diff options
author | Matthias Weisser <weisserm@arcor.de> | 2011-03-10 21:36:32 +0000 |
---|---|---|
committer | Albert ARIBAUD <albert.u.boot@aribaud.net> | 2011-04-27 19:38:07 +0200 |
commit | d8834a1323af72f6145bc81adadd75185ef6065f (patch) | |
tree | c8e04b59f251c9ea1e8dc0097ba8ca83a6fef31f /arch/arm/include/asm | |
parent | b65a77a8613d974cb665d14c51a012a8c12b8ba1 (diff) | |
download | u-boot-imx-d8834a1323af72f6145bc81adadd75185ef6065f.zip u-boot-imx-d8834a1323af72f6145bc81adadd75185ef6065f.tar.gz u-boot-imx-d8834a1323af72f6145bc81adadd75185ef6065f.tar.bz2 |
arm: Use optimized memcpy and memset from linux
Using optimized versions of memset and memcpy from linux brings a quite
noticeable speed (x2 or better) improvement for these two functions.
Here are some numbers for test done with jadecpu
| HEAD(1)| HEAD(1)| HEAD(2)| HEAD(2)|
| | +patch | | +patch |
---------------------------+--------+--------+--------+--------+
Reset to prompt | 438ms | 330ms | 228ms | 120ms |
| | | | |
TFTP a 3MB img | 4782ms | 3428ms | 3245ms | 2820ms |
| | | | |
FATLOAD USB a 3MB img* | 8515ms | 8510ms | ------ | ------ |
| | | | |
BOOTM LZO img in RAM | 3473ms | 3168ms | 592ms | 592ms |
where CRC is | 615ms | 615ms | 54ms | 54ms |
uncompress | 2460ms | 2462ms | 450ms | 451ms |
final boot_elf | 376ms | 68ms | 65ms | 65ms |
| | | | |
BOOTM LZO img in FLASH | 3207ms | 2902ms | 1050ms | 1050ms |
where CRC is | 600ms | 600ms | 135ms | 135ms |
uncompress | 2209ms | 2211ms | 828ms | 828ms |
| | | | |
Copy 1.4MB from NOR to RAM | 134ms | 72ms | 120ms | 70ms |
(1) No dcache
(2) dcache enabled in board_init
*Does not work when dcache is on
Size impact:
C version:
text data bss dec hex filename
202862 18912 266456 488230 77326 u-boot
ASM version:
text data bss dec hex filename
203798 18912 266288 488998 77626 u-boot
222712 u-boot.bin
Signed-off-by: Matthias Weisser <weisserm@arcor.de>
Diffstat (limited to 'arch/arm/include/asm')
-rw-r--r-- | arch/arm/include/asm/assembler.h | 60 | ||||
-rw-r--r-- | arch/arm/include/asm/string.h | 10 |
2 files changed, 68 insertions, 2 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h new file mode 100644 index 0000000..5e4789b --- /dev/null +++ b/arch/arm/include/asm/assembler.h @@ -0,0 +1,60 @@ +/* + * arch/arm/include/asm/assembler.h + * + * Copyright (C) 1996-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This file contains arm architecture specific defines + * for the different processors. + * + * Do not include any C declarations in this file - it is included by + * assembler source. + */ + +/* + * Endian independent macros for shifting bytes within registers. + */ +#ifndef __ARMEB__ +#define pull lsr +#define push lsl +#define get_byte_0 lsl #0 +#define get_byte_1 lsr #8 +#define get_byte_2 lsr #16 +#define get_byte_3 lsr #24 +#define put_byte_0 lsl #0 +#define put_byte_1 lsl #8 +#define put_byte_2 lsl #16 +#define put_byte_3 lsl #24 +#else +#define pull lsl +#define push lsr +#define get_byte_0 lsr #24 +#define get_byte_1 lsr #16 +#define get_byte_2 lsr #8 +#define get_byte_3 lsl #0 +#define put_byte_0 lsl #24 +#define put_byte_1 lsl #16 +#define put_byte_2 lsl #8 +#define put_byte_3 lsl #0 +#endif + +/* + * Data preload for architectures that support it + */ +#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__) +#define PLD(code...) code +#else +#define PLD(code...) +#endif + +/* + * Cache alligned + */ +#define CALGN(code...) code diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h index c3ea582..c6dfb25 100644 --- a/arch/arm/include/asm/string.h +++ b/arch/arm/include/asm/string.h @@ -1,6 +1,8 @@ #ifndef __ASM_ARM_STRING_H #define __ASM_ARM_STRING_H +#include <config.h> + /* * We don't do inline string functions, since the * optimised inline asm versions are not small. @@ -12,7 +14,9 @@ extern char * strrchr(const char * s, int c); #undef __HAVE_ARCH_STRCHR extern char * strchr(const char * s, int c); -#undef __HAVE_ARCH_MEMCPY +#ifdef CONFIG_USE_ARCH_MEMCPY +#define __HAVE_ARCH_MEMCPY +#endif extern void * memcpy(void *, const void *, __kernel_size_t); #undef __HAVE_ARCH_MEMMOVE @@ -22,7 +26,9 @@ extern void * memmove(void *, const void *, __kernel_size_t); extern void * memchr(const void *, int, __kernel_size_t); #undef __HAVE_ARCH_MEMZERO -#undef __HAVE_ARCH_MEMSET +#ifdef CONFIG_USE_ARCH_MEMSET +#define __HAVE_ARCH_MEMSET +#endif extern void * memset(void *, int, __kernel_size_t); #if 0 |