From 3ee8c12071f0e3bdda25125b63c9d3fd54a7c9d8 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 19 Nov 2009 13:44:16 +0100 Subject: crc32: Impl. linux optimized crc32() Ported over the more efficient linux crc32() function. A quick comparsion on ppc: After changing the old crc32 to do 4 bytes in the inner loop to be able to compare with new version one can note: - old inner loop has 61 insn, new has 19 insn. - new crc32 does one 32 bit load of data to crc while the old does four 8 bits loads. - size is bit bigger for the new crc32: 1392(old) 1416(new) of text. The is because the new version shares code with crc32_no_comp() instead of duplicating code. - about 33% faster on ppc: New > crc 0 0xfffffff -> 39 secs Old > crc 0 0xfffffff -> 60 secs Signed-off-by: Joakim Tjernlund --- lib_generic/crc32.c | 200 +++++++++++++++++++++++++++++----------------------- 1 file changed, 112 insertions(+), 88 deletions(-) (limited to 'lib_generic') diff --git a/lib_generic/crc32.c b/lib_generic/crc32.c index b27048c..468b397 100644 --- a/lib_generic/crc32.c +++ b/lib_generic/crc32.c @@ -8,11 +8,11 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef USE_HOSTCC /* Shut down "ANSI does not permit..." warnings */ +#ifndef USE_HOSTCC #include -#else -#include #endif +#include +#include #if defined(CONFIG_HW_WATCHDOG) || defined(CONFIG_WATCHDOG) #include @@ -22,6 +22,8 @@ #define local static #define ZEXPORT /* empty */ +#define tole(x) cpu_to_le32(x) + #ifdef DYNAMIC_CRC_TABLE local int crc_table_empty = 1; @@ -70,7 +72,7 @@ local void make_crc_table() c = (uLong)n; for (k = 0; k < 8; k++) c = c & 1 ? poly ^ (c >> 1) : c >> 1; - crc_table[n] = c; + crc_table[n] = tole(c); } crc_table_empty = 0; } @@ -78,59 +80,72 @@ local void make_crc_table() /* ======================================================================== * Table of CRC-32's of all single-byte values (made by make_crc_table) */ + local const uint32_t crc_table[256] = { - 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, - 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, - 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, - 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, - 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, - 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, - 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, - 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, - 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, - 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, - 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, - 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, - 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, - 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, - 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, - 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, - 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, - 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, - 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, - 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, - 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, - 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, - 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, - 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, - 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, - 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, - 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, - 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, - 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, - 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, - 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, - 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, - 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, - 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, - 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, - 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, - 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, - 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, - 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, - 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, - 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, - 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, - 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, - 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, - 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, - 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, - 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, - 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, - 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, - 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, - 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, - 0x2d02ef8dL +tole(0x00000000L), tole(0x77073096L), tole(0xee0e612cL), tole(0x990951baL), +tole(0x076dc419L), tole(0x706af48fL), tole(0xe963a535L), tole(0x9e6495a3L), +tole(0x0edb8832L), tole(0x79dcb8a4L), tole(0xe0d5e91eL), tole(0x97d2d988L), +tole(0x09b64c2bL), tole(0x7eb17cbdL), tole(0xe7b82d07L), tole(0x90bf1d91L), +tole(0x1db71064L), tole(0x6ab020f2L), tole(0xf3b97148L), tole(0x84be41deL), +tole(0x1adad47dL), tole(0x6ddde4ebL), tole(0xf4d4b551L), tole(0x83d385c7L), +tole(0x136c9856L), tole(0x646ba8c0L), tole(0xfd62f97aL), tole(0x8a65c9ecL), +tole(0x14015c4fL), tole(0x63066cd9L), tole(0xfa0f3d63L), tole(0x8d080df5L), +tole(0x3b6e20c8L), tole(0x4c69105eL), tole(0xd56041e4L), tole(0xa2677172L), +tole(0x3c03e4d1L), tole(0x4b04d447L), tole(0xd20d85fdL), tole(0xa50ab56bL), +tole(0x35b5a8faL), tole(0x42b2986cL), tole(0xdbbbc9d6L), tole(0xacbcf940L), +tole(0x32d86ce3L), tole(0x45df5c75L), tole(0xdcd60dcfL), tole(0xabd13d59L), +tole(0x26d930acL), tole(0x51de003aL), tole(0xc8d75180L), tole(0xbfd06116L), +tole(0x21b4f4b5L), tole(0x56b3c423L), tole(0xcfba9599L), tole(0xb8bda50fL), +tole(0x2802b89eL), tole(0x5f058808L), tole(0xc60cd9b2L), tole(0xb10be924L), +tole(0x2f6f7c87L), tole(0x58684c11L), tole(0xc1611dabL), tole(0xb6662d3dL), +tole(0x76dc4190L), tole(0x01db7106L), tole(0x98d220bcL), tole(0xefd5102aL), +tole(0x71b18589L), tole(0x06b6b51fL), tole(0x9fbfe4a5L), tole(0xe8b8d433L), +tole(0x7807c9a2L), tole(0x0f00f934L), tole(0x9609a88eL), tole(0xe10e9818L), +tole(0x7f6a0dbbL), tole(0x086d3d2dL), tole(0x91646c97L), tole(0xe6635c01L), +tole(0x6b6b51f4L), tole(0x1c6c6162L), tole(0x856530d8L), tole(0xf262004eL), +tole(0x6c0695edL), tole(0x1b01a57bL), tole(0x8208f4c1L), tole(0xf50fc457L), +tole(0x65b0d9c6L), tole(0x12b7e950L), tole(0x8bbeb8eaL), tole(0xfcb9887cL), +tole(0x62dd1ddfL), tole(0x15da2d49L), tole(0x8cd37cf3L), tole(0xfbd44c65L), +tole(0x4db26158L), tole(0x3ab551ceL), tole(0xa3bc0074L), tole(0xd4bb30e2L), +tole(0x4adfa541L), tole(0x3dd895d7L), tole(0xa4d1c46dL), tole(0xd3d6f4fbL), +tole(0x4369e96aL), tole(0x346ed9fcL), tole(0xad678846L), tole(0xda60b8d0L), +tole(0x44042d73L), tole(0x33031de5L), tole(0xaa0a4c5fL), tole(0xdd0d7cc9L), +tole(0x5005713cL), tole(0x270241aaL), tole(0xbe0b1010L), tole(0xc90c2086L), +tole(0x5768b525L), tole(0x206f85b3L), tole(0xb966d409L), tole(0xce61e49fL), +tole(0x5edef90eL), tole(0x29d9c998L), tole(0xb0d09822L), tole(0xc7d7a8b4L), +tole(0x59b33d17L), tole(0x2eb40d81L), tole(0xb7bd5c3bL), tole(0xc0ba6cadL), +tole(0xedb88320L), tole(0x9abfb3b6L), tole(0x03b6e20cL), tole(0x74b1d29aL), +tole(0xead54739L), tole(0x9dd277afL), tole(0x04db2615L), tole(0x73dc1683L), +tole(0xe3630b12L), tole(0x94643b84L), tole(0x0d6d6a3eL), tole(0x7a6a5aa8L), +tole(0xe40ecf0bL), tole(0x9309ff9dL), tole(0x0a00ae27L), tole(0x7d079eb1L), +tole(0xf00f9344L), tole(0x8708a3d2L), tole(0x1e01f268L), tole(0x6906c2feL), +tole(0xf762575dL), tole(0x806567cbL), tole(0x196c3671L), tole(0x6e6b06e7L), +tole(0xfed41b76L), tole(0x89d32be0L), tole(0x10da7a5aL), tole(0x67dd4accL), +tole(0xf9b9df6fL), tole(0x8ebeeff9L), tole(0x17b7be43L), tole(0x60b08ed5L), +tole(0xd6d6a3e8L), tole(0xa1d1937eL), tole(0x38d8c2c4L), tole(0x4fdff252L), +tole(0xd1bb67f1L), tole(0xa6bc5767L), tole(0x3fb506ddL), tole(0x48b2364bL), +tole(0xd80d2bdaL), tole(0xaf0a1b4cL), tole(0x36034af6L), tole(0x41047a60L), +tole(0xdf60efc3L), tole(0xa867df55L), tole(0x316e8eefL), tole(0x4669be79L), +tole(0xcb61b38cL), tole(0xbc66831aL), tole(0x256fd2a0L), tole(0x5268e236L), +tole(0xcc0c7795L), tole(0xbb0b4703L), tole(0x220216b9L), tole(0x5505262fL), +tole(0xc5ba3bbeL), tole(0xb2bd0b28L), tole(0x2bb45a92L), tole(0x5cb36a04L), +tole(0xc2d7ffa7L), tole(0xb5d0cf31L), tole(0x2cd99e8bL), tole(0x5bdeae1dL), +tole(0x9b64c2b0L), tole(0xec63f226L), tole(0x756aa39cL), tole(0x026d930aL), +tole(0x9c0906a9L), tole(0xeb0e363fL), tole(0x72076785L), tole(0x05005713L), +tole(0x95bf4a82L), tole(0xe2b87a14L), tole(0x7bb12baeL), tole(0x0cb61b38L), +tole(0x92d28e9bL), tole(0xe5d5be0dL), tole(0x7cdcefb7L), tole(0x0bdbdf21L), +tole(0x86d3d2d4L), tole(0xf1d4e242L), tole(0x68ddb3f8L), tole(0x1fda836eL), +tole(0x81be16cdL), tole(0xf6b9265bL), tole(0x6fb077e1L), tole(0x18b74777L), +tole(0x88085ae6L), tole(0xff0f6a70L), tole(0x66063bcaL), tole(0x11010b5cL), +tole(0x8f659effL), tole(0xf862ae69L), tole(0x616bffd3L), tole(0x166ccf45L), +tole(0xa00ae278L), tole(0xd70dd2eeL), tole(0x4e048354L), tole(0x3903b3c2L), +tole(0xa7672661L), tole(0xd06016f7L), tole(0x4969474dL), tole(0x3e6e77dbL), +tole(0xaed16a4aL), tole(0xd9d65adcL), tole(0x40df0b66L), tole(0x37d83bf0L), +tole(0xa9bcae53L), tole(0xdebb9ec5L), tole(0x47b2cf7fL), tole(0x30b5ffe9L), +tole(0xbdbdf21cL), tole(0xcabac28aL), tole(0x53b39330L), tole(0x24b4a3a6L), +tole(0xbad03605L), tole(0xcdd70693L), tole(0x54de5729L), tole(0x23d967bfL), +tole(0xb3667a2eL), tole(0xc4614ab8L), tole(0x5d681b02L), tole(0x2a6f2b94L), +tole(0xb40bbe37L), tole(0xc30c8ea1L), tole(0x5a05df1bL), tole(0x2d02ef8dL) }; #endif @@ -148,54 +163,63 @@ const uint32_t * ZEXPORT get_crc_table() #endif /* ========================================================================= */ -#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8); -#define DO2(buf) DO1(buf); DO1(buf); -#define DO4(buf) DO2(buf); DO2(buf); -#define DO8(buf) DO4(buf); DO4(buf); +# ifdef __LITTLE_ENDIAN +# define DO_CRC(x) crc = tab[(crc ^ (x)) & 255] ^ (crc >> 8) +# else +# define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# endif /* ========================================================================= */ -uint32_t ZEXPORT crc32 (uint32_t crc, const Bytef *buf, uInt len) -{ -#ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); -#endif - crc = crc ^ 0xffffffffL; - while (len >= 8) - { - DO8(buf); - len -= 8; - } - if (len) do { - DO1(buf); - } while (--len); - return crc ^ 0xffffffffL; -} - -#if defined(CONFIG_CMD_JFFS2) || defined(CONFIG_CMD_NAND) /* No ones complement version. JFFS2 (and other things ?) * don't use ones compliment in their CRC calculations. */ uint32_t ZEXPORT crc32_no_comp(uint32_t crc, const Bytef *buf, uInt len) { + const uint32_t *tab = crc_table; + const uint32_t *b =(const uint32_t *)buf; + size_t rem_len; #ifdef DYNAMIC_CRC_TABLE if (crc_table_empty) make_crc_table(); #endif - while (len >= 8) - { - DO8(buf); - len -= 8; + crc = cpu_to_le32(crc); + /* Align it */ + if (((long)b) & 3 && len) { + uint8_t *p = (uint8_t *)b; + do { + DO_CRC(*p++); + } while ((--len) && ((long)p)&3); + b = (uint32_t *)p; + } + + rem_len = len & 3; + len = len >> 2; + for (--b; len; --len) { + /* load data 32 bits wide, xor data 32 bits wide. */ + crc ^= *++b; /* use pre increment for speed */ + DO_CRC(0); + DO_CRC(0); + DO_CRC(0); + DO_CRC(0); + } + len = rem_len; + /* And the last few bytes */ + if (len) { + uint8_t *p = (uint8_t *)(b + 1) - 1; + do { + DO_CRC(*++p); /* use pre increment for speed */ + } while (--len); } - if (len) do { - DO1(buf); - } while (--len); - return crc; + return le32_to_cpu(crc); } +#undef DO_CRC -#endif +uint32_t ZEXPORT crc32 (uint32_t crc, const Bytef *p, uInt len) +{ + return crc32_no_comp(crc ^ 0xffffffffL, p, len) ^ 0xffffffffL; +} /* * Calculate the crc32 checksum triggering the watchdog every 'chunk_sz' bytes -- cgit v1.1 From 3eb90bad651fab39cffba750ec4421a9c01d60e7 Mon Sep 17 00:00:00 2001 From: Ingo van Lil Date: Tue, 24 Nov 2009 14:09:21 +0100 Subject: Generic udelay() with watchdog support According to the PPC reference implementation the udelay() function is responsible for resetting the watchdog timer as frequently as needed. Most other architectures do not meet that requirement, so long-running operations might result in a watchdog reset. This patch adds a generic udelay() function which takes care of resetting the watchdog before calling an architecture-specific __udelay(). Signed-off-by: Ingo van Lil --- lib_generic/Makefile | 1 + lib_generic/time.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 lib_generic/time.c (limited to 'lib_generic') diff --git a/lib_generic/Makefile b/lib_generic/Makefile index 686601c..7291fa3 100644 --- a/lib_generic/Makefile +++ b/lib_generic/Makefile @@ -44,6 +44,7 @@ COBJS-y += sha1.o COBJS-$(CONFIG_SHA256) += sha256.o COBJS-y += string.o COBJS-y += strmhz.o +COBJS-y += time.o COBJS-y += vsprintf.o COBJS-y += zlib.o COBJS-$(CONFIG_RBTREE) += rbtree.o diff --git a/lib_generic/time.c b/lib_generic/time.c new file mode 100644 index 0000000..a309c26 --- /dev/null +++ b/lib_generic/time.c @@ -0,0 +1,43 @@ +/* + * (C) Copyright 2000-2009 + * Wolfgang Denk, DENX Software Engineering, wd@denx.de. + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include +#include + +#ifndef CONFIG_WD_PERIOD +# define CONFIG_WD_PERIOD (10 * 1000 * 1000) /* 10 seconds default*/ +#endif + +/* ------------------------------------------------------------------------- */ + +void udelay(unsigned long usec) +{ + ulong kv; + + do { + WATCHDOG_RESET(); + kv = usec > CONFIG_WD_PERIOD ? CONFIG_WD_PERIOD : usec; + __udelay (kv); + usec -= kv; + } while(usec); +} -- cgit v1.1 From 20dde48bcadd856c86a91d5463831a10be46db83 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Thu, 19 Nov 2009 11:37:51 +0100 Subject: add lzop decompression support Add lzop decompression support to the existing lzo bitstream handling (think gzip versus zlib), and support it for uImage decompression if CONFIG_LZO is enabled. Lzop doesn't compress as good as gzip (~10% worse), but decompression is very fast (~0.7s faster here on a slow ppc). The lzop decompression code is based on Albin Tonnerre's recent ARM Linux lzo support patch. Cc: albin.tonnerre@free-electrons.com Signed-off-by: Peter Korsgaard --- lib_generic/lzo/lzo1x_decompress.c | 87 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'lib_generic') diff --git a/lib_generic/lzo/lzo1x_decompress.c b/lib_generic/lzo/lzo1x_decompress.c index 2780e11..09bdc8f 100644 --- a/lib_generic/lzo/lzo1x_decompress.c +++ b/lib_generic/lzo/lzo1x_decompress.c @@ -24,6 +24,93 @@ #define COPY4(dst, src) \ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) +static const unsigned char lzop_magic[] = { + 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a +}; + +#define HEADER_HAS_FILTER 0x00000800L + +static inline const unsigned char *parse_header(const unsigned char *src) +{ + u8 level = 0; + u16 version; + int i; + + /* read magic: 9 first bytes */ + for (i = 0; i < ARRAY_SIZE(lzop_magic); i++) { + if (*src++ != lzop_magic[i]) + return NULL; + } + /* get version (2bytes), skip library version (2), + * 'need to be extracted' version (2) and + * method (1) */ + version = get_unaligned_be16(src); + src += 7; + if (version >= 0x0940) + level = *src++; + if (get_unaligned_be32(src) & HEADER_HAS_FILTER) + src += 4; /* filter info */ + + /* skip flags, mode and mtime_low */ + src += 12; + if (version >= 0x0940) + src += 4; /* skip mtime_high */ + + i = *src++; + /* don't care about the file name, and skip checksum */ + src += i + 4; + + return src; +} + +int lzop_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len) +{ + unsigned char *start = dst; + const unsigned char *send = src + src_len; + u32 slen, dlen; + size_t tmp; + int r; + + src = parse_header(src); + if (!src) + return LZO_E_ERROR; + + while (src < send) { + /* read uncompressed block size */ + dlen = get_unaligned_be32(src); + src += 4; + + /* exit if last block */ + if (dlen == 0) { + *dst_len = dst - start; + return LZO_E_OK; + } + + /* read compressed block size, and skip block checksum info */ + slen = get_unaligned_be32(src); + src += 8; + + if (slen <= 0 || slen > dlen) + return LZO_E_ERROR; + + /* decompress */ + tmp = dlen; + r = lzo1x_decompress_safe((u8 *) src, slen, dst, &tmp); + + if (r != LZO_E_OK) + return r; + + if (dlen != tmp) + return LZO_E_ERROR; + + src += slen; + dst += dlen; + } + + return LZO_E_INPUT_OVERRUN; +} + int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len) { -- cgit v1.1 From cd514aeb996e2f7aefbe1f78481965d9d074aed4 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 19 Nov 2009 13:22:44 +0100 Subject: zlib: Optimize decompression This patch optimizes the direct copy procedure. Uses get_unaligned() but only in one place. The copy loop just above this one can also use this optimization, but I havn't done so as I have not tested if it is a win there too. On my MPC8321 this is about 17% faster on my JFFS2 root FS than the original. No speed test has been performed in u-boot. Size increase on ppc: 484 bytes Signed-off-by: Joakim Tjernlund Acked-by: Peter Korsgaard --- lib_generic/zlib.c | 56 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 12 deletions(-) (limited to 'lib_generic') diff --git a/lib_generic/zlib.c b/lib_generic/zlib.c index 8fe3bd0..5721968 100644 --- a/lib_generic/zlib.c +++ b/lib_generic/zlib.c @@ -26,8 +26,10 @@ #define ZUTIL_H #define ZLIB_INTERNAL -#include "u-boot/zlib.h" #include +#include +#include +#include "u-boot/zlib.h" /* To avoid a build time warning */ #ifdef STDC #include @@ -400,6 +402,7 @@ void inflate_fast OF((z_streamp strm, unsigned start)); */ #define OFF 1 #define PUP(a) *++(a) +#define UP_UNALIGNED(a) get_unaligned(++(a)) /* Decode literal, length, and distance codes and write out the resulting @@ -616,18 +619,47 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else { + unsigned short *sout; + unsigned long loops; + from = out - dist; /* copy direct from output */ - do { /* minimum length is three */ - PUP(out) = PUP(from); - PUP(out) = PUP(from); - PUP(out) = PUP(from); - len -= 3; - } while (len > 2); - if (len) { - PUP(out) = PUP(from); - if (len > 1) - PUP(out) = PUP(from); - } + /* minimum length is three */ + /* Align out addr */ + if (!((long)(out - 1 + OFF) & 1)) { + PUP(out) = PUP(from); + len--; + } + sout = (unsigned short *)(out - OFF); + if (dist > 2 ) { + unsigned short *sfrom; + + sfrom = (unsigned short *)(from - OFF); + loops = len >> 1; + do + PUP(sout) = UP_UNALIGNED(sfrom); + while (--loops); + out = (unsigned char *)sout + OFF; + from = (unsigned char *)sfrom + OFF; + } else { /* dist == 1 or dist == 2 */ + unsigned short pat16; + + pat16 = *(sout-2+2*OFF); + if (dist == 1) +#if defined(__BIG_ENDIAN) + pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8); +#elif defined(__LITTLE_ENDIAN) + pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8); +#else +#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined +#endif + loops = len >> 1; + do + PUP(sout) = pat16; + while (--loops); + out = (unsigned char *)sout + OFF; + } + if (len & 1) + PUP(out) = PUP(from); } } else if ((op & 64) == 0) { /* 2nd level distance code */ -- cgit v1.1 From 4b142febff71eabdb7ddbb125c7b583b24ddc434 Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Thu, 3 Dec 2009 11:21:21 +0100 Subject: common: delete CONFIG_SYS_64BIT_VSPRINTF and CONFIG_SYS_64BIT_STRTOUL There is more and more usage of printing 64bit values, so enable this feature generally, and delete the CONFIG_SYS_64BIT_VSPRINTF and CONFIG_SYS_64BIT_STRTOUL defines. Signed-off-by: Heiko Schocher --- lib_generic/vsprintf.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'lib_generic') diff --git a/lib_generic/vsprintf.c b/lib_generic/vsprintf.c index 3d95728..8c58a93 100644 --- a/lib_generic/vsprintf.c +++ b/lib_generic/vsprintf.c @@ -21,21 +21,10 @@ extern int do_reset (cmd_tbl_t *cmdtp, int flag, int argc, char *argv[]); #endif -#ifdef CONFIG_SYS_64BIT_VSPRINTF #include # define NUM_TYPE long long -#else -# define NUM_TYPE long -#define do_div(n, base) ({ \ - unsigned int __res; \ - __res = ((unsigned NUM_TYPE) n) % base; \ - n = ((unsigned NUM_TYPE) n) / base; \ - __res; \ -}) -#endif #define noinline __attribute__((noinline)) - const char hex_asc[] = "0123456789abcdef"; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] #define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] @@ -104,7 +93,6 @@ int ustrtoul(const char *cp, char **endp, unsigned int base) return result; } -#ifdef CONFIG_SYS_64BIT_STRTOUL unsigned long long simple_strtoull (const char *cp, char **endp, unsigned int base) { unsigned long long result = 0, value; @@ -132,7 +120,6 @@ unsigned long long simple_strtoull (const char *cp, char **endp, unsigned int ba *endp = (char *) cp; return result; } -#endif /* CONFIG_SYS_64BIT_STRTOUL */ /* we use this so that we can do without the ctype library */ #define is_digit(c) ((c) >= '0' && (c) <= '9') @@ -631,12 +618,9 @@ int vsprintf(char *buf, const char *fmt, va_list args) --fmt; continue; } -#ifdef CONFIG_SYS_64BIT_VSPRINTF if (qualifier == 'L') /* "quad" for 64 bit variables */ num = va_arg(args, unsigned long long); - else -#endif - if (qualifier == 'l') { + else if (qualifier == 'l') { num = va_arg(args, unsigned long); if (flags & SIGN) num = (signed long) num; -- cgit v1.1