diff options
author | Denys Vlasenko | 2015-02-02 16:07:07 +0100 |
---|---|---|
committer | Denys Vlasenko | 2015-02-02 16:07:07 +0100 |
commit | f7f70bf1b3025550ea4ad8d13d977b846a868a06 (patch) | |
tree | 2c18d81eab7e897d982a18667fdafa062a93e033 | |
parent | 7f7ade1964f61172125d9f4fe92f0b07ce8bc7a4 (diff) | |
download | busybox-f7f70bf1b3025550ea4ad8d13d977b846a868a06.zip busybox-f7f70bf1b3025550ea4ad8d13d977b846a868a06.tar.gz |
gzip: speed up and shrink put_16bit()
function old new delta
put_16bit 104 98 -6
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | archival/gzip.c | 41 | ||||
-rw-r--r-- | include/platform.h | 2 |
2 files changed, 36 insertions, 7 deletions
diff --git a/archival/gzip.c b/archival/gzip.c index 46367f9..18d7959 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -417,19 +417,46 @@ static void flush_outbuf(void) #define put_8bit(c) \ do { \ G1.outbuf[G1.outcnt++] = (c); \ - if (G1.outcnt == OUTBUFSIZ) flush_outbuf(); \ + if (G1.outcnt == OUTBUFSIZ) \ + flush_outbuf(); \ } while (0) /* Output a 16 bit value, lsb first */ static void put_16bit(ush w) { - if (G1.outcnt < OUTBUFSIZ - 2) { - G1.outbuf[G1.outcnt++] = w; - G1.outbuf[G1.outcnt++] = w >> 8; - } else { - put_8bit(w); - put_8bit(w >> 8); + /* GCC 4.2.1 won't optimize out redundant loads of G1.outcnt + * (probably because of fear of aliasing with G1.outbuf[] + * stores), do it explicitly: + */ + unsigned outcnt = G1.outcnt; + uch *dst = &G1.outbuf[outcnt]; + +#if BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN + if (outcnt < OUTBUFSIZ-2) { + /* Common case */ + ush *dst16 = (void*) dst; + *dst16 = w; /* unalinged LSB 16-bit store */ + G1.outcnt = outcnt + 2; + return; + } + *dst = (uch)w; + w >>= 8; +#else + *dst++ = (uch)w; + w >>= 8; + if (outcnt < OUTBUFSIZ-2) { + /* Common case */ + *dst = w; + G1.outcnt = outcnt + 2; + return; } +#endif + + /* Slowpath: we will need to do flush_outbuf() */ + G1.outcnt++; + if (G1.outcnt == OUTBUFSIZ) + flush_outbuf(); + put_8bit(w); } static void put_32bit(ulg n) diff --git a/include/platform.h b/include/platform.h index 0b0fce1..df95945 100644 --- a/include/platform.h +++ b/include/platform.h @@ -217,6 +217,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING; * a lvalue. This makes it more likely to not swap them by mistake */ #if defined(i386) || defined(__x86_64__) || defined(__powerpc__) +# define BB_UNALIGNED_MEMACCESS_OK 1 # define move_from_unaligned_int(v, intp) ((v) = *(bb__aliased_int*)(intp)) # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp)) # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p)) @@ -225,6 +226,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING; # define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v)) /* #elif ... - add your favorite arch today! */ #else +# define BB_UNALIGNED_MEMACCESS_OK 0 /* performs reasonably well (gcc usually inlines memcpy here) */ # define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int))) # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long))) |