diff options
Diffstat (limited to 'archival/bz/blocksort.c')
-rw-r--r-- | archival/bz/blocksort.c | 1072 |
1 files changed, 0 insertions, 1072 deletions
diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c deleted file mode 100644 index f70c370..0000000 --- a/archival/bz/blocksort.c +++ /dev/null @@ -1,1072 +0,0 @@ -/* - * bzip2 is written by Julian Seward <jseward@bzip.org>. - * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>. - * See README and LICENSE files in this directory for more information. - */ - -/*-------------------------------------------------------------*/ -/*--- Block sorting machinery ---*/ -/*--- blocksort.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org> - -Please read the WARNING, DISCLAIMER and PATENTS sections in the -README file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- */ - -/* #include "bzlib_private.h" */ - -#define mswap(zz1, zz2) \ -{ \ - int32_t zztmp = zz1; \ - zz1 = zz2; \ - zz2 = zztmp; \ -} - -static -/* No measurable speed gain with inlining */ -/* ALWAYS_INLINE */ -void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn) -{ - while (zzn > 0) { - mswap(ptr[zzp1], ptr[zzp2]); - zzp1++; - zzp2++; - zzn--; - } -} - -static -ALWAYS_INLINE -int32_t mmin(int32_t a, int32_t b) -{ - return (a < b) ? a : b; -} - - -/*---------------------------------------------*/ -/*--- Fallback O(N log(N)^2) sorting ---*/ -/*--- algorithm, for repetitive blocks ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -inline -void fallbackSimpleSort(uint32_t* fmap, - uint32_t* eclass, - int32_t lo, - int32_t hi) -{ - int32_t i, j, tmp; - uint32_t ec_tmp; - - if (lo == hi) return; - - if (hi - lo > 3) { - for (i = hi-4; i >= lo; i--) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) - fmap[j-4] = fmap[j]; - fmap[j-4] = tmp; - } - } - - for (i = hi-1; i >= lo; i--) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) - fmap[j-1] = fmap[j]; - fmap[j-1] = tmp; - } -} - - -/*---------------------------------------------*/ -#define fpush(lz,hz) { \ - stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - sp++; \ -} - -#define fpop(lz,hz) { \ - sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; \ -} - -#define FALLBACK_QSORT_SMALL_THRESH 10 -#define FALLBACK_QSORT_STACK_SIZE 100 - -static -void fallbackQSort3(uint32_t* fmap, - uint32_t* eclass, - int32_t loSt, - int32_t hiSt) -{ - int32_t unLo, unHi, ltLo, gtHi, n, m; - int32_t sp, lo, hi; - uint32_t med, r, r3; - int32_t stackLo[FALLBACK_QSORT_STACK_SIZE]; - int32_t stackHi[FALLBACK_QSORT_STACK_SIZE]; - - r = 0; - - sp = 0; - fpush(loSt, hiSt); - - while (sp > 0) { - AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004); - - fpop(lo, hi); - if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { - fallbackSimpleSort(fmap, eclass, lo, hi); - continue; - } - - /* Random partitioning. Median of 3 sometimes fails to - * avoid bad cases. Median of 9 seems to help but - * looks rather expensive. This too seems to work but - * is cheaper. Guidance for the magic constants - * 7621 and 32768 is taken from Sedgewick's algorithms - * book, chapter 35. - */ - r = ((r * 7621) + 1) % 32768; - r3 = r % 3; - if (r3 == 0) - med = eclass[fmap[lo]]; - else if (r3 == 1) - med = eclass[fmap[(lo+hi)>>1]]; - else - med = eclass[fmap[hi]]; - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (1) { - while (1) { - if (unLo > unHi) break; - n = (int32_t)eclass[fmap[unLo]] - (int32_t)med; - if (n == 0) { - mswap(fmap[unLo], fmap[ltLo]); - ltLo++; - unLo++; - continue; - }; - if (n > 0) break; - unLo++; - } - while (1) { - if (unLo > unHi) break; - n = (int32_t)eclass[fmap[unHi]] - (int32_t)med; - if (n == 0) { - mswap(fmap[unHi], fmap[gtHi]); - gtHi--; unHi--; - continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) break; - mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; - } - - AssertD(unHi == unLo-1, "fallbackQSort3(2)"); - - if (gtHi < ltLo) continue; - - n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n); - m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - if (n - lo > hi - m) { - fpush(lo, n); - fpush(m, hi); - } else { - fpush(m, hi); - fpush(lo, n); - } - } -} - -#undef fpush -#undef fpop -#undef FALLBACK_QSORT_SMALL_THRESH -#undef FALLBACK_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - * nblock > 0 - * eclass exists for [0 .. nblock-1] - * ((uint8_t*)eclass) [0 .. nblock-1] holds block - * ptr exists for [0 .. nblock-1] - * - * Post: - * ((uint8_t*)eclass) [0 .. nblock-1] holds block - * All other areas of eclass destroyed - * fmap [0 .. nblock-1] holds sorted order - * bhtab[0 .. 2+(nblock/32)] destroyed -*/ - -#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) -#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) -#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) -#define WORD_BH(zz) bhtab[(zz) >> 5] -#define UNALIGNED_BH(zz) ((zz) & 0x01f) - -static -void fallbackSort(uint32_t* fmap, - uint32_t* eclass, - uint32_t* bhtab, - int32_t nblock) -{ - int32_t ftab[257]; - int32_t ftabCopy[256]; - int32_t H, i, j, k, l, r, cc, cc1; - int32_t nNotDone; - int32_t nBhtab; - uint8_t* eclass8 = (uint8_t*)eclass; - - /* - * Initial 1-char radix sort to generate - * initial fmap and initial BH bits. - */ - for (i = 0; i < 257; i++) ftab[i] = 0; - for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; - for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; - - j = ftab[0]; /* bbox: optimized */ - for (i = 1; i < 257; i++) { - j += ftab[i]; - ftab[i] = j; - } - - for (i = 0; i < nblock; i++) { - j = eclass8[i]; - k = ftab[j] - 1; - ftab[j] = k; - fmap[k] = i; - } - - nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ - for (i = 0; i < nBhtab; i++) bhtab[i] = 0; - for (i = 0; i < 256; i++) SET_BH(ftab[i]); - - /* - * Inductively refine the buckets. Kind-of an - * "exponential radix sort" (!), inspired by the - * Manber-Myers suffix array construction algorithm. - */ - - /*-- set sentinel bits for block-end detection --*/ - for (i = 0; i < 32; i++) { - SET_BH(nblock + 2*i); - CLEAR_BH(nblock + 2*i + 1); - } - - /*-- the log(N) loop --*/ - H = 1; - while (1) { - j = 0; - for (i = 0; i < nblock; i++) { - if (ISSET_BH(i)) - j = i; - k = fmap[i] - H; - if (k < 0) - k += nblock; - eclass[k] = j; - } - - nNotDone = 0; - r = -1; - while (1) { - - /*-- find the next non-singleton bucket --*/ - k = r + 1; - while (ISSET_BH(k) && UNALIGNED_BH(k)) - k++; - if (ISSET_BH(k)) { - while (WORD_BH(k) == 0xffffffff) k += 32; - while (ISSET_BH(k)) k++; - } - l = k - 1; - if (l >= nblock) - break; - while (!ISSET_BH(k) && UNALIGNED_BH(k)) - k++; - if (!ISSET_BH(k)) { - while (WORD_BH(k) == 0x00000000) k += 32; - while (!ISSET_BH(k)) k++; - } - r = k - 1; - if (r >= nblock) - break; - - /*-- now [l, r] bracket current bucket --*/ - if (r > l) { - nNotDone += (r - l + 1); - fallbackQSort3(fmap, eclass, l, r); - - /*-- scan bucket and generate header bits-- */ - cc = -1; - for (i = l; i <= r; i++) { - cc1 = eclass[fmap[i]]; - if (cc != cc1) { - SET_BH(i); - cc = cc1; - }; - } - } - } - - H *= 2; - if (H > nblock || nNotDone == 0) - break; - } - - /* - * Reconstruct the original block in - * eclass8 [0 .. nblock-1], since the - * previous phase destroyed it. - */ - j = 0; - for (i = 0; i < nblock; i++) { - while (ftabCopy[j] == 0) - j++; - ftabCopy[j]--; - eclass8[fmap[i]] = (uint8_t)j; - } - AssertH(j < 256, 1005); -} - -#undef SET_BH -#undef CLEAR_BH -#undef ISSET_BH -#undef WORD_BH -#undef UNALIGNED_BH - - -/*---------------------------------------------*/ -/*--- The main, O(N^2 log(N)) sorting ---*/ -/*--- algorithm. Faster for "normal" ---*/ -/*--- non-repetitive blocks. ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -NOINLINE -int mainGtU( - uint32_t i1, - uint32_t i2, - uint8_t* block, - uint16_t* quadrant, - uint32_t nblock, - int32_t* budget) -{ - int32_t k; - uint8_t c1, c2; - uint16_t s1, s2; - -/* Loop unrolling here is actually very useful - * (generated code is much simpler), - * code size increase is only 270 bytes (i386) - * but speeds up compression 10% overall - */ - -#if CONFIG_BZIP2_FEATURE_SPEED >= 1 - -#define TIMES_8(code) \ - code; code; code; code; \ - code; code; code; code; -#define TIMES_12(code) \ - code; code; code; code; \ - code; code; code; code; \ - code; code; code; code; - -#else - -#define TIMES_8(code) \ -{ \ - int nn = 8; \ - do { \ - code; \ - } while (--nn); \ -} -#define TIMES_12(code) \ -{ \ - int nn = 12; \ - do { \ - code; \ - } while (--nn); \ -} - -#endif - - AssertD(i1 != i2, "mainGtU"); - TIMES_12( - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - ) - - k = nblock + 8; - - do { - TIMES_8( - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - ) - - if (i1 >= nblock) i1 -= nblock; - if (i2 >= nblock) i2 -= nblock; - - (*budget)--; - k -= 8; - } while (k >= 0); - - return False; -} -#undef TIMES_8 -#undef TIMES_12 - -/*---------------------------------------------*/ -/* - * Knuth's increments seem to work better - * than Incerpi-Sedgewick here. Possibly - * because the number of elems to sort is - * usually small, typically <= 20. - */ -static -const int32_t incs[14] = { - 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 -}; - -static -void mainSimpleSort(uint32_t* ptr, - uint8_t* block, - uint16_t* quadrant, - int32_t nblock, - int32_t lo, - int32_t hi, - int32_t d, - int32_t* budget) -{ - int32_t i, j, h, bigN, hp; - uint32_t v; - - bigN = hi - lo + 1; - if (bigN < 2) return; - - hp = 0; - while (incs[hp] < bigN) hp++; - hp--; - - for (; hp >= 0; hp--) { - h = incs[hp]; - - i = lo + h; - while (1) { - /*-- copy 1 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - -/* 1.5% overall speedup, +290 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 3 - /*-- copy 2 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - /*-- copy 3 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; -#endif - if (*budget < 0) return; - } - } -} - - -/*---------------------------------------------*/ -/* - * The following is an implementation of - * an elegant 3-way quicksort for strings, - * described in a paper "Fast Algorithms for - * Sorting and Searching Strings", by Robert - * Sedgewick and Jon L. Bentley. - */ - -static -ALWAYS_INLINE -uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c) -{ - uint8_t t; - if (a > b) { - t = a; - a = b; - b = t; - }; - /* here b >= a */ - if (b > c) { - b = c; - if (a > b) - b = a; - } - return b; -} - -#define mpush(lz,hz,dz) \ -{ \ - stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - stackD [sp] = dz; \ - sp++; \ -} - -#define mpop(lz,hz,dz) \ -{ \ - sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; \ - dz = stackD [sp]; \ -} - -#define mnextsize(az) (nextHi[az] - nextLo[az]) - -#define mnextswap(az,bz) \ -{ \ - int32_t tz; \ - tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ - tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ - tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \ -} - -#define MAIN_QSORT_SMALL_THRESH 20 -#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) -#define MAIN_QSORT_STACK_SIZE 100 - -static NOINLINE -void mainQSort3(uint32_t* ptr, - uint8_t* block, - uint16_t* quadrant, - int32_t nblock, - int32_t loSt, - int32_t hiSt, - int32_t dSt, - int32_t* budget) -{ - int32_t unLo, unHi, ltLo, gtHi, n, m, med; - int32_t sp, lo, hi, d; - - int32_t stackLo[MAIN_QSORT_STACK_SIZE]; - int32_t stackHi[MAIN_QSORT_STACK_SIZE]; - int32_t stackD [MAIN_QSORT_STACK_SIZE]; - - int32_t nextLo[3]; - int32_t nextHi[3]; - int32_t nextD [3]; - - sp = 0; - mpush(loSt, hiSt, dSt); - - while (sp > 0) { - AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001); - - mpop(lo, hi, d); - if (hi - lo < MAIN_QSORT_SMALL_THRESH - || d > MAIN_QSORT_DEPTH_THRESH - ) { - mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); - if (*budget < 0) - return; - continue; - } - med = (int32_t) mmed3(block[ptr[lo ] + d], - block[ptr[hi ] + d], - block[ptr[(lo+hi) >> 1] + d]); - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (1) { - while (1) { - if (unLo > unHi) - break; - n = ((int32_t)block[ptr[unLo]+d]) - med; - if (n == 0) { - mswap(ptr[unLo], ptr[ltLo]); - ltLo++; - unLo++; - continue; - }; - if (n > 0) break; - unLo++; - } - while (1) { - if (unLo > unHi) - break; - n = ((int32_t)block[ptr[unHi]+d]) - med; - if (n == 0) { - mswap(ptr[unHi], ptr[gtHi]); - gtHi--; - unHi--; - continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) - break; - mswap(ptr[unLo], ptr[unHi]); - unLo++; - unHi--; - } - - AssertD(unHi == unLo-1, "mainQSort3(2)"); - - if (gtHi < ltLo) { - mpush(lo, hi, d + 1); - continue; - } - - n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n); - m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; - nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; - nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; - - if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); - if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2); - if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); - - AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)"); - AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)"); - - mpush(nextLo[0], nextHi[0], nextD[0]); - mpush(nextLo[1], nextHi[1], nextD[1]); - mpush(nextLo[2], nextHi[2], nextD[2]); - } -} - -#undef mpush -#undef mpop -#undef mnextsize -#undef mnextswap -#undef MAIN_QSORT_SMALL_THRESH -#undef MAIN_QSORT_DEPTH_THRESH -#undef MAIN_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - * nblock > N_OVERSHOOT - * block32 exists for [0 .. nblock-1 +N_OVERSHOOT] - * ((uint8_t*)block32) [0 .. nblock-1] holds block - * ptr exists for [0 .. nblock-1] - * - * Post: - * ((uint8_t*)block32) [0 .. nblock-1] holds block - * All other areas of block32 destroyed - * ftab[0 .. 65536] destroyed - * ptr [0 .. nblock-1] holds sorted order - * if (*budget < 0), sorting was abandoned - */ - -#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) -#define SETMASK (1 << 21) -#define CLEARMASK (~(SETMASK)) - -static NOINLINE -void mainSort(EState* state, - uint32_t* ptr, - uint8_t* block, - uint16_t* quadrant, - uint32_t* ftab, - int32_t nblock, - int32_t* budget) -{ - int32_t i, j, k, ss, sb; - uint8_t c1; - int32_t numQSorted; - uint16_t s; - Bool bigDone[256]; - /* bbox: moved to EState to save stack - int32_t runningOrder[256]; - int32_t copyStart[256]; - int32_t copyEnd [256]; - */ -#define runningOrder (state->mainSort__runningOrder) -#define copyStart (state->mainSort__copyStart) -#define copyEnd (state->mainSort__copyEnd) - - /*-- set up the 2-byte frequency table --*/ - /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ - memset(ftab, 0, 65537 * sizeof(ftab[0])); - - j = block[0] << 8; - i = nblock - 1; -/* 3%, +300 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 2 - for (; i >= 3; i -= 4) { - quadrant[i] = 0; - j = (j >> 8) | (((uint16_t)block[i]) << 8); - ftab[j]++; - quadrant[i-1] = 0; - j = (j >> 8) | (((uint16_t)block[i-1]) << 8); - ftab[j]++; - quadrant[i-2] = 0; - j = (j >> 8) | (((uint16_t)block[i-2]) << 8); - ftab[j]++; - quadrant[i-3] = 0; - j = (j >> 8) | (((uint16_t)block[i-3]) << 8); - ftab[j]++; - } -#endif - for (; i >= 0; i--) { - quadrant[i] = 0; - j = (j >> 8) | (((uint16_t)block[i]) << 8); - ftab[j]++; - } - - /*-- (emphasises close relationship of block & quadrant) --*/ - for (i = 0; i < BZ_N_OVERSHOOT; i++) { - block [nblock+i] = block[i]; - quadrant[nblock+i] = 0; - } - - /*-- Complete the initial radix sort --*/ - j = ftab[0]; /* bbox: optimized */ - for (i = 1; i <= 65536; i++) { - j += ftab[i]; - ftab[i] = j; - } - - s = block[0] << 8; - i = nblock - 1; -#if CONFIG_BZIP2_FEATURE_SPEED >= 2 - for (; i >= 3; i -= 4) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i; - s = (s >> 8) | (block[i-1] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i-1; - s = (s >> 8) | (block[i-2] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i-2; - s = (s >> 8) | (block[i-3] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i-3; - } -#endif - for (; i >= 0; i--) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i; - } - - /* - * Now ftab contains the first loc of every small bucket. - * Calculate the running order, from smallest to largest - * big bucket. - */ - for (i = 0; i <= 255; i++) { - bigDone [i] = False; - runningOrder[i] = i; - } - - { - int32_t vv; - /* bbox: was: int32_t h = 1; */ - /* do h = 3 * h + 1; while (h <= 256); */ - uint32_t h = 364; - - do { - /*h = h / 3;*/ - h = (h * 171) >> 9; /* bbox: fast h/3 */ - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { - runningOrder[j] = runningOrder[j-h]; - j = j - h; - if (j <= (h - 1)) - goto zero; - } - zero: - runningOrder[j] = vv; - } - } while (h != 1); - } - - /* - * The main sorting loop. - */ - - numQSorted = 0; - - for (i = 0; i <= 255; i++) { - - /* - * Process big buckets, starting with the least full. - * Basically this is a 3-step process in which we call - * mainQSort3 to sort the small buckets [ss, j], but - * also make a big effort to avoid the calls if we can. - */ - ss = runningOrder[i]; - - /* - * Step 1: - * Complete the big bucket [ss] by quicksorting - * any unsorted small buckets [ss, j], for j != ss. - * Hopefully previous pointer-scanning phases have already - * completed many of the small buckets [ss, j], so - * we don't have to sort them at all. - */ - for (j = 0; j <= 255; j++) { - if (j != ss) { - sb = (ss << 8) + j; - if (!(ftab[sb] & SETMASK)) { - int32_t lo = ftab[sb] & CLEARMASK; - int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; - if (hi > lo) { - mainQSort3( - ptr, block, quadrant, nblock, - lo, hi, BZ_N_RADIX, budget - ); - if (*budget < 0) return; - numQSorted += (hi - lo + 1); - } - } - ftab[sb] |= SETMASK; - } - } - - AssertH(!bigDone[ss], 1006); - - /* - * Step 2: - * Now scan this big bucket [ss] so as to synthesise the - * sorted order for small buckets [t, ss] for all t, - * including, magically, the bucket [ss,ss] too. - * This will avoid doing Real Work in subsequent Step 1's. - */ - { - for (j = 0; j <= 255; j++) { - copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; - copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; - } - for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { - k = ptr[j] - 1; - if (k < 0) - k += nblock; - c1 = block[k]; - if (!bigDone[c1]) - ptr[copyStart[c1]++] = k; - } - for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { - k = ptr[j]-1; - if (k < 0) - k += nblock; - c1 = block[k]; - if (!bigDone[c1]) - ptr[copyEnd[c1]--] = k; - } - } - - /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. - * Necessity for this case is demonstrated by compressing - * a sequence of approximately 48.5 million of character - * 251; 1.0.0/1.0.1 will then die here. */ - AssertH((copyStart[ss]-1 == copyEnd[ss]) \ - || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007); - - for (j = 0; j <= 255; j++) - ftab[(j << 8) + ss] |= SETMASK; - - /* - * Step 3: - * The [ss] big bucket is now done. Record this fact, - * and update the quadrant descriptors. Remember to - * update quadrants in the overshoot area too, if - * necessary. The "if (i < 255)" test merely skips - * this updating for the last bucket processed, since - * updating for the last bucket is pointless. - * - * The quadrant array provides a way to incrementally - * cache sort orderings, as they appear, so as to - * make subsequent comparisons in fullGtU() complete - * faster. For repetitive blocks this makes a big - * difference (but not big enough to be able to avoid - * the fallback sorting mechanism, exponential radix sort). - * - * The precise meaning is: at all times: - * - * for 0 <= i < nblock and 0 <= j <= nblock - * - * if block[i] != block[j], - * - * then the relative values of quadrant[i] and - * quadrant[j] are meaningless. - * - * else { - * if quadrant[i] < quadrant[j] - * then the string starting at i lexicographically - * precedes the string starting at j - * - * else if quadrant[i] > quadrant[j] - * then the string starting at j lexicographically - * precedes the string starting at i - * - * else - * the relative ordering of the strings starting - * at i and j has not yet been determined. - * } - */ - bigDone[ss] = True; - - if (i < 255) { - int32_t bbStart = ftab[ss << 8] & CLEARMASK; - int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; - int32_t shifts = 0; - - while ((bbSize >> shifts) > 65534) shifts++; - - for (j = bbSize-1; j >= 0; j--) { - int32_t a2update = ptr[bbStart + j]; - uint16_t qVal = (uint16_t)(j >> shifts); - quadrant[a2update] = qVal; - if (a2update < BZ_N_OVERSHOOT) - quadrant[a2update + nblock] = qVal; - } - AssertH(((bbSize-1) >> shifts) <= 65535, 1002); - } - } -#undef runningOrder -#undef copyStart -#undef copyEnd -} - -#undef BIGFREQ -#undef SETMASK -#undef CLEARMASK - - -/*---------------------------------------------*/ -/* Pre: - * nblock > 0 - * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] - * ((uint8_t*)arr2)[0 .. nblock-1] holds block - * arr1 exists for [0 .. nblock-1] - * - * Post: - * ((uint8_t*)arr2) [0 .. nblock-1] holds block - * All other areas of block destroyed - * ftab[0 .. 65536] destroyed - * arr1[0 .. nblock-1] holds sorted order - */ -static NOINLINE -void BZ2_blockSort(EState* s) -{ - /* In original bzip2 1.0.4, it's a parameter, but 30 - * (which was the default) should work ok. */ - enum { wfact = 30 }; - - uint32_t* ptr = s->ptr; - uint8_t* block = s->block; - uint32_t* ftab = s->ftab; - int32_t nblock = s->nblock; - uint16_t* quadrant; - int32_t budget; - int32_t i; - - if (nblock < 10000) { - fallbackSort(s->arr1, s->arr2, ftab, nblock); - } else { - /* Calculate the location for quadrant, remembering to get - * the alignment right. Assumes that &(block[0]) is at least - * 2-byte aligned -- this should be ok since block is really - * the first section of arr2. - */ - i = nblock + BZ_N_OVERSHOOT; - if (i & 1) i++; - quadrant = (uint16_t*)(&(block[i])); - - /* (wfact-1) / 3 puts the default-factor-30 - * transition point at very roughly the same place as - * with v0.1 and v0.9.0. - * Not that it particularly matters any more, since the - * resulting compressed stream is now the same regardless - * of whether or not we use the main sort or fallback sort. - */ - budget = nblock * ((wfact-1) / 3); - - mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); - if (budget < 0) { - fallbackSort(s->arr1, s->arr2, ftab, nblock); - } - } - - s->origPtr = -1; - for (i = 0; i < s->nblock; i++) - if (ptr[i] == 0) { - s->origPtr = i; - break; - }; - - AssertH(s->origPtr != -1, 1003); -} - - -/*-------------------------------------------------------------*/ -/*--- end blocksort.c ---*/ -/*-------------------------------------------------------------*/ |