From 6452c300366cf0e322227ad39991ec2fdfd6beed Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 22 Dec 2020 20:08:40 +0100 Subject: factor: detect squares If we have a square, the speedup can be extremely large (in best case example below, it's ~40000 times faster): $ time ./busybox_old factor 18446743988964486098 18446743988964486098: 2 3037000493 3037000493 real 0m4.246s $ time ./busybox factor 18446743988964486098 18446743988964486098: 2 3037000493 3037000493 real 0m0.000s function old new delta isqrt_odd - 57 +57 print_w - 36 +36 factorize 218 236 +18 print_h - 7 +7 factorize_numstr 65 72 +7 ------------------------------------------------------------------------------ (add/remove: 3/0 grow/shrink: 2/0 up/down: 125/0) Total: 125 bytes Signed-off-by: Denys Vlasenko --- coreutils/factor.c | 42 +++++++++++++++++++++++++++++++++++++++--- testsuite/factor.tests | 23 +++++++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/coreutils/factor.c b/coreutils/factor.c index 27f26fe..729bd36 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c @@ -85,7 +85,8 @@ static const uint64_t packed_wheel[] = { #undef R #define WHEEL_START 5 #define WHEEL_SIZE (5 + 24 * 20) -#define wheel_tab ((uint8_t*)&bb_common_bufsiz1) +#define square_count (((uint8_t*)&bb_common_bufsiz1)[0]) +#define wheel_tab (((uint8_t*)&bb_common_bufsiz1) + 1) /* * Why, you ask? * plain byte array: @@ -119,9 +120,39 @@ static void unpack_wheel(void) } } +/* Prevent inlining, factorize() needs all help it can get with reducing register pressure */ +static NOINLINE void print_w(wide_t n) +{ + unsigned rep = square_count; + do + printf(" %llu", n); + while (--rep != 0); +} +static NOINLINE void print_h(half_t n) +{ + print_w(n); +} + +static void factorize(wide_t N); + static half_t isqrt_odd(wide_t N) { half_t s = isqrt(N); + /* s^2 is <= N, (s+1)^2 > N */ + + /* If s^2 in fact is EQUAL to N, it's very lucky. + * Examples: + * factor 18446743988964486098 = 2 * 3037000493 * 3037000493 + * factor 18446743902517389507 = 3 * 2479700513 * 2479700513 + */ + if ((wide_t)s * s == N) { + /* factorize sqrt(N), printing each factor twice */ + square_count *= 2; + factorize(s); + /* Let caller know we recursed */ + return 0; + } + /* Subtract 1 from even s, odd s won't change: */ /* (doesnt work for zero, but we know that s != 0 here) */ s = (s - 1) | 1; @@ -152,6 +183,8 @@ static NOINLINE void factorize(wide_t N) * factor 18446744073709551557 (0xffffffffffffffc5). */ max_factor = isqrt_odd(N); + if (!max_factor) + return; /* square was detected and recursively factored */ factor = 2; w = 0; for (;;) { @@ -162,8 +195,10 @@ static NOINLINE void factorize(wide_t N) */ while ((N % factor) == 0) { /* not likely */ N = N / factor; - printf(" %"HALF_FMT"u", factor); + print_h(factor); max_factor = isqrt_odd(N); + if (!max_factor) + return; /* square was detected */ } if (factor >= max_factor) break; @@ -178,7 +213,7 @@ static NOINLINE void factorize(wide_t N) } end: if (N > 1) - printf(" %llu", N); + print_w(N); bb_putchar('\n'); } @@ -193,6 +228,7 @@ static void factorize_numstr(const char *numstr) if (errno) bb_show_usage(); printf("%llu:", N); + square_count = 1; factorize(N); } diff --git a/testsuite/factor.tests b/testsuite/factor.tests index 2cf4a54..e404e29 100755 --- a/testsuite/factor.tests +++ b/testsuite/factor.tests @@ -45,4 +45,27 @@ testing "factor \$((2*3*5*7*11*13*17*19*23*29*31*37*41*43*47))" \ "614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \ "" "" +# Test that square-detection code is not buggy +testing "factor 2 * 3037000493 * 3037000493" \ + "factor 18446743988964486098" \ + "18446743988964486098: 2 3037000493 3037000493\n" \ + "" "" +testing "factor 3 * 2479700513 * 2479700513" \ + "factor 18446743902517389507" \ + "18446743902517389507: 3 2479700513 2479700513\n" \ + "" "" +# including square-of-square cases: +testing "factor 3 * 37831 * 37831 * 37831 * 37831" \ + "factor 6144867742934288163" \ + "6144867742934288163: 3 37831 37831 37831 37831\n" \ + "" "" +testing "factor 3 * 13^16" \ + "factor 1996249827549539523" \ + "1996249827549539523: 3 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \ + "" "" +testing "factor 13^16" \ + "factor 665416609183179841" \ + "665416609183179841: 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \ + "" "" + exit $FAILCOUNT -- cgit v1.1