diff options
Diffstat (limited to 'coreutils/shuf.c')
-rw-r--r-- | coreutils/shuf.c | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/coreutils/shuf.c b/coreutils/shuf.c new file mode 100644 index 0000000..f213d67 --- /dev/null +++ b/coreutils/shuf.c @@ -0,0 +1,150 @@ +/* vi: set sw=4 ts=4: */ +/* + * shuf: Write a random permutation of the input lines to standard output. + * + * Copyright (C) 2014 by Bartosz Golaszewski <bartekgola@gmail.com> + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +//config:config SHUF +//config: bool "shuf" +//config: default y +//config: help +//config: Generate random permutations + +//kbuild:lib-$(CONFIG_SHUF) += shuf.o +//applet:IF_SHUF(APPLET_NOEXEC(shuf, shuf, BB_DIR_USR_BIN, BB_SUID_DROP, shuf)) + +//usage:#define shuf_trivial_usage +//usage: "[-e|-i LO-HI] [-n NUM] [-o FILE] [-z] [FILE|ARG...]" +//usage:#define shuf_full_usage "\n\n" +//usage: "Write a random permutation of the input lines to standard output\n" +//usage: "\n -e Treat each ARG as an input line" +//usage: "\n -i L-H Treat each number L through H as an input line" +//usage: "\n -n NUM Output at most NUM lines" +//usage: "\n -o Write result to FILE instead of standard output" +//usage: "\n -z End lines with zero byte, not newline" + +#include "libbb.h" + +/* This is a NOEXEC applet. Be very careful! */ + +#define OPT_e (1 << 0) +#define OPT_i (1 << 1) +#define OPT_n (1 << 2) +#define OPT_o (1 << 3) +#define OPT_z (1 << 4) +#define OPT_STR "ei:n:o:z" + +/* + * Use the Fisher-Yates shuffle algorithm on an array of lines. + */ +static void shuffle_lines(char **lines, unsigned numlines) +{ + unsigned i; + unsigned r; + char *tmp; + + srand(monotonic_us()); + + for (i = numlines-1; i > 0; i--) { + r = rand(); + /* RAND_MAX can be as small as 32767 */ + if (i > RAND_MAX) + r ^= rand() << 15; + r %= i; + tmp = lines[i]; + lines[i] = lines[r]; + lines[r] = tmp; + } +} + +int shuf_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int shuf_main(int argc, char **argv) +{ + unsigned opts; + char *opt_i_str, *opt_n_str, *opt_o_str; + unsigned i; + char **lines; + unsigned numlines; + char eol; + + opt_complementary = "e--i:i--e"; /* mutually exclusive */ + opts = getopt32(argv, OPT_STR, &opt_i_str, &opt_n_str, &opt_o_str); + + argc -= optind; + argv += optind; + + /* Prepare lines for shuffling - either: */ + if (opts & OPT_e) { + /* make lines from command-line arguments */ + + numlines = argc; + lines = argv; + } else + if (opts & OPT_i) { + /* create a range of numbers */ + char *dash; + unsigned lo, hi; + + dash = strchr(opt_i_str, '-'); + if (!dash) { + bb_error_msg_and_die("bad range '%s'", opt_i_str); + } + *dash = '\0'; + lo = xatou(opt_i_str); + hi = xatou(dash + 1); + *dash = '-'; + if (hi < lo) { + bb_error_msg_and_die("bad range '%s'", opt_i_str); + } + + numlines = (hi+1) - lo; + lines = xmalloc(numlines * sizeof(lines[0])); + for (i = 0; i < numlines; i++) { + lines[i] = xstrdup(utoa(lo)); + lo++; + } + } else { + /* default - read lines from stdin or the input file */ + FILE *fp; + + if (argc > 1) + bb_show_usage(); + + fp = xfopen_stdin(argv[0] ? argv[0] : "-"); + lines = NULL; + numlines = 0; + for (;;) { + char *line = xmalloc_fgetline(fp); + if (!line) + break; + lines = xrealloc_vector(lines, 6, numlines); + lines[numlines++] = line; + } + fclose_if_not_stdin(fp); + } + + shuffle_lines(lines, numlines); + + if (opts & OPT_o) + xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO); + + if (opts & OPT_n) { + unsigned maxlines; + maxlines = xatou(opt_n_str); + if (numlines > maxlines) + numlines = maxlines; + } + + eol = '\n'; + if (opts & OPT_z) + eol = '\0'; + + for (i = 0; i < numlines; i++) { + printf("%s%c", lines[i], eol); + } + + fflush_stdout_and_exit(EXIT_SUCCESS); +} |