diff options
Diffstat (limited to 'coreutils/uniq.c')
-rw-r--r-- | coreutils/uniq.c | 126 |
1 files changed, 75 insertions, 51 deletions
diff --git a/coreutils/uniq.c b/coreutils/uniq.c index cb63c42..90686c9 100644 --- a/coreutils/uniq.c +++ b/coreutils/uniq.c @@ -1,10 +1,8 @@ /* vi: set sw=4 ts=4: */ /* - * Mini uniq implementation for busybox + * uniq implementation for busybox * - * Copyright (C) 1999 by Lineo, inc. and John Beppu - * Copyright (C) 1999,2000,2001 by John Beppu <beppu@codepoet.org> - * Rewritten by Matt Kraai <kraai@alumni.carnegiemellon.edu> + * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,67 +20,93 @@ * */ +/* BB_AUDIT SUSv3 compliant */ +/* http://www.opengroup.org/onlinepubs/007904975/utilities/uniq.html */ + #include <stdio.h> -#include <string.h> -#include <getopt.h> -#include <errno.h> #include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> #include "busybox.h" +#include "libcoreutils/coreutils.h" -static int print_count; -static int print_uniq = 1; -static int print_duplicates = 1; - -static void print_line(char *line, int count, FILE *fp) -{ - if ((print_duplicates && count > 1) || (print_uniq && count == 1)) { - if (print_count) - fprintf(fp, "%7d\t%s", count, line); - else - fputs(line, fp); - } -} +static const char uniq_opts[] = "f:s:cdu\0\7\3\5\1\2\4"; int uniq_main(int argc, char **argv) { - FILE *in = stdin, *out = stdout; - char *lastline = NULL, *input; - int opt, count = 0; + FILE *in, *out; + /* Note: Ignore the warning about dups and e0 possibly being uninitialized. + * They will be initialized on the fist pass of the loop (since s0 is NULL). */ + unsigned long dups, skip_fields, skip_chars, i; + const char *s0, *e0, *s1, *e1, *input_filename; + int opt; + int uniq_flags = 6; /* -u */ - /* parse argv[] */ - while ((opt = getopt(argc, argv, "cdu")) > 0) { - switch (opt) { - case 'c': - print_count = 1; - break; - case 'd': - print_duplicates = 1; - print_uniq = 0; - break; - case 'u': - print_duplicates = 0; - print_uniq = 1; - break; + skip_fields = skip_chars = 0; + + while ((opt = getopt(argc, argv, uniq_opts)) > 0) { + if (opt == 'f') { + skip_fields = bb_xgetularg10(optarg); + } else if (opt == 's') { + skip_chars = bb_xgetularg10(optarg); + } else if ((s0 = strchr(uniq_opts, opt)) != NULL) { + uniq_flags &= s0[4]; + uniq_flags |= s0[7]; + } else { + bb_show_usage(); } } - if (argv[optind] != NULL) { - in = xfopen(argv[optind], "r"); - if (argv[optind+1] != NULL) - out = xfopen(argv[optind+1], "w"); + input_filename = *(argv += optind); + + in = xgetoptfile_sort_uniq(argv, "r"); + if (*argv) { + ++argv; } + out = xgetoptfile_sort_uniq(argv, "w"); + if (*argv && argv[1]) { + bb_show_usage(); + } + + s0 = NULL; - while ((input = get_line_from_file(in)) != NULL) { - if (lastline == NULL || strcmp(input, lastline) != 0) { - print_line(lastline, count, out); - free(lastline); - lastline = input; - count = 0; + /* gnu uniq ignores newlines */ + while ((s1 = bb_get_chomped_line_from_file(in)) != NULL) { + e1 = s1; + for (i=skip_fields ; i ; i--) { + e1 = bb_skip_whitespace(e1); + while (*e1 && !isspace(*e1)) { + ++e1; + } + } + for (i = skip_chars ; *e1 && i ; i--) { + ++e1; + } + if (s0) { + if (strcmp(e0, e1) == 0) { + ++dups; /* Note: Testing for overflow seems excessive. */ + continue; + } + DO_LAST: + if ((dups && (uniq_flags & 2)) || (!dups && (uniq_flags & 4))) { + bb_fprintf(out, "\0%7d\t" + (uniq_flags & 1), dups + 1); + bb_fprintf(out, "%s\n", s0); + } + free((void *)s0); } - count++; + + s0 = s1; + e0 = e1; + dups = 0; + } + + if (s0) { + e1 = NULL; + goto DO_LAST; } - print_line(lastline, count, out); - free(lastline); - return EXIT_SUCCESS; + bb_xferror(in, input_filename); + + bb_fflush_stdout_and_exit(EXIT_SUCCESS); } |