summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Vlasenko2008-05-03 07:21:27 +0000
committerDenis Vlasenko2008-05-03 07:21:27 +0000
commit96b99b860cc15f13b85b1b2d5b5b20ab7183a652 (patch)
treef7f5236085dcc40f02bd29078900d971a787344e
parent687a26fe0dcf10f227cb0541882d1daa8a21991d (diff)
downloadbusybox-96b99b860cc15f13b85b1b2d5b5b20ab7183a652.zip
busybox-96b99b860cc15f13b85b1b2d5b5b20ab7183a652.tar.gz
uniq: support -w. closes bug 3094.
function old new delta packed_usage 24136 24132 -4 uniq_main 399 384 -15
-rw-r--r--coreutils/uniq.c23
-rw-r--r--include/usage.h10
-rwxr-xr-xtestsuite/uniq.tests17
3 files changed, 33 insertions, 17 deletions
diff --git a/coreutils/uniq.c b/coreutils/uniq.c
index 32327c6..41f1fed 100644
--- a/coreutils/uniq.c
+++ b/coreutils/uniq.c
@@ -12,8 +12,6 @@
#include "libbb.h"
-static const char uniq_opts[] ALIGN1 = "cdu" "f:s:" "cdu\0\1\2\4";
-
static FILE *xgetoptfile_uniq_s(char **argv, int read0write2)
{
const char *n;
@@ -31,9 +29,11 @@ int uniq_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
{
FILE *in, *out;
- unsigned long dups, skip_fields, skip_chars, i;
const char *s0, *e0, *s1, *e1, *input_filename;
+ unsigned long dups;
+ unsigned skip_fields, skip_chars, max_chars;
unsigned opt;
+ unsigned i;
enum {
OPT_c = 0x1,
@@ -41,15 +41,14 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
OPT_u = 0x4,
OPT_f = 0x8,
OPT_s = 0x10,
+ OPT_w = 0x20,
};
skip_fields = skip_chars = 0;
+ max_chars = -1;
- opt = getopt32(argv, "cduf:s:", &s0, &s1);
- if (opt & OPT_f)
- skip_fields = xatoul(s0);
- if (opt & OPT_s)
- skip_chars = xatoul(s1);
+ opt_complementary = "f+:s+:w+";
+ opt = getopt32(argv, "cduf:s:w:", &skip_fields, &skip_chars, &max_chars);
argv += optind;
input_filename = *argv;
@@ -63,7 +62,7 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
bb_show_usage();
}
- s1 = e1 = NULL; /* prime the pump */
+ s1 = e1 = NULL; /* prime the pump */
do {
s0 = s1;
@@ -81,16 +80,16 @@ int uniq_main(int argc ATTRIBUTE_UNUSED, char **argv)
++e1;
}
- if (!s0 || strcmp(e0, e1)) {
+ if (!s0 || strncmp(e0, e1, max_chars)) {
break;
}
- ++dups; /* Note: Testing for overflow seems excessive. */
+ ++dups; /* note: testing for overflow seems excessive. */
}
if (s0) {
if (!(opt & (OPT_d << !!dups))) { /* (if dups, opt & OPT_e) */
- fprintf(out, "\0%d " + (opt & 1), dups + 1);
+ fprintf(out, "\0%ld " + (opt & 1), dups + 1); /* 1 == OPT_c */
fprintf(out, "%s\n", s0);
}
free((void *)s0);
diff --git a/include/usage.h b/include/usage.h
index cbc5cb0..e791ba6 100644
--- a/include/usage.h
+++ b/include/usage.h
@@ -4303,16 +4303,16 @@
)
#define uniq_trivial_usage \
- "[-fscdu]... [INPUT [OUTPUT]]"
+ "[-fscduw]... [INPUT [OUTPUT]]"
#define uniq_full_usage "\n\n" \
- "Discard all but one of successive identical lines from INPUT\n" \
- "(or standard input), writing to OUTPUT (or standard output)\n" \
+ "Discard duplicate lines\n" \
"\nOptions:" \
"\n -c Prefix lines by the number of occurrences" \
"\n -d Only print duplicate lines" \
"\n -u Only print unique lines" \
- "\n -f N Skip the first N fields" \
- "\n -s N Skip the first N chars (after any skipped fields)" \
+ "\n -f N Skip first N fields" \
+ "\n -s N Skip first N chars (after any skipped fields)" \
+ "\n -w N Compare N characters in line" \
#define uniq_example_usage \
"$ echo -e \"a\\na\\nb\\nc\\nc\\na\" | sort | uniq\n" \
diff --git a/testsuite/uniq.tests b/testsuite/uniq.tests
index 49d4bed..8961d66 100755
--- a/testsuite/uniq.tests
+++ b/testsuite/uniq.tests
@@ -41,6 +41,7 @@ testing "uniq input - (specify stdout)" "uniq input -" \
#-c occurrences
#-d dups only
#-u
+#-w max chars
# Test various command line options
@@ -60,6 +61,22 @@ aa bb cc9
bb cc dd8
aa bb cc9
"
+testing "uniq -w (compare max characters)" "uniq -w 2" \
+"cc1
+" "" \
+"cc1
+cc2
+cc3
+"
+
+testing "uniq -s -w (skip fields and compare max chars)" \
+"uniq -s 2 -w 2" \
+"aaccaa
+" "" \
+"aaccaa
+aaccbb
+bbccaa
+"
# -d is "Suppress the writing fo lines that are not repeated in the input."
# -u is "Suppress the writing of lines that are repeated in the input."