diff options
author | Rob Landley | 2005-01-24 07:00:02 +0000 |
---|---|---|
committer | Rob Landley | 2005-01-24 07:00:02 +0000 |
commit | c0dedd05e81ac03a1793abd8cbfacf8c546e976f (patch) | |
tree | cd761995a05504c6d068ba33ddc62e86f9342e4c | |
parent | f4bb212d6ccc1a14724ba56fa57c3bc3ca66cf22 (diff) | |
download | busybox-c0dedd05e81ac03a1793abd8cbfacf8c546e976f.zip busybox-c0dedd05e81ac03a1793abd8cbfacf8c546e976f.tar.gz |
Sort rewrite to be SUSv3 compliant. New config option, updated help, and
a couple of infrastructure bits.
-rw-r--r-- | coreutils/Config.in | 12 | ||||
-rw-r--r-- | coreutils/sort.c | 350 | ||||
-rw-r--r-- | include/libbb.h | 1 | ||||
-rw-r--r-- | include/usage.h | 50 | ||||
-rw-r--r-- | libbb/get_line_from_file.c | 8 |
5 files changed, 351 insertions, 70 deletions
diff --git a/coreutils/Config.in b/coreutils/Config.in index e1f0516..4aff5ce 100644 --- a/coreutils/Config.in +++ b/coreutils/Config.in @@ -398,6 +398,18 @@ config CONFIG_SORT help sort is used to sort lines of text in specified files. +config CONFIG_SORT_BIG + bool " full SuSv3 compliant sort (Support -ktcsbdfiozgM)" + default y + depends on CONFIG_SORT + help + Without this, sort only supports -r, -u, and an integer version + of -n. Selecting this adds sort keys, floating point support, and + more. This adds a little over 3k to a nonstatic build on x86. + + The SuSv3 sort standard is available at: + http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html + config CONFIG_STTY bool "stty" default n diff --git a/coreutils/sort.c b/coreutils/sort.c index 8cc4d88..c701b5e 100644 --- a/coreutils/sort.c +++ b/coreutils/sort.c @@ -1,8 +1,8 @@ /* vi: set sw=4 ts=4: */ /* - * Mini sort implementation for busybox + * SuS3 compliant sort implementation for busybox * - * Copyright (C) 2000 by Matt Kraai <kraai@alumni.carnegiemellon.edu> + * Copyright (C) 2004 by Rob Landley <rob@landley.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,83 +18,321 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * + * See SuS3 sort standard at: + * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html */ -/* BB_AUDIT SUSv3 _NOT_ compliant -- a number of options are not supported. */ -/* http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html */ - -/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) - * - * Now does proper error checking on i/o. Plus some space savings. - */ - +#include <ctype.h> +#include <math.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <time.h> #include <unistd.h> #include "busybox.h" -#include "libcoreutils/coreutils.h" -static int compare_ascii(const void *x, const void *y) +int global_flags; + +/* + sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...] + sort -c [-bdfinru][-t char][-k keydef][file] +*/ + +/* These are sort types */ +#define FLAG_n 1 /* Numeric sort */ +#define FLAG_g 2 /* Sort using strtod() */ +#define FLAG_M 4 /* Sort date */ +/* ucsz apply to root level only, not keys. b at root level implies bb */ +#define FLAG_u 8 /* Unique */ +#define FLAG_c 16 /* Check: no output, exit(!ordered) */ +#define FLAG_s 32 /* Stable sort, no ascii fallback at end */ +#define FLAG_z 64 /* Input is null terminated, not \n */ +/* These can be applied to search keys, the previous four can't */ +#define FLAG_b 128 /* Ignore leading blanks */ +#define FLAG_r 256 /* Reverse */ +#define FLAG_d 512 /* Ignore !(isalnum()|isspace()) */ +#define FLAG_f 1024 /* Force uppercase */ +#define FLAG_i 2048 /* Ignore !isprint() */ +#define FLAG_bb 32768 /* Ignore trailing blanks */ + + +#ifdef CONFIG_SORT_BIG +char key_separator; + +struct sort_key { - return strcmp(*(char **)x, *(char **)y); -} + struct sort_key *next_key; /* linked list */ + unsigned short range[4]; /* start word, start char, end word, end char */ + int flags; +} *key_list; -static int compare_numeric(const void *x, const void *y) +static char *get_key(char *str, struct sort_key *key, int flags) { - int z = atoi(*(char **)x) - atoi(*(char **)y); - return z ? z : strcmp(*(char **)x, *(char **)y); + int start=0,end,len,i,j; + + /* Special case whole string, so we don't have to make a copy */ + if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] + && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str; + /* Find start of key on first pass, end on second pass*/ + len=strlen(str); + + for(j=0;j<2;j++) { + if(!key->range[2*j]) end=len; + /* Loop through fields */ + else { + end=0; + for(i=1;i<key->range[2*j]+j;i++) { + /* Skip leading blanks or first separator */ + if(str[end]) { + if(key_separator) { + if(str[end]==key_separator) end++; + } else if(isspace(str[end])) + while(isspace(str[end])) end++; + } + /* Skip body of key */ + for(;str[end];end++) { + if(key_separator) { + if(str[end]==key_separator) break; + } else if(isspace(str[end])) break; + } + } + } + if(!j) start=end; + } + /* Key with explicit separator starts after separator */ + if(key_separator && str[start]==key_separator) start++; + /* Strip leading whitespace if necessary */ + if(flags&FLAG_b) while(isspace(str[start])) start++; + /* Strip trailing whitespace if necessary */ + if(flags&FLAG_bb) while(end>start && isspace(str[end-1])) end--; + /* Handle offsets on start and end */ + if(key->range[3]) { + end+=key->range[3]-1; + if(end>len) end=len; + } + if(key->range[1]) { + start+=key->range[1]-1; + if(start>len) start=len; + } + /* Make the copy */ + if(end<start) end=start; + str=bb_xstrndup(str+start,end-start); + /* Handle -d */ + if(flags&FLAG_d) { + for(start=end=0;str[end];end++) + if(isspace(str[end]) || isalnum(str[end])) str[start++]=str[end]; + str[start]=0; + } + /* Handle -i */ + if(flags&FLAG_i) { + for(start=end=0;str[end];end++) + if(isprint(str[end])) str[start++]=str[end]; + str[start]=0; + } + /* Handle -f */ + if(flags*FLAG_f) for(i=0;str[i];i++) str[i]=toupper(str[i]); + + return str; } -int sort_main(int argc, char **argv) +static struct sort_key *add_key(void) { - FILE *fp; - char *line, **lines = NULL; - int i, nlines = 0, inc; - int (*compare)(const void *, const void *) = compare_ascii; + struct sort_key **pkey=&key_list; + while(*pkey) pkey=&((*pkey)->next_key); + return *pkey=xcalloc(1,sizeof(struct sort_key)); +} - int flags; +#define GET_LINE(fp) (global_flags&FLAG_z) ? bb_get_chunk_from_file(fp) \ + : bb_get_chomped_line_from_file(fp) +#else +#define GET_LINE(fp) bb_get_chomped_line_from_file(fp) +#endif - bb_default_error_retval = 2; +/* Iterate through keys list and perform comparisons */ +static int compare_keys(const void *xarg, const void *yarg) +{ + int flags=global_flags,retval=0; + char *x,*y; - flags = bb_getopt_ulflags(argc, argv, "nru"); - if (flags & 1) { - compare = compare_numeric; - } +#ifdef CONFIG_SORT_BIG + struct sort_key *key; + + for(key=key_list;!retval && key;key=key->next_key) { + flags=(key->flags) ? key->flags : global_flags; + /* Chop out and modify key chunks, handling -dfib */ + x=get_key(*(char **)xarg,key,flags); + y=get_key(*(char **)yarg,key,flags); +#else + /* This curly bracket serves no purpose but to match the nesting + level of the for() loop we're not using */ + { + x=*(char **)xarg; + y=*(char **)yarg; +#endif + /* Perform actual comparison */ + switch(flags&7) { + default: + bb_error_msg_and_die("Unknown sort type."); + break; + /* Ascii sort */ + case 0: + retval=strcmp(x,y); + break; +#ifdef CONFIG_SORT_BIG + case FLAG_g: + { + char *xx,*yy; + double dx=strtod(x,&xx), dy=strtod(y,&yy); + /* not numbers < NaN < -infinity < numbers < +infinity) */ + if(x==xx) retval=(y==yy ? 0 : -1); + else if(y==yy) retval=1; + else if(isnan(dx)) retval=isnan(dy) ? 0 : -1; + else if(isnan(dy)) retval=1; + else if(isinf(dx)) { + if(dx<0) retval=((isinf(dy) && dy<0) ? 0 : -1); + else retval=((isinf(dy) && dy>0) ? 0 : 1); + } else if(isinf(dy)) retval=dy<0 ? 1 : -1; + else retval=dx>dy ? 1 : (dx<dy ? -1 : 0); + break; + } + case FLAG_M: + { + struct tm thyme; + int dx; + char *xx,*yy; - argv += optind; - if (!*argv) { - *--argv = "-"; + xx=strptime(x,"%b",&thyme); + dx=thyme.tm_mon; + yy=strptime(y,"%b",&thyme); + if(!xx) retval=(!yy ? 0 : -1); + else if(!yy) retval=1; + else retval=(dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon); + break; + } + /* Full floating point version of -n */ + case FLAG_n: + { + double dx=atof(x),dy=atof(y); + retval=dx>dy ? 1 : (dx<dy ? -1 : 0); + break; + } + } + /* Free key copies. */ + if(x!=*(char **)xarg) free(x); + if(y!=*(char **)yarg) free(y); + if(retval) break; +#else + /* Integer version of -n for tiny systems */ + case FLAG_n: + retval=atoi(x)-atoi(y); + break; + } +#endif } + /* Perform fallback sort if necessary */ + if(!retval && !(global_flags&FLAG_s)) + retval=strcmp(*(char **)xarg, *(char **)yarg); + return ((flags&FLAG_r)?-1:1)*retval; +} - do { - fp = xgetoptfile_sort_uniq(argv, "r"); - while ((line = bb_get_chomped_line_from_file(fp)) != NULL) { - lines = xrealloc(lines, sizeof(char *) * (nlines + 1)); - lines[nlines++] = line; +int sort_main(int argc, char **argv) +{ + FILE *fp,*outfile=NULL; + int linecount=0,i,flag; + char *line,**lines=NULL,c,*optlist="ngMucszbrdfimS:T:o:k:t:"; + + bb_default_error_retval = 2; + /* Parse command line options */ + while((c=getopt(argc,argv,optlist))>0) { + line=index(optlist,c); + if(!line) bb_show_usage(); + switch(*line) { +#ifdef CONFIG_SORT_BIG + case 'o': + if(outfile) bb_error_msg_and_die("Too many -o."); + outfile=bb_xfopen(optarg,"w"); + break; + case 't': + if(key_separator || optarg[1]) + bb_error_msg_and_die("Too many -t."); + key_separator=*optarg; + break; + /* parse sort key */ + case 'k': + { + struct sort_key *key=add_key(); + char *temp, *temp2; + + temp=optarg; + for(i=0;*temp;) { + /* Start of range */ + key->range[2*i]=(unsigned short)strtol(temp,&temp,10); + if(*temp=='.') + key->range[(2*i)+1]=(unsigned short)strtol(temp+1,&temp,10); + for(;*temp;temp++) { + if(*temp==',' && !i++) { + temp++; + break; + } /* no else needed: fall through to syntax error + because comma isn't in optlist */ + temp2=index(optlist,*temp); + flag=(1<<(temp2-optlist)); + if(!temp2 || (flag>FLAG_M && flag<FLAG_b)) + bb_error_msg_and_die("Unknown key option."); + /* b after , means strip _trailing_ space */ + if(i && flag==FLAG_b) flag=FLAG_bb; + key->flags|=flag; + } + } + break; + } +#endif + default: + global_flags|=(1<<(line-optlist)); + /* global b strips leading and trailing spaces */ + if(global_flags&FLAG_b) global_flags|=FLAG_bb; + break; } - bb_xferror(fp, *argv); - bb_fclose_nonstdin(fp); - } while (*++argv); - - /* sort it */ - qsort(lines, nlines, sizeof(char *), compare); - - /* print it */ - i = 0; - --nlines; - if ((inc = 1 - (flags & 2)) < 0) { /* reverse */ - i = nlines; } - flags &= 4; - - while (nlines >= 0) { - if (!flags || !nlines || strcmp(lines[i+inc], lines[i])) { - puts(lines[i]); + /* Open input files and read data */ + for(i=argv[optind] ? optind : optind-1;argv[i];i++) { + if(i<optind || (*argv[i]=='-' && !argv[i][1])) fp=stdin; + else fp=bb_xfopen(argv[i],"r"); + for(;;) { + line=GET_LINE(fp); + if(!line) break; + if(!(linecount&63)) + lines=xrealloc(lines, sizeof(char *)*(linecount+64)); + lines[linecount++]=line; } - i += inc; - --nlines; + fclose(fp); } - +#ifdef CONFIG_SORT_BIG + /* if no key, perform alphabetic sort */ + if(!key_list) add_key()->range[0]=1; + /* handle -c */ + if(global_flags&FLAG_c) { + int j=(global_flags&FLAG_u) ? -1 : 0; + for(i=1;i<linecount;i++) + if(compare_keys(&lines[i-1],&lines[i])>j) { + fprintf(stderr,"Check line %d\n",i); + return 1; + } + return 0; + } +#endif + /* Perform the actual sort */ + qsort(lines,linecount,sizeof(char *),compare_keys); + /* handle -u */ + if(global_flags&FLAG_u) { + for(flag=0,i=1;i<linecount;i++) { + if(!compare_keys(&lines[flag],&lines[i])) free(lines[i]); + else lines[++flag]=lines[i]; + } + if(linecount) linecount=flag+1; + } + /* Print it */ + if(!outfile) outfile=stdout; + for(i=0;i<linecount;i++) fprintf(outfile,"%s\n",lines[i]); bb_fflush_stdout_and_exit(EXIT_SUCCESS); } diff --git a/include/libbb.h b/include/libbb.h index 93ab537..0119aab 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -137,6 +137,7 @@ extern long *find_pid_by_name( const char* pidName); extern char *find_real_root_device_name(void); extern char *bb_get_line_from_file(FILE *file); extern char *bb_get_chomped_line_from_file(FILE *file); +extern char *bb_get_chunk_from_file(FILE *file); extern int bb_copyfd_size(int fd1, int fd2, const off_t size); extern int bb_copyfd_eof(int fd1, int fd2); extern void bb_xprint_and_close_file(FILE *file); diff --git a/include/usage.h b/include/usage.h index 7cf44d7..c53ead0 100644 --- a/include/usage.h +++ b/include/usage.h @@ -2193,24 +2193,40 @@ USAGE_FANCY_SLEEP("$ sleep 1d 3h 22m 8s\n" \ "[98528 second delay results]\n") -#ifdef CONFIG_FEATURE_SORT_UNIQUE - #define USAGE_SORT_UNIQUE(a) a +#ifdef CONFIG_SORT_BIG + #define USAGE_SORT_BIG(a) a #else - #define USAGE_SORT_UNIQUE(a) -#endif -#ifdef CONFIG_FEATURE_SORT_REVERSE - #define USAGE_SORT_REVERSE(a) a -#else - #define USAGE_SORT_REVERSE(a) + #define USAGE_SORT_BIG(a) #endif + + #define sort_trivial_usage \ - "[-n" USAGE_SORT_REVERSE("r") USAGE_SORT_UNIQUE("u") "] [FILE]..." + "[-nru" USAGE_SORT_BIG("gMcszbdfimSTokt] [-o outfile] [-k start[.offset][opts][,end[.offset][opts]] [-t char") "] [FILE]..." #define sort_full_usage \ "Sorts lines of text in the specified files\n\n"\ "Options:\n" \ - USAGE_SORT_UNIQUE("\t-u\tsuppress duplicate lines\n") \ - USAGE_SORT_REVERSE("\t-r\tsort in reverse order\n") \ - "\t-n\tsort numerics" + USAGE_SORT_BIG( \ + "\t-b\tignore leading blanks\n" \ + "\t-c\tcheck whether input is sorted\n" \ + "\t-d\tdictionary order (blank or alphanumeric only)\n" \ + "\t-f\tignore case\n" \ + "\t-g\tgeneral numerical sort\n" \ + "\t-i\tignore unprintable characters\n" \ + "\t-k\tspecify sort key\n" \ + "\t-M\tsort month\n" \ + ) \ + "\t-n\tsort numbers\n" \ + USAGE_SORT_BIG( \ + "\t-o\toutput to file\n" \ + "\t-k\tsort by key\n" \ + "\t-t\tuse key separator other than whitespace\n" \ + ) \ + "\t-r\treverse sort order\n" \ + USAGE_SORT_BIG("\t-s\tstable (don't sort ties alphabetically)\n") \ + "\t-u\tsuppress duplicate lines" \ + USAGE_SORT_BIG("\n\t-z\tinput terminated by nulls, not newlines\n") \ + USAGE_SORT_BIG("\t-mST\tignored for GNU compatability") \ + "" #define sort_example_usage \ "$ echo -e \"e\\nf\\nb\\nd\\nc\\na\" | sort\n" \ "a\n" \ @@ -2218,7 +2234,15 @@ "c\n" \ "d\n" \ "e\n" \ - "f\n" + "f\n" \ + USAGE_SORT_BIG( \ + "$ echo -e \"c 3\\nb 2\\nd 2\" | $SORT -k 2,2n -k 1,1r\n" \ + "d 2\n" \ + "b 2\n" \ + "c 3\n" \ + ) \ + "" + #define start_stop_daemon_trivial_usage \ "[OPTIONS] [--start|--stop] ... [-- arguments...]\n" diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c index 6d12b21..a27edc3 100644 --- a/libbb/get_line_from_file.c +++ b/libbb/get_line_from_file.c @@ -44,7 +44,8 @@ static char *private_get_line_from_file(FILE *file, int c) linebuf = xrealloc(linebuf, linebufsz += GROWBY); } linebuf[idx++] = (char)ch; - if (ch == '\n' || ch == '\0') { + if (!ch) return linebuf; + if (c<2 && ch == '\n') { if (c) { --idx; } @@ -71,6 +72,11 @@ extern char *bb_get_chomped_line_from_file(FILE *file) return private_get_line_from_file(file, 1); } +extern char *bb_get_chunk_from_file(FILE *file) +{ + return private_get_line_from_file(file, 2); +} + /* END CODE */ /* |