diff options
author | Denis Vlasenko | 2007-06-12 08:12:33 +0000 |
---|---|---|
committer | Denis Vlasenko | 2007-06-12 08:12:33 +0000 |
commit | cc5e090f12fb4e3834fb1a55bc91d7618af8ce78 (patch) | |
tree | 34813e8836287c21cb893ab7d3aee666db415d62 /editors/diff.c | |
parent | aa198dd39cad6cb41fbf6c8b64301b581a9ba206 (diff) | |
download | busybox-cc5e090f12fb4e3834fb1a55bc91d7618af8ce78.zip busybox-cc5e090f12fb4e3834fb1a55bc91d7618af8ce78.tar.gz |
move several applets to more correct ex-project. No code changes.
Diffstat (limited to 'editors/diff.c')
-rw-r--r-- | editors/diff.c | 1282 |
1 files changed, 1282 insertions, 0 deletions
diff --git a/editors/diff.c b/editors/diff.c new file mode 100644 index 0000000..830c15e --- /dev/null +++ b/editors/diff.c @@ -0,0 +1,1282 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini diff implementation for busybox, adapted from OpenBSD diff. + * + * Copyright (C) 2006 by Robert Sullivan <cogito.ergo.cogito@hotmail.com> + * Copyright (c) 2003 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" + +#define FSIZE_MAX 32768 + +/* + * Output flags + */ +#define D_HEADER 1 /* Print a header/footer between files */ +#define D_EMPTY1 2 /* Treat first file as empty (/dev/null) */ +#define D_EMPTY2 4 /* Treat second file as empty (/dev/null) */ + +/* + * Status values for print_status() and diffreg() return values + * Guide: + * D_SAME - files are the same + * D_DIFFER - files differ + * D_BINARY - binary files differ + * D_COMMON - subdirectory common to both dirs + * D_ONLY - file only exists in one dir + * D_MISMATCH1 - path1 a dir, path2 a file + * D_MISMATCH2 - path1 a file, path2 a dir + * D_ERROR - error occurred + * D_SKIPPED1 - skipped path1 as it is a special file + * D_SKIPPED2 - skipped path2 as it is a special file + */ + +#define D_SAME 0 +#define D_DIFFER (1<<0) +#define D_BINARY (1<<1) +#define D_COMMON (1<<2) +#define D_ONLY (1<<3) +#define D_MISMATCH1 (1<<4) +#define D_MISMATCH2 (1<<5) +#define D_ERROR (1<<6) +#define D_SKIPPED1 (1<<7) +#define D_SKIPPED2 (1<<8) + +/* Command line options */ +#define FLAG_a (1<<0) +#define FLAG_b (1<<1) +#define FLAG_d (1<<2) +#define FLAG_i (1<<3) +#define FLAG_L (1<<4) +#define FLAG_N (1<<5) +#define FLAG_q (1<<6) +#define FLAG_r (1<<7) +#define FLAG_s (1<<8) +#define FLAG_S (1<<9) +#define FLAG_t (1<<10) +#define FLAG_T (1<<11) +#define FLAG_U (1<<12) +#define FLAG_w (1<<13) + +struct cand { + int x; + int y; + int pred; +}; + +struct line { + int serial; + int value; +}; + +/* + * The following struct is used to record change information + * doing a "context" or "unified" diff. (see routine "change" to + * understand the highly mnemonic field names) + */ +struct context_vec { + int a; /* start line in old file */ + int b; /* end line in old file */ + int c; /* start line in new file */ + int d; /* end line in new file */ +}; + +struct globals { + USE_FEATURE_DIFF_DIR(char **dl;) + USE_FEATURE_DIFF_DIR(int dl_count;) + /* This is the default number of lines of context. */ + int context; + size_t max_context; + int status; + char *start; + const char *label1; + const char *label2; + struct line *file[2]; + int *J; /* will be overlaid on class */ + int *class; /* will be overlaid on file[0] */ + int *klist; /* will be overlaid on file[0] after class */ + int *member; /* will be overlaid on file[1] */ + int clen; + int len[2]; + int pref, suff; /* length of prefix and suffix */ + int slen[2]; + bool anychange; + long *ixnew; /* will be overlaid on file[1] */ + long *ixold; /* will be overlaid on klist */ + struct cand *clist; /* merely a free storage pot for candidates */ + int clistlen; /* the length of clist */ + struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ + struct context_vec *context_vec_start; + struct context_vec *context_vec_end; + struct context_vec *context_vec_ptr; + struct stat stb1, stb2; +}; +#define G (*ptr_to_globals) +#define dl (G.dl ) +#define dl_count (G.dl_count ) +#define context (G.context ) +#define max_context (G.max_context ) +#define status (G.status ) +#define start (G.start ) +#define label1 (G.label1 ) +#define label2 (G.label2 ) +#define file (G.file ) +#define J (G.J ) +#define class (G.class ) +#define klist (G.klist ) +#define member (G.member ) +#define clen (G.clen ) +#define len (G.len ) +#define pref (G.pref ) +#define suff (G.suff ) +#define slen (G.slen ) +#define anychange (G.anychange ) +#define ixnew (G.ixnew ) +#define ixold (G.ixold ) +#define clist (G.clist ) +#define clistlen (G.clistlen ) +#define sfile (G.sfile ) +#define context_vec_start (G.context_vec_start ) +#define context_vec_end (G.context_vec_end ) +#define context_vec_ptr (G.context_vec_ptr ) +#define stb1 (G.stb1 ) +#define stb2 (G.stb2 ) +#define INIT_G() do { \ + PTR_TO_GLOBALS = xzalloc(sizeof(G)); \ + context = 3; \ + max_context = 64; \ +} while (0) + + +static void print_only(const char *path, size_t dirlen, const char *entry) +{ + if (dirlen > 1) + dirlen--; + printf("Only in %.*s: %s\n", (int) dirlen, path, entry); +} + +static void print_status(int val, char *path1, char *path2, char *entry) +{ + const char * const _entry = entry ? entry : ""; + char * const _path1 = entry ? concat_path_file(path1, _entry) : path1; + char * const _path2 = entry ? concat_path_file(path2, _entry) : path2; + + switch (val) { + case D_ONLY: + print_only(path1, strlen(path1), entry); + break; + case D_COMMON: + printf("Common subdirectories: %s and %s\n", _path1, _path2); + break; + case D_BINARY: + printf("Binary files %s and %s differ\n", _path1, _path2); + break; + case D_DIFFER: + if (option_mask32 & FLAG_q) + printf("Files %s and %s differ\n", _path1, _path2); + break; + case D_SAME: + if (option_mask32 & FLAG_s) + printf("Files %s and %s are identical\n", _path1, _path2); + break; + case D_MISMATCH1: + printf("File %s is a %s while file %s is a %s\n", + _path1, "directory", _path2, "regular file"); + break; + case D_MISMATCH2: + printf("File %s is a %s while file %s is a %s\n", + _path1, "regular file", _path2, "directory"); + break; + case D_SKIPPED1: + printf("File %s is not a regular file or directory and was skipped\n", + _path1); + break; + case D_SKIPPED2: + printf("File %s is not a regular file or directory and was skipped\n", + _path2); + break; + } + if (entry) { + free(_path1); + free(_path2); + } +} +static void fiddle_sum(int *sum, int t) +{ + *sum = (int)(*sum * 127 + t); +} +/* + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. + */ +static int readhash(FILE * f) +{ + int i, t, space; + int sum; + + sum = 1; + space = 0; + if (!(option_mask32 & (FLAG_b | FLAG_w))) { + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (t == EOF) { + if (i == 0) + return 0; + break; + } + fiddle_sum(&sum, t); + } + } else { + for (i = 0;;) { + switch (t = getc(f)) { + case '\t': + case '\r': + case '\v': + case '\f': + case ' ': + space++; + continue; + default: + if (space && !(option_mask32 & FLAG_w)) { + i++; + space = 0; + } + fiddle_sum(&sum, t); + i++; + continue; + case EOF: + if (i == 0) + return 0; + /* FALLTHROUGH */ + case '\n': + break; + } + break; + } + } + /* + * There is a remote possibility that we end up with a zero sum. + * Zero is used as an EOF marker, so return 1 instead. + */ + return (sum == 0 ? 1 : sum); +} + + +/* + * Check to see if the given files differ. + * Returns 0 if they are the same, 1 if different, and -1 on error. + */ +static int files_differ(FILE * f1, FILE * f2, int flags) +{ + size_t i, j; + + if ((flags & (D_EMPTY1 | D_EMPTY2)) || stb1.st_size != stb2.st_size + || (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT) + ) { + return 1; + } + while (1) { + i = fread(bb_common_bufsiz1, 1, BUFSIZ/2, f1); + j = fread(bb_common_bufsiz1 + BUFSIZ/2, 1, BUFSIZ/2, f2); + if (i != j) + return 1; + if (i == 0) + return (ferror(f1) || ferror(f2)); + if (memcmp(bb_common_bufsiz1, + bb_common_bufsiz1 + BUFSIZ/2, i) != 0) + return 1; + } +} + + +static void prepare(int i, FILE * fd, off_t filesize) +{ + struct line *p; + int h; + size_t j, sz; + + rewind(fd); + + sz = (filesize <= FSIZE_MAX ? filesize : FSIZE_MAX) / 25; + if (sz < 100) + sz = 100; + + p = xmalloc((sz + 3) * sizeof(struct line)); + j = 0; + while ((h = readhash(fd))) { + if (j == sz) { + sz = sz * 3 / 2; + p = xrealloc(p, (sz + 3) * sizeof(struct line)); + } + p[++j].value = h; + } + len[i] = j; + file[i] = p; +} + + +static void prune(void) +{ + int i, j; + + for (pref = 0; pref < len[0] && pref < len[1] && + file[0][pref + 1].value == file[1][pref + 1].value; pref++) + ; + for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && + file[0][len[0] - suff].value == file[1][len[1] - suff].value; + suff++) + ; + for (j = 0; j < 2; j++) { + sfile[j] = file[j] + pref; + slen[j] = len[j] - pref - suff; + for (i = 0; i <= slen[j]; i++) + sfile[j][i].serial = i; + } +} + + +static void equiv(struct line *a, int n, struct line *b, int m, int *c) +{ + int i, j; + + i = j = 1; + while (i <= n && j <= m) { + if (a[i].value < b[j].value) + a[i++].value = 0; + else if (a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while (i <= n) + a[i++].value = 0; + b[m + 1].value = 0; + j = 0; + while (++j <= m) { + c[j] = -b[j].serial; + while (b[j + 1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + + +static int isqrt(int n) +{ + int y, x; + + if (n == 0) + return 0; + x = 1; + do { + y = x; + x = n / x; + x += y; + x /= 2; + } while ((x - y) > 1 || (x - y) < -1); + + return x; +} + + +static int newcand(int x, int y, int pred) +{ + struct cand *q; + + if (clen == clistlen) { + clistlen = clistlen * 11 / 10; + clist = xrealloc(clist, clistlen * sizeof(struct cand)); + } + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return clen++; +} + + +static int search(int *c, int k, int y) +{ + int i, j, l, t; + + if (clist[c[k]].y < y) /* quick look for typical case */ + return k + 1; + i = 0; + j = k + 1; + while (1) { + l = i + j; + if ((l >>= 1) <= i) + break; + t = clist[c[l]].y; + if (t > y) + j = l; + else if (t < y) + i = l; + else + return l; + } + return l + 1; +} + + +static int stone(int *a, int n, int *b, int *c) +{ + int i, k, y, j, l; + int oldc, tc, oldl; + unsigned int numtries; + +#if ENABLE_FEATURE_DIFF_MINIMAL + const unsigned int bound = + (option_mask32 & FLAG_d) ? UINT_MAX : MAX(256, isqrt(n)); +#else + const unsigned int bound = MAX(256, isqrt(n)); +#endif + k = 0; + c[0] = newcand(0, 0, 0); + for (i = 1; i <= n; i++) { + j = a[i]; + if (j == 0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + numtries = 0; + do { + if (y <= clist[oldc].y) + continue; + l = search(c, k, y); + if (l != oldl + 1) + oldc = c[l - 1]; + if (l <= k) { + if (clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i, y, oldc); + oldc = tc; + oldl = l; + numtries++; + } else { + c[l] = newcand(i, y, oldc); + k++; + break; + } + } while ((y = b[++j]) > 0 && numtries < bound); + } + return k; +} + + +static void unravel(int p) +{ + struct cand *q; + int i; + + for (i = 0; i <= len[0]; i++) + J[i] = i <= pref ? i : i > len[0] - suff ? i + len[1] - len[0] : 0; + for (q = clist + p; q->y != 0; q = clist + q->pred) + J[q->x + pref] = q->y + pref; +} + + +static void unsort(struct line *f, int l, int *b) +{ + int *a, i; + + a = xmalloc((l + 1) * sizeof(int)); + for (i = 1; i <= l; i++) + a[f[i].serial] = f[i].value; + for (i = 1; i <= l; i++) + b[i] = a[i]; + free(a); +} + + +static int skipline(FILE * f) +{ + int i, c; + + for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) + continue; + return i; +} + + +/* + * Check does double duty: + * 1. ferret out any fortuitous correspondences due + * to confounding by hashing (which result in "jackpot") + * 2. collect random access indexes to the two files + */ +static void check(FILE * f1, FILE * f2) +{ + int i, j, jackpot, c, d; + long ctold, ctnew; + + rewind(f1); + rewind(f2); + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for (i = 1; i <= len[0]; i++) { + if (J[i] == 0) { + ixold[i] = ctold += skipline(f1); + continue; + } + while (j < J[i]) { + ixnew[j] = ctnew += skipline(f2); + j++; + } + if ((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w) + || (option_mask32 & FLAG_i)) { + while (1) { + c = getc(f1); + d = getc(f2); + /* + * GNU diff ignores a missing newline + * in one file if bflag || wflag. + */ + if (((option_mask32 & FLAG_b) || (option_mask32 & FLAG_w)) && + ((c == EOF && d == '\n') || (c == '\n' && d == EOF))) { + break; + } + ctold++; + ctnew++; + if ((option_mask32 & FLAG_b) && isspace(c) && isspace(d)) { + do { + if (c == '\n') + break; + ctold++; + } while (isspace(c = getc(f1))); + do { + if (d == '\n') + break; + ctnew++; + } while (isspace(d = getc(f2))); + } else if (option_mask32 & FLAG_w) { + while (isspace(c) && c != '\n') { + c = getc(f1); + ctold++; + } + while (isspace(d) && d != '\n') { + d = getc(f2); + ctnew++; + } + } + if (c != d) { + jackpot++; + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } else { + while (1) { + ctold++; + ctnew++; + if ((c = getc(f1)) != (d = getc(f2))) { + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for (; j <= len[1]; j++) + ixnew[j] = ctnew += skipline(f2); +} + + +/* shellsort CACM #201 */ +static void sort(struct line *a, int n) +{ + struct line *ai, *aim, w; + int j, m = 0, k; + + if (n == 0) + return; + for (j = 1; j <= n; j *= 2) + m = 2 * j - 1; + for (m /= 2; m != 0; m /= 2) { + k = n - m; + for (j = 1; j <= k; j++) { + for (ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if (aim < ai) + break; /* wraparound */ + if (aim->value > ai[0].value || + (aim->value == ai[0].value && aim->serial > ai[0].serial)) + break; + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + + +static void uni_range(int a, int b) +{ + if (a < b) + printf("%d,%d", a, b - a + 1); + else if (a == b) + printf("%d", b); + else + printf("%d,0", b); +} + + +static void fetch(long *f, int a, int b, FILE * lb, int ch) +{ + int i, j, c, lastc, col, nc; + + if (a > b) + return; + for (i = a; i <= b; i++) { + fseek(lb, f[i - 1], SEEK_SET); + nc = f[i] - f[i - 1]; + if (ch != '\0') { + putchar(ch); + if (option_mask32 & FLAG_T) + putchar('\t'); + } + col = 0; + for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { + if ((c = getc(lb)) == EOF) { + printf("\n\\ No newline at end of file\n"); + return; + } + if (c == '\t' && (option_mask32 & FLAG_t)) { + do { + putchar(' '); + } while (++col & 7); + } else { + putchar(c); + col++; + } + } + } +} + + +static int asciifile(FILE * f) +{ +#if ENABLE_FEATURE_DIFF_BINARY + int i, cnt; +#endif + + if ((option_mask32 & FLAG_a) || f == NULL) + return 1; + +#if ENABLE_FEATURE_DIFF_BINARY + rewind(f); + cnt = fread(bb_common_bufsiz1, 1, BUFSIZ, f); + for (i = 0; i < cnt; i++) { + if (!isprint(bb_common_bufsiz1[i]) + && !isspace(bb_common_bufsiz1[i])) { + return 0; + } + } +#endif + return 1; +} + + +/* dump accumulated "unified" diff changes */ +static void dump_unified_vec(FILE * f1, FILE * f2) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd; + int a, b, c, d; + char ch; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAX(1, cvp->a - context); + upb = MIN(len[0], context_vec_ptr->b + context); + lowc = MAX(1, cvp->c - context); + upd = MIN(len[1], context_vec_ptr->d + context); + + printf("@@ -"); + uni_range(lowa, upb); + printf(" +"); + uni_range(lowc, upd); + printf(" @@\n"); + + /* + * Output changes in "unified" diff format--the old and new lines + * are printed together. + */ + for (; cvp <= context_vec_ptr; cvp++) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + /* + * c: both new and old changes + * d: only changes in the old file + * a: only changes in the new file + */ + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; +#if 0 + switch (ch) { + case 'c': + fetch(ixold, lowa, a - 1, f1, ' '); + fetch(ixold, a, b, f1, '-'); + fetch(ixnew, c, d, f2, '+'); + break; + case 'd': + fetch(ixold, lowa, a - 1, f1, ' '); + fetch(ixold, a, b, f1, '-'); + break; + case 'a': + fetch(ixnew, lowc, c - 1, f2, ' '); + fetch(ixnew, c, d, f2, '+'); + break; + } +#else + if (ch == 'c' || ch == 'd') { + fetch(ixold, lowa, a - 1, f1, ' '); + fetch(ixold, a, b, f1, '-'); + } + if (ch == 'a') + fetch(ixnew, lowc, c - 1, f2, ' '); + if (ch == 'c' || ch == 'a') + fetch(ixnew, c, d, f2, '+'); +#endif + lowa = b + 1; + lowc = d + 1; + } + fetch(ixnew, d + 1, upd, f2, ' '); + + context_vec_ptr = context_vec_start - 1; +} + + +static void print_header(const char *file1, const char *file2) +{ + if (label1) + printf("--- %s\n", label1); + else + printf("--- %s\t%s", file1, ctime(&stb1.st_mtime)); + if (label2) + printf("+++ %s\n", label2); + else + printf("+++ %s\t%s", file2, ctime(&stb2.st_mtime)); +} + + +/* + * Indicate that there is a difference between lines a and b of the from file + * to get to lines c to d of the to file. If a is greater than b then there + * are no lines in the from file involved and this means that there were + * lines appended (beginning at b). If c is greater than d then there are + * lines missing from the to file. + */ +static void change(char *file1, FILE * f1, char *file2, FILE * f2, int a, + int b, int c, int d) +{ + if ((a > b && c > d) || (option_mask32 & FLAG_q)) { + anychange = 1; + return; + } + + /* + * Allocate change records as needed. + */ + if (context_vec_ptr == context_vec_end - 1) { + ptrdiff_t offset = context_vec_ptr - context_vec_start; + + max_context <<= 1; + context_vec_start = xrealloc(context_vec_start, + max_context * sizeof(struct context_vec)); + context_vec_end = context_vec_start + max_context; + context_vec_ptr = context_vec_start + offset; + } + if (anychange == 0) { + /* + * Print the context/unidiff header first time through. + */ + print_header(file1, file2); + } else if (a > context_vec_ptr->b + (2 * context) + 1 && + c > context_vec_ptr->d + (2 * context) + 1) { + /* + * If this change is more than 'context' lines from the + * previous change, dump the record and reset it. + */ + dump_unified_vec(f1, f2); + } + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + anychange = 1; +} + + +static void output(char *file1, FILE * f1, char *file2, FILE * f2) +{ + /* Note that j0 and j1 can't be used as they are defined in math.h. + * This also allows the rather amusing variable 'j00'... */ + int m, i0, i1, j00, j01; + + rewind(f1); + rewind(f2); + m = len[0]; + J[0] = 0; + J[m + 1] = len[1] + 1; + for (i0 = 1; i0 <= m; i0 = i1 + 1) { + while (i0 <= m && J[i0] == J[i0 - 1] + 1) + i0++; + j00 = J[i0 - 1] + 1; + i1 = i0 - 1; + while (i1 < m && J[i1 + 1] == 0) + i1++; + j01 = J[i1 + 1] - 1; + J[i1] = j01; + change(file1, f1, file2, f2, i0, i1, j00, j01); + } + if (m == 0) { + change(file1, f1, file2, f2, 1, 0, 1, len[1]); + } + if (anychange != 0 && !(option_mask32 & FLAG_q)) { + dump_unified_vec(f1, f2); + } +} + +/* + * The following code uses an algorithm due to Harold Stone, + * which finds a pair of longest identical subsequences in + * the two files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called ``value''). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0 y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * checked against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ +static unsigned diffreg(char * ofile1, char * ofile2, int flags) +{ + char *file1 = ofile1; + char *file2 = ofile2; + FILE *f1 = stdin, *f2 = stdin; + unsigned rval; + int i; + + anychange = 0; + context_vec_ptr = context_vec_start - 1; + + if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) + return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); + + rval = D_SAME; + + if (LONE_DASH(file1) && LONE_DASH(file2)) + goto closem; + + if (flags & D_EMPTY1) + f1 = xfopen(bb_dev_null, "r"); + else if (NOT_LONE_DASH(file1)) + f1 = xfopen(file1, "r"); + if (flags & D_EMPTY2) + f2 = xfopen(bb_dev_null, "r"); + else if (NOT_LONE_DASH(file2)) + f2 = xfopen(file2, "r"); + +/* We can't diff non-seekable stream - we use rewind(), fseek(). + * This can be fixed (volunteers?). + * Meanwhile we should check it here by stat'ing input fds, + * but I am lazy and check that in main() instead. + * Check in main won't catch "diffing fifos buried in subdirectories" + * failure scenario - not very likely in real life... */ + + i = files_differ(f1, f2, flags); + if (i == 0) + goto closem; + else if (i != 1) { /* 1 == ok */ + /* error */ + status |= 2; + goto closem; + } + + if (!asciifile(f1) || !asciifile(f2)) { + rval = D_BINARY; + status |= 1; + goto closem; + } + + prepare(0, f1, stb1.st_size); + prepare(1, f2, stb2.st_size); + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *) file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = xrealloc(member, (slen[1] + 2) * sizeof(int)); + + class = (int *) file[0]; + unsort(sfile[0], slen[0], class); + class = xrealloc(class, (slen[0] + 2) * sizeof(int)); + + klist = xmalloc((slen[0] + 2) * sizeof(int)); + clen = 0; + clistlen = 100; + clist = xmalloc(clistlen * sizeof(struct cand)); + i = stone(class, slen[0], member, klist); + free(member); + free(class); + + J = xrealloc(J, (len[0] + 2) * sizeof(int)); + unravel(klist[i]); + free(clist); + free(klist); + + ixold = xrealloc(ixold, (len[0] + 2) * sizeof(long)); + ixnew = xrealloc(ixnew, (len[1] + 2) * sizeof(long)); + check(f1, f2); + output(file1, f1, file2, f2); + + closem: + if (anychange) { + status |= 1; + if (rval == D_SAME) + rval = D_DIFFER; + } + fclose_if_not_stdin(f1); + fclose_if_not_stdin(f2); + if (file1 != ofile1) + free(file1); + if (file2 != ofile2) + free(file2); + return rval; +} + + +#if ENABLE_FEATURE_DIFF_DIR +static void do_diff(char *dir1, char *path1, char *dir2, char *path2) +{ + int flags = D_HEADER; + int val; + char *fullpath1 = NULL; /* if -N */ + char *fullpath2 = NULL; + + if (path1) + fullpath1 = concat_path_file(dir1, path1); + if (path2) + fullpath2 = concat_path_file(dir2, path2); + + if (!fullpath1 || stat(fullpath1, &stb1) != 0) { + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + if (path2) { + free(fullpath1); + fullpath1 = concat_path_file(dir1, path2); + } + } + if (!fullpath2 || stat(fullpath2, &stb2) != 0) { + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + if (path1) { + free(fullpath2); + fullpath2 = concat_path_file(dir2, path1); + } + } + + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + printf("Common subdirectories: %s and %s\n", fullpath1, fullpath2); + goto ret; + } + + if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) + val = D_SKIPPED1; + else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) + val = D_SKIPPED2; + else + val = diffreg(fullpath1, fullpath2, flags); + + print_status(val, fullpath1, fullpath2, NULL); + ret: + free(fullpath1); + free(fullpath2); +} +#endif + + +#if ENABLE_FEATURE_DIFF_DIR +static int dir_strcmp(const void *p1, const void *p2) +{ + return strcmp(*(char *const *) p1, *(char *const *) p2); +} + + +/* This function adds a filename to dl, the directory listing. */ +static int add_to_dirlist(const char *filename, + struct stat ATTRIBUTE_UNUSED * sb, void *userdata, + int depth ATTRIBUTE_UNUSED) +{ + /* +2: with space for eventual trailing NULL */ + dl = xrealloc(dl, (dl_count+2) * sizeof(dl[0])); + dl[dl_count] = xstrdup(filename + (int)(ptrdiff_t)userdata); + dl_count++; + return TRUE; +} + + +/* This returns a sorted directory listing. */ +static char **get_dir(char *path) +{ + dl_count = 0; + dl = xzalloc(sizeof(dl[0])); + + /* If -r has been set, then the recursive_action function will be + * used. Unfortunately, this outputs the root directory along with + * the recursed paths, so use void *userdata to specify the string + * length of the root directory - '(void*)(strlen(path)+)'. + * add_to_dirlist then removes root dir prefix. */ + + if (option_mask32 & FLAG_r) { + recursive_action(path, ACTION_RECURSE|ACTION_FOLLOWLINKS, + add_to_dirlist, NULL, + (void*)(strlen(path)+1), 0); + } else { + DIR *dp; + struct dirent *ep; + + dp = warn_opendir(path); + while ((ep = readdir(dp))) { + if (!strcmp(ep->d_name, "..") || LONE_CHAR(ep->d_name, '.')) + continue; + add_to_dirlist(ep->d_name, NULL, (void*)(int)0, 0); + } + closedir(dp); + } + + /* Sort dl alphabetically. */ + qsort(dl, dl_count, sizeof(char *), dir_strcmp); + + dl[dl_count] = NULL; + return dl; +} + + +static void diffdir(char *p1, char *p2) +{ + char **dirlist1, **dirlist2; + char *dp1, *dp2; + int pos; + + /* Check for trailing slashes. */ + dp1 = last_char_is(p1, '/'); + if (dp1 != NULL) + *dp1 = '\0'; + dp2 = last_char_is(p2, '/'); + if (dp2 != NULL) + *dp2 = '\0'; + + /* Get directory listings for p1 and p2. */ + + dirlist1 = get_dir(p1); + dirlist2 = get_dir(p2); + + /* If -S was set, find the starting point. */ + if (start) { + while (*dirlist1 != NULL && strcmp(*dirlist1, start) < 0) + dirlist1++; + while (*dirlist2 != NULL && strcmp(*dirlist2, start) < 0) + dirlist2++; + if ((*dirlist1 == NULL) || (*dirlist2 == NULL)) + bb_error_msg(bb_msg_invalid_arg, "NULL", "-S"); + } + + /* Now that both dirlist1 and dirlist2 contain sorted directory + * listings, we can start to go through dirlist1. If both listings + * contain the same file, then do a normal diff. Otherwise, behaviour + * is determined by whether the -N flag is set. */ + while (*dirlist1 != NULL || *dirlist2 != NULL) { + dp1 = *dirlist1; + dp2 = *dirlist2; + pos = dp1 == NULL ? 1 : dp2 == NULL ? -1 : strcmp(dp1, dp2); + if (pos == 0) { + do_diff(p1, dp1, p2, dp2); + dirlist1++; + dirlist2++; + } else if (pos < 0) { + if (option_mask32 & FLAG_N) + do_diff(p1, dp1, p2, NULL); + else + print_only(p1, strlen(p1) + 1, dp1); + dirlist1++; + } else { + if (option_mask32 & FLAG_N) + do_diff(p1, NULL, p2, dp2); + else + print_only(p2, strlen(p2) + 1, dp2); + dirlist2++; + } + } +} +#endif + + +int diff_main(int argc, char **argv); +int diff_main(int argc, char **argv) +{ + bool gotstdin = 0; + char *U_opt; + char *f1, *f2; + llist_t *L_arg = NULL; + + INIT_G(); + + /* exactly 2 params; collect multiple -L <label> */ + opt_complementary = "=2:L::"; + getopt32(argc, argv, "abdiL:NqrsS:tTU:wu" + "p" /* ignored (for compatibility) */, + &L_arg, &start, &U_opt); + /*argc -= optind;*/ + argv += optind; + while (L_arg) { + if (label1 && label2) + bb_show_usage(); + if (!label1) + label1 = L_arg->data; + else { /* then label2 is NULL */ + label2 = label1; + label1 = L_arg->data; + } + /* we leak L_arg here... */ + L_arg = L_arg->link; + } + if (option_mask32 & FLAG_U) + context = xatoi_u(U_opt); + + /* + * Do sanity checks, fill in stb1 and stb2 and call the appropriate + * driver routine. Both drivers use the contents of stb1 and stb2. + */ + + f1 = argv[0]; + f2 = argv[1]; + if (LONE_DASH(f1)) { + fstat(STDIN_FILENO, &stb1); + gotstdin = 1; + } else + xstat(f1, &stb1); + if (LONE_DASH(f2)) { + fstat(STDIN_FILENO, &stb2); + gotstdin = 1; + } else + xstat(f2, &stb2); + if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) + bb_error_msg_and_die("can't compare - to a directory"); + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { +#if ENABLE_FEATURE_DIFF_DIR + diffdir(f1, f2); +#else + bb_error_msg_and_die("directory comparison not supported"); +#endif + } else { + if (S_ISDIR(stb1.st_mode)) { + f1 = concat_path_file(f1, f2); + xstat(f1, &stb1); + } + if (S_ISDIR(stb2.st_mode)) { + f2 = concat_path_file(f2, f1); + xstat(f2, &stb2); + } +/* XXX: FIXME: */ +/* We can't diff e.g. stdin supplied by a pipe - we use rewind(), fseek(). + * This can be fixed (volunteers?) */ + if (!S_ISREG(stb1.st_mode) || !S_ISREG(stb2.st_mode)) + bb_error_msg_and_die("can't diff non-seekable stream"); + print_status(diffreg(f1, f2, 0), f1, f2, NULL); + } + return status; +} |