diff options
-rw-r--r-- | editors/sed.c | 249 | ||||
-rw-r--r-- | sed.c | 249 | ||||
-rw-r--r-- | tests/testcases | 10 |
3 files changed, 273 insertions, 235 deletions
diff --git a/editors/sed.c b/editors/sed.c index 95be018..73ed058 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -156,7 +156,7 @@ static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const c /* * returns the index in the string just past where the address ends. */ -static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, regex_t **regex) +static int get_address(struct sed_cmd *sed_cmd, const char *str, int *linenum, regex_t **regex) { char *my_str = strdup(str); int idx = 0; @@ -169,10 +169,10 @@ static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, rege idx++; } while (isdigit(my_str[idx])); my_str[idx] = 0; - *line = atoi(my_str); + *linenum = atoi(my_str); } else if (my_str[idx] == '$') { - *line = -1; + *linenum = -1; idx++; } else if (my_str[idx] == '/') { @@ -423,13 +423,13 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) if (strchr("pd", cmdstr[idx])) { idx++; } - /* handle (s)ubstitution */ + /* handle (s)ubstitution command */ else if (sed_cmd->cmd == 's') { idx += parse_subst_cmd(sed_cmd, &cmdstr[idx]); } /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ - else if (strchr("aic", cmdstr[idx])) { - if (sed_cmd->end_line || sed_cmd->end_match) + else if (strchr("aic", sed_cmd->cmd)) { + if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') error_msg_and_die("only a beginning address can be specified for edit commands"); idx += parse_edit_cmd(sed_cmd, &cmdstr[idx]); } @@ -584,91 +584,13 @@ static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line) return altered; } -static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line) -{ - int altered = 0; - - switch (sed_cmd->cmd) { - - case 'p': - puts(line); - break; - - case 'd': - altered++; - break; - - case 's': - - /* - * Some special cases for 's' printing to make it compliant with - * GNU sed printing behavior (aka "The -n | s///p Matrix"): - * - * -n ONLY = never print anything regardless of any successful - * substitution - * - * s///p ONLY = always print successful substitutions, even if - * the line is going to be printed anyway (line will be printed - * twice). - * - * -n AND s///p = print ONLY a successful substitution ONE TIME; - * no other lines are printed - this is the reason why the 'p' - * flag exists in the first place. - */ - - /* if the user specified that they didn't want anything printed (i.e., a -n - * flag and no 'p' flag after the s///), then there's really no point doing - * anything here. */ - if (be_quiet && !sed_cmd->sub_p) - break; - - /* we print the line once, unless we were told to be quiet */ - if (!be_quiet) - altered = do_subst_command(sed_cmd, line); - - /* we also print the line if we were given the 'p' flag - * (this is quite possibly the second printing) */ - if (sed_cmd->sub_p) - altered = do_subst_command(sed_cmd, line); - - break; - - case 'a': - puts(line); - fputs(sed_cmd->editline, stdout); - altered++; - break; - - case 'i': - fputs(sed_cmd->editline, stdout); - break; - - case 'c': - fputs(sed_cmd->editline, stdout); - altered++; - break; - - case 'r': { - FILE *file; - puts(line); - file = fopen(sed_cmd->filename, "r"); - if (file) - print_file(file); - /* else if we couldn't open the file, no biggie, just don't print anything */ - altered++; - } - break; - } - - return altered; -} static void process_file(FILE *file) { char *line = NULL; static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ unsigned int still_in_range = 0; - int line_altered; + int altered; int i; /* go through every line in the file */ @@ -676,51 +598,144 @@ static void process_file(FILE *file) chomp(line); linenum++; - line_altered = 0; + altered = 0; /* for every line, go through all the commands */ for (i = 0; i < ncmds; i++) { - /* are we acting on a range of matched lines? */ - if (sed_cmds[i].beg_match && sed_cmds[i].end_match) { - if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) { - line_altered += do_sed_command(&sed_cmds[i], line); - if (still_in_range && regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0) - still_in_range = 0; - else - still_in_range = 1; - } - } - /* are we trying to match a single line? */ - else if (sed_cmds[i].beg_match) { - if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) - line_altered += do_sed_command(&sed_cmds[i], line); - } + /* + * entry point into sedding... + */ + if ( + /* this line number is the first address we're looking for */ + (sed_cmds[i].beg_line && (sed_cmds[i].beg_line == linenum)) || + /* this line matches our first address regex */ + (sed_cmds[i].beg_match && (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0)) || + /* we are currently within the beginning & ending address range */ + still_in_range + ) { + + /* + * actual sedding + */ + switch (sed_cmds[i].cmd) { + + case 'p': + puts(line); + break; + + case 'd': + altered++; + break; + + case 's': + + /* + * Some special cases for 's' printing to make it compliant with + * GNU sed printing behavior (aka "The -n | s///p Matrix"): + * + * -n ONLY = never print anything regardless of any successful + * substitution + * + * s///p ONLY = always print successful substitutions, even if + * the line is going to be printed anyway (line will be printed + * twice). + * + * -n AND s///p = print ONLY a successful substitution ONE TIME; + * no other lines are printed - this is the reason why the 'p' + * flag exists in the first place. + */ + + /* if the user specified that they didn't want anything printed (i.e., a -n + * flag and no 'p' flag after the s///), then there's really no point doing + * anything here. */ + if (be_quiet && !sed_cmds[i].sub_p) + break; + + /* we print the line once, unless we were told to be quiet */ + if (!be_quiet) + altered = do_subst_command(&sed_cmds[i], line); + + /* we also print the line if we were given the 'p' flag + * (this is quite possibly the second printing) */ + if (sed_cmds[i].sub_p) + altered = do_subst_command(&sed_cmds[i], line); + + break; + + case 'a': + puts(line); + fputs(sed_cmds[i].editline, stdout); + altered++; + break; + + case 'i': + fputs(sed_cmds[i].editline, stdout); + break; + + case 'c': + /* single-address case */ + if (sed_cmds[i].end_match == NULL && sed_cmds[i].end_line == 0) { + fputs(sed_cmds[i].editline, stdout); + } + /* multi-address case */ + else { + /* matching text */ + if (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)) + fputs(sed_cmds[i].editline, stdout); + /* matching line numbers */ + if (sed_cmds[i].end_line > 0 && sed_cmds[i].end_line == linenum) + fputs(sed_cmds[i].editline, stdout); + } + altered++; + + break; + + case 'r': { + FILE *outfile; + puts(line); + outfile = fopen(sed_cmds[i].filename, "r"); + if (outfile) + print_file(outfile); + /* else if we couldn't open the output file, + * no biggie, just don't print anything */ + altered++; + } + break; + } - /* are we acting on a range of line numbers? */ - else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line != 0) { - if (linenum >= sed_cmds[i].beg_line && - (sed_cmds[i].end_line == -1 || linenum <= sed_cmds[i].end_line)) - line_altered += do_sed_command(&sed_cmds[i], line); - } + /* + * exit point from sedding... + */ + if ( + /* this is a single-address command or... */ + (sed_cmds[i].end_line == 0 && sed_cmds[i].end_match == NULL) || ( + /* we were in the middle of our address range (this + * isn't the first time through) and.. */ + (still_in_range == 1) && ( + /* this line number is the last address we're looking for or... */ + (sed_cmds[i].end_line && (sed_cmds[i].end_line == linenum)) || + /* this line matches our last address regex */ + (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)) + ) + ) + ) { + /* we're out of our address range */ + still_in_range = 0; + } - /* are we acting on a specified line number */ - else if (sed_cmds[i].beg_line > 0) { - if (linenum == sed_cmds[i].beg_line) - line_altered += do_sed_command(&sed_cmds[i], line); + /* didn't hit the exit? then we're still in the middle of an address range */ + else { + still_in_range = 1; + } } - - /* not acting on matches or line numbers. act on every line */ - else - line_altered += do_sed_command(&sed_cmds[i], line); - } /* we will print the line unless we were told to be quiet or if the * line was altered (via a 'd'elete or 's'ubstitution), in which case * the altered line was already printed */ - if (!be_quiet && !line_altered) + if (!be_quiet && !altered) puts(line); free(line); @@ -156,7 +156,7 @@ static int index_of_next_unescaped_regexp_delim(struct sed_cmd *sed_cmd, const c /* * returns the index in the string just past where the address ends. */ -static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, regex_t **regex) +static int get_address(struct sed_cmd *sed_cmd, const char *str, int *linenum, regex_t **regex) { char *my_str = strdup(str); int idx = 0; @@ -169,10 +169,10 @@ static int get_address(struct sed_cmd *sed_cmd, const char *str, int *line, rege idx++; } while (isdigit(my_str[idx])); my_str[idx] = 0; - *line = atoi(my_str); + *linenum = atoi(my_str); } else if (my_str[idx] == '$') { - *line = -1; + *linenum = -1; idx++; } else if (my_str[idx] == '/') { @@ -423,13 +423,13 @@ static char *parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr) if (strchr("pd", cmdstr[idx])) { idx++; } - /* handle (s)ubstitution */ + /* handle (s)ubstitution command */ else if (sed_cmd->cmd == 's') { idx += parse_subst_cmd(sed_cmd, &cmdstr[idx]); } /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ - else if (strchr("aic", cmdstr[idx])) { - if (sed_cmd->end_line || sed_cmd->end_match) + else if (strchr("aic", sed_cmd->cmd)) { + if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') error_msg_and_die("only a beginning address can be specified for edit commands"); idx += parse_edit_cmd(sed_cmd, &cmdstr[idx]); } @@ -584,91 +584,13 @@ static int do_subst_command(const struct sed_cmd *sed_cmd, const char *line) return altered; } -static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line) -{ - int altered = 0; - - switch (sed_cmd->cmd) { - - case 'p': - puts(line); - break; - - case 'd': - altered++; - break; - - case 's': - - /* - * Some special cases for 's' printing to make it compliant with - * GNU sed printing behavior (aka "The -n | s///p Matrix"): - * - * -n ONLY = never print anything regardless of any successful - * substitution - * - * s///p ONLY = always print successful substitutions, even if - * the line is going to be printed anyway (line will be printed - * twice). - * - * -n AND s///p = print ONLY a successful substitution ONE TIME; - * no other lines are printed - this is the reason why the 'p' - * flag exists in the first place. - */ - - /* if the user specified that they didn't want anything printed (i.e., a -n - * flag and no 'p' flag after the s///), then there's really no point doing - * anything here. */ - if (be_quiet && !sed_cmd->sub_p) - break; - - /* we print the line once, unless we were told to be quiet */ - if (!be_quiet) - altered = do_subst_command(sed_cmd, line); - - /* we also print the line if we were given the 'p' flag - * (this is quite possibly the second printing) */ - if (sed_cmd->sub_p) - altered = do_subst_command(sed_cmd, line); - - break; - - case 'a': - puts(line); - fputs(sed_cmd->editline, stdout); - altered++; - break; - - case 'i': - fputs(sed_cmd->editline, stdout); - break; - - case 'c': - fputs(sed_cmd->editline, stdout); - altered++; - break; - - case 'r': { - FILE *file; - puts(line); - file = fopen(sed_cmd->filename, "r"); - if (file) - print_file(file); - /* else if we couldn't open the file, no biggie, just don't print anything */ - altered++; - } - break; - } - - return altered; -} static void process_file(FILE *file) { char *line = NULL; static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ unsigned int still_in_range = 0; - int line_altered; + int altered; int i; /* go through every line in the file */ @@ -676,51 +598,144 @@ static void process_file(FILE *file) chomp(line); linenum++; - line_altered = 0; + altered = 0; /* for every line, go through all the commands */ for (i = 0; i < ncmds; i++) { - /* are we acting on a range of matched lines? */ - if (sed_cmds[i].beg_match && sed_cmds[i].end_match) { - if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) { - line_altered += do_sed_command(&sed_cmds[i], line); - if (still_in_range && regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0) - still_in_range = 0; - else - still_in_range = 1; - } - } - /* are we trying to match a single line? */ - else if (sed_cmds[i].beg_match) { - if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) - line_altered += do_sed_command(&sed_cmds[i], line); - } + /* + * entry point into sedding... + */ + if ( + /* this line number is the first address we're looking for */ + (sed_cmds[i].beg_line && (sed_cmds[i].beg_line == linenum)) || + /* this line matches our first address regex */ + (sed_cmds[i].beg_match && (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0)) || + /* we are currently within the beginning & ending address range */ + still_in_range + ) { + + /* + * actual sedding + */ + switch (sed_cmds[i].cmd) { + + case 'p': + puts(line); + break; + + case 'd': + altered++; + break; + + case 's': + + /* + * Some special cases for 's' printing to make it compliant with + * GNU sed printing behavior (aka "The -n | s///p Matrix"): + * + * -n ONLY = never print anything regardless of any successful + * substitution + * + * s///p ONLY = always print successful substitutions, even if + * the line is going to be printed anyway (line will be printed + * twice). + * + * -n AND s///p = print ONLY a successful substitution ONE TIME; + * no other lines are printed - this is the reason why the 'p' + * flag exists in the first place. + */ + + /* if the user specified that they didn't want anything printed (i.e., a -n + * flag and no 'p' flag after the s///), then there's really no point doing + * anything here. */ + if (be_quiet && !sed_cmds[i].sub_p) + break; + + /* we print the line once, unless we were told to be quiet */ + if (!be_quiet) + altered = do_subst_command(&sed_cmds[i], line); + + /* we also print the line if we were given the 'p' flag + * (this is quite possibly the second printing) */ + if (sed_cmds[i].sub_p) + altered = do_subst_command(&sed_cmds[i], line); + + break; + + case 'a': + puts(line); + fputs(sed_cmds[i].editline, stdout); + altered++; + break; + + case 'i': + fputs(sed_cmds[i].editline, stdout); + break; + + case 'c': + /* single-address case */ + if (sed_cmds[i].end_match == NULL && sed_cmds[i].end_line == 0) { + fputs(sed_cmds[i].editline, stdout); + } + /* multi-address case */ + else { + /* matching text */ + if (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)) + fputs(sed_cmds[i].editline, stdout); + /* matching line numbers */ + if (sed_cmds[i].end_line > 0 && sed_cmds[i].end_line == linenum) + fputs(sed_cmds[i].editline, stdout); + } + altered++; + + break; + + case 'r': { + FILE *outfile; + puts(line); + outfile = fopen(sed_cmds[i].filename, "r"); + if (outfile) + print_file(outfile); + /* else if we couldn't open the output file, + * no biggie, just don't print anything */ + altered++; + } + break; + } - /* are we acting on a range of line numbers? */ - else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line != 0) { - if (linenum >= sed_cmds[i].beg_line && - (sed_cmds[i].end_line == -1 || linenum <= sed_cmds[i].end_line)) - line_altered += do_sed_command(&sed_cmds[i], line); - } + /* + * exit point from sedding... + */ + if ( + /* this is a single-address command or... */ + (sed_cmds[i].end_line == 0 && sed_cmds[i].end_match == NULL) || ( + /* we were in the middle of our address range (this + * isn't the first time through) and.. */ + (still_in_range == 1) && ( + /* this line number is the last address we're looking for or... */ + (sed_cmds[i].end_line && (sed_cmds[i].end_line == linenum)) || + /* this line matches our last address regex */ + (sed_cmds[i].end_match && (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)) + ) + ) + ) { + /* we're out of our address range */ + still_in_range = 0; + } - /* are we acting on a specified line number */ - else if (sed_cmds[i].beg_line > 0) { - if (linenum == sed_cmds[i].beg_line) - line_altered += do_sed_command(&sed_cmds[i], line); + /* didn't hit the exit? then we're still in the middle of an address range */ + else { + still_in_range = 1; + } } - - /* not acting on matches or line numbers. act on every line */ - else - line_altered += do_sed_command(&sed_cmds[i], line); - } /* we will print the line unless we were told to be quiet or if the * line was altered (via a 'd'elete or 's'ubstitution), in which case * the altered line was already printed */ - if (!be_quiet && !line_altered) + if (!be_quiet && !altered) puts(line); free(line); diff --git a/tests/testcases b/tests/testcases index 5043c30..a38d317 100644 --- a/tests/testcases +++ b/tests/testcases @@ -294,11 +294,19 @@ route # rpmunpack -# sed - we can do some one-liners here; probably needs it's own input file +# sed - we can do some one-liners here, some testing is a little +# difficult to do in just this space (like a,i,c cmds). + +# test ^$ matching echo foo | sed -ne '/^$/p' +echo -e "foo\\n\\nbar" | sed -ne '/^$/p' + sed -e '/test$/d' testcases sed -e '/^echo/d' testcases sed -e '/test/s/dangerous/PELIGROSO/' testcases +sed -ne '1,/getopt/p' ../pwd.c +sed -e '/getopt/r ../pwd.c' ../sed.c + # setkeycodes |