diff options
-rw-r--r-- | editors/sed.c | 159 |
1 files changed, 88 insertions, 71 deletions
diff --git a/editors/sed.c b/editors/sed.c index 96e0dd8..ac765c8 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -30,7 +30,8 @@ - edit commands: (a)ppend, (i)nsert, (c)hange - file commands: (r)ead - backreferences in substitution expressions (\1, \2...\9) - + - grouped commands: {cmd1;cmd2} + (Note: Specifying an address (range) to match is *optional*; commands default to the whole pattern space if no specific address match was requested.) @@ -226,7 +227,7 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) /* verify that the 's' is followed by something. That something * (typically a 'slash') is now our regexp delimiter... */ - if (!substr[++idx]) + if (substr[idx] == '\0') error_msg_and_die("bad format in substitution expression"); else sed_cmd->delimiter=substr[idx]; @@ -287,11 +288,6 @@ out: return idx; } -static void move_back(char *str, int offset) -{ - memmove(str, str + offset, strlen(str + offset) + 1); -} - static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) { int i, j; @@ -317,15 +313,15 @@ static int parse_edit_cmd(sed_cmd_t *sed_cmd, const char *editstr) * is a-ok. * */ - - if (editstr[1] != '\\' || (editstr[2] != '\n' && editstr[2] != '\r')) + if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) { error_msg_and_die("bad format in edit expression"); + } /* store the edit line text */ - sed_cmd->editline = xmalloc(strlen(&editstr[3]) + 2); - for (i = 3, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; + sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2); + for (i = 2, j = 0; editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) { - if (editstr[i] == '\\' && strchr("\n\r", editstr[i+1]) != NULL) { + if ((editstr[i] == '\\') && strchr("\n\r", editstr[i+1]) != NULL) { sed_cmd->editline[j] = '\n'; i++; } else @@ -382,6 +378,53 @@ static int parse_file_cmd(sed_cmd_t *sed_cmd, const char *filecmdstr) static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) { + /* if it was a single-letter command that takes no arguments (such as 'p' + * or 'd') all we need to do is increment the index past that command */ + if (strchr("pd=", sed_cmd->cmd)) { + cmdstr++; + } + /* handle (s)ubstitution command */ + else if (sed_cmd->cmd == 's') { + cmdstr += parse_subst_cmd(sed_cmd, cmdstr); + } + /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ + else if (strchr("aic", sed_cmd->cmd)) { + if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') + error_msg_and_die("only a beginning address can be specified for edit commands"); + cmdstr += parse_edit_cmd(sed_cmd, cmdstr); + } + /* handle file cmds: (r)ead */ + else if (sed_cmd->cmd == 'r') { + if (sed_cmd->end_line || sed_cmd->end_match) + error_msg_and_die("Command only uses one address"); + cmdstr += parse_file_cmd(sed_cmd, cmdstr); + } + /* handle grouped commands */ + else { + error_msg_and_die("Unsupported command %c", sed_cmd->cmd); + } + + /* give back whatever's left over */ + return(cmdstr); +} + +static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr) +{ + + /* Skip over leading whitespace and semicolons */ + cmdstr += strspn(cmdstr, semicolon_whitespace); + + /* if we ate the whole thing, that means there was just trailing + * whitespace or a final / no-op semicolon. either way, get out */ + if (*cmdstr == '\0') { + return(NULL); + } + + /* if this is a comment, jump past it and keep going */ + if (*cmdstr == '#') { + return(strpbrk(cmdstr, "\n\r")); + } + /* parse the command * format is: [addr][,addr]cmd * |----||-----||-| @@ -389,26 +432,26 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) */ /* first part (if present) is an address: either a '$', a number or a /regex/ */ - cmdstr += get_address(&sed_cmd->delimiter, cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); + cmdstr += get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); /* second part (if present) will begin with a comma */ if (*cmdstr == ',') { - int tmp_idx; + int idx; cmdstr++; - tmp_idx = get_address(&sed_cmd->delimiter, cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); - if (tmp_idx == 0) { + idx = get_address(&(sed_cmd->delimiter), cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); + if (idx == 0) { error_msg_and_die("get_address: no address found in string\n" "\t(you probably didn't check the string you passed me)"); } - cmdstr += tmp_idx; + cmdstr += idx; } /* skip whitespace before the command */ - while (isspace(*cmdstr)) + while (isspace(*cmdstr)) { cmdstr++; + } /* there my be the inversion flag between part2 and part3 */ - sed_cmd->invert = 0; if (*cmdstr == '!') { sed_cmd->invert = 1; cmdstr++; @@ -419,14 +462,16 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) * and conforming applications shall not follow a '!' character * with <blank>s. */ - if (isblank(*cmdstr) { + if (isblank(cmdstr[idx]) { error_msg_and_die("blank follows '!'"); } #else /* skip whitespace before the command */ - while (isspace(*cmdstr)) + while (isspace(*cmdstr)) { cmdstr++; + } #endif + } /* last part (mandatory) will be a command */ @@ -434,61 +479,34 @@ static char *parse_cmd_str(sed_cmd_t * const sed_cmd, char *cmdstr) error_msg_and_die("missing command"); sed_cmd->cmd = *cmdstr; - - /* if it was a single-letter command that takes no arguments (such as 'p' - * or 'd') all we need to do is increment the index past that command */ - if (strchr("pd=", sed_cmd->cmd)) { - cmdstr++; + cmdstr++; + + if (sed_cmd->cmd == '{') { + do { + char *end_ptr = strpbrk(cmdstr, ";}"); + *end_ptr = '\0'; + add_cmd(sed_cmd, cmdstr); + cmdstr = end_ptr + 1; + } while (*cmdstr != '\0'); + } else { + + cmdstr = parse_cmd_str(sed_cmd, cmdstr); + + /* Add the command to the command array */ + sed_cmds = xrealloc(sed_cmds, sizeof(sed_cmd_t) * (++ncmds)); + sed_cmds[ncmds-1] = xmalloc(sizeof(sed_cmd_t)); + memcpy(sed_cmds[ncmds-1], sed_cmd, sizeof(sed_cmd_t)); } - /* handle (s)ubstitution command */ - else if (sed_cmd->cmd == 's') { - cmdstr += parse_subst_cmd(sed_cmd, cmdstr); - } - /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ - else if (strchr("aic", sed_cmd->cmd)) { - if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') - error_msg_and_die("only a beginning address can be specified for edit commands"); - cmdstr += parse_edit_cmd(sed_cmd, cmdstr); - } - /* handle file cmds: (r)ead */ - else if (sed_cmd->cmd == 'r') { - if (sed_cmd->end_line || sed_cmd->end_match) - error_msg_and_die("Command only uses one address"); - cmdstr += parse_file_cmd(sed_cmd, cmdstr); - } - else { - error_msg_and_die("Unsupported command %c", sed_cmd->cmd); - } - - /* give back whatever's left over */ return(cmdstr); } -static void add_cmd_str(const char * const cmdstr) +static void add_cmd_str(char *cmdstr) { - char *mystr = (char *)cmdstr; - do { - - /* trim leading whitespace and semicolons */ - move_back(mystr, strspn(mystr, semicolon_whitespace)); - /* if we ate the whole thing, that means there was just trailing - * whitespace or a final / no-op semicolon. either way, get out */ - if (strlen(mystr) == 0) - return; - /* if this is a comment, jump past it and keep going */ - if (mystr[0] == '#') { - mystr = strpbrk(mystr, "\n\r"); - continue; - } - /* grow the array */ - sed_cmds = xrealloc(sed_cmds, sizeof(sed_cmd_t *) * (++ncmds)); - /* zero new element */ - sed_cmds[ncmds-1] = xcalloc(1, sizeof(sed_cmd_t)); - /* load command string into new array element, get remainder */ - mystr = parse_cmd_str(sed_cmds[ncmds-1], mystr); - - } while (mystr && strlen(mystr)); + sed_cmd_t *sed_cmd; + sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); + cmdstr = add_cmd(sed_cmd, cmdstr); + } while (cmdstr && strlen(cmdstr)); } @@ -868,7 +886,6 @@ extern int sed_main(int argc, char **argv) } } - /* argv[(optind)..(argc-1)] should be names of file to process. If no * files were specified or '-' was specified, take input from stdin. * Otherwise, we process all the files specified. */ |