2 files changed, 200 insertions, 87 deletions
diff --git a/coreutils/printf.c b/coreutils/printf.c
index d877e05..ca035cc 100644
--- a/coreutils/printf.c
+++ b/coreutils/printf.c
@@ -36,14 +36,32 @@
    David MacKenzie <djm@gnu.ai.mit.edu>
 */
 
-
 //   19990508 Busy Boxed! Dave Cinege
 
 #include "libbb.h"
 
-typedef void (*converter)(const char *arg, void *result);
-
-static void multiconvert(const char *arg, void *result, converter convert)
+/* A note on bad input: neither bash 3.2 nor coreutils 6.10 stop on it.
+ * They report it:
+ *  bash: printf: XXX: invalid number
+ *  printf: XXX: expected a numeric value
+ *  bash: printf: 123XXX: invalid number
+ *  printf: 123XXX: value not completely converted
+ * but then they use 0 (or partially converted numeric prefix) as a value
+ * and continue. They exit with 1 in this case.
+ * Both accept insane field width/precision (e.g. %9999999999.9999999999d).
+ * Both print error message and assume 0 if %*.*f width/precision is "bad"
+ *  (but negative numbers are not "bad").
+ * Both accept negative numbers for %u specifier.
+ *
+ * We try to be compatible. We are not compatible here:
+ * - we do not accept -NUM for %u
+ * - exit code is 0 even if "invalid number" was seen (FIXME)
+ * See "if (errno)" checks in the code below.
+ */
+
+typedef void FAST_FUNC (*converter)(const char *arg, void *result);
+
+static int multiconvert(const char *arg, void *result, converter convert)
 {
 	char s[sizeof(int)*3 + 2];
 
@@ -51,43 +69,50 @@ static void multiconvert(const char *arg, void *result, converter convert)
 		sprintf(s, "%d", (unsigned char)arg[1]);
 		arg = s;
 	}
+	errno = 0;
 	convert(arg, result);
-	/* if there was conversion error, print unconverted string */
-	if (errno)
-		fputs(arg, stderr);
+	if (errno) {
+		bb_error_msg("%s: invalid number", arg);
+		return 1;
+	}
+	return 0;
 }
 
-static void conv_strtoul(const char *arg, void *result)
+static void FAST_FUNC conv_strtoul(const char *arg, void *result)
 {
 	*(unsigned long*)result = bb_strtoul(arg, NULL, 0);
 }
-static void conv_strtol(const char *arg, void *result)
+static void FAST_FUNC conv_strtol(const char *arg, void *result)
 {
 	*(long*)result = bb_strtol(arg, NULL, 0);
 }
-static void conv_strtod(const char *arg, void *result)
+static void FAST_FUNC conv_strtod(const char *arg, void *result)
 {
 	char *end;
-	/* Well, this one allows leading whitespace... so what */
-	/* What I like much less is that "-" is accepted too! :( */
+	/* Well, this one allows leading whitespace... so what? */
+	/* What I like much less is that "-" accepted too! :( */
 	*(double*)result = strtod(arg, &end);
-	if (end[0]) errno = ERANGE;
+	if (end[0]) {
+		errno = ERANGE;
+		*(double*)result = 0;
+	}
 }
 
+/* Callers should check errno to detect errors */
 static unsigned long my_xstrtoul(const char *arg)
 {
 	unsigned long result;
-	multiconvert(arg, &result, conv_strtoul);
+	if (multiconvert(arg, &result, conv_strtoul))
+		result = 0;
 	return result;
 }
-
 static long my_xstrtol(const char *arg)
 {
 	long result;
-	multiconvert(arg, &result, conv_strtol);
+	if (multiconvert(arg, &result, conv_strtol))
+		result = 0;
 	return result;
 }
-
 static double my_xstrtod(const char *arg)
 {
 	double result;
@@ -97,14 +122,14 @@ static double my_xstrtod(const char *arg)
 
 static void print_esc_string(char *str)
 {
-	for (; *str; str++) {
+	while (*str) {
 		if (*str == '\\') {
 			str++;
 			bb_putchar(bb_process_escape_sequence((const char **)&str));
 		} else {
 			bb_putchar(*str);
+			str++;
 		}
-
 	}
 }
 
@@ -112,88 +137,109 @@ static void print_direc(char *format, unsigned fmt_length,
 		int field_width, int precision,
 		const char *argument)
 {
+	long lv;
+	double dv;
 	char saved;
+	char *have_prec, *have_width;
+
+	have_prec = strstr(format, ".*");
+	have_width = strchr(format, '*');
+	if (have_width - 1 == have_prec)
+		have_width = NULL;
 
 	saved = format[fmt_length];
 	format[fmt_length] = '\0';
 
 	switch (format[fmt_length - 1]) {
+	case 'c':
+		printf(format, *argument);
+		break;
 	case 'd':
 	case 'i':
-		if (field_width < 0) {
-			if (precision < 0)
-				printf(format, my_xstrtol(argument));
+		lv = my_xstrtol(argument);
+ print_long:
+		/* if (errno) return; - see comment at the top */
+		if (!have_width) {
+			if (!have_prec)
+				printf(format, lv);
 			else
-				printf(format, precision, my_xstrtol(argument));
+				printf(format, precision, lv);
 		} else {
-			if (precision < 0)
-				printf(format, field_width, my_xstrtol(argument));
+			if (!have_prec)
+				printf(format, field_width, lv);
 			else
-				printf(format, field_width, precision, my_xstrtol(argument));
+				printf(format, field_width, precision, lv);
 		}
 		break;
 	case 'o':
 	case 'u':
 	case 'x':
 	case 'X':
-		if (field_width < 0) {
-			if (precision < 0)
-				printf(format, my_xstrtoul(argument));
-			else
-				printf(format, precision, my_xstrtoul(argument));
-		} else {
-			if (precision < 0)
-				printf(format, field_width, my_xstrtoul(argument));
-			else
-				printf(format, field_width, precision, my_xstrtoul(argument));
+		lv = my_xstrtoul(argument);
+		/* cheat: unsigned long and long have same width, so... */
+		goto print_long;
+	case 's':
+		/* Are char* and long the same? (true for most arches) */
+		if (sizeof(argument) == sizeof(lv)) {
+			lv = (long)(ptrdiff_t)argument;
+			goto print_long;
+		} else { /* Hope compiler will optimize it out */
+			if (!have_width) {
+				if (!have_prec)
+					printf(format, argument);
+				else
+					printf(format, precision, argument);
+			} else {
+				if (!have_prec)
+					printf(format, field_width, argument);
+				else
+					printf(format, field_width, precision, argument);
+			}
+			break;
 		}
-		break;
 	case 'f':
 	case 'e':
 	case 'E':
 	case 'g':
 	case 'G':
-		if (field_width < 0) {
-			if (precision < 0)
-				printf(format, my_xstrtod(argument));
+		dv = my_xstrtod(argument);
+		/* if (errno) return; */
+		if (!have_width) {
+			if (!have_prec)
+				printf(format, dv);
 			else
-				printf(format, precision, my_xstrtod(argument));
+				printf(format, precision, dv);
 		} else {
-			if (precision < 0)
-				printf(format, field_width, my_xstrtod(argument));
+			if (!have_prec)
+				printf(format, field_width, dv);
 			else
-				printf(format, field_width, precision, my_xstrtod(argument));
+				printf(format, field_width, precision, dv);
 		}
 		break;
-	case 'c':
-		printf(format, *argument);
-		break;
-	case 's':
-		if (field_width < 0) {
-			if (precision < 0)
-				printf(format, argument);
-			else
-				printf(format, precision, argument);
-		} else {
-			if (precision < 0)
-				printf(format, field_width, argument);
-			else
-				printf(format, field_width, precision, argument);
-		}
-		break;
-	}
+	} /* switch */
 
 	format[fmt_length] = saved;
 }
 
+/* Handle params for "%*.*f". Negative numbers are ok (compat). */
+static int get_width_prec(const char *str)
+{
+	int v = bb_strtoi(str, NULL, 10);
+	if (errno) {
+		bb_error_msg("%s: invalid number", str);
+		v = 0;
+	}
+	return v;
+}
+
 /* Print the text in FORMAT, using ARGV for arguments to any '%' directives.
    Return advanced ARGV.  */
 static char **print_formatted(char *f, char **argv)
 {
 	char *direc_start;      /* Start of % directive.  */
 	unsigned direc_length;  /* Length of % directive.  */
-	int field_width;        /* Arg to first '*', or -1 if none.  */
-	int precision;          /* Arg to second '*', or -1 if none.  */
+	int field_width;        /* Arg to first '*' */
+	int precision;          /* Arg to second '*' */
 	char **saved_argv = argv;
 
 	for (; *f; ++f) {
@@ -201,7 +247,7 @@ static char **print_formatted(char *f, char **argv)
 		case '%':
 			direc_start = f++;
 			direc_length = 1;
-			field_width = precision = -1;
+			field_width = precision = 0;
 			if (*f == '%') {
 				bb_putchar('%');
 				break;
@@ -220,11 +266,8 @@ static char **print_formatted(char *f, char **argv)
 			if (*f == '*') {
 				++f;
 				++direc_length;
-				if (*argv) {
-					field_width = my_xstrtoul(*argv);
-					++argv;
-				} else
-					field_width = 0;
+				if (*argv)
+					field_width = get_width_prec(*argv++);
 			} else {
 				while (isdigit(*f)) {
 					++f;
@@ -237,24 +280,22 @@ static char **print_formatted(char *f, char **argv)
 				if (*f == '*') {
 					++f;
 					++direc_length;
-					if (*argv) {
-						precision = my_xstrtoul(*argv);
-						++argv;
-					} else
-						precision = 0;
-				} else
+					if (*argv)
+						precision = get_width_prec(*argv++);
+				} else {
 					while (isdigit(*f)) {
 						++f;
 						++direc_length;
 					}
+				}
 			}
-			if (*f == 'l' || *f == 'L' || *f == 'h') {
+			if ((*f | 0x20) == 'l' || *f == 'h' || *f == 'z') {
 				++f;
 				++direc_length;
 			}
 			/* needed - try "printf %" without it */
 			if (!strchr("diouxXfeEgGcs", *f)) {
-				bb_error_msg("invalid directive '%s'", direc_start);
+				bb_error_msg("%s: invalid format", direc_start);
 				/* causes main() to exit with error */
 				return saved_argv - 1;
 			}
@@ -263,9 +304,11 @@ static char **print_formatted(char *f, char **argv)
 				print_direc(direc_start, direc_length, field_width,
 							precision, *argv);
 				++argv;
-			} else
+			} else {
 				print_direc(direc_start, direc_length, field_width,
 							precision, "");
+			}
+			/* if (errno) return saved_argv - 1; */
 			break;
 		case '\\':
 			if (*++f == 'c') {
diff --git a/testsuite/printf.tests b/testsuite/printf.tests
index d6b60fb..a5c71ec 100755
--- a/testsuite/printf.tests
+++ b/testsuite/printf.tests
@@ -1,6 +1,6 @@
 #!/bin/sh
-
-set -e
+# Copyright 2008 by Denys Vlasenko
+# Licensed under GPL v2, see file LICENSE for details.
 
 . testing.sh
 
@@ -9,25 +9,95 @@ bb="busybox "
 
 # testing "test name" "command" "expected result" "file input" "stdin"
 
-testing "printf produce no further output 1" \
+testing "printf produces no further output 1" \
 	"${bb}printf '\c' foo" \
 	"" \
 	"" ""
 
-testing "printf produce no further output 2" \
-	"${bb}printf '%s\c' foo \$HOME" \
+testing "printf produces no further output 2" \
+	"${bb}printf '%s\c' foo bar" \
 	"foo" \
 	"" ""
 
-testing "printf repeatedly use pattern for each argv" \
+testing "printf repeatedly uses pattern for each argv" \
 	"${bb}printf '%s\n' foo \$HOME" \
 	"foo\n$HOME\n" \
 	"" ""
 
-# Why ()s are necessary I have no idea...
+testing "printf understands %b escaped_string" \
+	"${bb}printf '%b' 'a\tb' 'c\\d\n' 2>&1; echo \$?" \
+	"a\tbc\\d\n""0\n" \
+	"" ""
+
+testing "printf understands %d '\"x' \"'y\" \"'zTAIL\"" \
+	"${bb}printf '%d\n' '\"x' \"'y\" \"'zTAIL\" 2>&1; echo \$?" \
+	"120\n""121\n""122\n""0\n" \
+	"" ""
+
+testing "printf understands %s '\"x' \"'y\" \"'zTAIL\"" \
+	"${bb}printf '%s\n' '\"x' \"'y\" \"'zTAIL\" 2>&1; echo \$?" \
+	"\"x\n""'y\n""'zTAIL\n""0\n" \
+	"" ""
+
+testing "printf understands %23.12f" \
+	"${bb}printf '|%23.12f|\n' 5.25 2>&1; echo \$?" \
+	"|         5.250000000000|\n""0\n" \
+	"" ""
+
+testing "printf understands %*.*f" \
+	"${bb}printf '|%*.*f|\n' 23 12 5.25 2>&1; echo \$?" \
+	"|         5.250000000000|\n""0\n" \
+	"" ""
+
+testing "printf understands %*f with negative width" \
+	"${bb}printf '|%*f|\n' -23 5.25 2>&1; echo \$?" \
+	"|5.250000               |\n""0\n" \
+	"" ""
+
+testing "printf understands %.*f with negative precision" \
+	"${bb}printf '|%.*f|\n' -12 5.25 2>&1; echo \$?" \
+	"|5.250000|\n""0\n" \
+	"" ""
+
+testing "printf understands %*.*f with negative width/precision" \
+	"${bb}printf '|%*.*f|\n' -23 -12 5.25 2>&1; echo \$?" \
+	"|5.250000               |\n""0\n" \
+	"" ""
+
+testing "printf understands %zd" \
+	"${bb}printf '%zd\n' -5 2>&1; echo \$?" \
+	"-5\n""0\n" \
+	"" ""
+
+testing "printf understands %ld" \
+	"${bb}printf '%ld\n' -5 2>&1; echo \$?" \
+	"-5\n""0\n" \
+	"" ""
+
+# We are "more correct" here than bash/coreutils: they happily print -2
+# as if it is a huge unsigned number
+testing "printf handles %u -N" \
+	"${bb}printf '%u\n' 1 -2 3 2>&1; echo \$?" \
+	"1\n""printf: -2: invalid number\n""0\n""3\n""0\n" \
+	"" ""
+
+# Actually, we are wrong here: exit code should be 1
+testing "printf handles %d bad_input" \
+	"${bb}printf '%d\n' 1 - 2 bad 3 123bad 4 2>&1; echo \$?" \
+"1\n""printf: -: invalid number\n""0\n"\
+"2\n""printf: bad: invalid number\n""0\n"\
+"3\n""printf: 123bad: invalid number\n""0\n"\
+"4\n""0\n" \
+	"" ""
+
 testing "printf aborts on bare %" \
-	"(${bb}printf '%' a b c) 2>&1; echo \$?" \
-	"printf: invalid directive '%'\n""1\n" \
+	"${bb}printf '%' a b c 2>&1; echo \$?" \
+	"printf: %: invalid format\n""1\n" \
+	"" ""
+
+testing "printf aborts on %r" \
+	"${bb}printf '%r' a b c 2>&1; echo \$?" \
+	"printf: %r: invalid format\n""1\n" \
 	"" ""
 
 exit $FAILCOUNT