summaryrefslogtreecommitdiff
path: root/libbb/process_escape_sequence.c
blob: 13022b83eea5428a8c999f549b08486e4bf5a144 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/* vi: set sw=4 ts=4: */
/*
 * Utility routines.
 *
 * Copyright (C) Manuel Novoa III <mjn3@codepoet.org>
 * and Vladimir Oleynik <dzo@simtreas.ru>
 *
 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
 */
#include "libbb.h"

#define WANT_HEX_ESCAPES 1

/* Usual "this only works for ascii compatible encodings" disclaimer. */
#undef _tolower
#define _tolower(X) ((X)|((char) 0x20))

char FAST_FUNC bb_process_escape_sequence(const char **ptr)
{
	const char *q;
	unsigned num_digits;
	unsigned n;
	unsigned base;

	num_digits = n = 0;
	base = 8;
	q = *ptr;

	if (WANT_HEX_ESCAPES && *q == 'x') {
		++q;
		base = 16;
		++num_digits;
	}

	/* bash requires leading 0 in octal escapes:
	 * \02 works, \2 does not (prints \ and 2).
	 * We treat \2 as a valid octal escape sequence. */
	do {
		unsigned r;
		unsigned d = (unsigned char)(*q) - '0';
#if WANT_HEX_ESCAPES
		if (d >= 10) {
			d = (unsigned char)_tolower(*q) - 'a';
			//d += 10;
			/* The above would map 'A'-'F' and 'a'-'f' to 10-15,
			 * however, some chars like '@' would map to 9 < base.
			 * Do not allow that, map invalid chars to N > base:
			 */
			if ((int)d >= 0)
				d += 10;
		}
#endif
		if (d >= base) {
			if (WANT_HEX_ESCAPES && base == 16) {
				--num_digits;
				if (num_digits == 0) {
					/* \x<bad_char>: return '\',
					 * leave ptr pointing to x */
					return '\\';
				}
			}
			break;
		}

		r = n * base + d;
		if (r > UCHAR_MAX) {
			break;
		}

		n = r;
		++q;
	} while (++num_digits < 3);

	if (num_digits == 0) {
		/* Not octal or hex escape sequence.
		 * Is it one-letter one? */

		/* bash builtin "echo -e '\ec'" interprets \e as ESC,
		 * but coreutils "/bin/echo -e '\ec'" does not.
		 * Manpages tend to support coreutils way.
		 * Update: coreutils added support for \e on 28 Oct 2009. */
		static const char charmap[] ALIGN1 = {
			'a',  'b', 'e', 'f',  'n',  'r',  't',  'v',  '\\', '\0',
			'\a', '\b', 27, '\f', '\n', '\r', '\t', '\v', '\\', '\\',
		};
		const char *p = charmap;
		do {
			if (*p == *q) {
				q++;
				break;
			}
		} while (*++p != '\0');
		/* p points to found escape char or NUL,
		 * advance it and find what it translates to.
		 * Note that \NUL and unrecognized sequence \z return '\'
		 * and leave ptr pointing to NUL or z. */
		n = p[sizeof(charmap) / 2];
	}

	*ptr = q;

	return (char) n;
}

char* FAST_FUNC strcpy_and_process_escape_sequences(char *dst, const char *src)
{
	while (1) {
		char c, c1;
		c = c1 = *src++;
		if (c1 == '\\')
			c1 = bb_process_escape_sequence(&src);
		*dst = c1;
		if (c == '\0')
			return dst;
		dst++;
	}
}