Add JSON-specific escaping, which has different rules from JS escaping.BUG=http://crbug.com/11431TEST=base_unittests.exe --gtest_filter=StringEscapeTest.Json*

Review URL: http://codereview.chromium.org/113606

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16485 0039d316-1c4b-4281-b951-d872f2087c98
Esse commit está contido em:
erikkay@google.com
2009-05-20 16:43:49 +00:00
commit 5f4ef093a5
8 arquivos alterados com 186 adições e 167 exclusões
+10 -11
Ver Arquivo
@@ -92,16 +92,15 @@ void JSONWriter::BuildJSONString(const Value* const node,
case Value::TYPE_STRING:
{
std::string value;
bool result = node->GetAsString(&value);
DCHECK(result);
if (escape) {
std::wstring value;
bool result = node->GetAsString(&value);
DCHECK(result);
AppendQuotedString(value);
string_escape::JsonDoubleQuote(UTF8ToUTF16(value),
true,
json_string_);
} else {
std::string value;
bool result = node->GetAsString(&value);
DCHECK(result);
string_escape::JavascriptDoubleQuote(value, true, json_string_);
string_escape::JsonDoubleQuote(value, true, json_string_);
}
break;
}
@@ -182,9 +181,9 @@ void JSONWriter::BuildJSONString(const Value* const node,
}
void JSONWriter::AppendQuotedString(const std::wstring& str) {
string_escape::JavascriptDoubleQuote(WideToUTF16Hack(str),
true,
json_string_);
string_escape::JsonDoubleQuote(WideToUTF16Hack(str),
true,
json_string_);
}
void JSONWriter::IndentLine(int depth) {
+28 -39
Ver Arquivo
@@ -11,10 +11,13 @@
namespace string_escape {
// Try to escape |c| as a "SingleEscapeCharacter" (\n, etc). If successful,
// returns true and appends the escape sequence to |dst|.
// returns true and appends the escape sequence to |dst|. This isn't required
// by the spec, but it's more readable by humans than the \uXXXX alternatives.
template<typename CHAR>
static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) {
static bool JsonSingleEscapeChar(const CHAR c, std::string* dst) {
// WARNING: if you add a new case here, you need to update the reader as well.
// Note: \v is in the reader, but not here since the JSON spec doesn't
// allow it.
switch (c) {
case '\b':
dst->append("\\b");
@@ -31,9 +34,6 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) {
case '\t':
dst->append("\\t");
break;
case '\v':
dst->append("\\v");
break;
case '\\':
dst->append("\\\\");
break;
@@ -46,25 +46,24 @@ static bool JavascriptSingleEscapeChar(const CHAR c, std::string* dst) {
return true;
}
void JavascriptDoubleQuote(const string16& str,
bool put_in_quotes,
std::string* dst) {
template <class STR>
void JsonDoubleQuoteT(const STR& str,
bool put_in_quotes,
std::string* dst) {
if (put_in_quotes)
dst->push_back('"');
for (string16::const_iterator it = str.begin(); it != str.end(); ++it) {
char16 c = *it;
if (!JavascriptSingleEscapeChar(c, dst)) {
if (c > 255) {
// Non-ascii values need to be unicode dst->
// TODO(tc): Some unicode values are handled specially. See
// spidermonkey code.
StringAppendF(dst, "\\u%04X", c);
} else if (c < 32 || c > 126) {
// Spidermonkey hex escapes these values.
StringAppendF(dst, "\\x%02X", c);
for (typename STR::const_iterator it = str.begin(); it != str.end(); ++it) {
typename ToUnsigned<typename STR::value_type>::Unsigned c = *it;
if (!JsonSingleEscapeChar(c, dst)) {
if (c < 32 || c > 126) {
// Technically, we could also pass through c > 126 as UTF8, but this is
// also optional. It would also be a pain to implement here.
unsigned int as_uint = static_cast<unsigned int>(c);
StringAppendF(dst, "\\u%04X", as_uint);
} else {
dst->push_back(static_cast<char>(c));
unsigned char ascii = static_cast<unsigned char>(*it);
dst->push_back(ascii);
}
}
}
@@ -73,26 +72,16 @@ void JavascriptDoubleQuote(const string16& str,
dst->push_back('"');
}
void JavascriptDoubleQuote(const std::string& str,
bool put_in_quotes,
std::string* dst) {
if (put_in_quotes)
dst->push_back('"');
void JsonDoubleQuote(const std::string& str,
bool put_in_quotes,
std::string* dst) {
JsonDoubleQuoteT(str, put_in_quotes, dst);
}
for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) {
unsigned char c = *it;
if (!JavascriptSingleEscapeChar(c, dst)) {
// Hex encode if the character is non-printable 7bit ascii
if (c < 32 || c == 127) {
StringAppendF(dst, "\\x%02X", c);
} else {
dst->push_back(static_cast<char>(c));
}
}
}
if (put_in_quotes)
dst->push_back('"');
void JsonDoubleQuote(const string16& str,
bool put_in_quotes,
std::string* dst) {
JsonDoubleQuoteT(str, put_in_quotes, dst);
}
} // namespace string_escape
+11 -14
Ver Arquivo
@@ -7,28 +7,25 @@
#ifndef BASE_STRING_ESCAPE_H__
#define BASE_STRING_ESCAPE_H__
#include <string>
#include "base/string16.h"
namespace string_escape {
// Escape |str| appropriately for a javascript string litereal, _appending_ the
// result to |dst|. This will create standard escape sequences (\b, \n),
// hex escape sequences (\x00), and unicode escape sequences (\uXXXX).
// Escape |str| appropriately for a JSON string litereal, _appending_ the
// result to |dst|. This will create unicode escape sequences (\uXXXX).
// If |put_in_quotes| is true, the result will be surrounded in double quotes.
// The outputted literal, when interpreted by the browser, should result in a
// javascript string that is identical and the same length as the input |str|.
void JavascriptDoubleQuote(const string16& str,
bool put_in_quotes,
std::string* dst);
void JsonDoubleQuote(const std::string& str,
bool put_in_quotes,
std::string* dst);
void JsonDoubleQuote(const string16& str,
bool put_in_quotes,
std::string* dst);
// Similar to the wide version, but for narrow strings. It will not use
// \uXXXX unicode escape sequences. It will pass non-7bit characters directly
// into the string unencoded, allowing the browser to interpret the encoding.
// The outputted literal, when interpreted by the browser, could result in a
// javascript string of a different length than the input |str|.
void JavascriptDoubleQuote(const std::string& str,
bool put_in_quotes,
std::string* dst);
} // namespace string_escape
+84 -53
Ver Arquivo
@@ -6,59 +6,90 @@
#include "base/string_escape.h"
#include "base/string_util.h"
TEST(StringEscapeTest, JavascriptDoubleQuote) {
static const char* kToEscape = "\b\001aZ\"\\wee";
static const char* kEscaped = "\\b\\x01aZ\\\"\\\\wee";
static const char* kEscapedQuoted = "\"\\b\\x01aZ\\\"\\\\wee\"";
static const wchar_t* kUToEscape = L"\b\x0001" L"a\x123fZ\"\\wee";
static const char* kUEscaped = "\\b\\x01a\\u123FZ\\\"\\\\wee";
static const char* kUEscapedQuoted = "\"\\b\\x01a\\u123FZ\\\"\\\\wee\"";
namespace {
std::string out;
const struct json_narrow_test_data {
const char* to_escape;
const char* escaped;
} json_narrow_cases[] = {
{"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
{"a\b\f\n\r\t\v\1\\.\"z",
"a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
{"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"},
};
// Test wide unicode escaping
out = "testy: ";
string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), false, &out);
ASSERT_EQ(std::string("testy: ") + kUEscaped, out);
out = "testy: ";
string_escape::JavascriptDoubleQuote(WideToUTF16(kUToEscape), true, &out);
ASSERT_EQ(std::string("testy: ") + kUEscapedQuoted, out);
// Test null and high bit / negative unicode values
string16 str16 = UTF8ToUTF16("TeSt");
str16.push_back(0);
str16.push_back(0xffb1);
str16.push_back(0x00ff);
out = "testy: ";
string_escape::JavascriptDoubleQuote(str16, false, &out);
ASSERT_EQ("testy: TeSt\\x00\\uFFB1\\xFF", out);
// Test escaping of 7bit ascii
out = "testy: ";
string_escape::JavascriptDoubleQuote(std::string(kToEscape), false, &out);
ASSERT_EQ(std::string("testy: ") + kEscaped, out);
out = "testy: ";
string_escape::JavascriptDoubleQuote(std::string(kToEscape), true, &out);
ASSERT_EQ(std::string("testy: ") + kEscapedQuoted, out);
// Test null, non-printable, and non-7bit
std::string str("TeSt");
str.push_back(0);
str.push_back(15);
str.push_back(127);
str.push_back(-16);
str.push_back(-128);
str.push_back('!');
out = "testy: ";
string_escape::JavascriptDoubleQuote(str, false, &out);
ASSERT_EQ("testy: TeSt\\x00\\x0F\\x7F\xf0\x80!", out);
// Test escape sequences
out = "testy: ";
string_escape::JavascriptDoubleQuote("a\b\f\n\r\t\v\1\\.\"z", false, &out);
ASSERT_EQ("testy: a\\b\\f\\n\\r\\t\\v\\x01\\\\.\\\"z", out);
}
TEST(StringEscapeTest, JsonDoubleQuoteNarrow) {
for (size_t i = 0; i < arraysize(json_narrow_cases); ++i) {
std::string in = json_narrow_cases[i].to_escape;
std::string out;
string_escape::JsonDoubleQuote(in, false, &out);
EXPECT_EQ(std::string(json_narrow_cases[i].escaped), out);
}
std::string in = json_narrow_cases[0].to_escape;
std::string out;
string_escape::JsonDoubleQuote(in, false, &out);
// test quoting
std::string out_quoted;
string_escape::JsonDoubleQuote(in, true, &out_quoted);
EXPECT_EQ(out.length() + 2, out_quoted.length());
EXPECT_EQ(out_quoted.find(out), 1U);
// now try with a NULL in the string
std::string null_prepend = "test";
null_prepend.push_back(0);
in = null_prepend + in;
std::string expected = "test\\u0000";
expected += json_narrow_cases[0].escaped;
out.clear();
string_escape::JsonDoubleQuote(in, false, &out);
EXPECT_EQ(expected, out);
}
namespace {
const struct json_wide_test_data {
const wchar_t* to_escape;
const char* escaped;
} json_wide_cases[] = {
{L"b\uffb1\u00ff", "b\\uFFB1\\u00FF"},
{L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
{L"a\b\f\n\r\t\v\1\\.\"z",
"a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
{L"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"},
};
}
TEST(StringEscapeTest, JsonDoubleQuoteWide) {
for (size_t i = 0; i < arraysize(json_wide_cases); ++i) {
std::string out;
string16 in = WideToUTF16(json_wide_cases[i].to_escape);
string_escape::JsonDoubleQuote(in, false, &out);
EXPECT_EQ(std::string(json_wide_cases[i].escaped), out);
}
string16 in = WideToUTF16(json_wide_cases[0].to_escape);
std::string out;
string_escape::JsonDoubleQuote(in, false, &out);
// test quoting
std::string out_quoted;
string_escape::JsonDoubleQuote(in, true, &out_quoted);
EXPECT_EQ(out.length() + 2, out_quoted.length());
EXPECT_EQ(out_quoted.find(out), 1U);
// now try with a NULL in the string
string16 null_prepend = WideToUTF16(L"test");
null_prepend.push_back(0);
in = null_prepend + in;
std::string expected = "test\\u0000";
expected += json_wide_cases[0].escaped;
out.clear();
string_escape::JsonDoubleQuote(in, false, &out);
EXPECT_EQ(expected, out);
}
-29
Ver Arquivo
@@ -37,35 +37,6 @@ struct EmptyStrings {
const string16 s16;
};
// Hack to convert any char-like type to its unsigned counterpart.
// For example, it will convert char, signed char and unsigned char to unsigned
// char.
template<typename T>
struct ToUnsigned {
typedef T Unsigned;
};
template<>
struct ToUnsigned<char> {
typedef unsigned char Unsigned;
};
template<>
struct ToUnsigned<signed char> {
typedef unsigned char Unsigned;
};
template<>
struct ToUnsigned<wchar_t> {
#if defined(WCHAR_T_IS_UTF16)
typedef unsigned short Unsigned;
#elif defined(WCHAR_T_IS_UTF32)
typedef uint32 Unsigned;
#endif
};
template<>
struct ToUnsigned<short> {
typedef unsigned short Unsigned;
};
// Used by ReplaceStringPlaceholders to track the position in the string of
// replaced parameters.
struct ReplacementOffset {
+28
Ver Arquivo
@@ -595,5 +595,33 @@ bool MatchPattern(const std::string& string, const std::string& pattern);
// std::numeric_limits<size_t>::max() / 2
std::string HexEncode(const void* bytes, size_t size);
// Hack to convert any char-like type to its unsigned counterpart.
// For example, it will convert char, signed char and unsigned char to unsigned
// char.
template<typename T>
struct ToUnsigned {
typedef T Unsigned;
};
template<>
struct ToUnsigned<char> {
typedef unsigned char Unsigned;
};
template<>
struct ToUnsigned<signed char> {
typedef unsigned char Unsigned;
};
template<>
struct ToUnsigned<wchar_t> {
#if defined(WCHAR_T_IS_UTF16)
typedef unsigned short Unsigned;
#elif defined(WCHAR_T_IS_UTF32)
typedef uint32 Unsigned;
#endif
};
template<>
struct ToUnsigned<short> {
typedef unsigned short Unsigned;
};
#endif // BASE_STRING_UTIL_H_
+20 -15
Ver Arquivo
@@ -73,19 +73,24 @@ TEST(JSONValueSerializerTest, StringEscape) {
// for (var i = 1; i < 256; ++i) { s += String.fromCharCode(i); }
// uneval(s).replace(/\\/g, "\\\\");
std::string all_chars_expected =
"\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0E\\x0F\\x10"
"\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1A\\x1B\\x1C\\x1D\\x1E"
"\\x1F !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\"
"\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7F\\x80\\x81\\x82\\x83\\x84\\x85"
"\\x86\\x87\\x88\\x89\\x8A\\x8B\\x8C\\x8D\\x8E\\x8F\\x90\\x91\\x92\\x93"
"\\x94\\x95\\x96\\x97\\x98\\x99\\x9A\\x9B\\x9C\\x9D\\x9E\\x9F\\xA0\\xA1"
"\\xA2\\xA3\\xA4\\xA5\\xA6\\xA7\\xA8\\xA9\\xAA\\xAB\\xAC\\xAD\\xAE\\xAF"
"\\xB0\\xB1\\xB2\\xB3\\xB4\\xB5\\xB6\\xB7\\xB8\\xB9\\xBA\\xBB\\xBC\\xBD"
"\\xBE\\xBF\\xC0\\xC1\\xC2\\xC3\\xC4\\xC5\\xC6\\xC7\\xC8\\xC9\\xCA\\xCB"
"\\xCC\\xCD\\xCE\\xCF\\xD0\\xD1\\xD2\\xD3\\xD4\\xD5\\xD6\\xD7\\xD8\\xD9"
"\\xDA\\xDB\\xDC\\xDD\\xDE\\xDF\\xE0\\xE1\\xE2\\xE3\\xE4\\xE5\\xE6\\xE7"
"\\xE8\\xE9\\xEA\\xEB\\xEC\\xED\\xEE\\xEF\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5"
"\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF";
"\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000B\\f\\r"
"\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017"
"\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E"
"\\u001F !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\"
"\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\u007F\\u0080\\u0081\\u0082\\u0083"
"\\u0084\\u0085\\u0086\\u0087\\u0088\\u0089\\u008A\\u008B\\u008C\\u008D"
"\\u008E\\u008F\\u0090\\u0091\\u0092\\u0093\\u0094\\u0095\\u0096\\u0097"
"\\u0098\\u0099\\u009A\\u009B\\u009C\\u009D\\u009E\\u009F\\u00A0\\u00A1"
"\\u00A2\\u00A3\\u00A4\\u00A5\\u00A6\\u00A7\\u00A8\\u00A9\\u00AA\\u00AB"
"\\u00AC\\u00AD\\u00AE\\u00AF\\u00B0\\u00B1\\u00B2\\u00B3\\u00B4\\u00B5"
"\\u00B6\\u00B7\\u00B8\\u00B9\\u00BA\\u00BB\\u00BC\\u00BD\\u00BE\\u00BF"
"\\u00C0\\u00C1\\u00C2\\u00C3\\u00C4\\u00C5\\u00C6\\u00C7\\u00C8\\u00C9"
"\\u00CA\\u00CB\\u00CC\\u00CD\\u00CE\\u00CF\\u00D0\\u00D1\\u00D2\\u00D3"
"\\u00D4\\u00D5\\u00D6\\u00D7\\u00D8\\u00D9\\u00DA\\u00DB\\u00DC\\u00DD"
"\\u00DE\\u00DF\\u00E0\\u00E1\\u00E2\\u00E3\\u00E4\\u00E5\\u00E6\\u00E7"
"\\u00E8\\u00E9\\u00EA\\u00EB\\u00EC\\u00ED\\u00EE\\u00EF\\u00F0\\u00F1"
"\\u00F2\\u00F3\\u00F4\\u00F5\\u00F6\\u00F7\\u00F8\\u00F9\\u00FA\\u00FB"
"\\u00FC\\u00FD\\u00FE\\u00FF";
std::string expected_output = "{\"all_chars\":\"" + all_chars_expected +
"\"}";
@@ -132,7 +137,7 @@ TEST(JSONValueSerializerTest, HexStrings) {
std::wstring test(L"\x01\x02");
root.SetString(L"test", test);
std::string expected = "{\"test\":\"\\x01\\x02\"}";
std::string expected = "{\"test\":\"\\u0001\\u0002\"}";
std::string actual;
JSONStringValueSerializer serializer(&actual);
@@ -150,7 +155,7 @@ TEST(JSONValueSerializerTest, HexStrings) {
ASSERT_EQ(test, test_value);
// Test converting escaped regular chars
std::string escaped_chars = "{\"test\":\"\\x67\\x6f\"}";
std::string escaped_chars = "{\"test\":\"\\u0067\\u006f\"}";
JSONStringValueSerializer deserializer2(escaped_chars);
deserial_root.reset(deserializer2.Deserialize(NULL));
ASSERT_TRUE(deserial_root.get());
+5 -6
Ver Arquivo
@@ -824,7 +824,7 @@ std::string GetDirectoryListingHeader(const std::string& title) {
std::string result(header.data(), header.size());
result.append("<script>start(");
string_escape::JavascriptDoubleQuote(title, true, &result);
string_escape::JsonDoubleQuote(title, true, &result);
result.append(");</script>\n");
return result;
@@ -836,17 +836,16 @@ std::string GetDirectoryListingEntry(const std::string& name,
const Time& modified) {
std::string result;
result.append("<script>addRow(");
string_escape::JavascriptDoubleQuote(name, true, &result);
string_escape::JsonDoubleQuote(name, true, &result);
result.append(",");
string_escape::JavascriptDoubleQuote(
EscapePath(name), true, &result);
string_escape::JsonDoubleQuote(EscapePath(name), true, &result);
if (is_dir) {
result.append(",1,");
} else {
result.append(",0,");
}
string_escape::JavascriptDoubleQuote(
string_escape::JsonDoubleQuote(
WideToUTF16Hack(FormatBytes(size, GetByteDisplayUnits(size), true)), true,
&result);
@@ -857,7 +856,7 @@ std::string GetDirectoryListingEntry(const std::string& name,
if (!modified.is_null()) {
modified_str = WideToUTF16Hack(base::TimeFormatShortDateAndTime(modified));
}
string_escape::JavascriptDoubleQuote(modified_str, true, &result);
string_escape::JsonDoubleQuote(modified_str, true, &result);
result.append(");</script>\n");