change use of html_supported_charset to avoid duplicating work

change callers of determine_charset to check for nulls, instead of calling html_supported_charset (to pre-validate charset name) and then calling this (which has to run through a list of names anyway).
Esse commit está contido em:
steveo
2013-03-26 07:14:52 -07:00
commit de Sara Golemon
commit b4ca4bbbd0
5 arquivos alterados com 34 adições e 25 exclusões
+6 -4
Ver Arquivo
@@ -449,13 +449,15 @@ String StringUtil::HtmlDecode(CStrRef input, QuoteStyle quoteStyle,
assert(charset);
if (!html_supported_charset(charset)) {
throw NotImplementedException(charset);
}
int len = input.size();
char *ret = string_html_decode(input, len, quoteStyle != NoQuotes,
quoteStyle == BothQuotes, charset, all);
if (!ret) {
// null iff charset was not recognized
throw NotImplementedException(charset);
// (charset is not null, see assertion above)
}
return String(ret, len, AttachString);
}
+3 -1
Ver Arquivo
@@ -807,8 +807,10 @@ static const HtmlBasicEntity basic_entities[] = {
};
Array f_get_html_translation_table(int table, int quote_style) {
static entity_charset charset = determine_charset(nullptr); // get default one
char ind[2]; ind[1] = 0;
entity_charset charset = determine_charset(NULL);
assert(charset != entity_charset_enum::cs_unknown);
const int HTML_SPECIALCHARS = 0;
const int HTML_ENTITIES = 1;
+8 -1
Ver Arquivo
@@ -67,8 +67,15 @@ bool ScannerToken::htmlTrim() {
void ScannerToken::xhpDecode() {
int len = m_text.size();
// note: 5th arg is charset_hint string; here we pass nullptr to indicate
// "use the default one" which is UTF-8. (Just saves a charset lookup.)
char *ret = string_html_decode(m_text.c_str(), len, true,
false, "UTF-8", true, true);
false, nullptr, true, true);
// safety check: decode function returns null iff charset unrecognized;
// i.e. nullptr result would mean UTF-8 is available.
// Pretty sure it is universally available!
// (Do assertion anyway.)
assert(ret);
m_text = string(ret, len);
free(ret);
}
+4 -18
Ver Arquivo
@@ -310,14 +310,13 @@ static const struct {
///////////////////////////////////////////////////////////////////////////////
entity_charset determine_charset(const char *charset_hint) {
entity_charset charset = cs_utf_8;
entity_charset charset = cs_unknown;
if (charset_hint == nullptr) {
// default to utf-8
return cs_utf_8;
}
DEBUG_ONLY bool found = false;
size_t len = strlen(charset_hint);
/* now walk the charset map and look for the codeset */
@@ -325,15 +324,10 @@ entity_charset determine_charset(const char *charset_hint) {
if (len == strlen(charset_map[i].codeset) &&
strncasecmp(charset_hint, charset_map[i].codeset, len) == 0) {
charset = charset_map[i].charset;
found = true;
break;
}
}
// All code paths that go into this check html_supported_charset()
// and throw if not.
assert(found && "currently we expect to only use supported charsets");
return charset;
}
@@ -766,6 +760,9 @@ char *string_html_decode(const char *input, int &len,
}
entity_charset charset = determine_charset(charset_hint);
if (charset == cs_unknown) {
return nullptr;
}
char *ret = (char *)malloc(len + 1);
char *q = ret;
@@ -826,16 +823,5 @@ const html_entity_map* html_get_entity_map() {
return entity_map;
}
bool html_supported_charset(const char *charset) {
size_t len = strlen(charset);
for (int i = 0; charset_map[i].codeset; i++) {
if (len == strlen(charset_map[i].codeset) &&
strncasecmp(charset, charset_map[i].codeset, len) == 0) {
return true;
}
}
return false;
}
///////////////////////////////////////////////////////////////////////////////
}
+13 -1
Ver Arquivo
@@ -62,6 +62,7 @@ enum entity_charset {
cs_8859_15, cs_utf_8, cs_big5, cs_gb2312,
cs_big5hkscs, cs_sjis, cs_eucjp, cs_koi8r,
cs_cp1251, cs_8859_5, cs_cp866, cs_macroman,
cs_unknown,
cs_end
};
}
@@ -85,6 +86,10 @@ struct html_entity_map {
const html_entity_map* html_get_entity_map();
/*
* returns cs_unknown iff not found;
* if input null, returns default charset of cs_utf_8
*/
entity_charset determine_charset(const char*);
char *string_html_encode(const char *input, int &len, bool encode_double_quote,
@@ -92,11 +97,18 @@ char *string_html_encode(const char *input, int &len, bool encode_double_quote,
char *string_html_encode_extra(const char *input, int &len,
StringHtmlEncoding flags,
const AsciiMap *asciiMap);
/**
* returns decoded string;
* note, can return nullptr if the charset could not be detected
* using the given charset_hint; can also pass in nullptr
* for the charset_hint to use the default one (UTF-8).
* (see determine_charset).
*/
char *string_html_decode(const char *input, int &len,
bool decode_double_quote, bool decode_single_quote,
const char *charset_hint,
bool all, bool xhp = false );
bool html_supported_charset(const char *charset);
///////////////////////////////////////////////////////////////////////////////
}