Arquivos
hhvm/hphp/runtime/base/string_util.cpp
T
2013-06-19 09:56:50 -07:00

598 linhas
17 KiB
C++

/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/runtime/base/string_util.h"
#include "hphp/util/zend/zend_html.h"
#include "hphp/runtime/base/zend/zend_string.h"
#include "hphp/runtime/base/zend/zend_url.h"
#include "hphp/runtime/base/runtime_error.h"
#include "hphp/runtime/base/array/array_iterator.h"
#include "hphp/runtime/base/builtin_functions.h"
namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
// manipulations
String StringUtil::ToLower(CStrRef input, ToLowerType type /*= ToLowerAll */) {
if (input.empty()) return input;
int len = input.size();
char *ret = nullptr;
switch (type) {
case ToLowerAll:
ret = string_to_lower(input.data(), len);
break;
case ToLowerFirst:
ret = string_to_lower_first(input.data(), len);
break;
case ToLowerWords:
ret = string_to_lower_words(input.data(), len);
break;
default:
assert(false);
break;
}
return String(ret, len, AttachString);
}
String StringUtil::ToUpper(CStrRef input, ToUpperType type /*= ToUpperAll */) {
if (input.empty()) return input;
int len = input.size();
char *ret = nullptr;
switch (type) {
case ToUpperAll:
ret = string_to_upper(input.data(), len);
break;
case ToUpperFirst:
ret = string_to_upper_first(input.data(), len);
break;
case ToUpperWords:
ret = string_to_upper_words(input.data(), len);
break;
default:
assert(false);
break;
}
return String(ret, len, AttachString);
}
String StringUtil::Trim(CStrRef input, TrimType type /* = TrimBoth */,
CStrRef charlist /* = k_HPHP_TRIM_CHARLIST */) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_trim(input.data(), len,
charlist.data(), charlist.length(), type);
if (!ret) {
return input;
}
return String(ret, len, AttachString);
}
String StringUtil::Pad(CStrRef input, int final_length,
CStrRef pad_string /* = " " */,
PadType type /* = PadRight */) {
int len = input.size();
char *ret = string_pad(input.data(), len, final_length, pad_string.data(),
pad_string.size(), type);
if (ret) return String(ret, len, AttachString);
return String();
}
String StringUtil::Reverse(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
return String(string_reverse(input.data(), len), len, AttachString);
}
String StringUtil::Repeat(CStrRef input, int count) {
if (count < 0) {
raise_warning("Second argument has to be greater than or equal to 0");
return String();
}
if (count == 0) {
return "";
}
if (!input.empty()) {
int len = input.size();
char *ret = string_repeat(input.data(), len, count);
if (ret) {
return String(ret, len, AttachString);
}
}
return input;
}
String StringUtil::Shuffle(CStrRef input) {
if (!input.empty()) {
int len = input.size();
char *ret = string_shuffle(input.data(), len);
if (ret) {
return String(ret, len, AttachString);
}
}
return input;
}
String StringUtil::StripHTMLTags(CStrRef input,
CStrRef allowable_tags /* = "" */) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_strip_tags(input.data(), len, allowable_tags.data(),
allowable_tags.size(), false);
return String(ret, len, AttachString);
}
String StringUtil::WordWrap(CStrRef input, int width,
CStrRef wordbreak /* = "\n" */,
bool cut /* = false */) {
if (!input.empty()) {
int len = input.size();
char *ret = string_wordwrap(input.data(), len, width, wordbreak.data(),
wordbreak.size(), cut);
if (ret) {
return String(ret, len, AttachString);
}
return String();
}
return input;
}
///////////////////////////////////////////////////////////////////////////////
// splits/joins
Variant StringUtil::Explode(CStrRef input, CStrRef delimiter,
int limit /* = 0x7FFFFFFF */) {
if (delimiter.empty()) {
throw_invalid_argument("delimiter: (empty)");
return false;
}
Array ret(Array::Create());
if (input.empty()) {
if (limit >= 0) {
ret.append("");
}
return ret;
}
if (limit > 1) {
int pos = input.find(delimiter);
if (pos < 0) {
ret.append(input);
} else {
int len = delimiter.size();
int pos0 = 0;
do {
ret.append(input.substr(pos0, pos - pos0));
pos += len;
pos0 = pos;
} while ((pos = input.find(delimiter, pos)) >= 0 && --limit > 1);
if (pos0 <= input.size()) {
ret.append(input.substr(pos0));
}
}
} else if (limit < 0) {
int pos = input.find(delimiter);
if (pos >= 0) {
vector<int> positions;
int len = delimiter.size();
int pos0 = 0;
int found = 0;
do {
positions.push_back(pos0);
positions.push_back(pos - pos0);
pos += len;
pos0 = pos;
found++;
} while ((pos = input.find(delimiter, pos)) >= 0);
if (pos0 <= input.size()) {
positions.push_back(pos0);
positions.push_back(input.size() - pos0);
found++;
}
int iMax = (found + limit) << 1;
for (int i = 0; i < iMax; i += 2) {
ret.append(input.substr(positions[i], positions[i+1]));
}
} // else we have negative limit and delimiter not found
} else {
ret.append(input);
}
return ret;
}
String StringUtil::Implode(CArrRef items, CStrRef delim) {
int size = items.size();
if (size == 0) return "";
String* sitems = (String*)smart_malloc(size * sizeof(String));
int len = 0;
int lenDelim = delim.size();
int i = 0;
for (ArrayIter iter(items); iter; ++iter) {
new (&sitems[i]) String(iter.second().toString());
len += sitems[i].size() + lenDelim;
i++;
}
len -= lenDelim; // always one delimiter less than count of items
assert(i == size);
String s = String(len, ReserveString);
char *buffer = s.mutableSlice().ptr;
const char *sdelim = delim.data();
char *p = buffer;
for (int i = 0; i < size; i++) {
String &item = sitems[i];
if (i && lenDelim) {
memcpy(p, sdelim, lenDelim);
p += lenDelim;
}
int lenItem = item.size();
if (lenItem) {
memcpy(p, item.data(), lenItem);
p += lenItem;
}
sitems[i].~String();
}
smart_free(sitems);
assert(p - buffer == len);
return s.setSize(len);
}
Variant StringUtil::Split(CStrRef str, int split_length /* = 1 */) {
if (split_length <= 0) {
throw_invalid_argument("split_length: (non-positive)");
return false;
}
Array ret;
int len = str.size();
if (split_length >= len) {
ret.append(str);
} else {
for (int i = 0; i < len; i += split_length) {
ret.append(str.substr(i, split_length));
}
}
return ret;
}
Variant StringUtil::ChunkSplit(CStrRef body, int chunklen /* = 76 */,
CStrRef end /* = "\r\n" */) {
if (chunklen <= 0) {
throw_invalid_argument("chunklen: (non-positive)");
return false;
}
String ret;
int len = body.size();
if (chunklen >= len) {
ret = body;
ret += end;
} else {
char *chunked = string_chunk_split(body.data(), len, end.c_str(),
end.size(), chunklen);
return String(chunked, len, AttachString);
}
return ret;
}
///////////////////////////////////////////////////////////////////////////////
// encoding/decoding
String StringUtil::CEncode(CStrRef input, CStrRef charlist) {
String chars = charlist;
if (chars.isNull()) {
chars = String("\\\x00\x01..\x1f\x7f..\xff", 10, AttachLiteral);
}
if (input.empty() || chars.empty()) return input;
int len = input.size();
char *ret = string_addcslashes(input.c_str(), len, chars.data(),
chars.size());
return String(ret, len, AttachString);
}
String StringUtil::CDecode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_stripcslashes(input.c_str(), len);
return String(ret, len, AttachString);
}
String StringUtil::SqlEncode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_addslashes(input.c_str(), len);
return String(ret, len, AttachString);
}
String StringUtil::SqlDecode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_stripslashes(input.c_str(), len);
return String(ret, len, AttachString);
}
String StringUtil::RegExEncode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_quotemeta(input.c_str(), len);
return String(ret, len, AttachString);
}
String StringUtil::HtmlEncode(CStrRef input, QuoteStyle quoteStyle,
const char *charset, bool nbsp) {
if (input.empty()) return input;
assert(charset);
bool utf8 = true;
if (strcasecmp(charset, "ISO-8859-1") == 0) {
utf8 = false;
} else if (strcasecmp(charset, "UTF-8")) {
throw NotImplementedException(charset);
}
int len = input.size();
char *ret = string_html_encode(input.data(), len, quoteStyle != NoQuotes,
quoteStyle == BothQuotes, utf8, nbsp);
if (!ret) {
raise_error("HtmlEncode called on too large input (%d)", len);
}
return String(ret, len, AttachString);
}
#define A1(v, ch) ((v)|((ch) & 64 ? 0 : 1uLL<<((ch)&63)))
#define A2(v, ch) ((v)|((ch) & 64 ? 1uLL<<((ch)&63) : 0))
static const AsciiMap mapNoQuotes = {
{ A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'),
A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@') }
};
static const AsciiMap mapDoubleQuotes = {
{ A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'),
A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"') }
};
static const AsciiMap mapBothQuotes = {
{ A1(A1(A1(A1(A1(A1(A1(A1(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\''),
A2(A2(A2(A2(A2(A2(A2(A2(0, '<'), '>'), '&'), '{'), '}'), '@'), '"'), '\'') }
};
static const AsciiMap mapNothing = {};
String StringUtil::HtmlEncodeExtra(CStrRef input, QuoteStyle quoteStyle,
const char *charset, bool nbsp,
Array extra) {
if (input.empty()) return input;
assert(charset);
int flags = STRING_HTML_ENCODE_UTF8;
if (nbsp) {
flags |= STRING_HTML_ENCODE_NBSP;
}
if (RuntimeOption::Utf8izeReplace) {
flags |= STRING_HTML_ENCODE_UTF8IZE_REPLACE;
}
if (!*charset || strcasecmp(charset, "UTF-8") == 0) {
} else if (strcasecmp(charset, "ISO-8859-1") == 0) {
flags &= ~STRING_HTML_ENCODE_UTF8;
} else {
throw NotImplementedException(charset);
}
const AsciiMap *am;
AsciiMap tmp;
switch (quoteStyle) {
case FBUtf8Only:
am = &mapNothing;
flags |= STRING_HTML_ENCODE_HIGH;
break;
case FBUtf8:
am = &mapBothQuotes;
flags |= STRING_HTML_ENCODE_HIGH;
break;
case BothQuotes:
am = &mapBothQuotes;
break;
case DoubleQuotes:
am = &mapDoubleQuotes;
break;
case NoQuotes:
am = &mapNoQuotes;
break;
default:
am = &mapNothing;
raise_error("Unknown quote style: %d", (int)quoteStyle);
}
if (quoteStyle != FBUtf8Only && extra.toBoolean()) {
tmp = *am;
am = &tmp;
for (ArrayIter iter(extra); iter; ++iter) {
String item = iter.second().toString();
char c = item.data()[0];
tmp.map[c & 64 ? 1 : 0] |= 1uLL << (c & 63);
}
}
int len = input.size();
char *ret = string_html_encode_extra(input.data(), len,
(StringHtmlEncoding)flags, am);
if (!ret) {
raise_error("HtmlEncode called on too large input (%d)", len);
}
return String(ret, len, AttachString);
}
String StringUtil::HtmlDecode(CStrRef input, QuoteStyle quoteStyle,
const char *charset, bool all) {
if (input.empty()) return input;
assert(charset);
int len = input.size();
char *ret = string_html_decode(input.data(), len, quoteStyle != NoQuotes,
quoteStyle == BothQuotes, charset, all);
if (!ret) {
// null iff charset was not recognized
throw NotImplementedException(charset);
// (charset is not null, see assertion above)
}
return String(ret, len, AttachString);
}
String StringUtil::QuotedPrintableEncode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_quoted_printable_encode(input.data(), len);
return String(ret, len, AttachString);
}
String StringUtil::QuotedPrintableDecode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_quoted_printable_decode(input.data(), len, false);
return String(ret, len, AttachString);
}
String StringUtil::HexEncode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_bin2hex(input.data(), len);
return String(ret, len, AttachString);
}
String StringUtil::HexDecode(CStrRef input) {
if (input.empty()) return input;
int len = input.size();
char *ret = string_hex2bin(input.data(), len);
return String(ret, len, AttachString);
}
String StringUtil::UUEncode(CStrRef input) {
if (input.empty()) return input;
int len;
char *encoded = string_uuencode(input.data(), input.size(), len);
return String(encoded, len, AttachString);
}
String StringUtil::UUDecode(CStrRef input) {
if (!input.empty()) {
int len;
char *decoded = string_uudecode(input.data(), input.size(), len);
if (decoded) {
return String(decoded, len, AttachString);
}
}
return String();
}
String StringUtil::Base64Encode(CStrRef input) {
int len = input.size();
char *ret = string_base64_encode(input.data(), len);
return String(ret, len, AttachString);
}
String StringUtil::Base64Decode(CStrRef input, bool strict /* = false */) {
int len = input.size();
char *ret = string_base64_decode(input.data(), len, strict);
return String(ret, len, AttachString);
}
String StringUtil::UrlEncode(CStrRef input, bool encodePlus /* = true */) {
int len = input.size();
char *ret;
if (encodePlus) {
ret = url_encode(input.data(), len);
} else {
ret = url_raw_encode(input.data(), len);
}
return String(ret, len, AttachString);
}
String StringUtil::UrlDecode(CStrRef input, bool decodePlus /* = true */) {
int len = input.size();
char *ret;
if (decodePlus) {
ret = url_decode(input.data(), len);
} else {
ret = url_raw_decode(input.data(), len);
}
return String(ret, len, AttachString);
}
///////////////////////////////////////////////////////////////////////////////
// formatting
String StringUtil::MoneyFormat(const char *format, double value) {
assert(format);
char *formatted = string_money_format(format, value);
return formatted ? String(formatted, AttachString) : String();
}
///////////////////////////////////////////////////////////////////////////////
// hashing
String StringUtil::Translate(CStrRef input, CStrRef from, CStrRef to) {
if (input.empty()) return input;
int len = input.size();
String retstr(len, ReserveString);
char *ret = retstr.mutableSlice().ptr;
memcpy(ret, input.data(), len);
auto trlen = std::min(from.size(), to.size());
string_translate(ret, len, from.data(), to.data(), trlen);
return retstr.setSize(len);
}
String StringUtil::ROT13(CStrRef input) {
if (input.empty()) return input;
return String(string_rot13(input.data(), input.size()),
input.size(), AttachString);
}
int64_t StringUtil::CRC32(CStrRef input) {
return string_crc32(input.data(), input.size());
}
String StringUtil::Crypt(CStrRef input, const char *salt /* = "" */) {
return String(string_crypt(input.c_str(), salt), AttachString);
}
String StringUtil::MD5(CStrRef input, bool raw /* = false */) {
int len;
char *ret = string_md5(input.data(), input.size(), raw, len);
return String(ret, len, AttachString);
}
String StringUtil::SHA1(CStrRef input, bool raw /* = false */) {
int len;
char *ret = string_sha1(input.data(), input.size(), raw, len);
return String(ret, len, AttachString);
}
///////////////////////////////////////////////////////////////////////////////
}