721f89b890
This moves runtime/base/*/* to runtime/base, and fixes paths.
628 linhas
19 KiB
C++
628 linhas
19 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
|
| Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 2.00 of the Zend license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.zend.com/license/2_00.txt. |
|
|
| If you did not receive a copy of the Zend license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@zend.com so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
#include "hphp/runtime/base/zend_collator.h"
|
|
#include "hphp/runtime/base/zend_strtod.h"
|
|
#include "hphp/runtime/base/intl_convert.h"
|
|
#include "hphp/runtime/base/type_conversions.h"
|
|
#include "hphp/runtime/base/builtin_functions.h"
|
|
#include "hphp/runtime/base/types.h"
|
|
#include "hphp/runtime/base/complex_types.h"
|
|
#include "hphp/runtime/base/runtime_error.h"
|
|
#include "hphp/runtime/base/array_iterator.h"
|
|
#include "hphp/runtime/base/comparisons.h"
|
|
|
|
namespace HPHP {
|
|
|
|
IMPLEMENT_REQUEST_LOCAL(IntlError, s_intl_error);
|
|
|
|
#define UCHARS(len) ((len) / sizeof(UChar))
|
|
#define UBYTES(len) ((len) * sizeof(UChar))
|
|
|
|
static Variant collator_convert_string_to_number_if_possible(CVarRef str);
|
|
|
|
static double collator_u_strtod(const UChar *nptr, UChar **endptr) {
|
|
const UChar *u = nptr, *nstart;
|
|
UChar c = *u;
|
|
int any = 0;
|
|
|
|
while (u_isspace(c)) {
|
|
c = *++u;
|
|
}
|
|
nstart = u;
|
|
|
|
if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
|
|
c = *++u;
|
|
}
|
|
|
|
while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
|
|
any = 1;
|
|
c = *++u;
|
|
}
|
|
|
|
if (c == 0x2E /*'.'*/) {
|
|
c = *++u;
|
|
while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
|
|
any = 1;
|
|
c = *++u;
|
|
}
|
|
}
|
|
|
|
if ((c == 0x65 /*'e'*/ || c == 0x45 /*'E'*/) && any) {
|
|
const UChar *e = u;
|
|
int any_exp = 0;
|
|
|
|
c = *++u;
|
|
if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
|
|
c = *++u;
|
|
}
|
|
|
|
while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
|
|
any_exp = 1;
|
|
c = *++u;
|
|
}
|
|
|
|
if (!any_exp) {
|
|
u = e;
|
|
}
|
|
}
|
|
|
|
if (any) {
|
|
char buf[64], *numbuf, *bufpos;
|
|
int length = u - nstart;
|
|
double value;
|
|
|
|
if (length < (int)sizeof(buf)) {
|
|
numbuf = buf;
|
|
} else {
|
|
numbuf = (char *) malloc(length + 1);
|
|
}
|
|
|
|
bufpos = numbuf;
|
|
|
|
while (nstart < u) {
|
|
*bufpos++ = (char) *nstart++;
|
|
}
|
|
|
|
*bufpos = '\0';
|
|
value = zend_strtod(numbuf, nullptr);
|
|
|
|
if (numbuf != buf) {
|
|
free(numbuf);
|
|
}
|
|
|
|
if (endptr != nullptr) {
|
|
*endptr = (UChar *)u;
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
if (endptr != nullptr) {
|
|
*endptr = (UChar *)nptr;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long collator_u_strtol(const UChar *nptr, UChar **endptr,
|
|
int base) {
|
|
const UChar *s = nptr;
|
|
unsigned long acc;
|
|
UChar c;
|
|
unsigned long cutoff;
|
|
int neg = 0, any, cutlim;
|
|
|
|
if (s == nullptr) {
|
|
errno = ERANGE;
|
|
if (endptr != nullptr) {
|
|
*endptr = nullptr;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Skip white space and pick up leading +/- sign if any.
|
|
* If base is 0, allow 0x for hex and 0 for octal, else
|
|
* assume decimal; if base is already 16, allow 0x.
|
|
*/
|
|
do {
|
|
c = *s++;
|
|
} while (u_isspace(c));
|
|
if (c == 0x2D /*'-'*/) {
|
|
neg = 1;
|
|
c = *s++;
|
|
} else if (c == 0x2B /*'+'*/)
|
|
c = *s++;
|
|
if ((base == 0 || base == 16) &&
|
|
(c == 0x30 /*'0'*/)
|
|
&& (*s == 0x78 /*'x'*/ || *s == 0x58 /*'X'*/)) {
|
|
c = s[1];
|
|
s += 2;
|
|
base = 16;
|
|
}
|
|
if (base == 0)
|
|
base = (c == 0x30 /*'0'*/) ? 8 : 10;
|
|
|
|
/*
|
|
* Compute the cutoff value between legal numbers and illegal
|
|
* numbers. That is the largest legal value, divided by the
|
|
* base. An input number that is greater than this value, if
|
|
* followed by a legal input character, is too big. One that
|
|
* is equal to this value may be valid or not; the limit
|
|
* between valid and invalid numbers is then based on the last
|
|
* digit. For instance, if the range for longs is
|
|
* [-2147483648..2147483647] and the input base is 10,
|
|
* cutoff will be set to 214748364 and cutlim to either
|
|
* 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
|
|
* a value > 214748364, or equal but the next digit is > 7 (or 8),
|
|
* the number is too big, and we will return a range error.
|
|
*
|
|
* Set any if any `digits' consumed; make it negative to indicate
|
|
* overflow.
|
|
*/
|
|
cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
|
|
cutlim = cutoff % (unsigned long)base;
|
|
cutoff /= (unsigned long)base;
|
|
for (acc = 0, any = 0;; c = *s++) {
|
|
if (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/)
|
|
c -= 0x30 /*'0'*/;
|
|
else if (c >= 0x41 /*'A'*/ && c <= 0x5A /*'Z'*/)
|
|
c -= 0x41 /*'A'*/ - 10;
|
|
else if (c >= 0x61 /*'a'*/ && c <= 0x7A /*'z'*/)
|
|
c -= 0x61 /*'a'*/ - 10;
|
|
else
|
|
break;
|
|
if (c >= base)
|
|
break;
|
|
|
|
if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
|
|
any = -1;
|
|
else {
|
|
any = 1;
|
|
acc *= base;
|
|
acc += c;
|
|
}
|
|
}
|
|
if (any < 0) {
|
|
acc = neg ? LONG_MIN : LONG_MAX;
|
|
errno = ERANGE;
|
|
} else if (neg)
|
|
acc = -acc;
|
|
if (endptr != nullptr)
|
|
*endptr = (UChar *)(any ? s - 1 : nptr);
|
|
return (acc);
|
|
}
|
|
|
|
|
|
static DataType collator_is_numeric(UChar *str, int length, int64_t *lval,
|
|
double *dval, int allow_errors ) {
|
|
int64_t local_lval;
|
|
double local_dval;
|
|
UChar *end_ptr_long, *end_ptr_double;
|
|
int conv_base=10;
|
|
|
|
if (!length) {
|
|
return KindOfNull;
|
|
}
|
|
|
|
/* handle hex numbers */
|
|
if (length>=2 && str[0]=='0' && (str[1]=='x' || str[1]=='X')) {
|
|
conv_base=16;
|
|
}
|
|
|
|
errno=0;
|
|
local_lval = collator_u_strtol(str, &end_ptr_long, conv_base);
|
|
if (errno != ERANGE) {
|
|
if (end_ptr_long == str+length) { /* integer string */
|
|
if (lval) {
|
|
*lval = local_lval;
|
|
}
|
|
return KindOfInt64;
|
|
} else if (end_ptr_long == str &&
|
|
*end_ptr_long != '\0' &&
|
|
*str != '.' &&
|
|
*str != '-') { /* ignore partial string matches */
|
|
return KindOfNull;
|
|
}
|
|
} else {
|
|
end_ptr_long = nullptr;
|
|
}
|
|
|
|
if (conv_base == 16) { /* hex string, under UNIX strtod() messes it up */
|
|
/* UTODO: keep compatibility with is_numeric_string() here? */
|
|
return KindOfNull;
|
|
}
|
|
|
|
local_dval = collator_u_strtod(str, &end_ptr_double);
|
|
if (local_dval == 0 && end_ptr_double == str) {
|
|
end_ptr_double = nullptr;
|
|
} else {
|
|
if (end_ptr_double == str+length) { /* floating point string */
|
|
if (!finite(local_dval)) {
|
|
/* "inf","nan" and maybe other weird ones */
|
|
return KindOfNull;
|
|
}
|
|
|
|
if (dval) {
|
|
*dval = local_dval;
|
|
}
|
|
return KindOfDouble;
|
|
}
|
|
}
|
|
|
|
if (!allow_errors) {
|
|
return KindOfNull;
|
|
}
|
|
if (allow_errors == -1) {
|
|
raise_notice("A non well formed numeric value encountered");
|
|
}
|
|
|
|
if (allow_errors) {
|
|
if (end_ptr_double > end_ptr_long && dval) {
|
|
*dval = local_dval;
|
|
return KindOfDouble;
|
|
} else if (end_ptr_long && lval) {
|
|
*lval = local_lval;
|
|
return KindOfInt64;
|
|
}
|
|
}
|
|
return KindOfNull;
|
|
}
|
|
|
|
static String intl_convert_str_utf8_to_utf16(CStrRef utf8_str,
|
|
UErrorCode * status) {
|
|
UChar* ustr = nullptr;
|
|
int ustr_len = 0;
|
|
intl_convert_utf8_to_utf16(&ustr, &ustr_len,
|
|
utf8_str.data(), utf8_str.length(),
|
|
status);
|
|
if (U_FAILURE(*status)) {
|
|
return (const char *)(L"");
|
|
}
|
|
return String((char*)ustr, UBYTES(ustr_len), AttachString);
|
|
}
|
|
|
|
static String intl_convert_str_utf16_to_utf8(CStrRef utf16_str,
|
|
UErrorCode * status) {
|
|
char* str = nullptr;
|
|
int str_len = 0;
|
|
intl_convert_utf16_to_utf8(&str, &str_len,
|
|
(UChar*)(utf16_str.data()),
|
|
UCHARS(utf16_str.length()),
|
|
status);
|
|
if (U_FAILURE(*status)) {
|
|
return "";
|
|
}
|
|
return String(str, str_len, AttachString);
|
|
}
|
|
|
|
static Variant collator_convert_string_to_number(CVarRef str) {
|
|
Variant num = collator_convert_string_to_number_if_possible(str);
|
|
if (same(num, false)) {
|
|
/* String wasn't converted => return zero. */
|
|
return 0;
|
|
}
|
|
return num;
|
|
}
|
|
|
|
static Variant collator_convert_string_to_double(CVarRef str) {
|
|
Variant num = collator_convert_string_to_number(str);
|
|
return num.toDouble();
|
|
}
|
|
|
|
static Variant collator_convert_string_to_number_if_possible(CVarRef str) {
|
|
int64_t lval = 0;
|
|
double dval = 0;
|
|
|
|
if (!str.isString()) return false;
|
|
|
|
DataType ret = collator_is_numeric((UChar*)(str.toString().data()),
|
|
UCHARS(str.toString().length()),
|
|
&lval, &dval, 1);
|
|
if (ret == KindOfInt64) return lval;
|
|
if (ret == KindOfDouble) return dval;
|
|
return false;
|
|
}
|
|
|
|
static Variant collator_convert_object_to_string(CVarRef obj) {
|
|
if (!obj.isObject()) return obj;
|
|
String str;
|
|
try {
|
|
str = obj.toString();
|
|
} catch (Exception &e) {
|
|
return obj;
|
|
}
|
|
UErrorCode status;
|
|
String ustr = intl_convert_str_utf8_to_utf16(str, &status);
|
|
if (U_FAILURE(status)) {
|
|
raise_warning("Error casting object to string in "
|
|
"collator_convert_object_to_string()");
|
|
return uninit_null();
|
|
}
|
|
return ustr;
|
|
}
|
|
|
|
static void collator_convert_array_from_utf16_to_utf8(Array &array,
|
|
UErrorCode * status) {
|
|
for (ArrayIter iter(array); iter; ++iter) {
|
|
CVarRef value = iter.secondRef();
|
|
/* Process string values only. */
|
|
if (!value.isString()) continue;
|
|
String str = intl_convert_str_utf16_to_utf8(value.toString(), status);
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
/* Update current value with the converted value. */
|
|
const_cast<Variant&>(value) = str;
|
|
}
|
|
}
|
|
|
|
static void collator_convert_array_from_utf8_to_utf16(Array &array,
|
|
UErrorCode * status) {
|
|
for (ArrayIter iter(array); iter; ++iter) {
|
|
CVarRef value = iter.secondRef();
|
|
/* Process string values only. */
|
|
if (!value.isString()) continue;
|
|
String str = intl_convert_str_utf8_to_utf16(value.toString(), status);
|
|
if (U_FAILURE(*status)) {
|
|
return;
|
|
}
|
|
/* Update current value with the converted value. */
|
|
Variant key = iter.first();
|
|
array.set(key, str);
|
|
}
|
|
}
|
|
|
|
static Variant collator_normalize_sort_argument(CVarRef arg) {
|
|
if (!arg.isString()) return arg;
|
|
|
|
Variant n_arg = collator_convert_string_to_number_if_possible(arg);
|
|
if (same(n_arg, false)) {
|
|
/* Conversion to number failed. */
|
|
UErrorCode status;
|
|
n_arg = intl_convert_str_utf16_to_utf8(arg.toString(), &status);
|
|
if (U_FAILURE(status)) {
|
|
raise_warning("Error converting utf16 to utf8 in "
|
|
"collator_normalize_sort_argument()");
|
|
}
|
|
}
|
|
return n_arg;
|
|
}
|
|
|
|
static int collator_regular_compare_function(CVarRef v1, CVarRef v2,
|
|
const void *data,
|
|
bool ascending) {
|
|
Variant str1 = collator_convert_object_to_string(v1);
|
|
Variant str2 = collator_convert_object_to_string(v2);
|
|
Variant num1;
|
|
Variant num2;
|
|
Variant norm1;
|
|
Variant norm2;
|
|
|
|
/* If both args are strings AND either of args is not numeric string
|
|
* then use ICU-compare. Otherwise PHP-compare. */
|
|
if (str1.isString() && str2.isString()) {
|
|
num1 = collator_convert_string_to_number_if_possible(str1);
|
|
if (!same(num1, false)) {
|
|
num2 = collator_convert_string_to_number_if_possible(str2);
|
|
}
|
|
if (same(num1, false) || same(num2, false)) {
|
|
assert(data);
|
|
int ret = ucol_strcoll((const UCollator *)data,
|
|
(UChar*)(str1.toString().data()),
|
|
UCHARS(str1.toString().length()),
|
|
(UChar*)(str2.toString().data()),
|
|
UCHARS(str2.toString().length()));
|
|
return ascending ? ret : (-ret);
|
|
}
|
|
}
|
|
|
|
/* num1 is set if str1 and str2 are strings. */
|
|
if (!num1.isNull()) {
|
|
if (same(num1, false)) {
|
|
/* str1 is string but not numeric string just convert it to utf8. */
|
|
UErrorCode status;
|
|
norm1 = intl_convert_str_utf16_to_utf8(str1.toString(), &status);
|
|
if (U_FAILURE(status)) {
|
|
raise_warning("Error converting utf16 to utf8 in "
|
|
"collator_regular_compare_function()");
|
|
}
|
|
/* num2 is not set but str2 is string => do normalization. */
|
|
norm2 = collator_normalize_sort_argument(str2);
|
|
} else {
|
|
/* str1 is numeric strings => passthru to PHP-compare. */
|
|
norm1 = num1;
|
|
norm2 = num2;
|
|
}
|
|
} else {
|
|
/* str1 or str2 is not a string => do normalization. */
|
|
norm1 = collator_normalize_sort_argument(str1);
|
|
norm2 = collator_normalize_sort_argument(str2);
|
|
}
|
|
if (ascending) {
|
|
if (less(norm1, norm2)) return -1;
|
|
if (equal(norm1, norm2)) return 0;
|
|
return 1;
|
|
}
|
|
if (less(norm1, norm2)) return 1;
|
|
if (equal(norm1, norm2)) return 0;
|
|
return -1;
|
|
}
|
|
|
|
static int collator_regular_compare_ascending(CVarRef v1, CVarRef v2,
|
|
const void *data) {
|
|
return collator_regular_compare_function(v1, v2, data, true);
|
|
}
|
|
|
|
static int collator_regular_compare_descending(CVarRef v1, CVarRef v2,
|
|
const void *data) {
|
|
return collator_regular_compare_function(v1, v2, data, false);
|
|
}
|
|
|
|
static int collator_numeric_compare_function(CVarRef v1, CVarRef v2,
|
|
const void *data,
|
|
bool ascending) {
|
|
Variant num1;
|
|
Variant num2;
|
|
|
|
if (v1.isString()) {
|
|
num1 = collator_convert_string_to_double(v1);
|
|
} else {
|
|
num1 = v1.toDouble();
|
|
}
|
|
if (v2.isString()) {
|
|
num2 = collator_convert_string_to_double(v2);
|
|
} else {
|
|
num2 = v2.toDouble();
|
|
}
|
|
if (ascending) {
|
|
if (less(num1, num2)) return -1;
|
|
if (equal(num1, num2)) return 0;
|
|
return 1;
|
|
}
|
|
if (less(num1, num2)) return 1;
|
|
if (equal(num1, num2)) return 0;
|
|
return -1;
|
|
}
|
|
|
|
static int collator_numeric_compare_ascending(CVarRef v1, CVarRef v2,
|
|
const void *data) {
|
|
return collator_numeric_compare_function(v1, v2, data, true);
|
|
}
|
|
|
|
static int collator_numeric_compare_descending(CVarRef v1, CVarRef v2,
|
|
const void *data) {
|
|
return collator_numeric_compare_function(v1, v2, data, false);
|
|
}
|
|
|
|
static int collator_string_compare_function(CVarRef v1, CVarRef v2,
|
|
const void *data,
|
|
bool ascending) {
|
|
assert(data);
|
|
String str1;
|
|
if (v1.isString()) {
|
|
str1 = v1.toString();
|
|
} else {
|
|
UErrorCode status;
|
|
str1 = intl_convert_str_utf8_to_utf16(v1.toString(), &status);
|
|
if (U_FAILURE(status)) {
|
|
raise_warning("Error converting utf8 to utf16 in "
|
|
"collator_string_compare_function()");
|
|
}
|
|
}
|
|
String str2;
|
|
if (v2.isString()) {
|
|
str2 = v2.toString();
|
|
} else {
|
|
UErrorCode status;
|
|
str2 = intl_convert_str_utf8_to_utf16(v2.toString(), &status);
|
|
if (U_FAILURE(status)) {
|
|
raise_warning("Error converting utf8 to utf16 in "
|
|
"collator_string_compare_function()");
|
|
}
|
|
}
|
|
|
|
int ret = ucol_strcoll((const UCollator *)data,
|
|
(UChar*)(str1.data()),
|
|
UCHARS(str1.length()),
|
|
(UChar*)(str2.data()),
|
|
UCHARS(str2.length()));
|
|
return ascending ? ret : (-ret);
|
|
}
|
|
|
|
static int collator_string_compare_ascending(CVarRef v1, CVarRef v2,
|
|
const void *data) {
|
|
return collator_string_compare_function(v1, v2, data, true);
|
|
}
|
|
|
|
static int collator_string_compare_descending(CVarRef v1, CVarRef v2,
|
|
const void *data) {
|
|
return collator_string_compare_function(v1, v2, data, false);
|
|
}
|
|
|
|
static bool collator_sort_internal(bool renumber, Variant &array,
|
|
int sort_flags, bool ascending,
|
|
UCollator *coll, intl_error * errcode) {
|
|
assert(coll);
|
|
errcode->clear();
|
|
s_intl_error->m_error.clear();
|
|
Array temp = array.toArray();
|
|
Array::PFUNC_CMP cmp_func;
|
|
|
|
switch (sort_flags) {
|
|
case COLLATOR_SORT_NUMERIC:
|
|
cmp_func = ascending ? collator_numeric_compare_ascending
|
|
: collator_numeric_compare_descending;
|
|
break;
|
|
case COLLATOR_SORT_STRING:
|
|
cmp_func = ascending ? collator_string_compare_ascending
|
|
: collator_string_compare_descending;
|
|
break;
|
|
case COLLATOR_SORT_REGULAR:
|
|
default:
|
|
cmp_func = ascending ? collator_regular_compare_ascending
|
|
: collator_regular_compare_descending;
|
|
break;
|
|
}
|
|
|
|
/* Convert strings in the specified array from UTF-8 to UTF-16. */
|
|
collator_convert_array_from_utf8_to_utf16(temp, &(errcode->code));
|
|
if (U_FAILURE(errcode->code)) {
|
|
errcode->custom_error_message =
|
|
"Error converting array from UTF-8 to UTF-16";
|
|
s_intl_error->m_error.code = errcode->code;
|
|
s_intl_error->m_error.custom_error_message = errcode->custom_error_message;
|
|
return false;
|
|
}
|
|
|
|
/* Sort specified array. */
|
|
temp.sort(cmp_func, false, renumber, coll);
|
|
|
|
/* Convert strings in the specified array back to UTF-8. */
|
|
errcode->clear();
|
|
s_intl_error->m_error.clear();
|
|
collator_convert_array_from_utf16_to_utf8(temp, &(errcode->code));
|
|
if (U_FAILURE(errcode->code)) {
|
|
errcode->custom_error_message =
|
|
"Error converting array from UTF-16 to UTF-8";
|
|
s_intl_error->m_error.code = errcode->code;
|
|
s_intl_error->m_error.custom_error_message = errcode->custom_error_message;
|
|
return false;
|
|
}
|
|
array = temp;
|
|
return true;
|
|
}
|
|
|
|
bool collator_sort(Variant &array, int sort_flags, bool ascending,
|
|
UCollator *coll, intl_error *errcode) {
|
|
assert(coll);
|
|
bool ret = collator_sort_internal(true, array, sort_flags, ascending, coll,
|
|
errcode);
|
|
return ret;
|
|
}
|
|
|
|
bool collator_asort(Variant &array, int sort_flags, bool ascending,
|
|
UCollator *coll, intl_error *errcode) {
|
|
assert(coll);
|
|
bool ret = collator_sort_internal(false, array, sort_flags, ascending, coll,
|
|
errcode);
|
|
return ret;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|