b3b767d6b2
uidna_openUTS46 overwrites its error code argument only if there is actually an error. Hence, if the variable is not initialized to zero it may seem to the calling code that an error has ocurred. This diff initializes the variable and re-enables the failing test.
875 linhas
28 KiB
C++
875 linhas
28 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
|
|
| Copyright (c) 1997-2010 The PHP Group |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 3.01 of the PHP license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.php.net/license/3_01.txt |
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@php.net so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
|
|
#include <runtime/ext/ext_intl.h>
|
|
#include <runtime/ext/ext_array.h> // for throw_bad_array_exception
|
|
#include <runtime/base/util/request_local.h>
|
|
#include <runtime/base/zend/intl_convert.h>
|
|
#include <runtime/base/zend/zend_collator.h>
|
|
#include <runtime/base/zend/zend_qsort.h>
|
|
#include <unicode/uidna.h>
|
|
#include <unicode/ustring.h>
|
|
#include <unicode/ucol.h> // icu
|
|
#include <unicode/uclean.h> // icu
|
|
#include <unicode/putil.h> // icu
|
|
#include <unicode/utypes.h>
|
|
#include <unicode/unorm.h>
|
|
|
|
#include <system/lib/systemlib.h>
|
|
|
|
#ifdef UIDNA_INFO_INITIALIZER
|
|
#define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
|
|
#endif
|
|
|
|
namespace HPHP {
|
|
IMPLEMENT_DEFAULT_EXTENSION(idn);
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
int64_t f_intl_get_error_code() {
|
|
return s_intl_error->m_error.code;
|
|
}
|
|
|
|
String f_intl_get_error_message() {
|
|
if (!s_intl_error->m_error.custom_error_message.empty()) {
|
|
return s_intl_error->m_error.custom_error_message;
|
|
}
|
|
return String(u_errorName(s_intl_error->m_error.code), AttachLiteral);
|
|
}
|
|
|
|
String f_intl_error_name(int64_t error_code) {
|
|
return String(u_errorName((UErrorCode)error_code), AttachLiteral);
|
|
}
|
|
|
|
bool f_intl_is_failure(int64_t error_code) {
|
|
if (U_FAILURE((UErrorCode)error_code)) return true;
|
|
return false;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
const int64_t q_Collator$$SORT_REGULAR = 0;
|
|
const int64_t q_Collator$$SORT_STRING = 1;
|
|
const int64_t q_Collator$$SORT_NUMERIC = 2;
|
|
const int64_t q_Collator$$FRENCH_COLLATION = UCOL_FRENCH_COLLATION;
|
|
const int64_t q_Collator$$ALTERNATE_HANDLING = UCOL_ALTERNATE_HANDLING;
|
|
const int64_t q_Collator$$CASE_FIRST = UCOL_CASE_FIRST;
|
|
const int64_t q_Collator$$CASE_LEVEL = UCOL_CASE_LEVEL;
|
|
const int64_t q_Collator$$NORMALIZATION_MODE = UCOL_NORMALIZATION_MODE;
|
|
const int64_t q_Collator$$STRENGTH = UCOL_STRENGTH;
|
|
const int64_t q_Collator$$HIRAGANA_QUATERNARY_MODE = UCOL_HIRAGANA_QUATERNARY_MODE;
|
|
const int64_t q_Collator$$NUMERIC_COLLATION = UCOL_NUMERIC_COLLATION;
|
|
const int64_t q_Collator$$DEFAULT_VALUE = UCOL_DEFAULT;
|
|
const int64_t q_Collator$$PRIMARY = UCOL_PRIMARY;
|
|
const int64_t q_Collator$$SECONDARY = UCOL_SECONDARY;
|
|
const int64_t q_Collator$$TERTIARY = UCOL_TERTIARY;
|
|
const int64_t q_Collator$$DEFAULT_STRENGTH = UCOL_DEFAULT_STRENGTH;
|
|
const int64_t q_Collator$$QUATERNARY = UCOL_QUATERNARY;
|
|
const int64_t q_Collator$$IDENTICAL = UCOL_IDENTICAL;
|
|
const int64_t q_Collator$$OFF = UCOL_OFF;
|
|
const int64_t q_Collator$$ON = UCOL_ON;
|
|
const int64_t q_Collator$$SHIFTED = UCOL_SHIFTED;
|
|
const int64_t q_Collator$$NON_IGNORABLE = UCOL_NON_IGNORABLE;
|
|
const int64_t q_Collator$$LOWER_FIRST = UCOL_LOWER_FIRST;
|
|
const int64_t q_Collator$$UPPER_FIRST = UCOL_UPPER_FIRST;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
c_Collator::c_Collator(VM::Class* cb) :
|
|
ExtObjectData(cb), m_locale(), m_ucoll(NULL), m_errcode() {
|
|
}
|
|
|
|
c_Collator::~c_Collator() {
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
}
|
|
|
|
void c_Collator::t___construct(CStrRef locale) {
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
m_errcode.clear();
|
|
if (!locale.empty()) {
|
|
m_locale = locale;
|
|
m_ucoll = ucol_open(locale.data(), &(m_errcode.code));
|
|
if (!U_FAILURE(m_errcode.code)) {
|
|
// If the specified locale opened successfully, return
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
return;
|
|
}
|
|
}
|
|
// If the empty string was given or if the specified locale did
|
|
// not open successfully, so fall back to using the default locale
|
|
m_errcode.code = U_USING_FALLBACK_WARNING;
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
UErrorCode errcode = U_ZERO_ERROR;
|
|
m_locale = String(uloc_getDefault(), CopyString);
|
|
m_ucoll = ucol_open(m_locale.data(), &errcode);
|
|
if (U_FAILURE(errcode)) {
|
|
m_errcode.code = errcode;
|
|
m_errcode.custom_error_message =
|
|
"collator_create: unable to open ICU collator";
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool c_Collator::t_asort(VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
if (!arr.isArray()) {
|
|
throw_bad_array_exception();
|
|
return false;
|
|
}
|
|
if (!m_ucoll) {
|
|
raise_warning("asort called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
m_errcode.clear();
|
|
bool ret = collator_asort(arr, sort_flag, true, m_ucoll, &m_errcode);
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
return false;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Variant c_Collator::t_compare(CStrRef str1, CStrRef str2) {
|
|
if (!m_ucoll) {
|
|
raise_warning("compare called on uninitialized Collator object");
|
|
return 0;
|
|
}
|
|
UChar* ustr1 = NULL;
|
|
UChar* ustr2 = NULL;
|
|
int ustr1_len = 0;
|
|
int ustr2_len = 0;
|
|
m_errcode.clear();
|
|
intl_convert_utf8_to_utf16(&ustr1, &ustr1_len,
|
|
str1.data(), str1.length(),
|
|
&(m_errcode.code));
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
free(ustr1);
|
|
return false;
|
|
}
|
|
intl_convert_utf8_to_utf16(&ustr2, &ustr2_len,
|
|
str2.data(), str2.length(),
|
|
&(m_errcode.code));
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
free(ustr1);
|
|
free(ustr2);
|
|
return false;
|
|
}
|
|
int64_t ret = ucol_strcoll(m_ucoll, ustr1, ustr1_len, ustr2, ustr2_len);
|
|
free(ustr1);
|
|
free(ustr2);
|
|
return ret;
|
|
}
|
|
|
|
Variant c_Collator::ti_create(const char* cls, CStrRef locale) {
|
|
p_Collator c(NEWOBJ(c_Collator)());
|
|
c.get()->t___construct(locale);
|
|
return c;
|
|
}
|
|
|
|
int64_t c_Collator::t_getattribute(int64_t attr) {
|
|
if (!m_ucoll) {
|
|
raise_warning("getattribute called on uninitialized Collator object");
|
|
return 0;
|
|
}
|
|
m_errcode.clear();
|
|
int64_t ret = (int64_t)ucol_getAttribute(m_ucoll, (UColAttribute)attr,
|
|
&(m_errcode.code));
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Error getting attribute value";
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
return 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int64_t c_Collator::t_geterrorcode() {
|
|
return m_errcode.code;
|
|
}
|
|
|
|
String c_Collator::t_geterrormessage() {
|
|
return String(u_errorName(m_errcode.code), AttachLiteral);
|
|
}
|
|
|
|
String c_Collator::t_getlocale(int64_t type /* = 0 */) {
|
|
if (!m_ucoll) {
|
|
raise_warning("getlocale called on uninitialized Collator object");
|
|
return "";
|
|
}
|
|
m_errcode.clear();
|
|
String ret(
|
|
(char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type,
|
|
&(m_errcode.code)),
|
|
AttachLiteral);
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Error getting locale by type";
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message =
|
|
m_errcode.custom_error_message;
|
|
return "";
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int64_t c_Collator::t_getstrength() {
|
|
if (!m_ucoll) {
|
|
raise_warning("getstrength called on uninitialized Collator object");
|
|
return 0;
|
|
}
|
|
return ucol_getStrength(m_ucoll);
|
|
}
|
|
|
|
bool c_Collator::t_setattribute(int64_t attr, int64_t val) {
|
|
if (!m_ucoll) {
|
|
raise_warning("setattribute called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
m_errcode.clear();
|
|
ucol_setAttribute(m_ucoll, (UColAttribute)attr,
|
|
(UColAttributeValue)val, &(m_errcode.code));
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Error setting attribute value";
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool c_Collator::t_setstrength(int64_t strength) {
|
|
if (!m_ucoll) {
|
|
raise_warning("setstrength called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
ucol_setStrength(m_ucoll, (UCollationStrength)strength);
|
|
return true;
|
|
}
|
|
|
|
typedef struct _collator_sort_key_index {
|
|
char* key; /* pointer to sort key */
|
|
ssize_t valPos; /* position of the original array element */
|
|
} collator_sort_key_index_t;
|
|
|
|
static const int32_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
|
|
static const int32_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
|
|
|
|
static const int32_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
|
|
static const int32_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
|
|
|
|
static const int32_t DEF_UTF16_BUF_SIZE = 1024;
|
|
|
|
/* {{{ collator_cmp_sort_keys
|
|
* Compare sort keys
|
|
*/
|
|
static int collator_cmp_sort_keys(const void* p1, const void* p2, const void*) {
|
|
char* key1 = ((collator_sort_key_index_t*)p1)->key;
|
|
char* key2 = ((collator_sort_key_index_t*)p2)->key;
|
|
return strcmp( key1, key2 );
|
|
}
|
|
|
|
bool c_Collator::t_sortwithsortkeys(VRefParam arr) {
|
|
char* sortKeyBuf = NULL; /* buffer to store sort keys */
|
|
int32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
|
|
ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */
|
|
int32_t sortKeyLen = 0; /* the length of currently processing key */
|
|
int32_t bufLeft = 0;
|
|
int32_t bufIncrement = 0;
|
|
|
|
/* buffer to store 'indexes' which will be passed to 'qsort' */
|
|
collator_sort_key_index_t* sortKeyIndxBuf = NULL;
|
|
int32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
|
|
int32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
|
|
|
|
int32_t sortKeyCount = 0;
|
|
int32_t j = 0;
|
|
|
|
/* tmp buffer to hold current processing string in utf-16 */
|
|
UChar* utf16_buf = NULL;
|
|
/* the length of utf16_buf */
|
|
int utf16_buf_size = DEF_UTF16_BUF_SIZE;
|
|
/* length of converted string */
|
|
int utf16_len = 0;
|
|
|
|
m_errcode.clear();
|
|
s_intl_error->m_error.clear();
|
|
|
|
/*
|
|
* Sort specified array.
|
|
*/
|
|
if (!arr.isArray()) {
|
|
return true;
|
|
}
|
|
Array hash = arr.toArray();
|
|
if (hash.size() == 0) {
|
|
return true;
|
|
}
|
|
|
|
/* Create bufers */
|
|
sortKeyBuf = (char*)calloc(sortKeyBufSize, sizeof(char));
|
|
sortKeyIndxBuf = (collator_sort_key_index_t*)malloc(sortKeyIndxBufSize);
|
|
utf16_buf = (UChar*)malloc(utf16_buf_size);
|
|
|
|
/* Iterate through input hash and create a sort key for each value. */
|
|
for (ssize_t pos = hash->iter_begin(); pos != ArrayData::invalid_index;
|
|
pos = hash->iter_advance(pos)) {
|
|
/* Convert current hash item from UTF-8 to UTF-16LE and save the result
|
|
* to utf16_buf. */
|
|
utf16_len = utf16_buf_size;
|
|
/* Process string values only. */
|
|
Variant val(hash->getValue(pos));
|
|
if (val.isString()) {
|
|
String str = val.toString();
|
|
intl_convert_utf8_to_utf16(&utf16_buf, &utf16_len, str.data(),
|
|
str.size(), &(m_errcode.code));
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Sort with sort keys failed";
|
|
if (utf16_buf) {
|
|
free(utf16_buf);
|
|
}
|
|
free(sortKeyIndxBuf);
|
|
free(sortKeyBuf);
|
|
return false;
|
|
}
|
|
} else {
|
|
/* Set empty string */
|
|
utf16_len = 0;
|
|
utf16_buf[utf16_len] = 0;
|
|
}
|
|
|
|
if ((utf16_len + 1) > utf16_buf_size) {
|
|
utf16_buf_size = utf16_len + 1;
|
|
}
|
|
|
|
/* Get sort key, reallocating the buffer if needed. */
|
|
bufLeft = sortKeyBufSize - sortKeyBufOffset;
|
|
|
|
sortKeyLen = ucol_getSortKey(m_ucoll,
|
|
utf16_buf,
|
|
utf16_len,
|
|
(uint8_t*)sortKeyBuf + sortKeyBufOffset,
|
|
bufLeft);
|
|
|
|
/* check for sortKeyBuf overflow, increasing its size of the buffer if
|
|
needed */
|
|
if (sortKeyLen > bufLeft) {
|
|
bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ?
|
|
sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
|
|
sortKeyBufSize += bufIncrement;
|
|
bufLeft += bufIncrement;
|
|
sortKeyBuf = (char*)realloc(sortKeyBuf, sortKeyBufSize);
|
|
sortKeyLen = ucol_getSortKey(m_ucoll, utf16_buf, utf16_len,
|
|
(uint8_t*)sortKeyBuf + sortKeyBufOffset,
|
|
bufLeft);
|
|
}
|
|
|
|
/* check sortKeyIndxBuf overflow, increasing its size of the buffer if
|
|
needed */
|
|
if ((sortKeyCount + 1) * sortKeyIndxSize > sortKeyIndxBufSize) {
|
|
bufIncrement = (sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT) ?
|
|
sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
|
|
sortKeyIndxBufSize += bufIncrement;
|
|
sortKeyIndxBuf = (collator_sort_key_index_t*)realloc(sortKeyIndxBuf,
|
|
sortKeyIndxBufSize);
|
|
}
|
|
sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset;
|
|
sortKeyIndxBuf[sortKeyCount].valPos = pos;
|
|
sortKeyBufOffset += sortKeyLen;
|
|
++sortKeyCount;
|
|
}
|
|
|
|
/* update ptrs to point to valid keys. */
|
|
for( j = 0; j < sortKeyCount; j++ )
|
|
sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
|
|
|
|
/* sort it */
|
|
zend_qsort(sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize,
|
|
collator_cmp_sort_keys, NULL);
|
|
|
|
/* for resulting hash we'll assign new hash keys rather then reordering */
|
|
Array sortedHash = Array::Create();
|
|
|
|
for (j = 0; j < sortKeyCount; j++) {
|
|
sortedHash.append(hash->getValue(sortKeyIndxBuf[j].valPos));
|
|
}
|
|
|
|
/* Save sorted hash into return variable. */
|
|
arr = sortedHash;
|
|
|
|
if (utf16_buf)
|
|
free(utf16_buf);
|
|
|
|
free(sortKeyIndxBuf);
|
|
free(sortKeyBuf);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool c_Collator::t_sort(VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
if (!arr.isArray()) {
|
|
throw_bad_array_exception();
|
|
return false;
|
|
}
|
|
if (!m_ucoll) {
|
|
raise_warning("sort called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
m_errcode.clear();
|
|
bool ret = collator_sort(arr, sort_flag, true, m_ucoll, &(m_errcode));
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
return false;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#define CHECK_COLL(obj) \
|
|
c_Collator *coll = NULL; \
|
|
if (obj.isObject()) { \
|
|
coll = obj.toObject().getTyped<c_Collator>(); \
|
|
} \
|
|
if (!coll) { \
|
|
raise_warning("Expecting collator object"); \
|
|
return false; \
|
|
} \
|
|
|
|
Variant f_collator_asort(CVarRef obj, VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_asort(ref(arr), sort_flag);
|
|
}
|
|
|
|
Variant f_collator_compare(CVarRef obj, CStrRef str1, CStrRef str2) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_compare(str1, str2);
|
|
}
|
|
|
|
Variant f_collator_create(CStrRef locale) {
|
|
return c_Collator::ti_create(nullptr, locale);
|
|
}
|
|
|
|
Variant f_collator_get_attribute(CVarRef obj, int64_t attr) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_getattribute(attr);
|
|
}
|
|
|
|
Variant f_collator_get_error_code(CVarRef obj) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_geterrorcode();
|
|
}
|
|
|
|
Variant f_collator_get_error_message(CVarRef obj) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_geterrormessage();
|
|
}
|
|
|
|
Variant f_collator_get_locale(CVarRef obj, int64_t type /* = 0 */) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_getlocale(type);
|
|
}
|
|
|
|
Variant f_collator_get_strength(CVarRef obj) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_getstrength();
|
|
}
|
|
|
|
Variant f_collator_set_attribute(CVarRef obj, int64_t attr, int64_t val) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_setattribute(attr, val);
|
|
}
|
|
|
|
Variant f_collator_set_strength(CVarRef obj, int64_t strength) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_setstrength(strength);
|
|
}
|
|
|
|
Variant f_collator_sort_with_sort_keys(CVarRef obj, VRefParam arr) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_sortwithsortkeys(ref(arr));
|
|
}
|
|
|
|
Variant f_collator_sort(CVarRef obj, VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_sort(ref(arr), sort_flag);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
const int64_t q_Locale$$ACTUAL_LOCALE = 0;
|
|
const int64_t q_Locale$$VALID_LOCALE = 1;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
c_Locale::c_Locale(VM::Class* cb) : ExtObjectData(cb) {
|
|
}
|
|
|
|
c_Locale::~c_Locale() {
|
|
}
|
|
|
|
void c_Locale::t___construct() {
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
const int64_t q_Normalizer$$NONE = UNORM_NONE;
|
|
const int64_t q_Normalizer$$FORM_D = UNORM_NFD;
|
|
const int64_t q_Normalizer$$NFD = UNORM_NFD;
|
|
const int64_t q_Normalizer$$FORM_KD = UNORM_NFKD;
|
|
const int64_t q_Normalizer$$NFKD = UNORM_NFKD;
|
|
const int64_t q_Normalizer$$FORM_C = UNORM_NFC;
|
|
const int64_t q_Normalizer$$NFC = UNORM_NFC;
|
|
const int64_t q_Normalizer$$FORM_KC = UNORM_NFKC;
|
|
const int64_t q_Normalizer$$NFKC = UNORM_NFKC;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
c_Normalizer::c_Normalizer(VM::Class* cb) : ExtObjectData(cb) {
|
|
}
|
|
|
|
c_Normalizer::~c_Normalizer() {
|
|
}
|
|
|
|
void c_Normalizer::t___construct() {
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
Variant c_Normalizer::ti_isnormalized(const char* cls , CStrRef input,
|
|
int64_t form /* = q_Normalizer$$FORM_C */) {
|
|
s_intl_error->m_error.clear();
|
|
|
|
switch (form) {
|
|
case UNORM_NFD:
|
|
case UNORM_NFKD:
|
|
case UNORM_NFC:
|
|
case UNORM_NFKC:
|
|
break;
|
|
default:
|
|
s_intl_error->m_error.code = U_ILLEGAL_ARGUMENT_ERROR;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"normalizer_isnormalized: illegal normalization form";
|
|
return uninit_null();
|
|
}
|
|
|
|
/* First convert the string to UTF-16. */
|
|
UChar* uinput = NULL; int uinput_len = 0;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
intl_convert_utf8_to_utf16(&uinput, &uinput_len, input.data(), input.size(),
|
|
&status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message = "Error converting string to UTF-16.";
|
|
free(uinput);
|
|
return false;
|
|
}
|
|
|
|
/* test string */
|
|
UBool uret = unorm_isNormalizedWithOptions(uinput, uinput_len,
|
|
(UNormalizationMode)form,
|
|
(int32_t)0, &status);
|
|
free(uinput);
|
|
|
|
/* Bail out if an unexpected error occured. */
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"Error testing if string is the given normalization form.";
|
|
return false;
|
|
}
|
|
|
|
return uret;
|
|
}
|
|
|
|
Variant c_Normalizer::ti_normalize(const char* cls , CStrRef input,
|
|
int64_t form /* = q_Normalizer$$FORM_C */) {
|
|
s_intl_error->m_error.clear();
|
|
|
|
int expansion_factor = 1;
|
|
switch(form) {
|
|
case UNORM_NONE:
|
|
case UNORM_NFC:
|
|
case UNORM_NFKC:
|
|
break;
|
|
case UNORM_NFD:
|
|
case UNORM_NFKD:
|
|
expansion_factor = 3;
|
|
break;
|
|
default:
|
|
s_intl_error->m_error.code = U_ILLEGAL_ARGUMENT_ERROR;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"normalizer_normalize: illegal normalization form";
|
|
return uninit_null();
|
|
}
|
|
|
|
/* First convert the string to UTF-16. */
|
|
UChar* uinput = NULL; int uinput_len = 0;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
intl_convert_utf8_to_utf16(&uinput, &uinput_len, input.data(), input.size(),
|
|
&status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"Error converting string to UTF-16.";
|
|
free(uinput);
|
|
return uninit_null();
|
|
}
|
|
|
|
/* Allocate memory for the destination buffer for normalization */
|
|
int uret_len = uinput_len * expansion_factor;
|
|
UChar *uret_buf = (UChar*)malloc((uret_len + 1) * sizeof(UChar));
|
|
|
|
/* normalize */
|
|
int size_needed = unorm_normalize(uinput, uinput_len,
|
|
(UNormalizationMode)form, (int32_t) 0,
|
|
uret_buf, uret_len, &status);
|
|
|
|
/* Bail out if an unexpected error occured.
|
|
* (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
|
|
* (U_STRING_NOT_TERMINATED_WARNING usually means that the input string
|
|
* is empty).
|
|
*/
|
|
if (U_FAILURE(status) &&
|
|
status != U_BUFFER_OVERFLOW_ERROR &&
|
|
status != U_STRING_NOT_TERMINATED_WARNING) {
|
|
free(uret_buf);
|
|
free(uinput);
|
|
return uninit_null();
|
|
}
|
|
|
|
if (size_needed > uret_len) {
|
|
/* realloc does not seem to work properly - memory is corrupted
|
|
* uret_buf = eurealloc(uret_buf, size_needed + 1); */
|
|
free(uret_buf);
|
|
uret_buf = (UChar*)malloc((size_needed + 1) * sizeof(UChar));
|
|
uret_len = size_needed;
|
|
|
|
status = U_ZERO_ERROR;
|
|
|
|
/* try normalize again */
|
|
size_needed = unorm_normalize( uinput, uinput_len,
|
|
(UNormalizationMode)form, (int32_t) 0,
|
|
uret_buf, uret_len, &status);
|
|
|
|
/* Bail out if an unexpected error occured. */
|
|
if (U_FAILURE(status)) {
|
|
/* Set error messages. */
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message = "Error normalizing string";
|
|
free(uret_buf);
|
|
free(uinput);
|
|
return uninit_null();
|
|
}
|
|
}
|
|
|
|
free(uinput);
|
|
|
|
/* the buffer we actually used */
|
|
uret_len = size_needed;
|
|
|
|
/* Convert normalized string from UTF-16 to UTF-8. */
|
|
char* ret_buf = NULL; int ret_len = 0;
|
|
intl_convert_utf16_to_utf8(&ret_buf, &ret_len, uret_buf, uret_len, &status);
|
|
free(uret_buf);
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"normalizer_normalize: error converting normalized text UTF-8";
|
|
return uninit_null();
|
|
}
|
|
|
|
return String(ret_buf, ret_len, AttachString);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
enum IdnVariant {
|
|
INTL_IDN_VARIANT_2003 = 0,
|
|
INTL_IDN_VARIANT_UTS46
|
|
};
|
|
|
|
enum {
|
|
INTL_IDN_TO_ASCII = 0,
|
|
INTL_IDN_TO_UTF8
|
|
};
|
|
|
|
#ifdef HAVE_46_API
|
|
static Variant php_intl_idn_to_46(CStrRef domain, int64_t options, IdnVariant idn_variant, VRefParam idna_info, int mode) {
|
|
int32_t converted_capacity;
|
|
char *converted = NULL;
|
|
int32_t converted_len;
|
|
UIDNA *uts46;
|
|
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// Get UIDNA instance which implements UTS #46.
|
|
uts46 = uidna_openUTS46(options, &status);
|
|
SCOPE_EXIT { uidna_close(uts46); };
|
|
if (U_FAILURE(status)) return false;
|
|
|
|
// Call the appropriate IDN function
|
|
status = U_ZERO_ERROR;
|
|
converted_capacity = 255; // no domain name may exceed this
|
|
String result(converted_capacity, ReserveString); // reserves converted_capacity+1 characters.
|
|
converted = result.mutableSlice().ptr;
|
|
if (mode == INTL_IDN_TO_ASCII) {
|
|
converted_len = uidna_nameToASCII_UTF8(uts46, (char*)domain.data(), domain.size(),
|
|
converted, converted_capacity, &info, &status);
|
|
} else {
|
|
converted_len = uidna_nameToUnicodeUTF8(uts46, (char*)domain.data(), domain.size(),
|
|
converted, converted_capacity, &info, &status);
|
|
}
|
|
if (U_FAILURE(status) || converted_len > converted_capacity) return false;
|
|
if (info.errors == 0) {
|
|
result.setSize(converted_len);
|
|
} else {
|
|
result.setSize(0);
|
|
}
|
|
|
|
// Set up the array returned in idna_info.
|
|
Array arr;
|
|
arr.set("result", result);
|
|
arr.set("isTransitionalDifferent", info.isTransitionalDifferent);
|
|
arr.set("errors", (long)info.errors);
|
|
idna_info = arr; // As in Zend, the previous value of idn_variant is overwritten, not modified.
|
|
|
|
if (info.errors == 0) {
|
|
return result;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
static Variant php_intl_idn_to(CStrRef domain, int64_t options, IdnVariant idn_variant, VRefParam idna_info, int mode) {
|
|
UChar* ustring = NULL;
|
|
int ustring_len = 0;
|
|
UErrorCode status;
|
|
char *converted_utf8 = NULL;
|
|
int32_t converted_utf8_len;
|
|
UChar* converted = NULL;
|
|
int32_t converted_ret_len;
|
|
|
|
if (idn_variant != INTL_IDN_VARIANT_2003) {
|
|
#ifdef HAVE_46_API
|
|
if (idn_variant == INTL_IDN_VARIANT_UTS46) {
|
|
return php_intl_idn_to_46(domain, options, idn_variant, ref(idna_info), mode);
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
// Convert the string to UTF-16
|
|
status = U_ZERO_ERROR;
|
|
intl_convert_utf8_to_utf16(&ustring, &ustring_len,
|
|
(char*)domain.data(), domain.size(), &status);
|
|
if (U_FAILURE(status)) {
|
|
free(ustring);
|
|
return false;
|
|
}
|
|
|
|
// Call the appropriate IDN function
|
|
int converted_len = (ustring_len > 1) ? ustring_len : 1;
|
|
for (;;) {
|
|
UParseError parse_error;
|
|
status = U_ZERO_ERROR;
|
|
converted = (UChar*)malloc(sizeof(UChar)*converted_len);
|
|
// If the malloc failed, bail out
|
|
if (!converted) {
|
|
free(ustring);
|
|
return false;
|
|
}
|
|
if (mode == INTL_IDN_TO_ASCII) {
|
|
converted_ret_len = uidna_IDNToASCII(ustring,
|
|
ustring_len, converted, converted_len,
|
|
(int32_t)options, &parse_error, &status);
|
|
} else {
|
|
converted_ret_len = uidna_IDNToUnicode(ustring,
|
|
ustring_len, converted, converted_len,
|
|
(int32_t)options, &parse_error, &status);
|
|
}
|
|
if (status != U_BUFFER_OVERFLOW_ERROR)
|
|
break;
|
|
// If we have a buffer overflow error, try again with a larger buffer
|
|
free(converted);
|
|
converted = NULL;
|
|
converted_len = converted_len * 2;
|
|
}
|
|
free(ustring);
|
|
if (U_FAILURE(status)) {
|
|
free(converted);
|
|
return false;
|
|
}
|
|
|
|
// Convert the string back to UTF-8
|
|
status = U_ZERO_ERROR;
|
|
intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len,
|
|
converted, converted_ret_len, &status);
|
|
free(converted);
|
|
if (U_FAILURE(status)) {
|
|
free(converted_utf8);
|
|
return false;
|
|
}
|
|
|
|
// Return the string
|
|
return String(converted_utf8, converted_utf8_len, AttachString);
|
|
}
|
|
|
|
Variant f_idn_to_ascii(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
|
|
return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_ASCII);
|
|
}
|
|
|
|
Variant f_idn_to_unicode(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
|
|
return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_UTF8);
|
|
}
|
|
|
|
Variant f_idn_to_utf8(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
|
|
return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_UTF8);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|
|
|