fef62f03a2
Now that HHVM is the default runtime, this namespace doesn't mean anything.
879 linhas
28 KiB
C++
879 linhas
28 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
|
|
| Copyright (c) 1997-2010 The PHP Group |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 3.01 of the PHP license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.php.net/license/3_01.txt |
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@php.net so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
|
|
#include <runtime/ext/ext_intl.h>
|
|
#include <runtime/ext/ext_array.h> // for throw_bad_array_exception
|
|
#include <runtime/base/util/request_local.h>
|
|
#include <runtime/base/zend/intl_convert.h>
|
|
#include <runtime/base/zend/zend_collator.h>
|
|
#include <runtime/base/zend/zend_qsort.h>
|
|
#include <unicode/uidna.h>
|
|
#include <unicode/ustring.h>
|
|
#include <unicode/ucol.h> // icu
|
|
#include <unicode/uclean.h> // icu
|
|
#include <unicode/putil.h> // icu
|
|
#include <unicode/utypes.h>
|
|
#include <unicode/unorm.h>
|
|
|
|
#include <system/lib/systemlib.h>
|
|
|
|
#ifdef UIDNA_INFO_INITIALIZER
|
|
#define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
|
|
#endif
|
|
|
|
namespace HPHP {
|
|
IMPLEMENT_DEFAULT_EXTENSION(idn);
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
int64_t f_intl_get_error_code() {
|
|
return s_intl_error->m_error.code;
|
|
}
|
|
|
|
String f_intl_get_error_message() {
|
|
if (!s_intl_error->m_error.custom_error_message.empty()) {
|
|
return s_intl_error->m_error.custom_error_message;
|
|
}
|
|
return String(u_errorName(s_intl_error->m_error.code), AttachLiteral);
|
|
}
|
|
|
|
String f_intl_error_name(int64_t error_code) {
|
|
return String(u_errorName((UErrorCode)error_code), AttachLiteral);
|
|
}
|
|
|
|
bool f_intl_is_failure(int64_t error_code) {
|
|
if (U_FAILURE((UErrorCode)error_code)) return true;
|
|
return false;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
const int64_t q_Collator$$SORT_REGULAR = 0;
|
|
const int64_t q_Collator$$SORT_STRING = 1;
|
|
const int64_t q_Collator$$SORT_NUMERIC = 2;
|
|
const int64_t q_Collator$$FRENCH_COLLATION = UCOL_FRENCH_COLLATION;
|
|
const int64_t q_Collator$$ALTERNATE_HANDLING = UCOL_ALTERNATE_HANDLING;
|
|
const int64_t q_Collator$$CASE_FIRST = UCOL_CASE_FIRST;
|
|
const int64_t q_Collator$$CASE_LEVEL = UCOL_CASE_LEVEL;
|
|
const int64_t q_Collator$$NORMALIZATION_MODE = UCOL_NORMALIZATION_MODE;
|
|
const int64_t q_Collator$$STRENGTH = UCOL_STRENGTH;
|
|
const int64_t q_Collator$$HIRAGANA_QUATERNARY_MODE = UCOL_HIRAGANA_QUATERNARY_MODE;
|
|
const int64_t q_Collator$$NUMERIC_COLLATION = UCOL_NUMERIC_COLLATION;
|
|
const int64_t q_Collator$$DEFAULT_VALUE = UCOL_DEFAULT;
|
|
const int64_t q_Collator$$PRIMARY = UCOL_PRIMARY;
|
|
const int64_t q_Collator$$SECONDARY = UCOL_SECONDARY;
|
|
const int64_t q_Collator$$TERTIARY = UCOL_TERTIARY;
|
|
const int64_t q_Collator$$DEFAULT_STRENGTH = UCOL_DEFAULT_STRENGTH;
|
|
const int64_t q_Collator$$QUATERNARY = UCOL_QUATERNARY;
|
|
const int64_t q_Collator$$IDENTICAL = UCOL_IDENTICAL;
|
|
const int64_t q_Collator$$OFF = UCOL_OFF;
|
|
const int64_t q_Collator$$ON = UCOL_ON;
|
|
const int64_t q_Collator$$SHIFTED = UCOL_SHIFTED;
|
|
const int64_t q_Collator$$NON_IGNORABLE = UCOL_NON_IGNORABLE;
|
|
const int64_t q_Collator$$LOWER_FIRST = UCOL_LOWER_FIRST;
|
|
const int64_t q_Collator$$UPPER_FIRST = UCOL_UPPER_FIRST;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
c_Collator::c_Collator(Class* cb) :
|
|
ExtObjectData(cb), m_locale(), m_ucoll(NULL), m_errcode() {
|
|
}
|
|
|
|
c_Collator::~c_Collator() {
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
}
|
|
|
|
void c_Collator::t___construct(CStrRef locale) {
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
m_errcode.clear();
|
|
if (!locale.empty()) {
|
|
m_locale = locale;
|
|
m_ucoll = ucol_open(locale.data(), &(m_errcode.code));
|
|
if (!U_FAILURE(m_errcode.code)) {
|
|
// If the specified locale opened successfully, return
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
return;
|
|
}
|
|
}
|
|
// If the empty string was given or if the specified locale did
|
|
// not open successfully, so fall back to using the default locale
|
|
m_errcode.code = U_USING_FALLBACK_WARNING;
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
UErrorCode errcode = U_ZERO_ERROR;
|
|
m_locale = String(uloc_getDefault(), CopyString);
|
|
m_ucoll = ucol_open(m_locale.data(), &errcode);
|
|
if (U_FAILURE(errcode)) {
|
|
m_errcode.code = errcode;
|
|
m_errcode.custom_error_message =
|
|
"collator_create: unable to open ICU collator";
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
if (m_ucoll) {
|
|
ucol_close(m_ucoll);
|
|
m_ucoll = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool c_Collator::t_asort(VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
if (!arr.isArray()) {
|
|
throw_bad_array_exception();
|
|
return false;
|
|
}
|
|
if (!m_ucoll) {
|
|
raise_warning("asort called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
m_errcode.clear();
|
|
bool ret = collator_asort(arr, sort_flag, true, m_ucoll, &m_errcode);
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
return false;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Variant c_Collator::t_compare(CStrRef str1, CStrRef str2) {
|
|
if (!m_ucoll) {
|
|
raise_warning("compare called on uninitialized Collator object");
|
|
return 0;
|
|
}
|
|
UChar* ustr1 = NULL;
|
|
UChar* ustr2 = NULL;
|
|
int ustr1_len = 0;
|
|
int ustr2_len = 0;
|
|
m_errcode.clear();
|
|
intl_convert_utf8_to_utf16(&ustr1, &ustr1_len,
|
|
str1.data(), str1.length(),
|
|
&(m_errcode.code));
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
free(ustr1);
|
|
return false;
|
|
}
|
|
intl_convert_utf8_to_utf16(&ustr2, &ustr2_len,
|
|
str2.data(), str2.length(),
|
|
&(m_errcode.code));
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
free(ustr1);
|
|
free(ustr2);
|
|
return false;
|
|
}
|
|
int64_t ret = ucol_strcoll(m_ucoll, ustr1, ustr1_len, ustr2, ustr2_len);
|
|
free(ustr1);
|
|
free(ustr2);
|
|
return ret;
|
|
}
|
|
|
|
Variant c_Collator::ti_create(CStrRef locale) {
|
|
p_Collator c(NEWOBJ(c_Collator)());
|
|
c.get()->t___construct(locale);
|
|
return c;
|
|
}
|
|
|
|
int64_t c_Collator::t_getattribute(int64_t attr) {
|
|
if (!m_ucoll) {
|
|
raise_warning("getattribute called on uninitialized Collator object");
|
|
return 0;
|
|
}
|
|
m_errcode.clear();
|
|
int64_t ret = (int64_t)ucol_getAttribute(m_ucoll, (UColAttribute)attr,
|
|
&(m_errcode.code));
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Error getting attribute value";
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
return 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int64_t c_Collator::t_geterrorcode() {
|
|
return m_errcode.code;
|
|
}
|
|
|
|
String c_Collator::t_geterrormessage() {
|
|
return String(u_errorName(m_errcode.code), AttachLiteral);
|
|
}
|
|
|
|
String c_Collator::t_getlocale(int64_t type /* = 0 */) {
|
|
if (!m_ucoll) {
|
|
raise_warning("getlocale called on uninitialized Collator object");
|
|
return "";
|
|
}
|
|
m_errcode.clear();
|
|
String ret(
|
|
(char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type,
|
|
&(m_errcode.code)),
|
|
AttachLiteral);
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Error getting locale by type";
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message =
|
|
m_errcode.custom_error_message;
|
|
return "";
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int64_t c_Collator::t_getstrength() {
|
|
if (!m_ucoll) {
|
|
raise_warning("getstrength called on uninitialized Collator object");
|
|
return 0;
|
|
}
|
|
return ucol_getStrength(m_ucoll);
|
|
}
|
|
|
|
bool c_Collator::t_setattribute(int64_t attr, int64_t val) {
|
|
if (!m_ucoll) {
|
|
raise_warning("setattribute called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
m_errcode.clear();
|
|
ucol_setAttribute(m_ucoll, (UColAttribute)attr,
|
|
(UColAttributeValue)val, &(m_errcode.code));
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Error setting attribute value";
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool c_Collator::t_setstrength(int64_t strength) {
|
|
if (!m_ucoll) {
|
|
raise_warning("setstrength called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
ucol_setStrength(m_ucoll, (UCollationStrength)strength);
|
|
return true;
|
|
}
|
|
|
|
typedef struct _collator_sort_key_index {
|
|
char* key; /* pointer to sort key */
|
|
ssize_t valPos; /* position of the original array element */
|
|
} collator_sort_key_index_t;
|
|
|
|
static const int32_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
|
|
static const int32_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
|
|
|
|
static const int32_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
|
|
static const int32_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
|
|
|
|
static const int32_t DEF_UTF16_BUF_SIZE = 1024;
|
|
|
|
/* {{{ collator_cmp_sort_keys
|
|
* Compare sort keys
|
|
*/
|
|
static int collator_cmp_sort_keys(const void* p1, const void* p2, const void*) {
|
|
char* key1 = ((collator_sort_key_index_t*)p1)->key;
|
|
char* key2 = ((collator_sort_key_index_t*)p2)->key;
|
|
return strcmp( key1, key2 );
|
|
}
|
|
|
|
bool c_Collator::t_sortwithsortkeys(VRefParam arr) {
|
|
char* sortKeyBuf = NULL; /* buffer to store sort keys */
|
|
int32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
|
|
ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */
|
|
int32_t sortKeyLen = 0; /* the length of currently processing key */
|
|
int32_t bufLeft = 0;
|
|
int32_t bufIncrement = 0;
|
|
|
|
/* buffer to store 'indexes' which will be passed to 'qsort' */
|
|
collator_sort_key_index_t* sortKeyIndxBuf = NULL;
|
|
int32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
|
|
int32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
|
|
|
|
int32_t sortKeyCount = 0;
|
|
int32_t j = 0;
|
|
|
|
/* tmp buffer to hold current processing string in utf-16 */
|
|
UChar* utf16_buf = NULL;
|
|
/* the length of utf16_buf */
|
|
int utf16_buf_size = DEF_UTF16_BUF_SIZE;
|
|
/* length of converted string */
|
|
int utf16_len = 0;
|
|
|
|
m_errcode.clear();
|
|
s_intl_error->m_error.clear();
|
|
|
|
/*
|
|
* Sort specified array.
|
|
*/
|
|
if (!arr.isArray()) {
|
|
return true;
|
|
}
|
|
Array hash = arr.toArray();
|
|
if (hash.size() == 0) {
|
|
return true;
|
|
}
|
|
|
|
/* Create bufers */
|
|
sortKeyBuf = (char*)calloc(sortKeyBufSize, sizeof(char));
|
|
sortKeyIndxBuf = (collator_sort_key_index_t*)malloc(sortKeyIndxBufSize);
|
|
utf16_buf = (UChar*)malloc(utf16_buf_size);
|
|
|
|
/* Iterate through input hash and create a sort key for each value. */
|
|
for (ssize_t pos = hash->iter_begin(); pos != ArrayData::invalid_index;
|
|
pos = hash->iter_advance(pos)) {
|
|
/* Convert current hash item from UTF-8 to UTF-16LE and save the result
|
|
* to utf16_buf. */
|
|
utf16_len = utf16_buf_size;
|
|
/* Process string values only. */
|
|
Variant val(hash->getValue(pos));
|
|
if (val.isString()) {
|
|
String str = val.toString();
|
|
intl_convert_utf8_to_utf16(&utf16_buf, &utf16_len, str.data(),
|
|
str.size(), &(m_errcode.code));
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
m_errcode.custom_error_message = "Sort with sort keys failed";
|
|
if (utf16_buf) {
|
|
free(utf16_buf);
|
|
}
|
|
free(sortKeyIndxBuf);
|
|
free(sortKeyBuf);
|
|
return false;
|
|
}
|
|
} else {
|
|
/* Set empty string */
|
|
utf16_len = 0;
|
|
utf16_buf[utf16_len] = 0;
|
|
}
|
|
|
|
if ((utf16_len + 1) > utf16_buf_size) {
|
|
utf16_buf_size = utf16_len + 1;
|
|
}
|
|
|
|
/* Get sort key, reallocating the buffer if needed. */
|
|
bufLeft = sortKeyBufSize - sortKeyBufOffset;
|
|
|
|
sortKeyLen = ucol_getSortKey(m_ucoll,
|
|
utf16_buf,
|
|
utf16_len,
|
|
(uint8_t*)sortKeyBuf + sortKeyBufOffset,
|
|
bufLeft);
|
|
|
|
/* check for sortKeyBuf overflow, increasing its size of the buffer if
|
|
needed */
|
|
if (sortKeyLen > bufLeft) {
|
|
bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ?
|
|
sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
|
|
sortKeyBufSize += bufIncrement;
|
|
bufLeft += bufIncrement;
|
|
sortKeyBuf = (char*)realloc(sortKeyBuf, sortKeyBufSize);
|
|
sortKeyLen = ucol_getSortKey(m_ucoll, utf16_buf, utf16_len,
|
|
(uint8_t*)sortKeyBuf + sortKeyBufOffset,
|
|
bufLeft);
|
|
}
|
|
|
|
/* check sortKeyIndxBuf overflow, increasing its size of the buffer if
|
|
needed */
|
|
if ((sortKeyCount + 1) * sortKeyIndxSize > sortKeyIndxBufSize) {
|
|
bufIncrement = (sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT) ?
|
|
sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
|
|
sortKeyIndxBufSize += bufIncrement;
|
|
sortKeyIndxBuf = (collator_sort_key_index_t*)realloc(sortKeyIndxBuf,
|
|
sortKeyIndxBufSize);
|
|
}
|
|
sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset;
|
|
sortKeyIndxBuf[sortKeyCount].valPos = pos;
|
|
sortKeyBufOffset += sortKeyLen;
|
|
++sortKeyCount;
|
|
}
|
|
|
|
/* update ptrs to point to valid keys. */
|
|
for( j = 0; j < sortKeyCount; j++ )
|
|
sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
|
|
|
|
/* sort it */
|
|
zend_qsort(sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize,
|
|
collator_cmp_sort_keys, NULL);
|
|
|
|
/* for resulting hash we'll assign new hash keys rather then reordering */
|
|
Array sortedHash = Array::Create();
|
|
|
|
for (j = 0; j < sortKeyCount; j++) {
|
|
sortedHash.append(hash->getValue(sortKeyIndxBuf[j].valPos));
|
|
}
|
|
|
|
/* Save sorted hash into return variable. */
|
|
arr = sortedHash;
|
|
|
|
if (utf16_buf)
|
|
free(utf16_buf);
|
|
|
|
free(sortKeyIndxBuf);
|
|
free(sortKeyBuf);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool c_Collator::t_sort(VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
if (!arr.isArray()) {
|
|
throw_bad_array_exception();
|
|
return false;
|
|
}
|
|
if (!m_ucoll) {
|
|
raise_warning("sort called on uninitialized Collator object");
|
|
return false;
|
|
}
|
|
m_errcode.clear();
|
|
bool ret = collator_sort(arr, sort_flag, true, m_ucoll, &(m_errcode));
|
|
s_intl_error->m_error.clear();
|
|
s_intl_error->m_error.code = m_errcode.code;
|
|
s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
|
|
if (U_FAILURE(m_errcode.code)) {
|
|
return false;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#define CHECK_COLL(obj) \
|
|
c_Collator *coll = NULL; \
|
|
if (obj.isObject()) { \
|
|
coll = obj.toObject().getTyped<c_Collator>(); \
|
|
} \
|
|
if (!coll) { \
|
|
raise_warning("Expecting collator object"); \
|
|
return false; \
|
|
} \
|
|
|
|
Variant f_collator_asort(CVarRef obj, VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_asort(ref(arr), sort_flag);
|
|
}
|
|
|
|
Variant f_collator_compare(CVarRef obj, CStrRef str1, CStrRef str2) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_compare(str1, str2);
|
|
}
|
|
|
|
Variant f_collator_create(CStrRef locale) {
|
|
return c_Collator::ti_create(locale);
|
|
}
|
|
|
|
Variant f_collator_get_attribute(CVarRef obj, int64_t attr) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_getattribute(attr);
|
|
}
|
|
|
|
Variant f_collator_get_error_code(CVarRef obj) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_geterrorcode();
|
|
}
|
|
|
|
Variant f_collator_get_error_message(CVarRef obj) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_geterrormessage();
|
|
}
|
|
|
|
Variant f_collator_get_locale(CVarRef obj, int64_t type /* = 0 */) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_getlocale(type);
|
|
}
|
|
|
|
Variant f_collator_get_strength(CVarRef obj) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_getstrength();
|
|
}
|
|
|
|
Variant f_collator_set_attribute(CVarRef obj, int64_t attr, int64_t val) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_setattribute(attr, val);
|
|
}
|
|
|
|
Variant f_collator_set_strength(CVarRef obj, int64_t strength) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_setstrength(strength);
|
|
}
|
|
|
|
Variant f_collator_sort_with_sort_keys(CVarRef obj, VRefParam arr) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_sortwithsortkeys(ref(arr));
|
|
}
|
|
|
|
Variant f_collator_sort(CVarRef obj, VRefParam arr,
|
|
int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
|
|
CHECK_COLL(obj);
|
|
return coll->t_sort(ref(arr), sort_flag);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
const int64_t q_Locale$$ACTUAL_LOCALE = 0;
|
|
const int64_t q_Locale$$VALID_LOCALE = 1;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
c_Locale::c_Locale(Class* cb) : ExtObjectData(cb) {
|
|
}
|
|
|
|
c_Locale::~c_Locale() {
|
|
}
|
|
|
|
void c_Locale::t___construct() {
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
const int64_t q_Normalizer$$NONE = UNORM_NONE;
|
|
const int64_t q_Normalizer$$FORM_D = UNORM_NFD;
|
|
const int64_t q_Normalizer$$NFD = UNORM_NFD;
|
|
const int64_t q_Normalizer$$FORM_KD = UNORM_NFKD;
|
|
const int64_t q_Normalizer$$NFKD = UNORM_NFKD;
|
|
const int64_t q_Normalizer$$FORM_C = UNORM_NFC;
|
|
const int64_t q_Normalizer$$NFC = UNORM_NFC;
|
|
const int64_t q_Normalizer$$FORM_KC = UNORM_NFKC;
|
|
const int64_t q_Normalizer$$NFKC = UNORM_NFKC;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
c_Normalizer::c_Normalizer(Class* cb) : ExtObjectData(cb) {
|
|
}
|
|
|
|
c_Normalizer::~c_Normalizer() {
|
|
}
|
|
|
|
void c_Normalizer::t___construct() {
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
Variant c_Normalizer::ti_isnormalized(CStrRef input,
|
|
int64_t form /* = q_Normalizer$$FORM_C */) {
|
|
s_intl_error->m_error.clear();
|
|
|
|
switch (form) {
|
|
case UNORM_NFD:
|
|
case UNORM_NFKD:
|
|
case UNORM_NFC:
|
|
case UNORM_NFKC:
|
|
break;
|
|
default:
|
|
s_intl_error->m_error.code = U_ILLEGAL_ARGUMENT_ERROR;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"normalizer_isnormalized: illegal normalization form";
|
|
return uninit_null();
|
|
}
|
|
|
|
/* First convert the string to UTF-16. */
|
|
UChar* uinput = NULL; int uinput_len = 0;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
intl_convert_utf8_to_utf16(&uinput, &uinput_len, input.data(), input.size(),
|
|
&status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message = "Error converting string to UTF-16.";
|
|
free(uinput);
|
|
return false;
|
|
}
|
|
|
|
/* test string */
|
|
UBool uret = unorm_isNormalizedWithOptions(uinput, uinput_len,
|
|
(UNormalizationMode)form,
|
|
(int32_t)0, &status);
|
|
free(uinput);
|
|
|
|
/* Bail out if an unexpected error occured. */
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"Error testing if string is the given normalization form.";
|
|
return false;
|
|
}
|
|
|
|
return uret;
|
|
}
|
|
|
|
Variant c_Normalizer::ti_normalize(CStrRef input,
|
|
int64_t form /* = q_Normalizer$$FORM_C */) {
|
|
s_intl_error->m_error.clear();
|
|
|
|
int expansion_factor = 1;
|
|
switch(form) {
|
|
case UNORM_NONE:
|
|
case UNORM_NFC:
|
|
case UNORM_NFKC:
|
|
break;
|
|
case UNORM_NFD:
|
|
case UNORM_NFKD:
|
|
expansion_factor = 3;
|
|
break;
|
|
default:
|
|
s_intl_error->m_error.code = U_ILLEGAL_ARGUMENT_ERROR;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"normalizer_normalize: illegal normalization form";
|
|
return uninit_null();
|
|
}
|
|
|
|
/* First convert the string to UTF-16. */
|
|
UChar* uinput = NULL; int uinput_len = 0;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
intl_convert_utf8_to_utf16(&uinput, &uinput_len, input.data(), input.size(),
|
|
&status);
|
|
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"Error converting string to UTF-16.";
|
|
free(uinput);
|
|
return uninit_null();
|
|
}
|
|
|
|
/* Allocate memory for the destination buffer for normalization */
|
|
int uret_len = uinput_len * expansion_factor;
|
|
UChar *uret_buf = (UChar*)malloc((uret_len + 1) * sizeof(UChar));
|
|
|
|
/* normalize */
|
|
int size_needed = unorm_normalize(uinput, uinput_len,
|
|
(UNormalizationMode)form, (int32_t) 0,
|
|
uret_buf, uret_len, &status);
|
|
|
|
/* Bail out if an unexpected error occured.
|
|
* (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
|
|
* (U_STRING_NOT_TERMINATED_WARNING usually means that the input string
|
|
* is empty).
|
|
*/
|
|
if (U_FAILURE(status) &&
|
|
status != U_BUFFER_OVERFLOW_ERROR &&
|
|
status != U_STRING_NOT_TERMINATED_WARNING) {
|
|
free(uret_buf);
|
|
free(uinput);
|
|
return uninit_null();
|
|
}
|
|
|
|
if (size_needed > uret_len) {
|
|
/* realloc does not seem to work properly - memory is corrupted
|
|
* uret_buf = eurealloc(uret_buf, size_needed + 1); */
|
|
free(uret_buf);
|
|
uret_buf = (UChar*)malloc((size_needed + 1) * sizeof(UChar));
|
|
uret_len = size_needed;
|
|
|
|
status = U_ZERO_ERROR;
|
|
|
|
/* try normalize again */
|
|
size_needed = unorm_normalize( uinput, uinput_len,
|
|
(UNormalizationMode)form, (int32_t) 0,
|
|
uret_buf, uret_len, &status);
|
|
|
|
/* Bail out if an unexpected error occured. */
|
|
if (U_FAILURE(status)) {
|
|
/* Set error messages. */
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message = "Error normalizing string";
|
|
free(uret_buf);
|
|
free(uinput);
|
|
return uninit_null();
|
|
}
|
|
}
|
|
|
|
free(uinput);
|
|
|
|
/* the buffer we actually used */
|
|
uret_len = size_needed;
|
|
|
|
/* Convert normalized string from UTF-16 to UTF-8. */
|
|
char* ret_buf = NULL; int ret_len = 0;
|
|
intl_convert_utf16_to_utf8(&ret_buf, &ret_len, uret_buf, uret_len, &status);
|
|
free(uret_buf);
|
|
if (U_FAILURE(status)) {
|
|
s_intl_error->m_error.code = status;
|
|
s_intl_error->m_error.custom_error_message =
|
|
"normalizer_normalize: error converting normalized text UTF-8";
|
|
return uninit_null();
|
|
}
|
|
|
|
return String(ret_buf, ret_len, AttachString);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
enum IdnVariant {
|
|
INTL_IDN_VARIANT_2003 = 0,
|
|
INTL_IDN_VARIANT_UTS46
|
|
};
|
|
|
|
enum {
|
|
INTL_IDN_TO_ASCII = 0,
|
|
INTL_IDN_TO_UTF8
|
|
};
|
|
|
|
#ifdef HAVE_46_API
|
|
|
|
static const StaticString s_result("result");
|
|
static const StaticString s_isTransitionalDifferent("isTransitionalDifferent");
|
|
static const StaticString s_errors("errors");
|
|
|
|
static Variant php_intl_idn_to_46(CStrRef domain, int64_t options, IdnVariant idn_variant, VRefParam idna_info, int mode) {
|
|
int32_t converted_capacity;
|
|
char *converted = NULL;
|
|
int32_t converted_len;
|
|
UIDNA *uts46;
|
|
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
// Get UIDNA instance which implements UTS #46.
|
|
uts46 = uidna_openUTS46(options, &status);
|
|
SCOPE_EXIT { uidna_close(uts46); };
|
|
if (U_FAILURE(status)) return false;
|
|
|
|
// Call the appropriate IDN function
|
|
status = U_ZERO_ERROR;
|
|
converted_capacity = 255; // no domain name may exceed this
|
|
String result(converted_capacity, ReserveString); // reserves converted_capacity+1 characters.
|
|
converted = result.mutableSlice().ptr;
|
|
if (mode == INTL_IDN_TO_ASCII) {
|
|
converted_len = uidna_nameToASCII_UTF8(uts46, (char*)domain.data(), domain.size(),
|
|
converted, converted_capacity, &info, &status);
|
|
} else {
|
|
converted_len = uidna_nameToUnicodeUTF8(uts46, (char*)domain.data(), domain.size(),
|
|
converted, converted_capacity, &info, &status);
|
|
}
|
|
if (U_FAILURE(status) || converted_len > converted_capacity) return false;
|
|
if (info.errors == 0) {
|
|
result.setSize(converted_len);
|
|
} else {
|
|
result.setSize(0);
|
|
}
|
|
|
|
// Set up the array returned in idna_info.
|
|
ArrayInit arr(3);
|
|
arr.set(s_result, result);
|
|
arr.set(s_isTransitionalDifferent, info.isTransitionalDifferent);
|
|
arr.set(s_errors, (long)info.errors);
|
|
// As in Zend, the previous value of idn_variant is overwritten, not modified.
|
|
idna_info = arr.create();
|
|
if (info.errors == 0) {
|
|
return result;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
#endif
|
|
|
|
static Variant php_intl_idn_to(CStrRef domain, int64_t options, IdnVariant idn_variant, VRefParam idna_info, int mode) {
|
|
UChar* ustring = NULL;
|
|
int ustring_len = 0;
|
|
UErrorCode status;
|
|
char *converted_utf8 = NULL;
|
|
int32_t converted_utf8_len;
|
|
UChar* converted = NULL;
|
|
int32_t converted_ret_len;
|
|
|
|
if (idn_variant != INTL_IDN_VARIANT_2003) {
|
|
#ifdef HAVE_46_API
|
|
if (idn_variant == INTL_IDN_VARIANT_UTS46) {
|
|
return php_intl_idn_to_46(domain, options, idn_variant, ref(idna_info), mode);
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
// Convert the string to UTF-16
|
|
status = U_ZERO_ERROR;
|
|
intl_convert_utf8_to_utf16(&ustring, &ustring_len,
|
|
(char*)domain.data(), domain.size(), &status);
|
|
if (U_FAILURE(status)) {
|
|
free(ustring);
|
|
return false;
|
|
}
|
|
|
|
// Call the appropriate IDN function
|
|
int converted_len = (ustring_len > 1) ? ustring_len : 1;
|
|
for (;;) {
|
|
UParseError parse_error;
|
|
status = U_ZERO_ERROR;
|
|
converted = (UChar*)malloc(sizeof(UChar)*converted_len);
|
|
// If the malloc failed, bail out
|
|
if (!converted) {
|
|
free(ustring);
|
|
return false;
|
|
}
|
|
if (mode == INTL_IDN_TO_ASCII) {
|
|
converted_ret_len = uidna_IDNToASCII(ustring,
|
|
ustring_len, converted, converted_len,
|
|
(int32_t)options, &parse_error, &status);
|
|
} else {
|
|
converted_ret_len = uidna_IDNToUnicode(ustring,
|
|
ustring_len, converted, converted_len,
|
|
(int32_t)options, &parse_error, &status);
|
|
}
|
|
if (status != U_BUFFER_OVERFLOW_ERROR)
|
|
break;
|
|
// If we have a buffer overflow error, try again with a larger buffer
|
|
free(converted);
|
|
converted = NULL;
|
|
converted_len = converted_len * 2;
|
|
}
|
|
free(ustring);
|
|
if (U_FAILURE(status)) {
|
|
free(converted);
|
|
return false;
|
|
}
|
|
|
|
// Convert the string back to UTF-8
|
|
status = U_ZERO_ERROR;
|
|
intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len,
|
|
converted, converted_ret_len, &status);
|
|
free(converted);
|
|
if (U_FAILURE(status)) {
|
|
free(converted_utf8);
|
|
return false;
|
|
}
|
|
|
|
// Return the string
|
|
return String(converted_utf8, converted_utf8_len, AttachString);
|
|
}
|
|
|
|
Variant f_idn_to_ascii(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
|
|
return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_ASCII);
|
|
}
|
|
|
|
Variant f_idn_to_unicode(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
|
|
return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_UTF8);
|
|
}
|
|
|
|
Variant f_idn_to_utf8(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
|
|
return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_UTF8);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|
|
|