c646a30002
Many callsites of Array.set(litstr, ...)
4166 linhas
120 KiB
C++
4166 linhas
120 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
|
|
| Copyright (c) 1997-2010 The PHP Group |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 3.01 of the PHP license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.php.net/license/3_01.txt |
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@php.net so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
|
|
#include <runtime/ext/ext_mb.h>
|
|
#include <runtime/base/util/string_buffer.h>
|
|
#include <runtime/base/util/request_local.h>
|
|
#include <runtime/ext/php_unicode.h>
|
|
#include <runtime/ext/unicode_data.h>
|
|
#include <runtime/ext/ext_process.h>
|
|
#include <runtime/base/zend/zend_url.h>
|
|
#include <runtime/base/zend/zend_string.h>
|
|
#include <runtime/base/ini_setting.h>
|
|
|
|
extern "C" {
|
|
#include <mbfl/mbfl_convert.h>
|
|
#include <mbfl/mbfilter.h>
|
|
#include <oniguruma.h>
|
|
}
|
|
|
|
#define php_mb_re_pattern_buffer re_pattern_buffer
|
|
#define php_mb_regex_t regex_t
|
|
#define php_mb_re_registers re_registers
|
|
|
|
extern void mbfl_memory_device_unput(mbfl_memory_device *device);
|
|
|
|
#define PARSE_POST 0
|
|
#define PARSE_GET 1
|
|
#define PARSE_COOKIE 2
|
|
#define PARSE_STRING 3
|
|
#define PARSE_ENV 4
|
|
#define PARSE_SERVER 5
|
|
#define PARSE_SESSION 6
|
|
|
|
namespace HPHP {
|
|
|
|
static class mbstringExtension : public Extension {
|
|
public:
|
|
mbstringExtension() : Extension("mbstring") {}
|
|
|
|
virtual void moduleInit() {
|
|
IniSetting::SetGlobalDefault("mbstring.http_input", "pass");
|
|
IniSetting::SetGlobalDefault("mbstring.http_output", "pass");
|
|
}
|
|
|
|
} s_mbstring_extension;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// statics
|
|
|
|
#define PHP_MBSTR_STACK_BLOCK_SIZE 32
|
|
|
|
typedef struct _php_mb_nls_ident_list {
|
|
mbfl_no_language lang;
|
|
mbfl_no_encoding* list;
|
|
int list_size;
|
|
} php_mb_nls_ident_list;
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_ja[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_jis,
|
|
mbfl_no_encoding_utf8,
|
|
mbfl_no_encoding_euc_jp,
|
|
mbfl_no_encoding_sjis
|
|
};
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_cn[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_utf8,
|
|
mbfl_no_encoding_euc_cn,
|
|
mbfl_no_encoding_cp936
|
|
};
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_utf8,
|
|
mbfl_no_encoding_euc_tw,
|
|
mbfl_no_encoding_big5
|
|
};
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_kr[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_utf8,
|
|
mbfl_no_encoding_euc_kr,
|
|
mbfl_no_encoding_uhc
|
|
};
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_ru[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_utf8,
|
|
mbfl_no_encoding_koi8r,
|
|
mbfl_no_encoding_cp1251,
|
|
mbfl_no_encoding_cp866
|
|
};
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_hy[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_utf8,
|
|
mbfl_no_encoding_armscii8
|
|
};
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_tr[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_utf8,
|
|
mbfl_no_encoding_8859_9
|
|
};
|
|
|
|
static mbfl_no_encoding php_mb_default_identify_list_neut[] = {
|
|
mbfl_no_encoding_ascii,
|
|
mbfl_no_encoding_utf8
|
|
};
|
|
|
|
static php_mb_nls_ident_list php_mb_default_identify_list[] = {
|
|
{ mbfl_no_language_japanese, php_mb_default_identify_list_ja,
|
|
sizeof(php_mb_default_identify_list_ja) /
|
|
sizeof(php_mb_default_identify_list_ja[0]) },
|
|
{ mbfl_no_language_korean, php_mb_default_identify_list_kr,
|
|
sizeof(php_mb_default_identify_list_kr) /
|
|
sizeof(php_mb_default_identify_list_kr[0]) },
|
|
{ mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk,
|
|
sizeof(php_mb_default_identify_list_tw_hk) /
|
|
sizeof(php_mb_default_identify_list_tw_hk[0]) },
|
|
{ mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn,
|
|
sizeof(php_mb_default_identify_list_cn) /
|
|
sizeof(php_mb_default_identify_list_cn[0]) },
|
|
{ mbfl_no_language_russian, php_mb_default_identify_list_ru,
|
|
sizeof(php_mb_default_identify_list_ru) /
|
|
sizeof(php_mb_default_identify_list_ru[0]) },
|
|
{ mbfl_no_language_armenian, php_mb_default_identify_list_hy,
|
|
sizeof(php_mb_default_identify_list_hy) /
|
|
sizeof(php_mb_default_identify_list_hy[0]) },
|
|
{ mbfl_no_language_turkish, php_mb_default_identify_list_tr,
|
|
sizeof(php_mb_default_identify_list_tr) /
|
|
sizeof(php_mb_default_identify_list_tr[0]) },
|
|
{ mbfl_no_language_neutral, php_mb_default_identify_list_neut,
|
|
sizeof(php_mb_default_identify_list_neut) /
|
|
sizeof(php_mb_default_identify_list_neut[0]) }
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// globals
|
|
typedef std::map<std::string, php_mb_regex_t *> RegexCache;
|
|
|
|
class MBGlobals : public RequestEventHandler {
|
|
public:
|
|
mbfl_no_language language;
|
|
mbfl_no_language current_language;
|
|
mbfl_no_encoding internal_encoding;
|
|
mbfl_no_encoding current_internal_encoding;
|
|
mbfl_no_encoding http_output_encoding;
|
|
mbfl_no_encoding current_http_output_encoding;
|
|
mbfl_no_encoding http_input_identify;
|
|
mbfl_no_encoding http_input_identify_get;
|
|
mbfl_no_encoding http_input_identify_post;
|
|
mbfl_no_encoding http_input_identify_cookie;
|
|
mbfl_no_encoding http_input_identify_string;
|
|
mbfl_no_encoding *http_input_list;
|
|
int http_input_list_size;
|
|
mbfl_no_encoding *detect_order_list;
|
|
int detect_order_list_size;
|
|
mbfl_no_encoding *current_detect_order_list;
|
|
int current_detect_order_list_size;
|
|
mbfl_no_encoding *default_detect_order_list;
|
|
int default_detect_order_list_size;
|
|
int filter_illegal_mode;
|
|
int filter_illegal_substchar;
|
|
int current_filter_illegal_mode;
|
|
int current_filter_illegal_substchar;
|
|
bool encoding_translation;
|
|
long strict_detection;
|
|
long illegalchars;
|
|
mbfl_buffer_converter *outconv;
|
|
|
|
OnigEncoding default_mbctype;
|
|
OnigEncoding current_mbctype;
|
|
RegexCache ht_rc;
|
|
std::string search_str;
|
|
unsigned int search_pos;
|
|
php_mb_regex_t *search_re;
|
|
OnigRegion *search_regs;
|
|
OnigOptionType regex_default_options;
|
|
OnigSyntaxType *regex_default_syntax;
|
|
|
|
MBGlobals() :
|
|
language(mbfl_no_language_uni),
|
|
current_language(mbfl_no_language_uni),
|
|
internal_encoding(mbfl_no_encoding_utf8),
|
|
current_internal_encoding(mbfl_no_encoding_utf8),
|
|
http_output_encoding(mbfl_no_encoding_pass),
|
|
current_http_output_encoding(mbfl_no_encoding_pass),
|
|
http_input_identify(mbfl_no_encoding_invalid),
|
|
http_input_identify_get(mbfl_no_encoding_invalid),
|
|
http_input_identify_post(mbfl_no_encoding_invalid),
|
|
http_input_identify_cookie(mbfl_no_encoding_invalid),
|
|
http_input_identify_string(mbfl_no_encoding_invalid),
|
|
http_input_list(NULL),
|
|
http_input_list_size(0),
|
|
detect_order_list(NULL),
|
|
detect_order_list_size(0),
|
|
current_detect_order_list(NULL),
|
|
current_detect_order_list_size(0),
|
|
default_detect_order_list
|
|
((mbfl_no_encoding *)php_mb_default_identify_list_neut),
|
|
default_detect_order_list_size
|
|
(sizeof(php_mb_default_identify_list_neut) /
|
|
sizeof(php_mb_default_identify_list_neut[0])),
|
|
filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR),
|
|
filter_illegal_substchar(0x3f), /* '?' */
|
|
current_filter_illegal_mode(MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR),
|
|
current_filter_illegal_substchar(0x3f), /* '?' */
|
|
encoding_translation(0),
|
|
strict_detection(0),
|
|
illegalchars(0),
|
|
outconv(NULL),
|
|
default_mbctype(ONIG_ENCODING_EUC_JP),
|
|
current_mbctype(ONIG_ENCODING_EUC_JP),
|
|
search_pos(0),
|
|
search_re((php_mb_regex_t*)NULL),
|
|
search_regs((OnigRegion*)NULL),
|
|
regex_default_options(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE),
|
|
regex_default_syntax(ONIG_SYNTAX_RUBY) {
|
|
}
|
|
|
|
virtual void requestInit() {
|
|
current_language = language;
|
|
current_internal_encoding = internal_encoding;
|
|
current_http_output_encoding = http_output_encoding;
|
|
current_filter_illegal_mode = filter_illegal_mode;
|
|
current_filter_illegal_substchar = filter_illegal_substchar;
|
|
if (!encoding_translation) {
|
|
illegalchars = 0;
|
|
}
|
|
|
|
mbfl_no_encoding *list=NULL, *entry;
|
|
int n = 0;
|
|
if (detect_order_list) {
|
|
list = detect_order_list;
|
|
n = detect_order_list_size;
|
|
}
|
|
if (n <= 0) {
|
|
list = default_detect_order_list;
|
|
n = default_detect_order_list_size;
|
|
}
|
|
entry = (mbfl_no_encoding *)malloc(n * sizeof(int));
|
|
current_detect_order_list = entry;
|
|
current_detect_order_list_size = n;
|
|
while (n > 0) {
|
|
*entry++ = *list++;
|
|
n--;
|
|
}
|
|
}
|
|
|
|
virtual void requestShutdown() {
|
|
if (current_detect_order_list != NULL) {
|
|
free(current_detect_order_list);
|
|
current_detect_order_list = NULL;
|
|
current_detect_order_list_size = 0;
|
|
}
|
|
if (outconv != NULL) {
|
|
illegalchars += mbfl_buffer_illegalchars(outconv);
|
|
mbfl_buffer_converter_delete(outconv);
|
|
outconv = NULL;
|
|
}
|
|
|
|
/* clear http input identification. */
|
|
http_input_identify = mbfl_no_encoding_invalid;
|
|
http_input_identify_post = mbfl_no_encoding_invalid;
|
|
http_input_identify_get = mbfl_no_encoding_invalid;
|
|
http_input_identify_cookie = mbfl_no_encoding_invalid;
|
|
http_input_identify_string = mbfl_no_encoding_invalid;
|
|
|
|
current_mbctype = default_mbctype;
|
|
|
|
search_str.clear();
|
|
search_pos = 0;
|
|
|
|
if (search_regs != NULL) {
|
|
onig_region_free(search_regs, 1);
|
|
search_regs = (OnigRegion *)NULL;
|
|
}
|
|
for (RegexCache::const_iterator it = ht_rc.begin(); it != ht_rc.end();
|
|
++it) {
|
|
onig_free(it->second);
|
|
}
|
|
ht_rc.clear();
|
|
}
|
|
};
|
|
IMPLEMENT_STATIC_REQUEST_LOCAL(MBGlobals, s_mb_globals);
|
|
#define MBSTRG(name) s_mb_globals->name
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// unicode functions
|
|
|
|
/*
|
|
* A simple array of 32-bit masks for lookup.
|
|
*/
|
|
static unsigned long masks32[32] = {
|
|
0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
|
|
0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
|
|
0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
|
|
0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
|
|
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
|
|
0x40000000, 0x80000000
|
|
};
|
|
|
|
static int prop_lookup(unsigned long code, unsigned long n) {
|
|
long l, r, m;
|
|
|
|
/*
|
|
* There is an extra node on the end of the offsets to allow this routine
|
|
* to work right. If the index is 0xffff, then there are no nodes for the
|
|
* property.
|
|
*/
|
|
if ((l = _ucprop_offsets[n]) == 0xffff)
|
|
return 0;
|
|
|
|
/*
|
|
* Locate the next offset that is not 0xffff. The sentinel at the end of
|
|
* the array is the max index value.
|
|
*/
|
|
for (m = 1; n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++)
|
|
;
|
|
|
|
r = _ucprop_offsets[n + m] - 1;
|
|
|
|
while (l <= r) {
|
|
/*
|
|
* Determine a "mid" point and adjust to make sure the mid point is at
|
|
* the beginning of a range pair.
|
|
*/
|
|
m = (l + r) >> 1;
|
|
m -= (m & 1);
|
|
if (code > _ucprop_ranges[m + 1])
|
|
l = m + 2;
|
|
else if (code < _ucprop_ranges[m])
|
|
r = m - 2;
|
|
else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
|
|
return 1;
|
|
}
|
|
return 0;
|
|
|
|
}
|
|
|
|
static int php_unicode_is_prop(unsigned long code, unsigned long mask1,
|
|
unsigned long mask2) {
|
|
unsigned long i;
|
|
|
|
if (mask1 == 0 && mask2 == 0)
|
|
return 0;
|
|
|
|
for (i = 0; mask1 && i < 32; i++) {
|
|
if ((mask1 & masks32[i]) && prop_lookup(code, i))
|
|
return 1;
|
|
}
|
|
|
|
for (i = 32; mask2 && i < _ucprop_size; i++) {
|
|
if ((mask2 & masks32[i & 31]) && prop_lookup(code, i))
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static unsigned long case_lookup(unsigned long code, long l, long r,
|
|
int field) {
|
|
long m;
|
|
|
|
/*
|
|
* Do the binary search.
|
|
*/
|
|
while (l <= r) {
|
|
/*
|
|
* Determine a "mid" point and adjust to make sure the mid point is at
|
|
* the beginning of a case mapping triple.
|
|
*/
|
|
m = (l + r) >> 1;
|
|
m -= (m % 3);
|
|
if (code > _uccase_map[m])
|
|
l = m + 3;
|
|
else if (code < _uccase_map[m])
|
|
r = m - 3;
|
|
else if (code == _uccase_map[m])
|
|
return _uccase_map[m + field];
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
static unsigned long php_turkish_toupper(unsigned long code, long l, long r,
|
|
int field) {
|
|
if (code == 0x0069L) {
|
|
return 0x0130L;
|
|
}
|
|
return case_lookup(code, l, r, field);
|
|
}
|
|
|
|
static unsigned long php_turkish_tolower(unsigned long code, long l, long r,
|
|
int field) {
|
|
if (code == 0x0049L) {
|
|
return 0x0131L;
|
|
}
|
|
return case_lookup(code, l, r, field);
|
|
}
|
|
|
|
static unsigned long php_unicode_toupper(unsigned long code,
|
|
enum mbfl_no_encoding enc) {
|
|
int field;
|
|
long l, r;
|
|
|
|
if (php_unicode_is_upper(code))
|
|
return code;
|
|
|
|
if (php_unicode_is_lower(code)) {
|
|
/*
|
|
* The character is lower case.
|
|
*/
|
|
field = 2;
|
|
l = _uccase_len[0];
|
|
r = (l + _uccase_len[1]) - 3;
|
|
|
|
if (enc == mbfl_no_encoding_8859_9) {
|
|
return php_turkish_toupper(code, l, r, field);
|
|
}
|
|
|
|
} else {
|
|
/*
|
|
* The character is title case.
|
|
*/
|
|
field = 1;
|
|
l = _uccase_len[0] + _uccase_len[1];
|
|
r = _uccase_size - 3;
|
|
}
|
|
return case_lookup(code, l, r, field);
|
|
}
|
|
|
|
static unsigned long php_unicode_tolower(unsigned long code,
|
|
enum mbfl_no_encoding enc) {
|
|
int field;
|
|
long l, r;
|
|
|
|
if (php_unicode_is_lower(code))
|
|
return code;
|
|
|
|
if (php_unicode_is_upper(code)) {
|
|
/*
|
|
* The character is upper case.
|
|
*/
|
|
field = 1;
|
|
l = 0;
|
|
r = _uccase_len[0] - 3;
|
|
|
|
if (enc == mbfl_no_encoding_8859_9) {
|
|
return php_turkish_tolower(code, l, r, field);
|
|
}
|
|
|
|
} else {
|
|
/*
|
|
* The character is title case.
|
|
*/
|
|
field = 2;
|
|
l = _uccase_len[0] + _uccase_len[1];
|
|
r = _uccase_size - 3;
|
|
}
|
|
return case_lookup(code, l, r, field);
|
|
}
|
|
|
|
static unsigned long php_unicode_totitle(unsigned long code,
|
|
enum mbfl_no_encoding enc) {
|
|
int field;
|
|
long l, r;
|
|
|
|
if (php_unicode_is_title(code))
|
|
return code;
|
|
|
|
/*
|
|
* The offset will always be the same for converting to title case.
|
|
*/
|
|
field = 2;
|
|
|
|
if (php_unicode_is_upper(code)) {
|
|
/*
|
|
* The character is upper case.
|
|
*/
|
|
l = 0;
|
|
r = _uccase_len[0] - 3;
|
|
} else {
|
|
/*
|
|
* The character is lower case.
|
|
*/
|
|
l = _uccase_len[0];
|
|
r = (l + _uccase_len[1]) - 3;
|
|
}
|
|
return case_lookup(code, l, r, field);
|
|
|
|
}
|
|
|
|
#define BE_ARY_TO_UINT32(ptr) (\
|
|
((unsigned char*)(ptr))[0]<<24 |\
|
|
((unsigned char*)(ptr))[1]<<16 |\
|
|
((unsigned char*)(ptr))[2]<< 8 |\
|
|
((unsigned char*)(ptr))[3] )
|
|
|
|
#define UINT32_TO_BE_ARY(ptr,val) { \
|
|
unsigned int v = val; \
|
|
((unsigned char*)(ptr))[0] = (v>>24) & 0xff,\
|
|
((unsigned char*)(ptr))[1] = (v>>16) & 0xff,\
|
|
((unsigned char*)(ptr))[2] = (v>> 8) & 0xff,\
|
|
((unsigned char*)(ptr))[3] = (v ) & 0xff;\
|
|
}
|
|
|
|
/**
|
|
* Return 0 if input contains any illegal encoding, otherwise 1.
|
|
* Even if any illegal encoding is detected the result may contain a list
|
|
* of parsed encodings.
|
|
*/
|
|
static int php_mb_parse_encoding_list(const char *value, int value_length,
|
|
mbfl_no_encoding **return_list,
|
|
int *return_size, int persistent) {
|
|
int n, l, size, bauto, ret = 1;
|
|
char *p, *p1, *p2, *endp, *tmpstr;
|
|
mbfl_no_encoding no_encoding;
|
|
mbfl_no_encoding *src, *entry, *list;
|
|
|
|
list = NULL;
|
|
if (value == NULL || value_length <= 0) {
|
|
if (return_list) {
|
|
*return_list = NULL;
|
|
}
|
|
if (return_size) {
|
|
*return_size = 0;
|
|
}
|
|
return 0;
|
|
} else {
|
|
mbfl_no_encoding *identify_list;
|
|
int identify_list_size;
|
|
|
|
identify_list = MBSTRG(default_detect_order_list);
|
|
identify_list_size = MBSTRG(default_detect_order_list_size);
|
|
|
|
/* copy the value string for work */
|
|
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
|
|
tmpstr = (char *)strndup(value+1, value_length-2);
|
|
value_length -= 2;
|
|
}
|
|
else
|
|
tmpstr = (char *)strndup(value, value_length);
|
|
if (tmpstr == NULL) {
|
|
return 0;
|
|
}
|
|
/* count the number of listed encoding names */
|
|
endp = tmpstr + value_length;
|
|
n = 1;
|
|
p1 = tmpstr;
|
|
while ((p2 = (char*)string_memnstr(p1, ",", 1, endp)) != NULL) {
|
|
p1 = p2 + 1;
|
|
n++;
|
|
}
|
|
size = n + identify_list_size;
|
|
/* make list */
|
|
list = (mbfl_no_encoding *)calloc(size, sizeof(int));
|
|
if (list != NULL) {
|
|
entry = list;
|
|
n = 0;
|
|
bauto = 0;
|
|
p1 = tmpstr;
|
|
do {
|
|
p2 = p = (char*)string_memnstr(p1, ",", 1, endp);
|
|
if (p == NULL) {
|
|
p = endp;
|
|
}
|
|
*p = '\0';
|
|
/* trim spaces */
|
|
while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
|
|
p1++;
|
|
}
|
|
p--;
|
|
while (p > p1 && (*p == ' ' || *p == '\t')) {
|
|
*p = '\0';
|
|
p--;
|
|
}
|
|
/* convert to the encoding number and check encoding */
|
|
if (strcasecmp(p1, "auto") == 0) {
|
|
if (!bauto) {
|
|
bauto = 1;
|
|
l = identify_list_size;
|
|
src = identify_list;
|
|
while (l > 0) {
|
|
*entry++ = *src++;
|
|
l--;
|
|
n++;
|
|
}
|
|
}
|
|
} else {
|
|
no_encoding = mbfl_name2no_encoding(p1);
|
|
if (no_encoding != mbfl_no_encoding_invalid) {
|
|
*entry++ = no_encoding;
|
|
n++;
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
}
|
|
p1 = p2 + 1;
|
|
} while (n < size && p2 != NULL);
|
|
if (n > 0) {
|
|
if (return_list) {
|
|
*return_list = list;
|
|
} else {
|
|
free(list);
|
|
}
|
|
} else {
|
|
free(list);
|
|
if (return_list) {
|
|
*return_list = NULL;
|
|
}
|
|
ret = 0;
|
|
}
|
|
if (return_size) {
|
|
*return_size = n;
|
|
}
|
|
} else {
|
|
if (return_list) {
|
|
*return_list = NULL;
|
|
}
|
|
if (return_size) {
|
|
*return_size = 0;
|
|
}
|
|
ret = 0;
|
|
}
|
|
free(tmpstr);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static char *php_mb_convert_encoding(const char *input, size_t length,
|
|
const char *_to_encoding,
|
|
const char *_from_encodings,
|
|
unsigned int *output_len) {
|
|
mbfl_string string, result, *ret;
|
|
mbfl_no_encoding from_encoding, to_encoding;
|
|
mbfl_buffer_converter *convd;
|
|
int size;
|
|
mbfl_no_encoding *list;
|
|
char *output = NULL;
|
|
|
|
if (output_len) {
|
|
*output_len = 0;
|
|
}
|
|
if (!input) {
|
|
return NULL;
|
|
}
|
|
/* new encoding */
|
|
if (_to_encoding && strlen(_to_encoding)) {
|
|
to_encoding = mbfl_name2no_encoding(_to_encoding);
|
|
if (to_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", _to_encoding);
|
|
return NULL;
|
|
}
|
|
} else {
|
|
to_encoding = MBSTRG(current_internal_encoding);
|
|
}
|
|
|
|
/* initialize string */
|
|
mbfl_string_init(&string);
|
|
mbfl_string_init(&result);
|
|
from_encoding = MBSTRG(current_internal_encoding);
|
|
string.no_encoding = from_encoding;
|
|
string.no_language = MBSTRG(current_language);
|
|
string.val = (unsigned char *)input;
|
|
string.len = length;
|
|
|
|
/* pre-conversion encoding */
|
|
if (_from_encodings) {
|
|
list = NULL;
|
|
size = 0;
|
|
php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings),
|
|
&list, &size, 0);
|
|
if (size == 1) {
|
|
from_encoding = *list;
|
|
string.no_encoding = from_encoding;
|
|
} else if (size > 1) {
|
|
/* auto detect */
|
|
from_encoding = mbfl_identify_encoding_no(&string, list, size,
|
|
MBSTRG(strict_detection));
|
|
if (from_encoding != mbfl_no_encoding_invalid) {
|
|
string.no_encoding = from_encoding;
|
|
} else {
|
|
raise_warning("Unable to detect character encoding");
|
|
from_encoding = mbfl_no_encoding_pass;
|
|
to_encoding = from_encoding;
|
|
string.no_encoding = from_encoding;
|
|
}
|
|
} else {
|
|
raise_warning("Illegal character encoding specified");
|
|
}
|
|
if (list != NULL) {
|
|
free((void *)list);
|
|
}
|
|
}
|
|
|
|
/* initialize converter */
|
|
convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
|
|
if (convd == NULL) {
|
|
raise_warning("Unable to create character encoding converter");
|
|
return NULL;
|
|
}
|
|
mbfl_buffer_converter_illegal_mode
|
|
(convd, MBSTRG(current_filter_illegal_mode));
|
|
mbfl_buffer_converter_illegal_substchar
|
|
(convd, MBSTRG(current_filter_illegal_substchar));
|
|
|
|
/* do it */
|
|
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
|
|
if (ret) {
|
|
if (output_len) {
|
|
*output_len = ret->len;
|
|
}
|
|
output = (char *)ret->val;
|
|
}
|
|
|
|
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
|
|
mbfl_buffer_converter_delete(convd);
|
|
return output;
|
|
}
|
|
|
|
static char *php_unicode_convert_case(int case_mode, const char *srcstr,
|
|
size_t srclen, unsigned int *ret_len,
|
|
const char *src_encoding) {
|
|
char *unicode, *newstr;
|
|
unsigned int unicode_len;
|
|
unsigned char *unicode_ptr;
|
|
size_t i;
|
|
enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
|
|
|
|
unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding,
|
|
&unicode_len);
|
|
if (unicode == NULL)
|
|
return NULL;
|
|
|
|
unicode_ptr = (unsigned char *)unicode;
|
|
|
|
switch(case_mode) {
|
|
case PHP_UNICODE_CASE_UPPER:
|
|
for (i = 0; i < unicode_len; i+=4) {
|
|
UINT32_TO_BE_ARY(&unicode_ptr[i],
|
|
php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]),
|
|
_src_encoding));
|
|
}
|
|
break;
|
|
|
|
case PHP_UNICODE_CASE_LOWER:
|
|
for (i = 0; i < unicode_len; i+=4) {
|
|
UINT32_TO_BE_ARY(&unicode_ptr[i],
|
|
php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]),
|
|
_src_encoding));
|
|
}
|
|
break;
|
|
|
|
case PHP_UNICODE_CASE_TITLE:
|
|
{
|
|
int mode = 0;
|
|
|
|
for (i = 0; i < unicode_len; i+=4) {
|
|
int res = php_unicode_is_prop
|
|
(BE_ARY_TO_UINT32(&unicode_ptr[i]),
|
|
UC_MN|UC_ME|UC_CF|UC_LM|UC_SK|UC_LU|UC_LL|UC_LT, 0);
|
|
if (mode) {
|
|
if (res) {
|
|
UINT32_TO_BE_ARY
|
|
(&unicode_ptr[i],
|
|
php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]),
|
|
_src_encoding));
|
|
} else {
|
|
mode = 0;
|
|
}
|
|
} else {
|
|
if (res) {
|
|
mode = 1;
|
|
UINT32_TO_BE_ARY
|
|
(&unicode_ptr[i],
|
|
php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]),
|
|
_src_encoding));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding,
|
|
"UCS-4BE", ret_len);
|
|
free(unicode);
|
|
return newstr;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// helpers
|
|
|
|
/**
|
|
* Return 0 if input contains any illegal encoding, otherwise 1.
|
|
* Even if any illegal encoding is detected the result may contain a list
|
|
* of parsed encodings.
|
|
*/
|
|
static int php_mb_parse_encoding_array(CArrRef array,
|
|
mbfl_no_encoding **return_list,
|
|
int *return_size, int persistent) {
|
|
int n, l, size, bauto,ret = 1;
|
|
mbfl_no_encoding no_encoding;
|
|
mbfl_no_encoding *src, *list, *entry;
|
|
|
|
list = NULL;
|
|
mbfl_no_encoding *identify_list = MBSTRG(default_detect_order_list);
|
|
int identify_list_size = MBSTRG(default_detect_order_list_size);
|
|
|
|
size = array.size() + identify_list_size;
|
|
list = (mbfl_no_encoding *)calloc(size, sizeof(int));
|
|
if (list != NULL) {
|
|
entry = list;
|
|
bauto = 0;
|
|
n = 0;
|
|
for (ArrayIter iter(array); iter; ++iter) {
|
|
String hash_entry = iter.second();
|
|
if (strcasecmp(hash_entry.data(), "auto") == 0) {
|
|
if (!bauto) {
|
|
bauto = 1;
|
|
l = identify_list_size;
|
|
src = identify_list;
|
|
while (l > 0) {
|
|
*entry++ = *src++;
|
|
l--;
|
|
n++;
|
|
}
|
|
}
|
|
} else {
|
|
no_encoding = mbfl_name2no_encoding(hash_entry.data());
|
|
if (no_encoding != mbfl_no_encoding_invalid) {
|
|
*entry++ = no_encoding;
|
|
n++;
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
}
|
|
}
|
|
if (n > 0) {
|
|
if (return_list) {
|
|
*return_list = list;
|
|
} else {
|
|
free(list);
|
|
}
|
|
} else {
|
|
free(list);
|
|
if (return_list) {
|
|
*return_list = NULL;
|
|
}
|
|
ret = 0;
|
|
}
|
|
if (return_size) {
|
|
*return_size = n;
|
|
}
|
|
} else {
|
|
if (return_list) {
|
|
*return_list = NULL;
|
|
}
|
|
if (return_size) {
|
|
*return_size = 0;
|
|
}
|
|
ret = 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static bool php_mb_parse_encoding(CVarRef encoding,
|
|
mbfl_no_encoding **return_list,
|
|
int *return_size, bool persistent) {
|
|
bool ret;
|
|
if (encoding.is(KindOfArray)) {
|
|
ret = php_mb_parse_encoding_array(encoding.toArray(),
|
|
return_list, return_size,
|
|
persistent ? 1 : 0);
|
|
} else {
|
|
String enc = encoding.toString();
|
|
ret = php_mb_parse_encoding_list(enc.data(), enc.size(),
|
|
return_list, return_size,
|
|
persistent ? 1 : 0);
|
|
}
|
|
if (!ret) {
|
|
if (return_list && *return_list) {
|
|
free(*return_list);
|
|
*return_list = NULL;
|
|
}
|
|
return_size = 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int php_mb_nls_get_default_detect_order_list(mbfl_no_language lang,
|
|
mbfl_no_encoding **plist,
|
|
int* plist_size) {
|
|
size_t i;
|
|
*plist = (mbfl_no_encoding *) php_mb_default_identify_list_neut;
|
|
*plist_size = sizeof(php_mb_default_identify_list_neut) /
|
|
sizeof(php_mb_default_identify_list_neut[0]);
|
|
|
|
for (i = 0; i < sizeof(php_mb_default_identify_list) /
|
|
sizeof(php_mb_default_identify_list[0]); i++) {
|
|
if (php_mb_default_identify_list[i].lang == lang) {
|
|
*plist = php_mb_default_identify_list[i].list;
|
|
*plist_size = php_mb_default_identify_list[i].list_size;
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) {
|
|
if (enc != NULL) {
|
|
if (enc->flag & MBFL_ENCTYPE_MBCS) {
|
|
if (enc->mblen_table != NULL) {
|
|
if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
|
|
}
|
|
} else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
|
|
return 2;
|
|
} else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
|
|
return 4;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
static int php_mb_stripos(int mode,
|
|
const char *old_haystack, int old_haystack_len,
|
|
const char *old_needle, int old_needle_len,
|
|
long offset, const char *from_encoding) {
|
|
int n;
|
|
mbfl_string haystack, needle;
|
|
n = -1;
|
|
|
|
mbfl_string_init(&haystack);
|
|
mbfl_string_init(&needle);
|
|
haystack.no_language = MBSTRG(current_language);
|
|
haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
needle.no_language = MBSTRG(current_language);
|
|
needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
|
|
do {
|
|
haystack.val = (unsigned char *)php_unicode_convert_case
|
|
(PHP_UNICODE_CASE_UPPER, old_haystack, (size_t)old_haystack_len,
|
|
&haystack.len, from_encoding);
|
|
if (!haystack.val) {
|
|
break;
|
|
}
|
|
if (haystack.len <= 0) {
|
|
break;
|
|
}
|
|
|
|
needle.val = (unsigned char *)php_unicode_convert_case
|
|
(PHP_UNICODE_CASE_UPPER, old_needle, (size_t)old_needle_len,
|
|
&needle.len, from_encoding);
|
|
if (!needle.val) {
|
|
break;
|
|
}
|
|
if (needle.len <= 0) {
|
|
break;
|
|
}
|
|
|
|
haystack.no_encoding = needle.no_encoding =
|
|
mbfl_name2no_encoding(from_encoding);
|
|
if (haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", from_encoding);
|
|
break;
|
|
}
|
|
|
|
int haystack_char_len = mbfl_strlen(&haystack);
|
|
if (mode) {
|
|
if ((offset > 0 && offset > haystack_char_len) ||
|
|
(offset < 0 && -offset > haystack_char_len)) {
|
|
raise_warning("Offset is greater than the length of haystack string");
|
|
break;
|
|
}
|
|
} else {
|
|
if (offset < 0 || offset > haystack_char_len) {
|
|
raise_warning("Offset not contained in string.");
|
|
break;
|
|
}
|
|
}
|
|
|
|
n = mbfl_strpos(&haystack, &needle, offset, mode);
|
|
} while(0);
|
|
|
|
if (haystack.val) {
|
|
free(haystack.val);
|
|
}
|
|
if (needle.val) {
|
|
free(needle.val);
|
|
}
|
|
return n;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
Array f_mb_list_encodings() {
|
|
Array ret;
|
|
int i = 0;
|
|
const mbfl_encoding **encodings = mbfl_get_supported_encodings();
|
|
const mbfl_encoding *encoding;
|
|
while ((encoding = encodings[i++]) != NULL) {
|
|
ret.append(String(encoding->name, CopyString));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Variant f_mb_list_encodings_alias_names(CStrRef name /* = null_string */) {
|
|
const mbfl_encoding **encodings;
|
|
const mbfl_encoding *encoding;
|
|
mbfl_no_encoding no_encoding;
|
|
int i, j;
|
|
|
|
Array ret;
|
|
if (name.isNull()) {
|
|
i = 0;
|
|
encodings = mbfl_get_supported_encodings();
|
|
while ((encoding = encodings[i++]) != NULL) {
|
|
Array row;
|
|
if (encoding->aliases != NULL) {
|
|
j = 0;
|
|
while ((*encoding->aliases)[j] != NULL) {
|
|
row.append(String((*encoding->aliases)[j], CopyString));
|
|
j++;
|
|
}
|
|
}
|
|
ret.set(String(encoding->name, CopyString), row);
|
|
}
|
|
} else {
|
|
no_encoding = mbfl_name2no_encoding(name.data());
|
|
if (no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", name.data());
|
|
return false;
|
|
}
|
|
|
|
char *name = (char *)mbfl_no_encoding2name(no_encoding);
|
|
if (name != NULL) {
|
|
i = 0;
|
|
encodings = mbfl_get_supported_encodings();
|
|
while ((encoding = encodings[i++]) != NULL) {
|
|
if (strcmp(encoding->name, name) != 0) continue;
|
|
|
|
if (encoding->aliases != NULL) {
|
|
j = 0;
|
|
while ((*encoding->aliases)[j] != NULL) {
|
|
ret.append(String((*encoding->aliases)[j], CopyString));
|
|
j++;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Variant f_mb_list_mime_names(CStrRef name /* = null_string */) {
|
|
const mbfl_encoding **encodings;
|
|
const mbfl_encoding *encoding;
|
|
mbfl_no_encoding no_encoding;
|
|
int i;
|
|
|
|
Array ret;
|
|
if (name.isNull()) {
|
|
i = 0;
|
|
encodings = mbfl_get_supported_encodings();
|
|
while ((encoding = encodings[i++]) != NULL) {
|
|
if (encoding->mime_name != NULL) {
|
|
ret.set(String(encoding->name, CopyString),
|
|
String(encoding->mime_name, CopyString));
|
|
} else{
|
|
ret.set(String(encoding->name, CopyString), "");
|
|
}
|
|
}
|
|
} else {
|
|
no_encoding = mbfl_name2no_encoding(name.data());
|
|
if (no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", name.data());
|
|
return false;
|
|
}
|
|
|
|
char *name = (char *)mbfl_no_encoding2name(no_encoding);
|
|
if (name != NULL) {
|
|
i = 0;
|
|
encodings = mbfl_get_supported_encodings();
|
|
while ((encoding = encodings[i++]) != NULL) {
|
|
if (strcmp(encoding->name, name) != 0) continue;
|
|
if (encoding->mime_name != NULL) {
|
|
return String(encoding->mime_name, CopyString);
|
|
}
|
|
break;
|
|
}
|
|
return "";
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool f_mb_check_encoding(CStrRef var /* = null_string */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_buffer_converter *convd;
|
|
mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
|
|
mbfl_string string, result, *ret = NULL;
|
|
long illegalchars = 0;
|
|
|
|
if (var.isNull()) {
|
|
return MBSTRG(illegalchars) == 0;
|
|
}
|
|
|
|
if (!encoding.isNull()) {
|
|
no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (no_encoding == mbfl_no_encoding_invalid ||
|
|
no_encoding == mbfl_no_encoding_pass) {
|
|
raise_warning("Invalid encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
|
|
if (convd == NULL) {
|
|
raise_warning("Unable to create converter");
|
|
return false;
|
|
}
|
|
mbfl_buffer_converter_illegal_mode
|
|
(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
|
|
mbfl_buffer_converter_illegal_substchar
|
|
(convd, 0);
|
|
|
|
/* initialize string */
|
|
mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
|
|
mbfl_string_init(&result);
|
|
|
|
string.val = (unsigned char *)var.data();
|
|
string.len = var.size();
|
|
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
|
|
illegalchars = mbfl_buffer_illegalchars(convd);
|
|
mbfl_buffer_converter_delete(convd);
|
|
|
|
if (ret != NULL) {
|
|
MBSTRG(illegalchars) += illegalchars;
|
|
if (illegalchars == 0 && string.len == ret->len &&
|
|
memcmp((const char *)string.val, (const char *)ret->val,
|
|
string.len) == 0) {
|
|
mbfl_string_clear(&result);
|
|
return true;
|
|
} else {
|
|
mbfl_string_clear(&result);
|
|
return false;
|
|
}
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
Variant f_mb_convert_case(CStrRef str, int mode,
|
|
CStrRef encoding /* = null_string */) {
|
|
const char *enc = NULL;
|
|
if (encoding.empty()) {
|
|
enc = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
|
|
}
|
|
|
|
unsigned int ret_len;
|
|
char *newstr = php_unicode_convert_case(mode, str.data(), str.size(),
|
|
&ret_len, enc);
|
|
if (newstr) {
|
|
return String(newstr, ret_len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_convert_encoding(CStrRef str, CStrRef to_encoding,
|
|
CVarRef from_encoding /* = null_variant */) {
|
|
String encoding = from_encoding.toString();
|
|
if (from_encoding.is(KindOfArray)) {
|
|
StringBuffer _from_encodings;
|
|
Array encs = from_encoding.toArray();
|
|
for (ArrayIter iter(encs); iter; ++iter) {
|
|
if (!_from_encodings.empty()) {
|
|
_from_encodings.append(",");
|
|
}
|
|
_from_encodings.append(iter.second().toString());
|
|
}
|
|
encoding = _from_encodings.detach();
|
|
}
|
|
|
|
unsigned int size;
|
|
char *ret = php_mb_convert_encoding(str.data(), str.size(),
|
|
to_encoding.data(),
|
|
(!encoding.empty() ?
|
|
encoding.data() : NULL),
|
|
&size);
|
|
if (ret != NULL) {
|
|
return String(ret, size, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_convert_kana(CStrRef str, CStrRef option /* = null_string */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string string, result, *ret;
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
|
|
int opt = 0x900;
|
|
if (!option.empty()) {
|
|
const char *p = option.data();
|
|
int n = option.size();
|
|
int i = 0;
|
|
opt = 0;
|
|
while (i < n) {
|
|
i++;
|
|
switch (*p++) {
|
|
case 'A': opt |= 0x1; break;
|
|
case 'a': opt |= 0x10; break;
|
|
case 'R': opt |= 0x2; break;
|
|
case 'r': opt |= 0x20; break;
|
|
case 'N': opt |= 0x4; break;
|
|
case 'n': opt |= 0x40; break;
|
|
case 'S': opt |= 0x8; break;
|
|
case 's': opt |= 0x80; break;
|
|
case 'K': opt |= 0x100; break;
|
|
case 'k': opt |= 0x1000; break;
|
|
case 'H': opt |= 0x200; break;
|
|
case 'h': opt |= 0x2000; break;
|
|
case 'V': opt |= 0x800; break;
|
|
case 'C': opt |= 0x10000; break;
|
|
case 'c': opt |= 0x20000; break;
|
|
case 'M': opt |= 0x100000; break;
|
|
case 'm': opt |= 0x200000; break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* encoding */
|
|
if (!encoding.empty()) {
|
|
string.no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (string.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
ret = mbfl_ja_jp_hantozen(&string, &result, opt);
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool php_mbfl_encoding_detect(CVarRef var,
|
|
mbfl_encoding_detector *identd,
|
|
mbfl_string *string) {
|
|
if (var.is(KindOfArray) || var.is(KindOfObject)) {
|
|
Array items = var.toArray();
|
|
for (ArrayIter iter(items); iter; ++iter) {
|
|
if (php_mbfl_encoding_detect(iter.second(), identd, string)) {
|
|
return true;
|
|
}
|
|
}
|
|
} else if (var.isString()) {
|
|
String svar = var.toString();
|
|
string->val = (unsigned char *)svar.data();
|
|
string->len = svar.size();
|
|
if (mbfl_encoding_detector_feed(identd, string)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static Variant php_mbfl_convert(CVarRef var,
|
|
mbfl_buffer_converter *convd,
|
|
mbfl_string *string,
|
|
mbfl_string *result) {
|
|
if (var.is(KindOfArray)) {
|
|
Array ret;
|
|
Array items = var.toArray();
|
|
for (ArrayIter iter(items); iter; ++iter) {
|
|
ret.set(iter.first(),
|
|
php_mbfl_convert(iter.second(), convd, string, result));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
if (var.is(KindOfObject)) {
|
|
Object obj = var.toObject();
|
|
Array items = var.toArray();
|
|
for (ArrayIter iter(items); iter; ++iter) {
|
|
obj->o_set(iter.first().toString(),
|
|
php_mbfl_convert(iter.second().toString().data(), convd,
|
|
string, result));
|
|
}
|
|
return var; // which still has obj
|
|
}
|
|
|
|
if (var.isString()) {
|
|
String svar = var.toString();
|
|
string->val = (unsigned char *)svar.data();
|
|
string->len = svar.size();
|
|
mbfl_string *ret =
|
|
mbfl_buffer_converter_feed_result(convd, string, result);
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
|
|
return var;
|
|
}
|
|
|
|
Variant f_mb_convert_variables(int _argc, CStrRef to_encoding,
|
|
CVarRef from_encoding, VRefParam vars,
|
|
CArrRef _argv /* = null_array */) {
|
|
mbfl_string string, result;
|
|
mbfl_no_encoding _from_encoding, _to_encoding;
|
|
mbfl_encoding_detector *identd;
|
|
mbfl_buffer_converter *convd;
|
|
int elistsz;
|
|
mbfl_no_encoding *elist;
|
|
char *name;
|
|
|
|
/* new encoding */
|
|
_to_encoding = mbfl_name2no_encoding(to_encoding.data());
|
|
if (_to_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", to_encoding.data());
|
|
return false;
|
|
}
|
|
|
|
/* initialize string */
|
|
mbfl_string_init(&string);
|
|
mbfl_string_init(&result);
|
|
_from_encoding = MBSTRG(current_internal_encoding);
|
|
string.no_encoding = _from_encoding;
|
|
string.no_language = MBSTRG(current_language);
|
|
|
|
/* pre-conversion encoding */
|
|
elist = NULL;
|
|
elistsz = 0;
|
|
php_mb_parse_encoding(from_encoding, &elist, &elistsz, false);
|
|
if (elistsz <= 0) {
|
|
_from_encoding = mbfl_no_encoding_pass;
|
|
} else if (elistsz == 1) {
|
|
_from_encoding = *elist;
|
|
} else {
|
|
/* auto detect */
|
|
_from_encoding = mbfl_no_encoding_invalid;
|
|
identd = mbfl_encoding_detector_new(elist, elistsz,
|
|
MBSTRG(strict_detection));
|
|
if (identd != NULL) {
|
|
for (int n = -1; n < _argv.size(); n++) {
|
|
if (php_mbfl_encoding_detect(n < 0 ? (Variant&)vars : _argv[n],
|
|
identd, &string)) {
|
|
break;
|
|
}
|
|
}
|
|
_from_encoding = mbfl_encoding_detector_judge(identd);
|
|
mbfl_encoding_detector_delete(identd);
|
|
}
|
|
|
|
if (_from_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unable to detect encoding");
|
|
_from_encoding = mbfl_no_encoding_pass;
|
|
}
|
|
}
|
|
if (elist != NULL) {
|
|
free((void *)elist);
|
|
}
|
|
|
|
/* create converter */
|
|
convd = NULL;
|
|
if (_from_encoding != mbfl_no_encoding_pass) {
|
|
convd = mbfl_buffer_converter_new(_from_encoding, _to_encoding, 0);
|
|
if (convd == NULL) {
|
|
raise_warning("Unable to create converter");
|
|
return false;
|
|
}
|
|
mbfl_buffer_converter_illegal_mode
|
|
(convd, MBSTRG(current_filter_illegal_mode));
|
|
mbfl_buffer_converter_illegal_substchar
|
|
(convd, MBSTRG(current_filter_illegal_substchar));
|
|
}
|
|
|
|
/* convert */
|
|
if (convd != NULL) {
|
|
vars = php_mbfl_convert(vars, convd, &string, &result);
|
|
for (int n = 0; n < _argv.size(); n++) {
|
|
lval(((Array&)_argv).lval(n)) =
|
|
php_mbfl_convert(_argv[n], convd, &string, &result);
|
|
}
|
|
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
|
|
mbfl_buffer_converter_delete(convd);
|
|
}
|
|
|
|
name = (char *)mbfl_no_encoding2name(_from_encoding);
|
|
if (name != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_decode_mimeheader(CStrRef str) {
|
|
mbfl_string string, result, *ret;
|
|
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
|
|
mbfl_string_init(&result);
|
|
ret = mbfl_mime_header_decode(&string, &result,
|
|
MBSTRG(current_internal_encoding));
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static Variant php_mb_numericentity_exec(CStrRef str, CVarRef convmap,
|
|
CStrRef encoding, int type) {
|
|
int mapsize=0;
|
|
mbfl_string string, result, *ret;
|
|
mbfl_no_encoding no_encoding;
|
|
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
|
|
/* encoding */
|
|
if (!encoding.empty()) {
|
|
no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
} else {
|
|
string.no_encoding = no_encoding;
|
|
}
|
|
}
|
|
|
|
/* conversion map */
|
|
int *iconvmap = NULL;
|
|
if (convmap.is(KindOfArray)) {
|
|
Array convs = convmap.toArray();
|
|
mapsize = convs.size();
|
|
if (mapsize > 0) {
|
|
iconvmap = (int*)malloc(mapsize * sizeof(int));
|
|
int *mapelm = iconvmap;
|
|
for (ArrayIter iter(convs); iter; ++iter) {
|
|
*mapelm++ = iter.second().toInt32();
|
|
}
|
|
}
|
|
}
|
|
if (iconvmap == NULL) {
|
|
return false;
|
|
}
|
|
mapsize /= 4;
|
|
|
|
ret = mbfl_html_numeric_entity(&string, &result, iconvmap, mapsize, type);
|
|
free(iconvmap);
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_decode_numericentity(CStrRef str, CVarRef convmap,
|
|
CStrRef encoding /* = null_string */) {
|
|
return php_mb_numericentity_exec(str, convmap, encoding, 1);
|
|
}
|
|
|
|
Variant f_mb_detect_encoding(CStrRef str, CVarRef encoding_list /* = null_variant */,
|
|
CVarRef strict /* = null_variant */) {
|
|
mbfl_string string;
|
|
const char *ret;
|
|
mbfl_no_encoding *elist;
|
|
int size;
|
|
mbfl_no_encoding *list = 0;
|
|
|
|
/* make encoding list */
|
|
list = NULL;
|
|
size = 0;
|
|
php_mb_parse_encoding(encoding_list, &list, &size, false);
|
|
if (size > 0 && list != NULL) {
|
|
elist = list;
|
|
} else {
|
|
elist = MBSTRG(current_detect_order_list);
|
|
size = MBSTRG(current_detect_order_list_size);
|
|
}
|
|
|
|
long nstrict = 0;
|
|
if (!strict.isNull()) {
|
|
nstrict = strict.toInt64();
|
|
} else {
|
|
nstrict = MBSTRG(strict_detection);
|
|
}
|
|
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
ret = mbfl_identify_encoding_name(&string, elist, size, nstrict);
|
|
if (list != NULL) {
|
|
free(list);
|
|
}
|
|
if (ret != NULL) {
|
|
return String(ret, CopyString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_detect_order(CVarRef encoding_list /* = null_variant */) {
|
|
int n, size;
|
|
mbfl_no_encoding *list, *entry;
|
|
|
|
if (encoding_list.isNull()) {
|
|
Array ret;
|
|
entry = MBSTRG(current_detect_order_list);
|
|
n = MBSTRG(current_detect_order_list_size);
|
|
while (n > 0) {
|
|
char *name = (char *)mbfl_no_encoding2name(*entry);
|
|
if (name) {
|
|
ret.append(String(name, CopyString));
|
|
}
|
|
entry++;
|
|
n--;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
list = NULL;
|
|
size = 0;
|
|
if (!php_mb_parse_encoding(encoding_list, &list, &size, false) ||
|
|
list == NULL) {
|
|
return false;
|
|
}
|
|
if (MBSTRG(current_detect_order_list)) {
|
|
free(MBSTRG(current_detect_order_list));
|
|
}
|
|
MBSTRG(current_detect_order_list) = list;
|
|
MBSTRG(current_detect_order_list_size) = size;
|
|
return true;
|
|
}
|
|
|
|
Variant f_mb_encode_mimeheader(CStrRef str, CStrRef charset /* = null_string */,
|
|
CStrRef transfer_encoding /* = null_string */,
|
|
CStrRef linefeed /* = "\r\n" */,
|
|
int indent /* = 0 */) {
|
|
mbfl_no_encoding charsetenc, transenc;
|
|
mbfl_string string, result, *ret;
|
|
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
|
|
charsetenc = mbfl_no_encoding_pass;
|
|
transenc = mbfl_no_encoding_base64;
|
|
|
|
if (!charset.empty()) {
|
|
charsetenc = mbfl_name2no_encoding(charset.data());
|
|
if (charsetenc == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", charset.data());
|
|
return false;
|
|
}
|
|
} else {
|
|
const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
|
|
if (lang != NULL) {
|
|
charsetenc = lang->mail_charset;
|
|
transenc = lang->mail_header_encoding;
|
|
}
|
|
}
|
|
|
|
if (!transfer_encoding.empty()) {
|
|
char ch = *transfer_encoding.data();
|
|
if (ch == 'B' || ch == 'b') {
|
|
transenc = mbfl_no_encoding_base64;
|
|
} else if (ch == 'Q' || ch == 'q') {
|
|
transenc = mbfl_no_encoding_qprint;
|
|
}
|
|
}
|
|
|
|
mbfl_string_init(&result);
|
|
ret = mbfl_mime_header_encode(&string, &result, charsetenc, transenc,
|
|
linefeed.data(), indent);
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_encode_numericentity(CStrRef str, CVarRef convmap,
|
|
CStrRef encoding /* = null_string */) {
|
|
return php_mb_numericentity_exec(str, convmap, encoding, 0);
|
|
}
|
|
|
|
static const StaticString s_internal_encoding("internal_encoding");
|
|
static const StaticString s_http_input("http_input");
|
|
static const StaticString s_http_output("http_output");
|
|
static const StaticString s_mail_charset("mail_charset");
|
|
static const StaticString s_mail_header_encoding("mail_header_encoding");
|
|
static const StaticString s_mail_body_encoding("mail_body_encoding");
|
|
static const StaticString s_illegal_chars("illegal_chars");
|
|
static const StaticString s_encoding_translation("encoding_translation");
|
|
static const StaticString s_On("On");
|
|
static const StaticString s_Off("Off");
|
|
static const StaticString s_language("language");
|
|
static const StaticString s_detect_order("detect_order");
|
|
static const StaticString s_substitute_character("substitute_character");
|
|
static const StaticString s_strict_detection("strict_detection");
|
|
static const StaticString s_none("none");
|
|
static const StaticString s_long("long");
|
|
static const StaticString s_entity("entity");
|
|
|
|
Variant f_mb_get_info(CStrRef type /* = null_string */) {
|
|
const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
|
|
mbfl_no_encoding *entry;
|
|
int n;
|
|
|
|
char *name;
|
|
if (type.empty() || strcasecmp(type.data(), "all") == 0) {
|
|
Array ret;
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(current_internal_encoding))) != NULL) {
|
|
ret.set(s_internal_encoding, String(name, CopyString));
|
|
}
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(http_input_identify))) != NULL) {
|
|
ret.set(s_http_input, String(name, CopyString));
|
|
}
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(current_http_output_encoding))) != NULL) {
|
|
ret.set(s_http_output, String(name, CopyString));
|
|
}
|
|
if (lang != NULL) {
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(lang->mail_charset)) != NULL) {
|
|
ret.set(s_mail_charset, String(name, CopyString));
|
|
}
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(lang->mail_header_encoding)) != NULL) {
|
|
ret.set(s_mail_header_encoding, String(name, CopyString));
|
|
}
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(lang->mail_body_encoding)) != NULL) {
|
|
ret.set(s_mail_body_encoding, String(name, CopyString));
|
|
}
|
|
}
|
|
ret.set(s_illegal_chars, MBSTRG(illegalchars));
|
|
ret.set(s_encoding_translation,
|
|
MBSTRG(encoding_translation) ? s_On : s_Off);
|
|
if ((name = (char *)mbfl_no_language2name
|
|
(MBSTRG(current_language))) != NULL) {
|
|
ret.set(s_language, String(name, CopyString));
|
|
}
|
|
n = MBSTRG(current_detect_order_list_size);
|
|
entry = MBSTRG(current_detect_order_list);
|
|
if (n > 0) {
|
|
Array row;
|
|
while (n > 0) {
|
|
if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
|
|
row.append(String(name, CopyString));
|
|
}
|
|
entry++;
|
|
n--;
|
|
}
|
|
ret.set(s_detect_order, row);
|
|
}
|
|
switch (MBSTRG(current_filter_illegal_mode)) {
|
|
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE:
|
|
ret.set(s_substitute_character, s_none);
|
|
break;
|
|
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
|
|
ret.set(s_substitute_character, s_long);
|
|
break;
|
|
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
|
|
ret.set(s_substitute_character, s_entity);
|
|
break;
|
|
default:
|
|
ret.set(s_substitute_character,
|
|
MBSTRG(current_filter_illegal_substchar));
|
|
}
|
|
ret.set(s_strict_detection, MBSTRG(strict_detection) ? s_On : s_Off);
|
|
return ret;
|
|
} else if (strcasecmp(type.data(), "internal_encoding") == 0) {
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(current_internal_encoding))) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
} else if (strcasecmp(type.data(), "http_input") == 0) {
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(http_input_identify))) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
} else if (strcasecmp(type.data(), "http_output") == 0) {
|
|
if ((name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(current_http_output_encoding))) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
} else if (strcasecmp(type.data(), "mail_charset") == 0) {
|
|
if (lang != NULL &&
|
|
(name = (char *)mbfl_no_encoding2name
|
|
(lang->mail_charset)) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
} else if (strcasecmp(type.data(), "mail_header_encoding") == 0) {
|
|
if (lang != NULL &&
|
|
(name = (char *)mbfl_no_encoding2name
|
|
(lang->mail_header_encoding)) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
} else if (strcasecmp(type.data(), "mail_body_encoding") == 0) {
|
|
if (lang != NULL &&
|
|
(name = (char *)mbfl_no_encoding2name
|
|
(lang->mail_body_encoding)) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
} else if (strcasecmp(type.data(), "illegal_chars") == 0) {
|
|
return MBSTRG(illegalchars);
|
|
} else if (strcasecmp(type.data(), "encoding_translation") == 0) {
|
|
return MBSTRG(encoding_translation) ? "On" : "Off";
|
|
} else if (strcasecmp(type.data(), "language") == 0) {
|
|
if ((name = (char *)mbfl_no_language2name
|
|
(MBSTRG(current_language))) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
} else if (strcasecmp(type.data(), "detect_order") == 0) {
|
|
n = MBSTRG(current_detect_order_list_size);
|
|
entry = MBSTRG(current_detect_order_list);
|
|
if (n > 0) {
|
|
Array ret;
|
|
while (n > 0) {
|
|
name = (char *)mbfl_no_encoding2name(*entry);
|
|
if (name) {
|
|
ret.append(String(name, CopyString));
|
|
}
|
|
entry++;
|
|
n--;
|
|
}
|
|
}
|
|
} else if (strcasecmp(type.data(), "substitute_character") == 0) {
|
|
if (MBSTRG(current_filter_illegal_mode) ==
|
|
MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
|
return s_none;
|
|
} else if (MBSTRG(current_filter_illegal_mode) ==
|
|
MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
|
|
return s_long;
|
|
} else if (MBSTRG(current_filter_illegal_mode) ==
|
|
MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
|
|
return s_entity;
|
|
} else {
|
|
return MBSTRG(current_filter_illegal_substchar);
|
|
}
|
|
} else if (strcasecmp(type.data(), "strict_detection") == 0) {
|
|
return MBSTRG(strict_detection) ? s_On : s_Off;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_http_input(CStrRef type /* = null_string */) {
|
|
int n;
|
|
char *name;
|
|
mbfl_no_encoding *entry;
|
|
mbfl_no_encoding result = mbfl_no_encoding_invalid;
|
|
|
|
if (type.empty()) {
|
|
result = MBSTRG(http_input_identify);
|
|
} else {
|
|
switch (*type.data()) {
|
|
case 'G': case 'g': result = MBSTRG(http_input_identify_get); break;
|
|
case 'P': case 'p': result = MBSTRG(http_input_identify_post); break;
|
|
case 'C': case 'c': result = MBSTRG(http_input_identify_cookie); break;
|
|
case 'S': case 's': result = MBSTRG(http_input_identify_string); break;
|
|
case 'I': case 'i':
|
|
{
|
|
Array ret;
|
|
entry = MBSTRG(http_input_list);
|
|
n = MBSTRG(http_input_list_size);
|
|
while (n > 0) {
|
|
name = (char *)mbfl_no_encoding2name(*entry);
|
|
if (name) {
|
|
ret.append(String(name, CopyString));
|
|
}
|
|
entry++;
|
|
n--;
|
|
}
|
|
return ret;
|
|
}
|
|
case 'L': case 'l':
|
|
{
|
|
entry = MBSTRG(http_input_list);
|
|
n = MBSTRG(http_input_list_size);
|
|
StringBuffer list;
|
|
while (n > 0) {
|
|
name = (char *)mbfl_no_encoding2name(*entry);
|
|
if (name) {
|
|
if (list.empty()) {
|
|
list.append(name);
|
|
} else {
|
|
list.append(',');
|
|
list.append(name);
|
|
}
|
|
}
|
|
entry++;
|
|
n--;
|
|
}
|
|
if (list.empty()) {
|
|
return false;
|
|
}
|
|
return list.detach();
|
|
}
|
|
default:
|
|
result = MBSTRG(http_input_identify);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (result != mbfl_no_encoding_invalid &&
|
|
(name = (char *)mbfl_no_encoding2name(result)) != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_http_output(CStrRef encoding /* = null_string */) {
|
|
if (encoding.empty()) {
|
|
char *name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(current_http_output_encoding));
|
|
if (name != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
mbfl_no_encoding no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
MBSTRG(current_http_output_encoding) = no_encoding;
|
|
return true;
|
|
}
|
|
|
|
Variant f_mb_internal_encoding(CStrRef encoding /* = null_string */) {
|
|
if (encoding.empty()) {
|
|
char *name = (char *)mbfl_no_encoding2name
|
|
(MBSTRG(current_internal_encoding));
|
|
if (name != NULL) {
|
|
return String(name, CopyString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
mbfl_no_encoding no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
|
|
MBSTRG(current_internal_encoding) = no_encoding;
|
|
return true;
|
|
}
|
|
|
|
Variant f_mb_language(CStrRef language /* = null_string */) {
|
|
if (language.empty()) {
|
|
return String(mbfl_no_language2name(MBSTRG(current_language)), CopyString);
|
|
}
|
|
|
|
mbfl_no_language no_language = mbfl_name2no_language(language.data());
|
|
if (no_language == mbfl_no_language_invalid) {
|
|
raise_warning("Unknown language \"%s\"", language.data());
|
|
return false;
|
|
}
|
|
|
|
php_mb_nls_get_default_detect_order_list
|
|
(no_language, &MBSTRG(default_detect_order_list),
|
|
&MBSTRG(default_detect_order_list_size));
|
|
MBSTRG(current_language) = no_language;
|
|
return true;
|
|
}
|
|
|
|
String f_mb_output_handler(CStrRef contents, int status) {
|
|
mbfl_string string, result;
|
|
int last_feed;
|
|
|
|
mbfl_no_encoding encoding = MBSTRG(current_http_output_encoding);
|
|
|
|
/* start phase only */
|
|
if (status & PHP_OUTPUT_HANDLER_START) {
|
|
/* delete the converter just in case. */
|
|
if (MBSTRG(outconv)) {
|
|
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
|
|
mbfl_buffer_converter_delete(MBSTRG(outconv));
|
|
MBSTRG(outconv) = NULL;
|
|
}
|
|
if (encoding == mbfl_no_encoding_pass) {
|
|
return contents;
|
|
}
|
|
|
|
/* analyze mime type */
|
|
String mimetype = g_context->getMimeType();
|
|
if (!mimetype.empty()) {
|
|
const char *charset = mbfl_no2preferred_mime_name(encoding);
|
|
if (charset) {
|
|
g_context->setContentType(mimetype, charset);
|
|
}
|
|
/* activate the converter */
|
|
MBSTRG(outconv) = mbfl_buffer_converter_new
|
|
(MBSTRG(current_internal_encoding), encoding, 0);
|
|
}
|
|
}
|
|
|
|
/* just return if the converter is not activated. */
|
|
if (MBSTRG(outconv) == NULL) {
|
|
return contents;
|
|
}
|
|
|
|
/* flag */
|
|
last_feed = ((status & PHP_OUTPUT_HANDLER_END) != 0);
|
|
/* mode */
|
|
mbfl_buffer_converter_illegal_mode
|
|
(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
|
|
mbfl_buffer_converter_illegal_substchar
|
|
(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
|
|
|
|
/* feed the string */
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
string.val = (unsigned char *)contents.data();
|
|
string.len = contents.size();
|
|
mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
|
|
if (last_feed) {
|
|
mbfl_buffer_converter_flush(MBSTRG(outconv));
|
|
}
|
|
/* get the converter output, and return it */
|
|
mbfl_buffer_converter_result(MBSTRG(outconv), &result);
|
|
|
|
/* delete the converter if it is the last feed. */
|
|
if (last_feed) {
|
|
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
|
|
mbfl_buffer_converter_delete(MBSTRG(outconv));
|
|
MBSTRG(outconv) = NULL;
|
|
}
|
|
|
|
return String((const char *)result.val, result.len, AttachString);
|
|
}
|
|
|
|
typedef struct _php_mb_encoding_handler_info_t {
|
|
int data_type;
|
|
const char *separator;
|
|
unsigned int force_register_globals: 1;
|
|
unsigned int report_errors: 1;
|
|
enum mbfl_no_language to_language;
|
|
enum mbfl_no_encoding to_encoding;
|
|
enum mbfl_no_language from_language;
|
|
int num_from_encodings;
|
|
const enum mbfl_no_encoding *from_encodings;
|
|
} php_mb_encoding_handler_info_t;
|
|
|
|
static mbfl_no_encoding _php_mb_encoding_handler_ex
|
|
(const php_mb_encoding_handler_info_t *info, Variant &arg, char *res) {
|
|
char *var, *val;
|
|
const char *s1, *s2;
|
|
char *strtok_buf = NULL, **val_list = NULL;
|
|
int n, num, *len_list = NULL;
|
|
unsigned int val_len;
|
|
mbfl_string string, resvar, resval;
|
|
enum mbfl_no_encoding from_encoding = mbfl_no_encoding_invalid;
|
|
mbfl_encoding_detector *identd = NULL;
|
|
mbfl_buffer_converter *convd = NULL;
|
|
|
|
mbfl_string_init_set(&string, info->to_language, info->to_encoding);
|
|
mbfl_string_init_set(&resvar, info->to_language, info->to_encoding);
|
|
mbfl_string_init_set(&resval, info->to_language, info->to_encoding);
|
|
|
|
if (!res || *res == '\0') {
|
|
goto out;
|
|
}
|
|
|
|
/* count the variables(separators) contained in the "res".
|
|
* separator may contain multiple separator chars.
|
|
*/
|
|
num = 1;
|
|
for (s1=res; *s1 != '\0'; s1++) {
|
|
for (s2=info->separator; *s2 != '\0'; s2++) {
|
|
if (*s1 == *s2) {
|
|
num++;
|
|
}
|
|
}
|
|
}
|
|
num *= 2; /* need space for variable name and value */
|
|
|
|
val_list = (char **)calloc(num, sizeof(char *));
|
|
len_list = (int *)calloc(num, sizeof(int));
|
|
|
|
/* split and decode the query */
|
|
n = 0;
|
|
strtok_buf = NULL;
|
|
var = strtok_r(res, info->separator, &strtok_buf);
|
|
while (var) {
|
|
val = strchr(var, '=');
|
|
if (val) { /* have a value */
|
|
len_list[n] = url_decode_ex(var, val-var);
|
|
val_list[n] = var;
|
|
n++;
|
|
|
|
*val++ = '\0';
|
|
val_list[n] = val;
|
|
len_list[n] = url_decode_ex(val, strlen(val));
|
|
} else {
|
|
len_list[n] = url_decode_ex(var, strlen(var));
|
|
val_list[n] = var;
|
|
n++;
|
|
|
|
val_list[n] = const_cast<char*>("");
|
|
len_list[n] = 0;
|
|
}
|
|
n++;
|
|
var = strtok_r(NULL, info->separator, &strtok_buf);
|
|
}
|
|
num = n; /* make sure to process initilized vars only */
|
|
|
|
/* initialize converter */
|
|
if (info->num_from_encodings <= 0) {
|
|
from_encoding = mbfl_no_encoding_pass;
|
|
} else if (info->num_from_encodings == 1) {
|
|
from_encoding = info->from_encodings[0];
|
|
} else {
|
|
/* auto detect */
|
|
from_encoding = mbfl_no_encoding_invalid;
|
|
identd = mbfl_encoding_detector_new
|
|
((enum mbfl_no_encoding *)info->from_encodings,
|
|
info->num_from_encodings, MBSTRG(strict_detection));
|
|
if (identd) {
|
|
n = 0;
|
|
while (n < num) {
|
|
string.val = (unsigned char *)val_list[n];
|
|
string.len = len_list[n];
|
|
if (mbfl_encoding_detector_feed(identd, &string)) {
|
|
break;
|
|
}
|
|
n++;
|
|
}
|
|
from_encoding = mbfl_encoding_detector_judge(identd);
|
|
mbfl_encoding_detector_delete(identd);
|
|
}
|
|
if (from_encoding == mbfl_no_encoding_invalid) {
|
|
if (info->report_errors) {
|
|
raise_warning("Unable to detect encoding");
|
|
}
|
|
from_encoding = mbfl_no_encoding_pass;
|
|
}
|
|
}
|
|
|
|
convd = NULL;
|
|
if (from_encoding != mbfl_no_encoding_pass) {
|
|
convd = mbfl_buffer_converter_new(from_encoding, info->to_encoding, 0);
|
|
if (convd != NULL) {
|
|
mbfl_buffer_converter_illegal_mode
|
|
(convd, MBSTRG(current_filter_illegal_mode));
|
|
mbfl_buffer_converter_illegal_substchar
|
|
(convd, MBSTRG(current_filter_illegal_substchar));
|
|
} else {
|
|
if (info->report_errors) {
|
|
raise_warning("Unable to create converter");
|
|
}
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/* convert encoding */
|
|
string.no_encoding = from_encoding;
|
|
|
|
n = 0;
|
|
while (n < num) {
|
|
string.val = (unsigned char *)val_list[n];
|
|
string.len = len_list[n];
|
|
if (convd != NULL &&
|
|
mbfl_buffer_converter_feed_result(convd, &string, &resvar) != NULL) {
|
|
var = (char *)resvar.val;
|
|
} else {
|
|
var = val_list[n];
|
|
}
|
|
n++;
|
|
string.val = (unsigned char *)val_list[n];
|
|
string.len = len_list[n];
|
|
if (convd != NULL &&
|
|
mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) {
|
|
val = (char *)resval.val;
|
|
val_len = resval.len;
|
|
} else {
|
|
val = val_list[n];
|
|
val_len = len_list[n];
|
|
}
|
|
n++;
|
|
|
|
arg.set(String(var, CopyString), String(val, val_len, CopyString));
|
|
|
|
if (convd != NULL){
|
|
mbfl_string_clear(&resvar);
|
|
mbfl_string_clear(&resval);
|
|
}
|
|
}
|
|
|
|
out:
|
|
if (convd != NULL) {
|
|
MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
|
|
mbfl_buffer_converter_delete(convd);
|
|
}
|
|
if (val_list != NULL) {
|
|
free((void *)val_list);
|
|
}
|
|
if (len_list != NULL) {
|
|
free((void *)len_list);
|
|
}
|
|
|
|
return from_encoding;
|
|
}
|
|
|
|
bool f_mb_parse_str(CStrRef encoded_string, VRefParam result /* = null */) {
|
|
php_mb_encoding_handler_info_t info;
|
|
info.data_type = PARSE_STRING;
|
|
info.separator = ";&";
|
|
info.force_register_globals = false;
|
|
info.report_errors = 1;
|
|
info.to_encoding = MBSTRG(current_internal_encoding);
|
|
info.to_language = MBSTRG(current_language);
|
|
info.from_encodings = MBSTRG(http_input_list);
|
|
info.num_from_encodings = MBSTRG(http_input_list_size);
|
|
info.from_language = MBSTRG(current_language);
|
|
|
|
char *encstr = strndup(encoded_string.data(), encoded_string.size());
|
|
mbfl_no_encoding detected =
|
|
_php_mb_encoding_handler_ex(&info, result, encstr);
|
|
free(encstr);
|
|
|
|
MBSTRG(http_input_identify) = detected;
|
|
return detected != mbfl_no_encoding_invalid;
|
|
}
|
|
|
|
Variant f_mb_preferred_mime_name(CStrRef encoding) {
|
|
mbfl_no_encoding no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
|
|
const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
|
|
if (preferred_name == NULL || *preferred_name == '\0') {
|
|
raise_warning("No MIME preferred name corresponding to \"%s\"",
|
|
encoding.data());
|
|
return false;
|
|
}
|
|
|
|
return String(preferred_name, CopyString);
|
|
}
|
|
|
|
static Variant php_mb_substr(CStrRef str, int from, int len,
|
|
CStrRef encoding, bool substr) {
|
|
mbfl_string string;
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
|
|
if (!encoding.empty()) {
|
|
string.no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (string.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
int size;
|
|
if (substr) {
|
|
size = mbfl_strlen(&string);
|
|
} else {
|
|
size = str.size();
|
|
}
|
|
if (len == 0x7FFFFFFF) {
|
|
len = size;
|
|
}
|
|
|
|
/* if "from" position is negative, count start position from the end
|
|
* of the string
|
|
*/
|
|
if (from < 0) {
|
|
from = size + from;
|
|
if (from < 0) {
|
|
from = 0;
|
|
}
|
|
}
|
|
|
|
/* if "length" position is negative, set it to the length
|
|
* needed to stop that many chars from the end of the string
|
|
*/
|
|
if (len < 0) {
|
|
len = (size - from) + len;
|
|
if (len < 0) {
|
|
len = 0;
|
|
}
|
|
}
|
|
|
|
if (from > size) {
|
|
if (!substr) {
|
|
return false;
|
|
}
|
|
from = size;
|
|
}
|
|
|
|
mbfl_string result;
|
|
mbfl_string *ret;
|
|
if (substr) {
|
|
ret = mbfl_substr(&string, &result, from, len);
|
|
} else {
|
|
ret = mbfl_strcut(&string, &result, from, len);
|
|
}
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_substr(CStrRef str, int start, int length /* = 0x7FFFFFFF */,
|
|
CStrRef encoding /* = null_string */) {
|
|
return php_mb_substr(str, start, length, encoding, true);
|
|
}
|
|
|
|
Variant f_mb_strcut(CStrRef str, int start, int length /* = 0x7FFFFFFF */,
|
|
CStrRef encoding /* = null_string */) {
|
|
return php_mb_substr(str, start, length, encoding, false);
|
|
}
|
|
|
|
Variant f_mb_strimwidth(CStrRef str, int start, int width,
|
|
CStrRef trimmarker /* = null_string */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string string, result, marker, *ret;
|
|
|
|
mbfl_string_init(&string);
|
|
mbfl_string_init(&marker);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
marker.no_language = MBSTRG(current_language);
|
|
marker.no_encoding = MBSTRG(current_internal_encoding);
|
|
marker.val = NULL;
|
|
marker.len = 0;
|
|
|
|
if (!encoding.empty()) {
|
|
string.no_encoding = marker.no_encoding =
|
|
mbfl_name2no_encoding(encoding.data());
|
|
if (string.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
|
|
if (start < 0 || start > str.size()) {
|
|
raise_warning("Start position is out of reange");
|
|
return false;
|
|
}
|
|
|
|
if (width < 0) {
|
|
raise_warning("Width is negative value");
|
|
return false;
|
|
}
|
|
|
|
marker.val = (unsigned char *)trimmarker.data();
|
|
marker.len = trimmarker.size();
|
|
|
|
ret = mbfl_strimwidth(&string, &marker, &result, start, width);
|
|
if (ret != NULL) {
|
|
return String((const char *)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_stripos(CStrRef haystack, CStrRef needle, int offset /* = 0 */,
|
|
CStrRef encoding /* = null_string */) {
|
|
const char *from_encoding;
|
|
if (encoding.empty()) {
|
|
from_encoding =
|
|
mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
|
|
} else {
|
|
from_encoding = encoding.data();
|
|
}
|
|
|
|
if (needle.empty()) {
|
|
raise_warning("Empty delimiter");
|
|
return false;
|
|
}
|
|
|
|
int n = php_mb_stripos(0, haystack.data(), haystack.size(),
|
|
needle.data(), needle.size(), offset, from_encoding);
|
|
if (n >= 0) {
|
|
return n;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strripos(CStrRef haystack, CStrRef needle, int offset /* = 0 */,
|
|
CStrRef encoding /* = null_string */) {
|
|
const char *from_encoding;
|
|
if (encoding.empty()) {
|
|
from_encoding =
|
|
mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
|
|
} else {
|
|
from_encoding = encoding.data();
|
|
}
|
|
|
|
int n = php_mb_stripos(1, haystack.data(), haystack.size(),
|
|
needle.data(), needle.size(), offset, from_encoding);
|
|
if (n >= 0) {
|
|
return n;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_stristr(CStrRef haystack, CStrRef needle, bool part /* = false */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string mbs_haystack;
|
|
mbfl_string_init(&mbs_haystack);
|
|
mbs_haystack.no_language = MBSTRG(current_language);
|
|
mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_haystack.val = (unsigned char *)haystack.data();
|
|
mbs_haystack.len = haystack.size();
|
|
|
|
mbfl_string mbs_needle;
|
|
mbfl_string_init(&mbs_needle);
|
|
mbs_needle.no_language = MBSTRG(current_language);
|
|
mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_needle.val = (unsigned char *)needle.data();
|
|
mbs_needle.len = needle.size();
|
|
if (!mbs_needle.len) {
|
|
raise_warning("Empty delimiter.");
|
|
return false;
|
|
}
|
|
|
|
const char *from_encoding;
|
|
if (encoding.empty()) {
|
|
from_encoding =
|
|
mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
|
|
} else {
|
|
from_encoding = encoding.data();
|
|
}
|
|
mbs_haystack.no_encoding = mbs_needle.no_encoding =
|
|
mbfl_name2no_encoding(from_encoding);
|
|
if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", from_encoding);
|
|
return false;
|
|
}
|
|
|
|
int n = php_mb_stripos(0, (const char*)mbs_haystack.val, mbs_haystack.len,
|
|
(const char *)mbs_needle.val, mbs_needle.len,
|
|
0, from_encoding);
|
|
if (n < 0) {
|
|
return false;
|
|
}
|
|
|
|
int mblen = mbfl_strlen(&mbs_haystack);
|
|
mbfl_string result, *ret = NULL;
|
|
if (part) {
|
|
ret = mbfl_substr(&mbs_haystack, &result, 0, n);
|
|
} else {
|
|
int len = (mblen - n);
|
|
ret = mbfl_substr(&mbs_haystack, &result, n, len);
|
|
}
|
|
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strlen(CStrRef str, CStrRef encoding /* = null_string */) {
|
|
mbfl_string string;
|
|
mbfl_string_init(&string);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
string.no_language = MBSTRG(current_language);
|
|
|
|
if (encoding.empty()) {
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
} else {
|
|
string.no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (string.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
int n = mbfl_strlen(&string);
|
|
if (n >= 0) {
|
|
return n;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strpos(CStrRef haystack, CStrRef needle, int offset /* = 0 */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string mbs_haystack;
|
|
mbfl_string_init(&mbs_haystack);
|
|
mbs_haystack.no_language = MBSTRG(current_language);
|
|
mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_haystack.val = (unsigned char *)haystack.data();
|
|
mbs_haystack.len = haystack.size();
|
|
|
|
mbfl_string mbs_needle;
|
|
mbfl_string_init(&mbs_needle);
|
|
mbs_needle.no_language = MBSTRG(current_language);
|
|
mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_needle.val = (unsigned char *)needle.data();
|
|
mbs_needle.len = needle.size();
|
|
|
|
if (!encoding.empty()) {
|
|
mbs_haystack.no_encoding = mbs_needle.no_encoding =
|
|
mbfl_name2no_encoding(encoding.data());
|
|
if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (offset < 0 || offset > mbfl_strlen(&mbs_haystack)) {
|
|
raise_warning("Offset not contained in string.");
|
|
return false;
|
|
}
|
|
if (mbs_needle.len == 0) {
|
|
raise_warning("Empty delimiter.");
|
|
return false;
|
|
}
|
|
|
|
int reverse = 0;
|
|
int n = mbfl_strpos(&mbs_haystack, &mbs_needle, offset, reverse);
|
|
if (n >= 0) {
|
|
return n;
|
|
}
|
|
|
|
switch (-n) {
|
|
case 1:
|
|
break;
|
|
case 2:
|
|
raise_warning("Needle has not positive length.");
|
|
break;
|
|
case 4:
|
|
raise_warning("Unknown encoding or conversion error.");
|
|
break;
|
|
case 8:
|
|
raise_warning("Argument is empty.");
|
|
break;
|
|
default:
|
|
raise_warning("Unknown error in mb_strpos.");
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strrpos(CStrRef haystack, CStrRef needle,
|
|
CVarRef offset /* = 0LL */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string mbs_haystack;
|
|
mbfl_string_init(&mbs_haystack);
|
|
mbs_haystack.no_language = MBSTRG(current_language);
|
|
mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_haystack.val = (unsigned char *)haystack.data();
|
|
mbs_haystack.len = haystack.size();
|
|
|
|
mbfl_string mbs_needle;
|
|
mbfl_string_init(&mbs_needle);
|
|
mbs_needle.no_language = MBSTRG(current_language);
|
|
mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_needle.val = (unsigned char *)needle.data();
|
|
mbs_needle.len = needle.size();
|
|
|
|
const char *enc_name = encoding.data();
|
|
long noffset = 0;
|
|
String soffset = offset.toString();
|
|
if (offset.isString()) {
|
|
enc_name = soffset.data();
|
|
|
|
int str_flg = 1;
|
|
if (enc_name != NULL) {
|
|
switch (*enc_name) {
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
case ' ': case '-': case '.':
|
|
break;
|
|
default :
|
|
str_flg = 0;
|
|
break;
|
|
}
|
|
}
|
|
if (str_flg) {
|
|
noffset = offset.toInt32();
|
|
enc_name = encoding.data();
|
|
}
|
|
} else {
|
|
noffset = offset.toInt32();
|
|
}
|
|
|
|
if (!enc_name && !*enc_name) {
|
|
mbs_haystack.no_encoding = mbs_needle.no_encoding =
|
|
mbfl_name2no_encoding(enc_name);
|
|
if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", enc_name);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (mbs_haystack.len <= 0) {
|
|
return false;
|
|
}
|
|
if (mbs_needle.len <= 0) {
|
|
return false;
|
|
}
|
|
|
|
if ((noffset > 0 && noffset > mbfl_strlen(&mbs_haystack)) ||
|
|
(noffset < 0 && -noffset > mbfl_strlen(&mbs_haystack))) {
|
|
raise_notice("Offset is greater than the length of haystack string");
|
|
return false;
|
|
}
|
|
|
|
int n = mbfl_strpos(&mbs_haystack, &mbs_needle, noffset, 1);
|
|
if (n >= 0) {
|
|
return n;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strrchr(CStrRef haystack, CStrRef needle, bool part /* = false */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string mbs_haystack;
|
|
mbfl_string_init(&mbs_haystack);
|
|
mbs_haystack.no_language = MBSTRG(current_language);
|
|
mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_haystack.val = (unsigned char *)haystack.data();
|
|
mbs_haystack.len = haystack.size();
|
|
|
|
mbfl_string mbs_needle;
|
|
mbfl_string_init(&mbs_needle);
|
|
mbs_needle.no_language = MBSTRG(current_language);
|
|
mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_needle.val = (unsigned char *)needle.data();
|
|
mbs_needle.len = needle.size();
|
|
|
|
if (!encoding.empty()) {
|
|
mbs_haystack.no_encoding = mbs_needle.no_encoding =
|
|
mbfl_name2no_encoding(encoding.data());
|
|
if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (mbs_haystack.len <= 0) {
|
|
return false;
|
|
}
|
|
if (mbs_needle.len <= 0) {
|
|
return false;
|
|
}
|
|
|
|
mbfl_string result, *ret = NULL;
|
|
int n = mbfl_strpos(&mbs_haystack, &mbs_needle, 0, 1);
|
|
if (n >= 0) {
|
|
int mblen = mbfl_strlen(&mbs_haystack);
|
|
if (part) {
|
|
ret = mbfl_substr(&mbs_haystack, &result, 0, n);
|
|
} else {
|
|
int len = (mblen - n);
|
|
ret = mbfl_substr(&mbs_haystack, &result, n, len);
|
|
}
|
|
}
|
|
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strrichr(CStrRef haystack, CStrRef needle, bool part /* = false */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string mbs_haystack;
|
|
mbfl_string_init(&mbs_haystack);
|
|
mbs_haystack.no_language = MBSTRG(current_language);
|
|
mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_haystack.val = (unsigned char *)haystack.data();
|
|
mbs_haystack.len = haystack.size();
|
|
|
|
mbfl_string mbs_needle;
|
|
mbfl_string_init(&mbs_needle);
|
|
mbs_needle.no_language = MBSTRG(current_language);
|
|
mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_needle.val = (unsigned char *)needle.data();
|
|
mbs_needle.len = needle.size();
|
|
|
|
const char *from_encoding;
|
|
if (encoding.empty()) {
|
|
from_encoding =
|
|
mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
|
|
} else {
|
|
from_encoding = encoding.data();
|
|
}
|
|
mbs_haystack.no_encoding = mbs_needle.no_encoding =
|
|
mbfl_name2no_encoding(from_encoding);
|
|
if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", from_encoding);
|
|
return false;
|
|
}
|
|
|
|
int n = php_mb_stripos(1, (const char*)mbs_haystack.val, mbs_haystack.len,
|
|
(const char*)mbs_needle.val, mbs_needle.len,
|
|
0, from_encoding);
|
|
if (n < 0) {
|
|
return false;
|
|
}
|
|
|
|
mbfl_string result, *ret = NULL;
|
|
int mblen = mbfl_strlen(&mbs_haystack);
|
|
if (part) {
|
|
ret = mbfl_substr(&mbs_haystack, &result, 0, n);
|
|
} else {
|
|
int len = (mblen - n);
|
|
ret = mbfl_substr(&mbs_haystack, &result, n, len);
|
|
}
|
|
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strstr(CStrRef haystack, CStrRef needle, bool part /* = false */,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string mbs_haystack;
|
|
mbfl_string_init(&mbs_haystack);
|
|
mbs_haystack.no_language = MBSTRG(current_language);
|
|
mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_haystack.val = (unsigned char *)haystack.data();
|
|
mbs_haystack.len = haystack.size();
|
|
|
|
mbfl_string mbs_needle;
|
|
mbfl_string_init(&mbs_needle);
|
|
mbs_needle.no_language = MBSTRG(current_language);
|
|
mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_needle.val = (unsigned char *)needle.data();
|
|
mbs_needle.len = needle.size();
|
|
|
|
if (!encoding.empty()) {
|
|
mbs_haystack.no_encoding = mbs_needle.no_encoding =
|
|
mbfl_name2no_encoding(encoding.data());
|
|
if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (mbs_needle.len <= 0) {
|
|
raise_warning("Empty delimiter.");
|
|
return false;
|
|
}
|
|
|
|
mbfl_string result, *ret = NULL;
|
|
int n = mbfl_strpos(&mbs_haystack, &mbs_needle, 0, 0);
|
|
if (n >= 0) {
|
|
int mblen = mbfl_strlen(&mbs_haystack);
|
|
if (part) {
|
|
ret = mbfl_substr(&mbs_haystack, &result, 0, n);
|
|
} else {
|
|
int len = (mblen - n);
|
|
ret = mbfl_substr(&mbs_haystack, &result, n, len);
|
|
}
|
|
}
|
|
|
|
if (ret != NULL) {
|
|
return String((const char*)ret->val, ret->len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strtolower(CStrRef str, CStrRef encoding /* = null_string */) {
|
|
const char *from_encoding;
|
|
if (encoding.empty()) {
|
|
from_encoding =
|
|
mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
|
|
} else {
|
|
from_encoding = encoding.data();
|
|
}
|
|
|
|
unsigned int ret_len;
|
|
char *newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER,
|
|
str.data(), str.size(),
|
|
&ret_len, from_encoding);
|
|
if (newstr) {
|
|
return String(newstr, ret_len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strtoupper(CStrRef str, CStrRef encoding /* = null_string */) {
|
|
const char *from_encoding;
|
|
if (encoding.empty()) {
|
|
from_encoding =
|
|
mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
|
|
} else {
|
|
from_encoding = encoding.data();
|
|
}
|
|
|
|
unsigned int ret_len;
|
|
char *newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER,
|
|
str.data(), str.size(),
|
|
&ret_len, from_encoding);
|
|
if (newstr) {
|
|
return String(newstr, ret_len, AttachString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_strwidth(CStrRef str, CStrRef encoding /* = null_string */) {
|
|
mbfl_string string;
|
|
mbfl_string_init(&string);
|
|
string.no_language = MBSTRG(current_language);
|
|
string.no_encoding = MBSTRG(current_internal_encoding);
|
|
string.val = (unsigned char *)str.data();
|
|
string.len = str.size();
|
|
|
|
if (!encoding.empty()) {
|
|
string.no_encoding = mbfl_name2no_encoding(encoding.data());
|
|
if (string.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
int n = mbfl_strwidth(&string);
|
|
if (n >= 0) {
|
|
return n;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Variant f_mb_substitute_character(CVarRef substrchar /* = null_variant */) {
|
|
if (substrchar.isNull()) {
|
|
switch (MBSTRG(current_filter_illegal_mode)) {
|
|
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE:
|
|
return "none";
|
|
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
|
|
return "long";
|
|
case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
|
|
return "entity";
|
|
default:
|
|
return MBSTRG(current_filter_illegal_substchar);
|
|
}
|
|
}
|
|
|
|
if (substrchar.isString()) {
|
|
String s = substrchar.toString();
|
|
if (strcasecmp("none", s.data()) == 0) {
|
|
MBSTRG(current_filter_illegal_mode) =
|
|
MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
|
|
return true;
|
|
}
|
|
if (strcasecmp("long", s.data()) == 0) {
|
|
MBSTRG(current_filter_illegal_mode) =
|
|
MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
|
|
return true;
|
|
}
|
|
if (strcasecmp("entity", s.data()) == 0) {
|
|
MBSTRG(current_filter_illegal_mode) =
|
|
MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
int64_t n = substrchar.toInt64();
|
|
if (n < 0xffff && n > 0) {
|
|
MBSTRG(current_filter_illegal_mode) =
|
|
MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
|
|
MBSTRG(current_filter_illegal_substchar) = n;
|
|
} else {
|
|
raise_warning("Unknown character.");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Variant f_mb_substr_count(CStrRef haystack, CStrRef needle,
|
|
CStrRef encoding /* = null_string */) {
|
|
mbfl_string mbs_haystack;
|
|
mbfl_string_init(&mbs_haystack);
|
|
mbs_haystack.no_language = MBSTRG(current_language);
|
|
mbs_haystack.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_haystack.val = (unsigned char *)haystack.data();
|
|
mbs_haystack.len = haystack.size();
|
|
|
|
mbfl_string mbs_needle;
|
|
mbfl_string_init(&mbs_needle);
|
|
mbs_needle.no_language = MBSTRG(current_language);
|
|
mbs_needle.no_encoding = MBSTRG(current_internal_encoding);
|
|
mbs_needle.val = (unsigned char *)needle.data();
|
|
mbs_needle.len = needle.size();
|
|
|
|
if (!encoding.empty()) {
|
|
mbs_haystack.no_encoding = mbs_needle.no_encoding =
|
|
mbfl_name2no_encoding(encoding.data());
|
|
if (mbs_haystack.no_encoding == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (mbs_needle.len <= 0) {
|
|
raise_warning("Empty substring.");
|
|
return false;
|
|
}
|
|
|
|
int n = mbfl_substr_count(&mbs_haystack, &mbs_needle);
|
|
if (n >= 0) {
|
|
return n;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// regex helpers
|
|
|
|
typedef struct _php_mb_regex_enc_name_map_t {
|
|
const char *names;
|
|
OnigEncoding code;
|
|
} php_mb_regex_enc_name_map_t;
|
|
|
|
static php_mb_regex_enc_name_map_t enc_name_map[] ={
|
|
{
|
|
"EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
|
|
ONIG_ENCODING_EUC_JP
|
|
},
|
|
{
|
|
"UTF-8\0UTF8\0",
|
|
ONIG_ENCODING_UTF8
|
|
},
|
|
{
|
|
"UTF-16\0UTF-16BE\0",
|
|
ONIG_ENCODING_UTF16_BE
|
|
},
|
|
{
|
|
"UTF-16LE\0",
|
|
ONIG_ENCODING_UTF16_LE
|
|
},
|
|
{
|
|
"UCS-4\0UTF-32\0UTF-32BE\0",
|
|
ONIG_ENCODING_UTF32_BE
|
|
},
|
|
{
|
|
"UCS-4LE\0UTF-32LE\0",
|
|
ONIG_ENCODING_UTF32_LE
|
|
},
|
|
{
|
|
"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
|
|
ONIG_ENCODING_SJIS
|
|
},
|
|
{
|
|
"BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
|
|
ONIG_ENCODING_BIG5
|
|
},
|
|
{
|
|
"EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
|
|
ONIG_ENCODING_EUC_CN
|
|
},
|
|
{
|
|
"EUC-TW\0EUCTW\0EUC_TW\0",
|
|
ONIG_ENCODING_EUC_TW
|
|
},
|
|
{
|
|
"EUC-KR\0EUCKR\0EUC_KR\0",
|
|
ONIG_ENCODING_EUC_KR
|
|
},
|
|
{
|
|
"KOI8R\0KOI8-R\0KOI-8R\0",
|
|
ONIG_ENCODING_KOI8_R
|
|
},
|
|
{
|
|
"ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
|
|
ONIG_ENCODING_ISO_8859_1
|
|
},
|
|
{
|
|
"ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
|
|
ONIG_ENCODING_ISO_8859_2
|
|
},
|
|
{
|
|
"ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
|
|
ONIG_ENCODING_ISO_8859_3
|
|
},
|
|
{
|
|
"ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
|
|
ONIG_ENCODING_ISO_8859_4
|
|
},
|
|
{
|
|
"ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
|
|
ONIG_ENCODING_ISO_8859_5
|
|
},
|
|
{
|
|
"ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
|
|
ONIG_ENCODING_ISO_8859_6
|
|
},
|
|
{
|
|
"ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
|
|
ONIG_ENCODING_ISO_8859_7
|
|
},
|
|
{
|
|
"ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
|
|
ONIG_ENCODING_ISO_8859_8
|
|
},
|
|
{
|
|
"ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
|
|
ONIG_ENCODING_ISO_8859_9
|
|
},
|
|
{
|
|
"ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
|
|
ONIG_ENCODING_ISO_8859_10
|
|
},
|
|
{
|
|
"ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
|
|
ONIG_ENCODING_ISO_8859_11
|
|
},
|
|
{
|
|
"ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
|
|
ONIG_ENCODING_ISO_8859_13
|
|
},
|
|
{
|
|
"ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
|
|
ONIG_ENCODING_ISO_8859_14
|
|
},
|
|
{
|
|
"ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
|
|
ONIG_ENCODING_ISO_8859_15
|
|
},
|
|
{
|
|
"ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
|
|
ONIG_ENCODING_ISO_8859_16
|
|
},
|
|
{
|
|
"ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
|
|
ONIG_ENCODING_ASCII
|
|
},
|
|
{ NULL, ONIG_ENCODING_UNDEF }
|
|
};
|
|
|
|
static OnigEncoding php_mb_regex_name2mbctype(const char *pname) {
|
|
const char *p;
|
|
php_mb_regex_enc_name_map_t *mapping;
|
|
|
|
if (pname == NULL) {
|
|
return ONIG_ENCODING_UNDEF;
|
|
}
|
|
|
|
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
|
|
for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
|
|
if (strcasecmp(p, pname) == 0) {
|
|
return mapping->code;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ONIG_ENCODING_UNDEF;
|
|
}
|
|
|
|
static const char *php_mb_regex_mbctype2name(OnigEncoding mbctype) {
|
|
php_mb_regex_enc_name_map_t *mapping;
|
|
|
|
for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
|
|
if (mapping->code == mbctype) {
|
|
return mapping->names;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* regex cache
|
|
*/
|
|
static php_mb_regex_t *php_mbregex_compile_pattern(CStrRef pattern,
|
|
OnigOptionType options,
|
|
OnigEncoding enc,
|
|
OnigSyntaxType *syntax) {
|
|
int err_code = 0;
|
|
OnigErrorInfo err_info;
|
|
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
|
|
php_mb_regex_t *rc = NULL;
|
|
|
|
std::string spattern = std::string(pattern.data(), pattern.size());
|
|
RegexCache &cache = MBSTRG(ht_rc);
|
|
RegexCache::const_iterator it =
|
|
cache.find(spattern);
|
|
if (it != cache.end()) {
|
|
rc = it->second;
|
|
}
|
|
|
|
if (!rc || rc->options != options || rc->enc != enc ||
|
|
rc->syntax != syntax) {
|
|
if (rc) {
|
|
onig_free(rc);
|
|
rc = NULL;
|
|
}
|
|
if ((err_code = onig_new(&rc, (OnigUChar *)pattern.data(),
|
|
(OnigUChar *)(pattern.data() + pattern.size()),
|
|
options,enc, syntax, &err_info)) != ONIG_NORMAL) {
|
|
onig_error_code_to_str(err_str, err_code, err_info);
|
|
raise_warning("mbregex compile err: %s", err_str);
|
|
return NULL;
|
|
}
|
|
MBSTRG(ht_rc)[spattern] = rc;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static size_t _php_mb_regex_get_option_string(char *str, size_t len,
|
|
OnigOptionType option,
|
|
OnigSyntaxType *syntax) {
|
|
size_t len_left = len;
|
|
size_t len_req = 0;
|
|
char *p = str;
|
|
char c;
|
|
|
|
if ((option & ONIG_OPTION_IGNORECASE) != 0) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = 'i';
|
|
}
|
|
++len_req;
|
|
}
|
|
|
|
if ((option & ONIG_OPTION_EXTEND) != 0) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = 'x';
|
|
}
|
|
++len_req;
|
|
}
|
|
|
|
if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
|
|
(ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = 'p';
|
|
}
|
|
++len_req;
|
|
} else {
|
|
if ((option & ONIG_OPTION_MULTILINE) != 0) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = 'm';
|
|
}
|
|
++len_req;
|
|
}
|
|
|
|
if ((option & ONIG_OPTION_SINGLELINE) != 0) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = 's';
|
|
}
|
|
++len_req;
|
|
}
|
|
}
|
|
if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = 'l';
|
|
}
|
|
++len_req;
|
|
}
|
|
if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = 'n';
|
|
}
|
|
++len_req;
|
|
}
|
|
|
|
c = 0;
|
|
|
|
if (syntax == ONIG_SYNTAX_JAVA) {
|
|
c = 'j';
|
|
} else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
|
|
c = 'u';
|
|
} else if (syntax == ONIG_SYNTAX_GREP) {
|
|
c = 'g';
|
|
} else if (syntax == ONIG_SYNTAX_EMACS) {
|
|
c = 'c';
|
|
} else if (syntax == ONIG_SYNTAX_RUBY) {
|
|
c = 'r';
|
|
} else if (syntax == ONIG_SYNTAX_PERL) {
|
|
c = 'z';
|
|
} else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
|
|
c = 'b';
|
|
} else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
|
|
c = 'd';
|
|
}
|
|
|
|
if (c != 0) {
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = c;
|
|
}
|
|
++len_req;
|
|
}
|
|
|
|
if (len_left > 0) {
|
|
--len_left;
|
|
*(p++) = '\0';
|
|
}
|
|
++len_req;
|
|
if (len < len_req) {
|
|
return len_req;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void _php_mb_regex_init_options(const char *parg, int narg,
|
|
OnigOptionType *option,
|
|
OnigSyntaxType **syntax, int *eval) {
|
|
int n;
|
|
char c;
|
|
int optm = 0;
|
|
|
|
*syntax = ONIG_SYNTAX_RUBY;
|
|
if (parg != NULL) {
|
|
n = 0;
|
|
while (n < narg) {
|
|
c = parg[n++];
|
|
switch (c) {
|
|
case 'i': optm |= ONIG_OPTION_IGNORECASE; break;
|
|
case 'x': optm |= ONIG_OPTION_EXTEND; break;
|
|
case 'm': optm |= ONIG_OPTION_MULTILINE; break;
|
|
case 's': optm |= ONIG_OPTION_SINGLELINE; break;
|
|
case 'p': optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE; break;
|
|
case 'l': optm |= ONIG_OPTION_FIND_LONGEST; break;
|
|
case 'n': optm |= ONIG_OPTION_FIND_NOT_EMPTY; break;
|
|
case 'j': *syntax = ONIG_SYNTAX_JAVA; break;
|
|
case 'u': *syntax = ONIG_SYNTAX_GNU_REGEX; break;
|
|
case 'g': *syntax = ONIG_SYNTAX_GREP; break;
|
|
case 'c': *syntax = ONIG_SYNTAX_EMACS; break;
|
|
case 'r': *syntax = ONIG_SYNTAX_RUBY; break;
|
|
case 'z': *syntax = ONIG_SYNTAX_PERL; break;
|
|
case 'b': *syntax = ONIG_SYNTAX_POSIX_BASIC; break;
|
|
case 'd': *syntax = ONIG_SYNTAX_POSIX_EXTENDED; break;
|
|
case 'e':
|
|
if (eval != NULL) *eval = 1;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if (option != NULL) *option|=optm;
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// regex functions
|
|
|
|
bool f_mb_ereg_match(CStrRef pattern, CStrRef str,
|
|
CStrRef option /* = null_string */) {
|
|
OnigSyntaxType *syntax;
|
|
OnigOptionType noption = 0;
|
|
if (!option.empty()) {
|
|
_php_mb_regex_init_options(option.data(), option.size(), &noption,
|
|
&syntax, NULL);
|
|
} else {
|
|
noption |= MBSTRG(regex_default_options);
|
|
syntax = MBSTRG(regex_default_syntax);
|
|
}
|
|
|
|
php_mb_regex_t *re;
|
|
if ((re = php_mbregex_compile_pattern
|
|
(pattern, noption, MBSTRG(current_mbctype), syntax)) == NULL) {
|
|
return false;
|
|
}
|
|
|
|
/* match */
|
|
int err = onig_match(re, (OnigUChar *)str.data(),
|
|
(OnigUChar *)(str.data() + str.size()),
|
|
(OnigUChar *)str.data(), NULL, 0);
|
|
return err >= 0;
|
|
}
|
|
|
|
static Variant _php_mb_regex_ereg_replace_exec(CVarRef pattern,
|
|
CStrRef replacement,
|
|
CStrRef str,
|
|
CStrRef option,
|
|
OnigOptionType options) {
|
|
const char *p;
|
|
php_mb_regex_t *re;
|
|
OnigSyntaxType *syntax;
|
|
OnigRegion *regs = NULL;
|
|
StringBuffer out_buf;
|
|
int i, err, eval, n;
|
|
OnigUChar *pos;
|
|
OnigUChar *string_lim;
|
|
char pat_buf[2];
|
|
|
|
const mbfl_encoding *enc;
|
|
|
|
{
|
|
const char *current_enc_name;
|
|
current_enc_name = php_mb_regex_mbctype2name(MBSTRG(current_mbctype));
|
|
if (current_enc_name == NULL ||
|
|
(enc = mbfl_name2encoding(current_enc_name)) == NULL) {
|
|
raise_warning("Unknown error");
|
|
return false;
|
|
}
|
|
}
|
|
eval = 0;
|
|
{
|
|
if (!option.empty()) {
|
|
_php_mb_regex_init_options(option.data(), option.size(),
|
|
&options, &syntax, &eval);
|
|
} else {
|
|
options |= MBSTRG(regex_default_options);
|
|
syntax = MBSTRG(regex_default_syntax);
|
|
}
|
|
}
|
|
|
|
String spattern;
|
|
if (pattern.isString()) {
|
|
spattern = pattern.toString();
|
|
} else {
|
|
/* FIXME: this code is not multibyte aware! */
|
|
pat_buf[0] = pattern.toByte();
|
|
pat_buf[1] = '\0';
|
|
spattern = String(pat_buf, 1, CopyString);
|
|
}
|
|
/* create regex pattern buffer */
|
|
re = php_mbregex_compile_pattern(spattern, options,
|
|
MBSTRG(current_mbctype), syntax);
|
|
if (re == NULL) {
|
|
return false;
|
|
}
|
|
|
|
if (eval) {
|
|
throw NotSupportedException("ereg_replace", "dynamic coding");
|
|
}
|
|
|
|
/* do the actual work */
|
|
err = 0;
|
|
pos = (OnigUChar*)str.data();
|
|
string_lim = (OnigUChar*)(str.data() + str.size());
|
|
regs = onig_region_new();
|
|
while (err >= 0) {
|
|
err = onig_search(re, (OnigUChar *)str.data(), (OnigUChar *)string_lim,
|
|
pos, (OnigUChar *)string_lim, regs, 0);
|
|
if (err <= -2) {
|
|
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
|
|
onig_error_code_to_str(err_str, err);
|
|
raise_warning("mbregex search failure: %s", err_str);
|
|
break;
|
|
}
|
|
if (err >= 0) {
|
|
#if moriyoshi_0
|
|
if (regs->beg[0] == regs->end[0]) {
|
|
raise_warning("Empty regular expression");
|
|
break;
|
|
}
|
|
#endif
|
|
/* copy the part of the string before the match */
|
|
out_buf.append((const char *)pos,
|
|
(OnigUChar *)(str.data() + regs->beg[0]) - pos);
|
|
/* copy replacement and backrefs */
|
|
i = 0;
|
|
p = replacement.data();
|
|
while (i < replacement.size()) {
|
|
int fwd = (int)php_mb_mbchar_bytes_ex(p, enc);
|
|
n = -1;
|
|
if ((replacement.size() - i) >= 2 && fwd == 1 &&
|
|
p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
|
|
n = p[1] - '0';
|
|
}
|
|
if (n >= 0 && n < regs->num_regs) {
|
|
if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] &&
|
|
regs->end[n] <= str.size()) {
|
|
out_buf.append(str.data() + regs->beg[n],
|
|
regs->end[n] - regs->beg[n]);
|
|
}
|
|
p += 2;
|
|
i += 2;
|
|
} else {
|
|
out_buf.append(p, fwd);
|
|
p += fwd;
|
|
i += fwd;
|
|
}
|
|
}
|
|
n = regs->end[0];
|
|
if ((pos - (OnigUChar *)str.data()) < n) {
|
|
pos = (OnigUChar *)(str.data() + n);
|
|
} else {
|
|
if (pos < string_lim) {
|
|
out_buf.append((const char *)pos, 1);
|
|
}
|
|
pos++;
|
|
}
|
|
} else { /* nomatch */
|
|
/* stick that last bit of string on our output */
|
|
if (string_lim - pos > 0) {
|
|
out_buf.append((const char *)pos, string_lim - pos);
|
|
}
|
|
}
|
|
onig_region_free(regs, 0);
|
|
}
|
|
|
|
if (regs != NULL) {
|
|
onig_region_free(regs, 1);
|
|
}
|
|
|
|
if (err <= -2) {
|
|
return false;
|
|
}
|
|
return out_buf.detach();
|
|
}
|
|
|
|
Variant f_mb_ereg_replace(CVarRef pattern, CStrRef replacement, CStrRef str,
|
|
CStrRef option /* = null_string */) {
|
|
return _php_mb_regex_ereg_replace_exec(pattern, replacement,
|
|
str, option, 0);
|
|
}
|
|
|
|
Variant f_mb_eregi_replace(CVarRef pattern, CStrRef replacement, CStrRef str,
|
|
CStrRef option /* = null_string */) {
|
|
return _php_mb_regex_ereg_replace_exec(pattern, replacement,
|
|
str, option, ONIG_OPTION_IGNORECASE);
|
|
}
|
|
|
|
int64_t f_mb_ereg_search_getpos() {
|
|
return MBSTRG(search_pos);
|
|
}
|
|
|
|
bool f_mb_ereg_search_setpos(int position) {
|
|
if (position < 0 || position >= (int)MBSTRG(search_str).size()) {
|
|
raise_warning("Position is out of range");
|
|
MBSTRG(search_pos) = 0;
|
|
return false;
|
|
}
|
|
MBSTRG(search_pos) = position;
|
|
return true;
|
|
}
|
|
|
|
Variant f_mb_ereg_search_getregs() {
|
|
OnigRegion *search_regs = MBSTRG(search_regs);
|
|
if (search_regs && !MBSTRG(search_str).empty()) {
|
|
Array ret;
|
|
OnigUChar *str = (OnigUChar *)MBSTRG(search_str).data();
|
|
int len = MBSTRG(search_str).size();
|
|
int n = search_regs->num_regs;
|
|
for (int i = 0; i < n; i++) {
|
|
int beg = search_regs->beg[i];
|
|
int end = search_regs->end[i];
|
|
if (beg >= 0 && beg <= end && end <= len) {
|
|
ret.append(String((const char *)(str + beg), end - beg, CopyString));
|
|
} else {
|
|
ret.append(false);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool f_mb_ereg_search_init(CStrRef str, CStrRef pattern /* = null_string */,
|
|
CStrRef option /* = null_string */) {
|
|
OnigOptionType noption = MBSTRG(regex_default_options);
|
|
OnigSyntaxType *syntax = MBSTRG(regex_default_syntax);
|
|
if (!option.empty()) {
|
|
noption = 0;
|
|
_php_mb_regex_init_options(option.data(), option.size(),
|
|
&noption, &syntax, NULL);
|
|
}
|
|
if (!pattern.empty()) {
|
|
if ((MBSTRG(search_re) = php_mbregex_compile_pattern
|
|
(pattern, noption, MBSTRG(current_mbctype), syntax)) == NULL) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
MBSTRG(search_str) = std::string(str.data(), str.size());
|
|
MBSTRG(search_pos) = 0;
|
|
|
|
if (MBSTRG(search_regs) != NULL) {
|
|
onig_region_free(MBSTRG(search_regs), 1);
|
|
MBSTRG(search_regs) = (OnigRegion *)NULL;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* regex search */
|
|
static Variant _php_mb_regex_ereg_search_exec(CStrRef pattern, CStrRef option,
|
|
int mode) {
|
|
int n, i, err, pos, len, beg, end;
|
|
OnigUChar *str;
|
|
OnigSyntaxType *syntax = NULL;
|
|
OnigOptionType noption;
|
|
|
|
noption = MBSTRG(regex_default_options);
|
|
if (!option.empty()) {
|
|
noption = 0;
|
|
_php_mb_regex_init_options(option.data(), option.size(),
|
|
&noption, &syntax, NULL);
|
|
}
|
|
if (!pattern.empty()) {
|
|
if ((MBSTRG(search_re) = php_mbregex_compile_pattern
|
|
(pattern, noption, MBSTRG(current_mbctype), syntax)) == NULL) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
pos = MBSTRG(search_pos);
|
|
str = NULL;
|
|
len = 0;
|
|
if (!MBSTRG(search_str).empty()) {
|
|
str = (OnigUChar *)MBSTRG(search_str).data();
|
|
len = MBSTRG(search_str).size();
|
|
}
|
|
|
|
if (MBSTRG(search_re) == NULL) {
|
|
raise_warning("No regex given");
|
|
return false;
|
|
}
|
|
|
|
if (str == NULL) {
|
|
raise_warning("No string given");
|
|
return false;
|
|
}
|
|
|
|
if (MBSTRG(search_regs)) {
|
|
onig_region_free(MBSTRG(search_regs), 1);
|
|
}
|
|
MBSTRG(search_regs) = onig_region_new();
|
|
|
|
err = onig_search(MBSTRG(search_re), str, str + len, str + pos, str + len,
|
|
MBSTRG(search_regs), 0);
|
|
Variant ret;
|
|
if (err == ONIG_MISMATCH) {
|
|
MBSTRG(search_pos) = len;
|
|
ret = false;
|
|
} else if (err <= -2) {
|
|
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
|
|
onig_error_code_to_str(err_str, err);
|
|
raise_warning("mbregex search failure in mbregex_search(): %s", err_str);
|
|
ret = false;
|
|
} else {
|
|
if (MBSTRG(search_regs)->beg[0] == MBSTRG(search_regs)->end[0]) {
|
|
raise_warning("Empty regular expression");
|
|
}
|
|
switch (mode) {
|
|
case 1:
|
|
{
|
|
beg = MBSTRG(search_regs)->beg[0];
|
|
end = MBSTRG(search_regs)->end[0];
|
|
ret.append(beg);
|
|
ret.append(end - beg);
|
|
}
|
|
break;
|
|
case 2:
|
|
n = MBSTRG(search_regs)->num_regs;
|
|
for (i = 0; i < n; i++) {
|
|
beg = MBSTRG(search_regs)->beg[i];
|
|
end = MBSTRG(search_regs)->end[i];
|
|
if (beg >= 0 && beg <= end && end <= len) {
|
|
ret.append(String((const char *)(str + beg), end - beg, CopyString));
|
|
} else {
|
|
ret.append(false);
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
ret = true;
|
|
break;
|
|
}
|
|
end = MBSTRG(search_regs)->end[0];
|
|
if (pos < end) {
|
|
MBSTRG(search_pos) = end;
|
|
} else {
|
|
MBSTRG(search_pos) = pos + 1;
|
|
}
|
|
}
|
|
|
|
if (err < 0) {
|
|
onig_region_free(MBSTRG(search_regs), 1);
|
|
MBSTRG(search_regs) = (OnigRegion *)NULL;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Variant f_mb_ereg_search(CStrRef pattern /* = null_string */,
|
|
CStrRef option /* = null_string */) {
|
|
return _php_mb_regex_ereg_search_exec(pattern, option, 0);
|
|
}
|
|
|
|
Variant f_mb_ereg_search_pos(CStrRef pattern /* = null_string */,
|
|
CStrRef option /* = null_string */) {
|
|
return _php_mb_regex_ereg_search_exec(pattern, option, 1);
|
|
}
|
|
|
|
Variant f_mb_ereg_search_regs(CStrRef pattern /* = null_string */,
|
|
CStrRef option /* = null_string */) {
|
|
return _php_mb_regex_ereg_search_exec(pattern, option, 2);
|
|
}
|
|
|
|
static Variant _php_mb_regex_ereg_exec(CVarRef pattern, CStrRef str,
|
|
Variant ®s, int icase) {
|
|
php_mb_regex_t *re;
|
|
OnigRegion *regions = NULL;
|
|
int i, match_len, beg, end;
|
|
OnigOptionType options;
|
|
|
|
options = MBSTRG(regex_default_options);
|
|
if (icase) {
|
|
options |= ONIG_OPTION_IGNORECASE;
|
|
}
|
|
|
|
/* compile the regular expression from the supplied regex */
|
|
String spattern;
|
|
if (!pattern.isString()) {
|
|
/* we convert numbers to integers and treat them as a string */
|
|
if (pattern.is(KindOfDouble)) {
|
|
spattern = String(pattern.toInt64()); /* get rid of decimal places */
|
|
} else {
|
|
spattern = pattern.toString();
|
|
}
|
|
} else {
|
|
spattern = pattern.toString();
|
|
}
|
|
re = php_mbregex_compile_pattern(spattern, options, MBSTRG(current_mbctype),
|
|
MBSTRG(regex_default_syntax));
|
|
if (re == NULL) {
|
|
return false;
|
|
}
|
|
|
|
regions = onig_region_new();
|
|
|
|
/* actually execute the regular expression */
|
|
if (onig_search(re, (OnigUChar *)str.data(),
|
|
(OnigUChar *)(str.data() + str.size()),
|
|
(OnigUChar *)str.data(),
|
|
(OnigUChar *)(str.data() + str.size()),
|
|
regions, 0) < 0) {
|
|
onig_region_free(regions, 1);
|
|
return false;
|
|
}
|
|
|
|
const char *s = str.data();
|
|
int string_len = str.size();
|
|
match_len = regions->end[0] - regions->beg[0];
|
|
regs = Array::Create();
|
|
for (i = 0; i < regions->num_regs; i++) {
|
|
beg = regions->beg[i];
|
|
end = regions->end[i];
|
|
if (beg >= 0 && beg < end && end <= string_len) {
|
|
regs.append(String(s + beg, end - beg, CopyString));
|
|
} else {
|
|
regs.append(false);
|
|
}
|
|
}
|
|
|
|
if (match_len == 0) {
|
|
match_len = 1;
|
|
}
|
|
if (regions != NULL) {
|
|
onig_region_free(regions, 1);
|
|
}
|
|
return match_len;
|
|
}
|
|
|
|
Variant f_mb_ereg(CVarRef pattern, CStrRef str, VRefParam regs /* = null */) {
|
|
return _php_mb_regex_ereg_exec(pattern, str, regs, 0);
|
|
}
|
|
|
|
Variant f_mb_eregi(CVarRef pattern, CStrRef str, VRefParam regs /* = null */) {
|
|
return _php_mb_regex_ereg_exec(pattern, str, regs, 1);
|
|
}
|
|
|
|
Variant f_mb_regex_encoding(CStrRef encoding /* = null_string */) {
|
|
if (encoding.empty()) {
|
|
const char *retval = php_mb_regex_mbctype2name(MBSTRG(current_mbctype));
|
|
if (retval != NULL) {
|
|
return String(retval, CopyString);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
OnigEncoding mbctype = php_mb_regex_name2mbctype(encoding.data());
|
|
if (mbctype == ONIG_ENCODING_UNDEF) {
|
|
raise_warning("Unknown encoding \"%s\"", encoding.data());
|
|
return false;
|
|
}
|
|
|
|
MBSTRG(current_mbctype) = mbctype;
|
|
return true;
|
|
}
|
|
|
|
static void php_mb_regex_set_options(OnigOptionType options,
|
|
OnigSyntaxType *syntax,
|
|
OnigOptionType *prev_options,
|
|
OnigSyntaxType **prev_syntax) {
|
|
if (prev_options != NULL) {
|
|
*prev_options = MBSTRG(regex_default_options);
|
|
}
|
|
if (prev_syntax != NULL) {
|
|
*prev_syntax = MBSTRG(regex_default_syntax);
|
|
}
|
|
MBSTRG(regex_default_options) = options;
|
|
MBSTRG(regex_default_syntax) = syntax;
|
|
}
|
|
|
|
String f_mb_regex_set_options(CStrRef options /* = null_string */) {
|
|
OnigOptionType opt;
|
|
OnigSyntaxType *syntax;
|
|
char buf[16];
|
|
|
|
if (!options.empty()) {
|
|
opt = 0;
|
|
syntax = NULL;
|
|
_php_mb_regex_init_options(options.data(), options.size(),
|
|
&opt, &syntax, NULL);
|
|
php_mb_regex_set_options(opt, syntax, NULL, NULL);
|
|
} else {
|
|
opt = MBSTRG(regex_default_options);
|
|
syntax = MBSTRG(regex_default_syntax);
|
|
}
|
|
_php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
|
|
return String(buf, CopyString);
|
|
}
|
|
|
|
Variant f_mb_split(CStrRef pattern, CStrRef str, int count /* = -1 */) {
|
|
php_mb_regex_t *re;
|
|
OnigRegion *regs = NULL;
|
|
|
|
int n, err;
|
|
if (count == 0) {
|
|
count = 1;
|
|
}
|
|
|
|
/* create regex pattern buffer */
|
|
if ((re = php_mbregex_compile_pattern(pattern,
|
|
MBSTRG(regex_default_options),
|
|
MBSTRG(current_mbctype),
|
|
MBSTRG(regex_default_syntax)))
|
|
== NULL) {
|
|
return false;
|
|
}
|
|
|
|
Array ret;
|
|
OnigUChar *pos0 = (OnigUChar *)str.data();
|
|
OnigUChar *pos_end = (OnigUChar *)(str.data() + str.size());
|
|
OnigUChar *pos = pos0;
|
|
err = 0;
|
|
regs = onig_region_new();
|
|
/* churn through str, generating array entries as we go */
|
|
while ((--count != 0) &&
|
|
(err = onig_search(re, pos0, pos_end, pos, pos_end, regs, 0)) >= 0) {
|
|
if (regs->beg[0] == regs->end[0]) {
|
|
raise_warning("Empty regular expression");
|
|
break;
|
|
}
|
|
|
|
/* add it to the array */
|
|
if (regs->beg[0] < str.size() && regs->beg[0] >= (pos - pos0)) {
|
|
ret.append(String((const char *)pos,
|
|
((OnigUChar *)(str.data() + regs->beg[0]) - pos),
|
|
CopyString));
|
|
} else {
|
|
err = -2;
|
|
break;
|
|
}
|
|
/* point at our new starting point */
|
|
n = regs->end[0];
|
|
if ((pos - pos0) < n) {
|
|
pos = pos0 + n;
|
|
}
|
|
if (count < 0) {
|
|
count = 0;
|
|
}
|
|
onig_region_free(regs, 0);
|
|
}
|
|
|
|
onig_region_free(regs, 1);
|
|
|
|
/* see if we encountered an error */
|
|
if (err <= -2) {
|
|
OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
|
|
onig_error_code_to_str(err_str, err);
|
|
raise_warning("mbregex search failure in mbsplit(): %s", err_str);
|
|
return false;
|
|
}
|
|
|
|
/* otherwise we just have one last element to add to the array */
|
|
n = pos_end - pos;
|
|
if (n > 0) {
|
|
ret.append(String((const char *)pos, n, CopyString));
|
|
} else {
|
|
ret.append("");
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
|
|
if (str[pos] == '\r' && str[pos + 1] == '\n' && \
|
|
(str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
|
|
pos += 2; \
|
|
while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
|
|
pos++; \
|
|
} \
|
|
continue; \
|
|
}
|
|
|
|
static int _php_mbstr_parse_mail_headers(Array &ht, const char *str,
|
|
size_t str_len) {
|
|
const char *ps;
|
|
size_t icnt;
|
|
int state = 0;
|
|
int crlf_state = -1;
|
|
|
|
StringBuffer token;
|
|
String fld_name, fld_val;
|
|
|
|
ps = str;
|
|
icnt = str_len;
|
|
|
|
/*
|
|
* C o n t e n t - T y p e : t e x t / h t m l \r\n
|
|
* ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
|
|
* state 0 1 2 3
|
|
*
|
|
* C o n t e n t - T y p e : t e x t / h t m l \r\n
|
|
* ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
|
|
* crlf_state -1 0 1 -1
|
|
*
|
|
*/
|
|
|
|
while (icnt > 0) {
|
|
switch (*ps) {
|
|
case ':':
|
|
if (crlf_state == 1) {
|
|
token.append('\r');
|
|
}
|
|
|
|
if (state == 0 || state == 1) {
|
|
fld_name = token.detach();
|
|
|
|
state = 2;
|
|
} else {
|
|
token.append(*ps);
|
|
}
|
|
|
|
crlf_state = 0;
|
|
break;
|
|
|
|
case '\n':
|
|
if (crlf_state == -1) {
|
|
goto out;
|
|
}
|
|
crlf_state = -1;
|
|
break;
|
|
|
|
case '\r':
|
|
if (crlf_state == 1) {
|
|
token.append('\r');
|
|
} else {
|
|
crlf_state = 1;
|
|
}
|
|
break;
|
|
|
|
case ' ': case '\t':
|
|
if (crlf_state == -1) {
|
|
if (state == 3) {
|
|
/* continuing from the previous line */
|
|
state = 4;
|
|
} else {
|
|
/* simply skipping this new line */
|
|
state = 5;
|
|
}
|
|
} else {
|
|
if (crlf_state == 1) {
|
|
token.append('\r');
|
|
}
|
|
if (state == 1 || state == 3) {
|
|
token.append(*ps);
|
|
}
|
|
}
|
|
crlf_state = 0;
|
|
break;
|
|
|
|
default:
|
|
switch (state) {
|
|
case 0:
|
|
token.clear();
|
|
state = 1;
|
|
break;
|
|
|
|
case 2:
|
|
if (crlf_state != -1) {
|
|
token.clear();
|
|
state = 3;
|
|
break;
|
|
}
|
|
/* break is missing intentionally */
|
|
|
|
case 3:
|
|
if (crlf_state == -1) {
|
|
fld_val = token.detach();
|
|
if (!fld_name.empty() && !fld_val.empty()) {
|
|
/* FIXME: some locale free implementation is
|
|
* really required here,,, */
|
|
ht.set(StringUtil::ToUpper(fld_name), fld_val);
|
|
}
|
|
state = 1;
|
|
}
|
|
break;
|
|
|
|
case 4:
|
|
token.append(' ');
|
|
state = 3;
|
|
break;
|
|
}
|
|
|
|
if (crlf_state == 1) {
|
|
token.append('\r');
|
|
}
|
|
|
|
token.append(*ps);
|
|
|
|
crlf_state = 0;
|
|
break;
|
|
}
|
|
ps++, icnt--;
|
|
}
|
|
out:
|
|
if (state == 2) {
|
|
token.clear();
|
|
state = 3;
|
|
}
|
|
if (state == 3) {
|
|
fld_val = token.detach();
|
|
if (!fld_name.empty() && !fld_val.empty()) {
|
|
/* FIXME: some locale free implementation is
|
|
* really required here,,, */
|
|
ht.set(StringUtil::ToUpper(fld_name), fld_val);
|
|
}
|
|
}
|
|
return state;
|
|
}
|
|
|
|
static int php_mail(const char *to, const char *subject, const char *message,
|
|
const char *headers, const char *extra_cmd) {
|
|
const char *sendmail_path = "/usr/sbin/sendmail -t -i";
|
|
String sendmail_cmd = sendmail_path;
|
|
if (extra_cmd != NULL) {
|
|
sendmail_cmd += " ";
|
|
sendmail_cmd += extra_cmd;
|
|
}
|
|
|
|
/* Since popen() doesn't indicate if the internal fork() doesn't work
|
|
* (e.g. the shell can't be executed) we explicitely set it to 0 to be
|
|
* sure we don't catch any older errno value. */
|
|
errno = 0;
|
|
FILE *sendmail = popen(sendmail_cmd.data(), "w");
|
|
if (sendmail == NULL) {
|
|
raise_warning("Could not execute mail delivery program '%s'",
|
|
sendmail_path);
|
|
return 0;
|
|
}
|
|
|
|
if (EACCES == errno) {
|
|
raise_warning("Permission denied: unable to execute shell to run "
|
|
"mail delivery binary '%s'", sendmail_path);
|
|
pclose(sendmail);
|
|
return 0;
|
|
}
|
|
|
|
fprintf(sendmail, "To: %s\n", to);
|
|
fprintf(sendmail, "Subject: %s\n", subject);
|
|
if (headers != NULL) {
|
|
fprintf(sendmail, "%s\n", headers);
|
|
}
|
|
fprintf(sendmail, "\n%s\n", message);
|
|
int ret = pclose(sendmail);
|
|
#if defined(EX_TEMPFAIL)
|
|
if ((ret != EX_OK) && (ret != EX_TEMPFAIL)) return 0;
|
|
#elif defined(EX_OK)
|
|
if (ret != EX_OK) return 0;
|
|
#else
|
|
if (ret != 0) return 0;
|
|
#endif
|
|
return 1;
|
|
}
|
|
|
|
bool f_mb_send_mail(CStrRef to, CStrRef subject, CStrRef message,
|
|
CStrRef headers /* = null_string */,
|
|
CStrRef extra_cmd /* = null_string */) {
|
|
/* initialize */
|
|
/* automatic allocateable buffer for additional header */
|
|
mbfl_memory_device device;
|
|
mbfl_memory_device_init(&device, 0, 0);
|
|
mbfl_string orig_str, conv_str;
|
|
mbfl_string_init(&orig_str);
|
|
mbfl_string_init(&conv_str);
|
|
|
|
/* character-set, transfer-encoding */
|
|
mbfl_no_encoding
|
|
tran_cs, /* transfar text charset */
|
|
head_enc, /* header transfar encoding */
|
|
body_enc; /* body transfar encoding */
|
|
tran_cs = mbfl_no_encoding_utf8;
|
|
head_enc = mbfl_no_encoding_base64;
|
|
body_enc = mbfl_no_encoding_base64;
|
|
const mbfl_language *lang = mbfl_no2language(MBSTRG(current_language));
|
|
if (lang != NULL) {
|
|
tran_cs = lang->mail_charset;
|
|
head_enc = lang->mail_header_encoding;
|
|
body_enc = lang->mail_body_encoding;
|
|
}
|
|
|
|
Array ht_headers;
|
|
if (!headers.empty()) {
|
|
_php_mbstr_parse_mail_headers(ht_headers, headers.data(), headers.size());
|
|
}
|
|
|
|
struct {
|
|
int cnt_type:1;
|
|
int cnt_trans_enc:1;
|
|
} suppressed_hdrs = { 0, 0 };
|
|
|
|
static const StaticString s_CONTENT_TYPE("CONTENT-TYPE");
|
|
String s = ht_headers[s_CONTENT_TYPE];
|
|
if (!s.isNull()) {
|
|
char *tmp;
|
|
char *param_name;
|
|
char *charset = NULL;
|
|
|
|
char *p = const_cast<char*>(strchr(s.data(), ';'));
|
|
if (p != NULL) {
|
|
/* skipping the padded spaces */
|
|
do {
|
|
++p;
|
|
} while (*p == ' ' || *p == '\t');
|
|
|
|
if (*p != '\0') {
|
|
if ((param_name = strtok_r(p, "= ", &tmp)) != NULL) {
|
|
if (strcasecmp(param_name, "charset") == 0) {
|
|
mbfl_no_encoding _tran_cs = tran_cs;
|
|
|
|
charset = strtok_r(NULL, "= ", &tmp);
|
|
if (charset != NULL) {
|
|
_tran_cs = mbfl_name2no_encoding(charset);
|
|
}
|
|
|
|
if (_tran_cs == mbfl_no_encoding_invalid) {
|
|
raise_warning("Unsupported charset \"%s\" - "
|
|
"will be regarded as ascii", charset);
|
|
_tran_cs = mbfl_no_encoding_ascii;
|
|
}
|
|
tran_cs = _tran_cs;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
suppressed_hdrs.cnt_type = 1;
|
|
}
|
|
|
|
static const StaticString
|
|
s_CONTENT_TRANSFER_ENCODING("CONTENT-TRANSFER-ENCODING");
|
|
s = ht_headers[s_CONTENT_TRANSFER_ENCODING];
|
|
if (!s.isNull()) {
|
|
mbfl_no_encoding _body_enc = mbfl_name2no_encoding(s.data());
|
|
switch (_body_enc) {
|
|
case mbfl_no_encoding_base64:
|
|
case mbfl_no_encoding_7bit:
|
|
case mbfl_no_encoding_8bit:
|
|
body_enc = _body_enc;
|
|
break;
|
|
|
|
default:
|
|
raise_warning("Unsupported transfer encoding \"%s\" - "
|
|
"will be regarded as 8bit", s.data());
|
|
body_enc = mbfl_no_encoding_8bit;
|
|
break;
|
|
}
|
|
suppressed_hdrs.cnt_trans_enc = 1;
|
|
}
|
|
|
|
/* To: */
|
|
char *to_r = NULL;
|
|
int err = 0;
|
|
if (!to.empty()) {
|
|
int to_len = to.size();
|
|
if (to_len > 0) {
|
|
to_r = strndup(to.data(), to_len);
|
|
for (; to_len; to_len--) {
|
|
if (!isspace((unsigned char)to_r[to_len - 1])) {
|
|
break;
|
|
}
|
|
to_r[to_len - 1] = '\0';
|
|
}
|
|
for (int i = 0; to_r[i]; i++) {
|
|
if (iscntrl((unsigned char)to_r[i])) {
|
|
/**
|
|
* According to RFC 822, section 3.1.1 long headers may be
|
|
* separated into parts using CRLF followed at least one
|
|
* linear-white-space character ('\t' or ' ').
|
|
* To prevent these separators from being replaced with a space,
|
|
* we use the SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
|
|
*/
|
|
SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
|
|
to_r[i] = ' ';
|
|
}
|
|
}
|
|
} else {
|
|
to_r = (char*)to.data();
|
|
}
|
|
} else {
|
|
raise_warning("Missing To: field");
|
|
err = 1;
|
|
}
|
|
|
|
/* Subject: */
|
|
String encoded_subject;
|
|
if (!subject.isNull()) {
|
|
orig_str.no_language = MBSTRG(current_language);
|
|
orig_str.val = (unsigned char *)subject.data();
|
|
orig_str.len = subject.size();
|
|
orig_str.no_encoding = MBSTRG(current_internal_encoding);
|
|
if (orig_str.no_encoding == mbfl_no_encoding_invalid
|
|
|| orig_str.no_encoding == mbfl_no_encoding_pass) {
|
|
orig_str.no_encoding = mbfl_identify_encoding_no
|
|
(&orig_str, MBSTRG(current_detect_order_list),
|
|
MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
|
|
}
|
|
mbfl_string *pstr = mbfl_mime_header_encode
|
|
(&orig_str, &conv_str, tran_cs, head_enc,
|
|
"\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
|
|
if (pstr != NULL) {
|
|
encoded_subject = String((const char *)pstr->val, pstr->len,
|
|
AttachString);
|
|
}
|
|
} else {
|
|
raise_warning("Missing Subject: field");
|
|
err = 1;
|
|
}
|
|
|
|
/* message body */
|
|
String encoded_message;
|
|
if (!message.empty()) {
|
|
orig_str.no_language = MBSTRG(current_language);
|
|
orig_str.val = (unsigned char*)message.data();
|
|
orig_str.len = message.size();
|
|
orig_str.no_encoding = MBSTRG(current_internal_encoding);
|
|
|
|
if (orig_str.no_encoding == mbfl_no_encoding_invalid
|
|
|| orig_str.no_encoding == mbfl_no_encoding_pass) {
|
|
orig_str.no_encoding = mbfl_identify_encoding_no
|
|
(&orig_str, MBSTRG(current_detect_order_list),
|
|
MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
|
|
}
|
|
|
|
mbfl_string *pstr = NULL;
|
|
{
|
|
mbfl_string tmpstr;
|
|
if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
|
|
tmpstr.no_encoding = mbfl_no_encoding_8bit;
|
|
pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
|
|
free(tmpstr.val);
|
|
}
|
|
}
|
|
if (pstr != NULL) {
|
|
encoded_message = String((const char *)pstr->val, pstr->len,
|
|
AttachString);
|
|
}
|
|
} else {
|
|
/* this is not really an error, so it is allowed. */
|
|
raise_warning("Empty message body");
|
|
}
|
|
|
|
/* other headers */
|
|
#define PHP_MBSTR_MAIL_MIME_HEADER1 "Mime-Version: 1.0"
|
|
#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
|
|
#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
|
|
#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
|
|
if (!headers.empty()) {
|
|
const char *p = headers.data();
|
|
int n = headers.size();
|
|
mbfl_memory_device_strncat(&device, p, n);
|
|
if (n > 0 && p[n - 1] != '\n') {
|
|
mbfl_memory_device_strncat(&device, "\n", 1);
|
|
}
|
|
}
|
|
|
|
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1,
|
|
sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
|
|
mbfl_memory_device_strncat(&device, "\n", 1);
|
|
|
|
if (!suppressed_hdrs.cnt_type) {
|
|
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2,
|
|
sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
|
|
|
|
char *p = (char *)mbfl_no2preferred_mime_name(tran_cs);
|
|
if (p != NULL) {
|
|
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3,
|
|
sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
|
|
mbfl_memory_device_strcat(&device, p);
|
|
}
|
|
mbfl_memory_device_strncat(&device, "\n", 1);
|
|
}
|
|
if (!suppressed_hdrs.cnt_trans_enc) {
|
|
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4,
|
|
sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
|
|
const char *p = (char *)mbfl_no2preferred_mime_name(body_enc);
|
|
if (p == NULL) {
|
|
p = "7bit";
|
|
}
|
|
mbfl_memory_device_strcat(&device, p);
|
|
mbfl_memory_device_strncat(&device, "\n", 1);
|
|
}
|
|
|
|
mbfl_memory_device_unput(&device);
|
|
mbfl_memory_device_output('\0', &device);
|
|
|
|
char *all_headers = (char *)device.buffer;
|
|
|
|
String cmd = f_escapeshellcmd(extra_cmd);
|
|
bool ret = (!err && php_mail(to_r, encoded_subject.data(),
|
|
encoded_message.data(),
|
|
all_headers, cmd.data()));
|
|
mbfl_memory_device_clear(&device);
|
|
return ret;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|