84b9d9a3a2
In HHVM (and HPHPc before it) we've been piggybacking resources on the KindOfObject machinery. At the language level, resource is considered to be a different type than object, and there are a number of differences in behavior between objects and resources (ex. resources don't allow for dynamic properties, resources don't work with the clone operator, the "(object)" cast behaves differently for resources vs. objects, etc). Piggybacking resources on the KindOfObject machinery has some downsides. Code that deals with KindOfObject values often needs to check if the value is a resource and go down a different code path. This makes things harder to maintain and harder to keep parity with Zend. Also, these extra branches hurt performance a little, and they make it harder for the JIT to do a good job in some cases when its generating machine code that operates on objects. This diff prepares the code base for a new KindOfResource type by adding a new "Resource" smart pointer type (currently a typedef for the Object smart pointer type) and it updates the C++ code and the idl files appropriately. This diff is essentially a cosmetic change and should not impact run time behavior. In the next diff (part 2) we'll actually add a new KindOfResource type, detach ResourceData from the ObjectData inheritence hierarchy, and provide a real implementation for the Resource smart pointer type (instead of just aliasing the Object smart pointer type).
952 linhas
29 KiB
C++
952 linhas
29 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
|
| Copyright (c) 1997-2010 The PHP Group |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 3.01 of the PHP license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.php.net/license/3_01.txt |
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@php.net so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
|
|
#include "hphp/runtime/ext/ext_xml.h"
|
|
#include "hphp/runtime/base/zend/zend_functions.h"
|
|
#include "hphp/runtime/base/zend/zend_string.h"
|
|
#include "hphp/runtime/vm/jit/translator.h"
|
|
#include "hphp/runtime/vm/jit/translator-inline.h"
|
|
#include <expat.h>
|
|
|
|
namespace HPHP {
|
|
IMPLEMENT_DEFAULT_EXTENSION(xml);
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
class XmlParser : public SweepableResourceData {
|
|
public:
|
|
DECLARE_OBJECT_ALLOCATION(XmlParser)
|
|
XmlParser();
|
|
virtual ~XmlParser();
|
|
void cleanupImpl();
|
|
static StaticString s_class_name;
|
|
virtual CStrRef o_getClassNameHook() const;
|
|
|
|
int case_folding;
|
|
XML_Parser parser;
|
|
XML_Char *target_encoding;
|
|
|
|
Variant startElementHandler;
|
|
Variant endElementHandler;
|
|
Variant characterDataHandler;
|
|
Variant processingInstructionHandler;
|
|
Variant defaultHandler;
|
|
Variant unparsedEntityDeclHandler;
|
|
Variant notationDeclHandler;
|
|
Variant externalEntityRefHandler;
|
|
Variant unknownEncodingHandler;
|
|
Variant startNamespaceDeclHandler;
|
|
Variant endNamespaceDeclHandler;
|
|
|
|
Variant object;
|
|
|
|
Variant data;
|
|
Variant info;
|
|
int level;
|
|
int toffset;
|
|
int curtag;
|
|
Variant ctag;
|
|
char **ltags;
|
|
int lastwasopen;
|
|
int skipwhite;
|
|
int isparsing;
|
|
};
|
|
|
|
IMPLEMENT_OBJECT_ALLOCATION_NO_DEFAULT_SWEEP(XmlParser);
|
|
|
|
XmlParser::XmlParser() : case_folding(0), parser(NULL),
|
|
target_encoding(NULL), level(0), toffset(0), curtag(0),
|
|
ltags(NULL), lastwasopen(0), skipwhite(0), isparsing(0) {
|
|
}
|
|
|
|
XmlParser::~XmlParser() {
|
|
cleanupImpl();
|
|
}
|
|
|
|
void XmlParser::cleanupImpl() {
|
|
if (parser) {
|
|
XML_ParserFree(parser);
|
|
parser = NULL;
|
|
}
|
|
if (ltags) {
|
|
int inx;
|
|
for (inx = 0; inx < level; inx++)
|
|
free(ltags[inx]);
|
|
free(ltags);
|
|
ltags = NULL;
|
|
}
|
|
}
|
|
|
|
void XmlParser::sweep() {
|
|
cleanupImpl();
|
|
}
|
|
|
|
StaticString XmlParser::s_class_name("xml");
|
|
|
|
CStrRef XmlParser::o_getClassNameHook() const {
|
|
return s_class_name;
|
|
}
|
|
|
|
typedef struct {
|
|
XML_Char *name;
|
|
char (*decoding_function)(unsigned short);
|
|
unsigned short (*encoding_function)(unsigned char);
|
|
} xml_encoding;
|
|
|
|
enum php_xml_option {
|
|
PHP_XML_OPTION_CASE_FOLDING = 1,
|
|
PHP_XML_OPTION_TARGET_ENCODING,
|
|
PHP_XML_OPTION_SKIP_TAGSTART,
|
|
PHP_XML_OPTION_SKIP_WHITE
|
|
};
|
|
|
|
static XML_Char * xml_globals_default_encoding = (XML_Char*)"UTF-8";
|
|
// for xml_parse_into_struct
|
|
|
|
#define XML_MAXLEVEL 255
|
|
// XXX this should be dynamic
|
|
|
|
#define XML(v) (xml_globals_ ## v)
|
|
|
|
inline static unsigned short xml_encode_iso_8859_1(unsigned char c) {
|
|
return (unsigned short)c;
|
|
}
|
|
|
|
inline static char xml_decode_iso_8859_1(unsigned short c) {
|
|
return (char)(c > 0xff ? '?' : c);
|
|
}
|
|
|
|
inline static unsigned short xml_encode_us_ascii(unsigned char c) {
|
|
return (unsigned short)c;
|
|
}
|
|
|
|
inline static char xml_decode_us_ascii(unsigned short c) {
|
|
return (char)(c > 0x7f ? '?' : c);
|
|
}
|
|
|
|
xml_encoding xml_encodings[] = {
|
|
{ (XML_Char*)"ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
|
|
{ (XML_Char*)"US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
|
|
{ (XML_Char*)"UTF-8", NULL, NULL },
|
|
{ (XML_Char*)NULL, NULL, NULL }
|
|
};
|
|
|
|
static void *php_xml_malloc_wrapper(size_t sz) {
|
|
return malloc(sz);
|
|
}
|
|
|
|
static void *php_xml_realloc_wrapper(void *ptr, size_t sz) {
|
|
return realloc(ptr, sz);
|
|
}
|
|
|
|
static void php_xml_free_wrapper(void *ptr) {
|
|
if (ptr != NULL) {
|
|
free(ptr);
|
|
}
|
|
}
|
|
|
|
static XML_Memory_Handling_Suite php_xml_mem_hdlrs = {
|
|
php_xml_malloc_wrapper,
|
|
php_xml_realloc_wrapper,
|
|
php_xml_free_wrapper
|
|
};
|
|
|
|
static xml_encoding *xml_get_encoding(const XML_Char *name) {
|
|
xml_encoding *enc = &xml_encodings[0];
|
|
|
|
while (enc && enc->name) {
|
|
if (strcasecmp((const char*)name, (const char*)enc->name) == 0) {
|
|
return enc;
|
|
}
|
|
enc++;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static int _xml_xmlcharlen(const XML_Char *s) {
|
|
int len = 0;
|
|
|
|
while (*s) {
|
|
len++;
|
|
s++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
char *xml_utf8_decode(const XML_Char *s, int len, int *newlen,
|
|
const XML_Char *encoding) {
|
|
int pos = len;
|
|
char *newbuf = (char*)malloc(len+1);
|
|
unsigned short c;
|
|
char (*decoder)(unsigned short) = NULL;
|
|
xml_encoding *enc = xml_get_encoding(encoding);
|
|
|
|
*newlen = 0;
|
|
if (enc) {
|
|
decoder = enc->decoding_function;
|
|
}
|
|
if (decoder == NULL) {
|
|
/* If the target encoding was unknown, or no decoder function
|
|
* was specified, return the UTF-8-encoded data as-is.
|
|
*/
|
|
memcpy(newbuf, s, len);
|
|
*newlen = len;
|
|
newbuf[*newlen] = '\0';
|
|
return newbuf;
|
|
}
|
|
while (pos > 0) {
|
|
c = (unsigned char)(*s);
|
|
if (c >= 0xf0) { /* four bytes encoded, 21 bits */
|
|
if(pos-4 >= 0) {
|
|
c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
|
|
} else {
|
|
c = '?';
|
|
}
|
|
s += 4;
|
|
pos -= 4;
|
|
} else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
|
|
if(pos-3 >= 0) {
|
|
c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
|
|
} else {
|
|
c = '?';
|
|
}
|
|
s += 3;
|
|
pos -= 3;
|
|
} else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
|
|
if(pos-2 >= 0) {
|
|
c = ((s[0]&63)<<6) | (s[1]&63);
|
|
} else {
|
|
c = '?';
|
|
}
|
|
s += 2;
|
|
pos -= 2;
|
|
} else {
|
|
s++;
|
|
pos--;
|
|
}
|
|
newbuf[*newlen] = decoder ? decoder(c) : c;
|
|
++*newlen;
|
|
}
|
|
if (*newlen < len) {
|
|
newbuf = (char*)realloc(newbuf, *newlen + 1);
|
|
}
|
|
newbuf[*newlen] = '\0';
|
|
return newbuf;
|
|
}
|
|
|
|
static Variant _xml_xmlchar_zval(const XML_Char *s, int len,
|
|
const XML_Char *encoding) {
|
|
if (s == NULL) {
|
|
return false;
|
|
}
|
|
if (len == 0) {
|
|
len = _xml_xmlcharlen(s);
|
|
}
|
|
int ret_len;
|
|
char * ret = xml_utf8_decode(s, len, &ret_len, encoding);
|
|
return String(ret, ret_len, AttachString);
|
|
}
|
|
|
|
static char *_xml_decode_tag(XmlParser *parser, const char *tag) {
|
|
char *newstr;
|
|
int out_len;
|
|
newstr = xml_utf8_decode((const XML_Char*)tag, strlen(tag), &out_len,
|
|
parser->target_encoding);
|
|
if (parser->case_folding) {
|
|
char* oldstr = newstr;
|
|
newstr = string_to_upper(oldstr, out_len);
|
|
free(oldstr);
|
|
}
|
|
return newstr;
|
|
}
|
|
|
|
static Variant php_xml_parser_create_impl(CStrRef encoding_param,
|
|
CStrRef ns_param, int ns_support) {
|
|
XmlParser *parser;
|
|
int auto_detect = 0;
|
|
XML_Char *encoding;
|
|
|
|
if (!encoding_param.isNull()) {
|
|
/* The supported encoding types are hardcoded here because
|
|
* we are limited to the encodings supported by expat/xmltok.
|
|
*/
|
|
if (encoding_param.size() == 0) {
|
|
encoding = XML(default_encoding);
|
|
auto_detect = 1;
|
|
} else if (strcasecmp(encoding_param.data(), "ISO-8859-1") == 0) {
|
|
encoding = (XML_Char*)"ISO-8859-1";
|
|
} else if (strcasecmp(encoding_param.data(), "UTF-8") == 0) {
|
|
encoding = (XML_Char*)"UTF-8";
|
|
} else if (strcasecmp(encoding_param.data(), "US-ASCII") == 0) {
|
|
encoding = (XML_Char*)"US-ASCII";
|
|
} else {
|
|
raise_warning("unsupported source encoding \"%s\"",
|
|
encoding_param.c_str());
|
|
return false;
|
|
}
|
|
} else {
|
|
encoding = XML(default_encoding);
|
|
}
|
|
|
|
String separator;
|
|
if (ns_support && ns_param.empty()){
|
|
separator = ":";
|
|
} else {
|
|
separator = ns_param;
|
|
}
|
|
|
|
parser = NEWOBJ(XmlParser)();
|
|
parser->parser = XML_ParserCreate_MM
|
|
((auto_detect ? NULL : encoding), &php_xml_mem_hdlrs,
|
|
!separator.empty() ? (const XML_Char*)separator.data() : NULL);
|
|
|
|
parser->target_encoding = encoding;
|
|
parser->case_folding = 1;
|
|
setNull(parser->object);
|
|
parser->isparsing = 0;
|
|
|
|
XML_SetUserData(parser->parser, parser);
|
|
|
|
return Resource(parser);
|
|
}
|
|
|
|
static String _xml_string_zval(const char *str) {
|
|
return String(str, CopyString);
|
|
}
|
|
|
|
static Variant xml_call_handler(XmlParser *parser, CVarRef handler,
|
|
CArrRef args) {
|
|
if (parser && handler.toBoolean()) {
|
|
Variant retval;
|
|
if (handler.isString()) {
|
|
if (!parser->object.isObject()) {
|
|
retval = invoke(handler.toString().c_str(), args, -1);
|
|
} else {
|
|
retval = parser->object.toObject()->
|
|
o_invoke(handler.toString(), args);
|
|
}
|
|
} else if (handler.isArray() && handler.getArrayData()->size() == 2 &&
|
|
(handler[0].isString() || handler[0].isObject()) &&
|
|
handler[1].isString()) {
|
|
vm_call_user_func(handler, args);
|
|
} else {
|
|
raise_warning("Handler is invalid");
|
|
}
|
|
return retval;
|
|
}
|
|
return uninit_null();
|
|
}
|
|
|
|
static void _xml_add_to_info(XmlParser *parser, char *name) {
|
|
if (parser->info.isNull()) {
|
|
return;
|
|
}
|
|
String nameStr(name, CopyString);
|
|
if (!parser->info.toArray().exists(nameStr)) {
|
|
parser->info.set(nameStr, Array::Create());
|
|
}
|
|
parser->info.lvalAt(nameStr).append(parser->curtag);
|
|
parser->curtag++;
|
|
}
|
|
|
|
static const StaticString s_type("type");
|
|
static const StaticString s_complete("complete");
|
|
static const StaticString s_tag("tag");
|
|
static const StaticString s_close("close");
|
|
static const StaticString s_level("level");
|
|
static const StaticString s_value("value");
|
|
static const StaticString s_cdata("cdata");
|
|
static const StaticString s_open("open");
|
|
static const StaticString s_attributes("attributes");
|
|
|
|
void _xml_endElementHandler(void *userData, const XML_Char *name) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
char *tag_name;
|
|
|
|
if (parser) {
|
|
Variant retval;
|
|
Array args = Array::Create();
|
|
|
|
tag_name = _xml_decode_tag(parser, (const char*)name);
|
|
|
|
if (parser->endElementHandler.toBoolean()) {
|
|
args.append(parser);
|
|
args.append(_xml_string_zval(tag_name));
|
|
xml_call_handler(parser, parser->endElementHandler, args);
|
|
}
|
|
|
|
if (!parser->data.isNull()) {
|
|
if (parser->lastwasopen) {
|
|
parser->ctag.set(s_type, s_complete);
|
|
} else {
|
|
ArrayInit tag(3);
|
|
_xml_add_to_info(parser,((char*)tag_name) + parser->toffset);
|
|
tag.set(s_tag, String(((char*)tag_name) + parser->toffset, CopyString));
|
|
tag.set(s_type, s_close);
|
|
tag.set(s_level, parser->level);
|
|
parser->data.append(tag.create());
|
|
}
|
|
parser->lastwasopen = 0;
|
|
}
|
|
|
|
free(tag_name);
|
|
|
|
if (parser->ltags) {
|
|
free(parser->ltags[parser->level-1]);
|
|
}
|
|
|
|
parser->level--;
|
|
}
|
|
}
|
|
|
|
void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
|
|
if (parser) {
|
|
Variant retval;
|
|
Array args = Array::Create();
|
|
|
|
if (parser->characterDataHandler.toBoolean()) {
|
|
args.append(parser);
|
|
args.append(_xml_xmlchar_zval(s, len, parser->target_encoding));
|
|
xml_call_handler(parser, parser->characterDataHandler, args);
|
|
}
|
|
|
|
if (!parser->data.isNull()) {
|
|
int i;
|
|
int doprint = 0;
|
|
|
|
char *decoded_value;
|
|
int decoded_len;
|
|
decoded_value = xml_utf8_decode(s,len,&decoded_len,
|
|
parser->target_encoding);
|
|
for (i = 0; i < decoded_len; i++) {
|
|
switch (decoded_value[i]) {
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
continue;
|
|
default:
|
|
doprint = 1;
|
|
break;
|
|
}
|
|
if (doprint) {
|
|
break;
|
|
}
|
|
}
|
|
if (doprint || (! parser->skipwhite)) {
|
|
if (parser->lastwasopen) {
|
|
String myval;
|
|
// check if value exists, if yes append to that
|
|
if (parser->ctag.toArray().exists(s_value))
|
|
{
|
|
myval = parser->ctag.rvalAt(s_value).toString();
|
|
myval += String(decoded_value, decoded_len, AttachString);
|
|
parser->ctag.set(s_value, myval);
|
|
} else {
|
|
parser->ctag.set(s_value,
|
|
String(decoded_value,decoded_len,AttachString));
|
|
}
|
|
} else {
|
|
Array tag;
|
|
Variant curtag;
|
|
String myval;
|
|
String mytype;
|
|
curtag.assignRef(parser->data.getArrayData()->endRef());
|
|
if (curtag.toArray().exists(s_type)) {
|
|
mytype = curtag.rvalAt(s_type).toString();
|
|
if (!strcmp(mytype.data(), "cdata") &&
|
|
curtag.toArray().exists(s_value)) {
|
|
myval = curtag.rvalAt(s_value).toString();
|
|
myval += String(decoded_value, decoded_len, AttachString);
|
|
curtag.set(s_value, myval);
|
|
return;
|
|
}
|
|
}
|
|
tag = Array::Create();
|
|
_xml_add_to_info(parser, parser->ltags[parser->level-1] +
|
|
parser->toffset);
|
|
tag.set(s_tag, String(parser->ltags[parser->level-1] +
|
|
parser->toffset, CopyString));
|
|
tag.set(s_value, String(decoded_value, AttachString));
|
|
tag.set(s_type, s_cdata);
|
|
tag.set(s_level, parser->level);
|
|
parser->data.append(tag);
|
|
}
|
|
} else {
|
|
free(decoded_value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void _xml_defaultHandler(void *userData, const XML_Char *s, int len) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
|
|
if (parser && parser->defaultHandler.toBoolean()) {
|
|
xml_call_handler(parser, parser->defaultHandler, CREATE_VECTOR2(
|
|
parser, _xml_xmlchar_zval(s, len, parser->target_encoding)));
|
|
}
|
|
}
|
|
|
|
void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
const char **attrs = (const char **) attributes;
|
|
Variant retval;
|
|
Array args = Array::Create();
|
|
|
|
if (parser) {
|
|
parser->level++;
|
|
|
|
char* tag_name = _xml_decode_tag(parser, (const char*)name);
|
|
|
|
if (parser->startElementHandler.toBoolean()) {
|
|
args.append(parser);
|
|
args.append(_xml_string_zval(tag_name));
|
|
args.append(Array::Create());
|
|
|
|
while (attributes && *attributes) {
|
|
char* att = _xml_decode_tag(parser, (const char*)attributes[0]);
|
|
int val_len;
|
|
char* val = xml_utf8_decode(attributes[1],
|
|
strlen((const char*)attributes[1]),
|
|
&val_len, parser->target_encoding);
|
|
args.lvalAt(2).set(String(att, AttachString),
|
|
String(val, val_len, AttachString));
|
|
attributes += 2;
|
|
}
|
|
|
|
xml_call_handler(parser, parser->startElementHandler, args);
|
|
}
|
|
|
|
if (!parser->data.isNull()) {
|
|
Array tag, atr;
|
|
int atcnt = 0;
|
|
tag = Array::Create();
|
|
atr = Array::Create();
|
|
|
|
_xml_add_to_info(parser,((char *) tag_name) + parser->toffset);
|
|
|
|
tag.set(s_tag,String(((char *)tag_name)+parser->toffset,CopyString));
|
|
tag.set(s_type, s_open);
|
|
tag.set(s_level, parser->level);
|
|
|
|
parser->ltags[parser->level-1] = strdup(tag_name);
|
|
parser->lastwasopen = 1;
|
|
|
|
attributes = (const XML_Char **) attrs;
|
|
|
|
while (attributes && *attributes) {
|
|
char* att = _xml_decode_tag(parser, (const char*)attributes[0]);
|
|
int val_len;
|
|
char* val = xml_utf8_decode(attributes[1],
|
|
strlen((const char*)attributes[1]),
|
|
&val_len, parser->target_encoding);
|
|
atr.set(String(att, AttachString), String(val, val_len, AttachString));
|
|
atcnt++;
|
|
attributes += 2;
|
|
}
|
|
|
|
if (atcnt) {
|
|
tag.set(s_attributes,atr);
|
|
}
|
|
parser->data.append(tag);
|
|
parser->ctag.assignRef(parser->data.getArrayData()->endRef());
|
|
}
|
|
|
|
free(tag_name);
|
|
}
|
|
}
|
|
|
|
void _xml_processingInstructionHandler(void *userData, const XML_Char *target,
|
|
const XML_Char *data) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
if (parser && parser->processingInstructionHandler.toBoolean()) {
|
|
Array args = Array::Create();
|
|
args.append(parser);
|
|
args.append(_xml_xmlchar_zval(target, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(data, 0, parser->target_encoding));
|
|
xml_call_handler(parser, parser->processingInstructionHandler, args);
|
|
}
|
|
}
|
|
|
|
int _xml_externalEntityRefHandler(XML_ParserStruct* /* void* */ parserPtr,
|
|
const XML_Char *openEntityNames,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId) {
|
|
XmlParser *parser = (XmlParser*)XML_GetUserData((XML_Parser)parserPtr);
|
|
int ret = 0; /* abort if no handler is set (should be configurable?) */
|
|
if (parser && parser->externalEntityRefHandler.toBoolean()) {
|
|
Array args = Array::Create();
|
|
args.append(parser);
|
|
args.append(_xml_xmlchar_zval(openEntityNames, 0,
|
|
parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
|
|
ret = xml_call_handler(parser,
|
|
parser->externalEntityRefHandler, args).toInt64();
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void _xml_notationDeclHandler(void *userData,
|
|
const XML_Char *notationName,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
|
|
if (parser && parser->notationDeclHandler.toBoolean()) {
|
|
Array args = Array::Create();
|
|
args.append(parser);
|
|
args.append(_xml_xmlchar_zval(notationName, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
|
|
xml_call_handler(parser, parser->notationDeclHandler, args);
|
|
}
|
|
}
|
|
|
|
void _xml_startNamespaceDeclHandler(void *userData,const XML_Char *prefix,
|
|
const XML_Char *uri) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
|
|
if (parser && parser->startNamespaceDeclHandler.toBoolean()) {
|
|
Array args = Array::Create();
|
|
|
|
args.append(parser);
|
|
args.append(_xml_xmlchar_zval(prefix, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(uri, 0, parser->target_encoding));
|
|
xml_call_handler(parser, parser->startNamespaceDeclHandler, args);
|
|
}
|
|
}
|
|
|
|
void _xml_endNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
|
|
if (parser && parser->endNamespaceDeclHandler.toBoolean()) {
|
|
Array args = Array::Create();
|
|
args.append(parser);
|
|
args.append(_xml_xmlchar_zval(prefix, 0, parser->target_encoding));
|
|
xml_call_handler(parser, parser->endNamespaceDeclHandler, args);
|
|
}
|
|
}
|
|
|
|
void _xml_unparsedEntityDeclHandler(void *userData,
|
|
const XML_Char *entityName,
|
|
const XML_Char *base,
|
|
const XML_Char *systemId,
|
|
const XML_Char *publicId,
|
|
const XML_Char *notationName) {
|
|
XmlParser *parser = (XmlParser *)userData;
|
|
|
|
if (parser && parser->unparsedEntityDeclHandler.toBoolean()) {
|
|
Array args = Array::Create();
|
|
args.append(parser);
|
|
args.append(_xml_xmlchar_zval(entityName, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(base, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(systemId, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(publicId, 0, parser->target_encoding));
|
|
args.append(_xml_xmlchar_zval(notationName, 0, parser->target_encoding));
|
|
xml_call_handler(parser, parser->unparsedEntityDeclHandler, args);
|
|
}
|
|
}
|
|
|
|
static void xml_set_handler(Variant * handler, CVarRef data) {
|
|
if (same(data, false) || data.isString() ||
|
|
(data.isArray() && data.getArrayData()->size() == 2 &&
|
|
(data[0].isString() || data[0].isObject()) &&
|
|
data[1].isString())) {
|
|
*handler = data;
|
|
} else {
|
|
raise_warning("Handler is invalid");
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
Object f_xml_parser_create(CStrRef encoding /* = null_string */) {
|
|
return php_xml_parser_create_impl(encoding, null_string, 0).toObject();
|
|
}
|
|
|
|
Object f_xml_parser_create_ns(CStrRef encoding /* = null_string */,
|
|
CStrRef separator /* = null_string */) {
|
|
return php_xml_parser_create_impl(encoding, separator, 1).toObject();
|
|
}
|
|
|
|
bool f_xml_parser_free(CResRef parser) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
if (p->isparsing == 1) {
|
|
raise_warning("Parser cannot be freed while it is parsing.");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int64_t f_xml_parse(CResRef parser, CStrRef data, bool is_final /* = true */) {
|
|
// XML_Parse can reenter the VM, and it will do so after we've lost
|
|
// the frame pointer by calling through the system's copy of XML_Parse
|
|
// in libexpat.so.
|
|
SYNC_VM_REGS_SCOPED();
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
int ret;
|
|
long isFinal = is_final ? 1 : 0;
|
|
p->isparsing = 1;
|
|
ret = XML_Parse(p->parser, (const XML_Char*)data.data(), data.size(),
|
|
isFinal);
|
|
p->isparsing = 0;
|
|
return ret;
|
|
}
|
|
|
|
int64_t f_xml_parse_into_struct(CResRef parser, CStrRef data, VRefParam values,
|
|
VRefParam index /* = null */) {
|
|
int ret;
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
values = Array::Create();
|
|
p->data.assignRef(values);
|
|
index = Array::Create();
|
|
p->info.assignRef(index);
|
|
p->level = 0;
|
|
p->ltags = (char**)malloc(XML_MAXLEVEL * sizeof(char*));
|
|
|
|
XML_SetDefaultHandler(p->parser, _xml_defaultHandler);
|
|
XML_SetElementHandler(p->parser, _xml_startElementHandler,
|
|
_xml_endElementHandler);
|
|
XML_SetCharacterDataHandler(p->parser, _xml_characterDataHandler);
|
|
|
|
p->isparsing = 1;
|
|
ret = XML_Parse(p->parser, (const XML_Char*)data.data(), data.size(), 1);
|
|
p->isparsing = 0;
|
|
|
|
return ret;
|
|
}
|
|
|
|
Variant f_xml_parser_get_option(CResRef parser, int option) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
switch (option) {
|
|
case PHP_XML_OPTION_CASE_FOLDING:
|
|
return p->case_folding;
|
|
case PHP_XML_OPTION_TARGET_ENCODING:
|
|
return String((const char*)p->target_encoding, CopyString);
|
|
default:
|
|
raise_warning("Unknown option");
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool f_xml_parser_set_option(CResRef parser, int option, CVarRef value) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
switch (option) {
|
|
case PHP_XML_OPTION_CASE_FOLDING:
|
|
p->case_folding = value.toInt64();
|
|
break;
|
|
case PHP_XML_OPTION_SKIP_TAGSTART:
|
|
p->toffset = value.toInt64();
|
|
break;
|
|
case PHP_XML_OPTION_SKIP_WHITE:
|
|
p->skipwhite = value.toInt64();
|
|
break;
|
|
case PHP_XML_OPTION_TARGET_ENCODING: {
|
|
xml_encoding *enc;
|
|
enc = xml_get_encoding((const XML_Char*)value.toString().data());
|
|
if (enc == NULL) {
|
|
raise_warning("Unsupported target encoding \"%s\"",
|
|
value.toString().data());
|
|
return false;
|
|
}
|
|
p->target_encoding = enc->name;
|
|
break;
|
|
}
|
|
default:
|
|
raise_warning("Unknown option");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_character_data_handler(CResRef parser, CVarRef handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->characterDataHandler, handler);
|
|
XML_SetCharacterDataHandler(p->parser, _xml_characterDataHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_default_handler(CResRef parser, CVarRef handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->defaultHandler, handler);
|
|
XML_SetDefaultHandler(p->parser, _xml_defaultHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_element_handler(CResRef parser, CVarRef start_element_handler,
|
|
CVarRef end_element_handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->startElementHandler, start_element_handler);
|
|
xml_set_handler(&p->endElementHandler, end_element_handler);
|
|
XML_SetElementHandler(p->parser, _xml_startElementHandler,
|
|
_xml_endElementHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_processing_instruction_handler(CResRef parser, CVarRef handler){
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->processingInstructionHandler, handler);
|
|
XML_SetProcessingInstructionHandler(p->parser,
|
|
_xml_processingInstructionHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_start_namespace_decl_handler(CResRef parser, CVarRef handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->startNamespaceDeclHandler, handler);
|
|
XML_SetStartNamespaceDeclHandler(p->parser, _xml_startNamespaceDeclHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_end_namespace_decl_handler(CResRef parser, CVarRef handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->endNamespaceDeclHandler, handler);
|
|
XML_SetEndNamespaceDeclHandler(p->parser, _xml_endNamespaceDeclHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_unparsed_entity_decl_handler(CResRef parser, CVarRef handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->unparsedEntityDeclHandler, handler);
|
|
XML_SetUnparsedEntityDeclHandler(p->parser, _xml_unparsedEntityDeclHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_external_entity_ref_handler(CResRef parser, CVarRef handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->externalEntityRefHandler, handler);
|
|
XML_SetExternalEntityRefHandler(p->parser, _xml_externalEntityRefHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_notation_decl_handler(CResRef parser, CVarRef handler) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
xml_set_handler(&p->notationDeclHandler, handler);
|
|
XML_SetNotationDeclHandler(p->parser, _xml_notationDeclHandler);
|
|
return true;
|
|
}
|
|
|
|
bool f_xml_set_object(CResRef parser, VRefParam object) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
p->object.assignRef(object);
|
|
return true;
|
|
}
|
|
|
|
int64_t f_xml_get_current_byte_index(CResRef parser) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
return XML_GetCurrentByteIndex(p->parser);
|
|
}
|
|
|
|
int64_t f_xml_get_current_column_number(CResRef parser) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
return XML_GetCurrentColumnNumber(p->parser);
|
|
}
|
|
|
|
int64_t f_xml_get_current_line_number(CResRef parser) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
return XML_GetCurrentLineNumber(p->parser);
|
|
}
|
|
|
|
int64_t f_xml_get_error_code(CResRef parser) {
|
|
XmlParser * p = parser.getTyped<XmlParser>();
|
|
return XML_GetErrorCode(p->parser);
|
|
}
|
|
|
|
String f_xml_error_string(int code) {
|
|
char * str = (char *)XML_ErrorString((XML_Error)/*(int)*/code);
|
|
return String(str, AttachLiteral);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
String f_utf8_decode(CStrRef data) {
|
|
String str = String(data.size(), ReserveString);
|
|
char *newbuf = str.mutableSlice().ptr;
|
|
int newlen = 0;
|
|
const char *s = data.data();
|
|
for (int pos = data.size(); pos > 0; ) {
|
|
unsigned short c = (unsigned char)(*s);
|
|
if (c >= 0xf0) { /* four bytes encoded, 21 bits */
|
|
if (pos-4 >= 0) {
|
|
c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
|
|
} else {
|
|
c = '?';
|
|
}
|
|
s += 4;
|
|
pos -= 4;
|
|
} else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
|
|
if (pos-3 >= 0) {
|
|
c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
|
|
} else {
|
|
c = '?';
|
|
}
|
|
s += 3;
|
|
pos -= 3;
|
|
} else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
|
|
if (pos-2 >= 0) {
|
|
c = ((s[0]&63)<<6) | (s[1]&63);
|
|
} else {
|
|
c = '?';
|
|
}
|
|
s += 2;
|
|
pos -= 2;
|
|
} else {
|
|
s++;
|
|
pos--;
|
|
}
|
|
newbuf[newlen] = (char)(c > 0xff ? '?' : c);
|
|
++newlen;
|
|
}
|
|
return str.setSize(newlen);
|
|
}
|
|
|
|
String f_utf8_encode(CStrRef data) {
|
|
String str = String(data.size() * 4, ReserveString);
|
|
char *newbuf = str.mutableSlice().ptr;
|
|
int newlen = 0;
|
|
const char *s = data.data();
|
|
for (int pos = data.size(); pos > 0; pos--, s++) {
|
|
unsigned int c = (unsigned char)(*s);
|
|
if (c < 0x80) {
|
|
newbuf[newlen++] = (char) c;
|
|
} else if (c < 0x800) {
|
|
newbuf[newlen++] = (0xc0 | (c >> 6));
|
|
newbuf[newlen++] = (0x80 | (c & 0x3f));
|
|
} else if (c < 0x10000) {
|
|
newbuf[newlen++] = (0xe0 | (c >> 12));
|
|
newbuf[newlen++] = (0xc0 | ((c >> 6) & 0x3f));
|
|
newbuf[newlen++] = (0x80 | (c & 0x3f));
|
|
} else if (c < 0x200000) {
|
|
newbuf[newlen++] = (0xf0 | (c >> 18));
|
|
newbuf[newlen++] = (0xe0 | ((c >> 12) & 0x3f));
|
|
newbuf[newlen++] = (0xc0 | ((c >> 6) & 0x3f));
|
|
newbuf[newlen++] = (0x80 | (c & 0x3f));
|
|
}
|
|
}
|
|
return str.setSize(newlen);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|