Initial rough draft of xhpast2, a replacement for xhpast

The goal of this diff is to clean up the current work-in-progress and check
it in so that others may contribute if they have time.

The program currently outputs the HHVM tree as well as the xhpast-like
json, for ease of debugging. There is a little helper program jsonpretty.py
to make the trees more readable, e.g.

_build/dbg/hphp/util/parser/xhpast/xhpast2 <some php file> | hphp/util/parser/xhpast/xhpast2/jsonpretty.py
Esse commit está contido em:
Sanjeev Singh
2013-06-07 18:51:36 -07:00
commit de Sara Golemon
commit 75397a10c7
12 arquivos alterados com 3588 adições e 1517 exclusões
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
+201 -244
Ver Arquivo
@@ -27,12 +27,15 @@
#define RESET_YYCURSOR yyg->yy_hold_char = *YYCURSOR; *YYCURSOR = '\0';
// macros for rules
#define SETTOKEN _scanner->setToken(yytext, yyleng)
#define STEPPOS _scanner->stepPos(yytext, yyleng)
#define RETTOKEN(t) do {_scanner->setToken(yytext, yyleng, t); return t;} \
while (0)
#define RETSTEP(t) do {_scanner->stepPos(yytext, yyleng, t); return t;} \
while (0)
#define SETTOKEN(t) _scanner->setToken(yytext, yyleng, t)
#define STEPPOS(t) _scanner->stepPos(yytext, yyleng, t)
#define HH_ONLY_KEYWORD(tok) do { \
SETTOKEN; \
return _scanner->hipHopSyntaxEnabled() ? tok : T_STRING; \
#define HH_ONLY_KEYWORD(tok) do { \
RETTOKEN(_scanner->hipHopSyntaxEnabled() ? tok : T_STRING); \
} while (0)
#define IS_LABEL_START(c) \
@@ -211,65 +214,64 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
%%
<ST_IN_SCRIPTING>"exit" { SETTOKEN; return T_EXIT;}
<ST_IN_SCRIPTING>"die" { SETTOKEN; return T_EXIT;}
<ST_IN_SCRIPTING>"function" { SETTOKEN; return T_FUNCTION;}
<ST_IN_SCRIPTING>"const" { SETTOKEN; return T_CONST;}
<ST_IN_SCRIPTING>"return" { SETTOKEN; return T_RETURN;}
<ST_IN_SCRIPTING>"yield" { SETTOKEN; return T_YIELD;}
<ST_IN_SCRIPTING>"try" { SETTOKEN; return T_TRY;}
<ST_IN_SCRIPTING>"catch" { SETTOKEN; return T_CATCH;}
<ST_IN_SCRIPTING>"finally" { SETTOKEN; return T_FINALLY;}
<ST_IN_SCRIPTING>"throw" { SETTOKEN; return T_THROW;}
<ST_IN_SCRIPTING>"if" { SETTOKEN; return T_IF;}
<ST_IN_SCRIPTING>"elseif" { SETTOKEN; return T_ELSEIF;}
<ST_IN_SCRIPTING>"endif" { SETTOKEN; return T_ENDIF;}
<ST_IN_SCRIPTING>"else" { SETTOKEN; return T_ELSE;}
<ST_IN_SCRIPTING>"while" { SETTOKEN; return T_WHILE;}
<ST_IN_SCRIPTING>"endwhile" { SETTOKEN; return T_ENDWHILE;}
<ST_IN_SCRIPTING>"do" { SETTOKEN; return T_DO;}
<ST_IN_SCRIPTING>"for" { SETTOKEN; return T_FOR;}
<ST_IN_SCRIPTING>"endfor" { SETTOKEN; return T_ENDFOR;}
<ST_IN_SCRIPTING>"foreach" { SETTOKEN; return T_FOREACH;}
<ST_IN_SCRIPTING>"endforeach" { SETTOKEN; return T_ENDFOREACH;}
<ST_IN_SCRIPTING>"declare" { SETTOKEN; return T_DECLARE;}
<ST_IN_SCRIPTING>"enddeclare" { SETTOKEN; return T_ENDDECLARE;}
<ST_IN_SCRIPTING>"instanceof" { SETTOKEN; return T_INSTANCEOF;}
<ST_IN_SCRIPTING>"as" { SETTOKEN; return T_AS;}
<ST_IN_SCRIPTING>"switch" { SETTOKEN; return T_SWITCH;}
<ST_IN_SCRIPTING>"endswitch" { SETTOKEN; return T_ENDSWITCH;}
<ST_IN_SCRIPTING>"case" { SETTOKEN; return T_CASE;}
<ST_IN_SCRIPTING>"default" { SETTOKEN; return T_DEFAULT;}
<ST_IN_SCRIPTING>"break" { SETTOKEN; return T_BREAK;}
<ST_IN_SCRIPTING>"continue" { SETTOKEN; return T_CONTINUE;}
<ST_IN_SCRIPTING>"goto" { SETTOKEN; return T_GOTO;}
<ST_IN_SCRIPTING>"echo" { SETTOKEN; return T_ECHO;}
<ST_IN_SCRIPTING>"print" { SETTOKEN; return T_PRINT;}
<ST_IN_SCRIPTING>"class" { SETTOKEN; return T_CLASS;}
<ST_IN_SCRIPTING>"interface" { SETTOKEN; return T_INTERFACE;}
<ST_IN_SCRIPTING>"trait" { SETTOKEN; return T_TRAIT;}
<ST_IN_SCRIPTING>"insteadof" { SETTOKEN; return T_INSTEADOF;}
<ST_IN_SCRIPTING>"extends" { SETTOKEN; return T_EXTENDS;}
<ST_IN_SCRIPTING>"implements" { SETTOKEN; return T_IMPLEMENTS;}
<ST_IN_SCRIPTING>"attribute" { SETTOKEN; return T_XHP_ATTRIBUTE;}
<ST_IN_SCRIPTING>"category" { SETTOKEN; return T_XHP_CATEGORY;}
<ST_IN_SCRIPTING>"children" { SETTOKEN; return T_XHP_CHILDREN;}
<ST_IN_SCRIPTING>"required" { SETTOKEN; return T_XHP_REQUIRED;}
<ST_IN_SCRIPTING>"enum" { SETTOKEN; return T_XHP_ENUM;}
<ST_IN_SCRIPTING>"exit" { RETTOKEN(T_EXIT);}
<ST_IN_SCRIPTING>"die" { RETTOKEN(T_EXIT);}
<ST_IN_SCRIPTING>"function" { RETTOKEN(T_FUNCTION);}
<ST_IN_SCRIPTING>"const" { RETTOKEN(T_CONST);}
<ST_IN_SCRIPTING>"return" { RETTOKEN(T_RETURN); }
<ST_IN_SCRIPTING>"yield" { RETTOKEN(T_YIELD);}
<ST_IN_SCRIPTING>"try" { RETTOKEN(T_TRY);}
<ST_IN_SCRIPTING>"catch" { RETTOKEN(T_CATCH);}
<ST_IN_SCRIPTING>"finally" { RETTOKEN(T_FINALLY);}
<ST_IN_SCRIPTING>"throw" { RETTOKEN(T_THROW);}
<ST_IN_SCRIPTING>"if" { RETTOKEN(T_IF);}
<ST_IN_SCRIPTING>"elseif" { RETTOKEN(T_ELSEIF);}
<ST_IN_SCRIPTING>"endif" { RETTOKEN(T_ENDIF);}
<ST_IN_SCRIPTING>"else" { RETTOKEN(T_ELSE);}
<ST_IN_SCRIPTING>"while" { RETTOKEN(T_WHILE);}
<ST_IN_SCRIPTING>"endwhile" { RETTOKEN(T_ENDWHILE);}
<ST_IN_SCRIPTING>"do" { RETTOKEN(T_DO);}
<ST_IN_SCRIPTING>"for" { RETTOKEN(T_FOR);}
<ST_IN_SCRIPTING>"endfor" { RETTOKEN(T_ENDFOR);}
<ST_IN_SCRIPTING>"foreach" { RETTOKEN(T_FOREACH);}
<ST_IN_SCRIPTING>"endforeach" { RETTOKEN(T_ENDFOREACH);}
<ST_IN_SCRIPTING>"declare" { RETTOKEN(T_DECLARE);}
<ST_IN_SCRIPTING>"enddeclare" { RETTOKEN(T_ENDDECLARE);}
<ST_IN_SCRIPTING>"instanceof" { RETTOKEN(T_INSTANCEOF);}
<ST_IN_SCRIPTING>"as" { RETTOKEN(T_AS);}
<ST_IN_SCRIPTING>"switch" { RETTOKEN(T_SWITCH);}
<ST_IN_SCRIPTING>"endswitch" { RETTOKEN(T_ENDSWITCH);}
<ST_IN_SCRIPTING>"case" { RETTOKEN(T_CASE);}
<ST_IN_SCRIPTING>"default" { RETTOKEN(T_DEFAULT);}
<ST_IN_SCRIPTING>"break" { RETTOKEN(T_BREAK);}
<ST_IN_SCRIPTING>"continue" { RETTOKEN(T_CONTINUE);}
<ST_IN_SCRIPTING>"goto" { RETTOKEN(T_GOTO);}
<ST_IN_SCRIPTING>"echo" { RETTOKEN(T_ECHO);}
<ST_IN_SCRIPTING>"print" { RETTOKEN(T_PRINT);}
<ST_IN_SCRIPTING>"class" { RETTOKEN(T_CLASS);}
<ST_IN_SCRIPTING>"interface" { RETTOKEN(T_INTERFACE);}
<ST_IN_SCRIPTING>"trait" { RETTOKEN(T_TRAIT);}
<ST_IN_SCRIPTING>"insteadof" { RETTOKEN(T_INSTEADOF);}
<ST_IN_SCRIPTING>"extends" { RETTOKEN(T_EXTENDS);}
<ST_IN_SCRIPTING>"implements" { RETTOKEN(T_IMPLEMENTS);}
<ST_IN_SCRIPTING>"attribute" { RETTOKEN(T_XHP_ATTRIBUTE);}
<ST_IN_SCRIPTING>"category" { RETTOKEN(T_XHP_CATEGORY);}
<ST_IN_SCRIPTING>"children" { RETTOKEN(T_XHP_CHILDREN);}
<ST_IN_SCRIPTING>"required" { RETTOKEN(T_XHP_REQUIRED);}
<ST_IN_SCRIPTING>"enum" { RETTOKEN(T_XHP_ENUM);}
<ST_IN_SCRIPTING>"->" {
STEPPOS;
STEPPOS(T_OBJECT_OPERATOR);
yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
return T_OBJECT_OPERATOR;
}
<ST_LOOKING_FOR_PROPERTY>"->" {
STEPPOS;
return T_OBJECT_OPERATOR;
RETSTEP(T_OBJECT_OPERATOR);
}
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
SETTOKEN;
SETTOKEN(T_STRING);
yy_pop_state(yyscanner);
return T_STRING;
}
@@ -279,133 +281,116 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
yy_pop_state(yyscanner);
}
<ST_IN_SCRIPTING>"::" { STEPPOS;return T_PAAMAYIM_NEKUDOTAYIM;}
<ST_IN_SCRIPTING>"\\" { SETTOKEN;return T_NS_SEPARATOR;}
<ST_IN_SCRIPTING>"new" { SETTOKEN;return T_NEW;}
<ST_IN_SCRIPTING>"clone" { SETTOKEN;return T_CLONE;}
<ST_IN_SCRIPTING>"var" { SETTOKEN;return T_VAR;}
<ST_IN_SCRIPTING>"::" { RETSTEP(T_PAAMAYIM_NEKUDOTAYIM);}
<ST_IN_SCRIPTING>"\\" { RETTOKEN(T_NS_SEPARATOR);}
<ST_IN_SCRIPTING>"new" { RETTOKEN(T_NEW);}
<ST_IN_SCRIPTING>"clone" { RETTOKEN(T_CLONE);}
<ST_IN_SCRIPTING>"var" { RETTOKEN(T_VAR);}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
if (_scanner->lastToken() != T_FUNCTION) {
STEPPOS;
return T_INT_CAST;
RETSTEP(T_INT_CAST);
}
yyless(1);
STEPPOS;
return '(';
RETSTEP('(');
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
if (_scanner->lastToken() != T_FUNCTION) {
STEPPOS;
return T_DOUBLE_CAST;
RETSTEP(T_DOUBLE_CAST);
}
yyless(1);
STEPPOS;
return '(';
RETSTEP('(');
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
if (_scanner->lastToken() != T_FUNCTION) {
STEPPOS;
return T_STRING_CAST;
RETSTEP(T_STRING_CAST);
}
yyless(1);
STEPPOS;
return '(';
RETSTEP('(');
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
if (_scanner->lastToken() != T_FUNCTION) {
STEPPOS;
return T_ARRAY_CAST;
RETSTEP(T_ARRAY_CAST);
}
yyless(1);
STEPPOS;
return '(';
RETSTEP('(');
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
if (_scanner->lastToken() != T_FUNCTION) {
STEPPOS;
return T_OBJECT_CAST;
RETSTEP(T_OBJECT_CAST);
}
yyless(1);
STEPPOS;
return '(';
RETSTEP('(');
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
if (_scanner->lastToken() != T_FUNCTION) {
STEPPOS;
return T_BOOL_CAST;
RETSTEP(T_BOOL_CAST);
}
yyless(1);
STEPPOS;
return '(';
RETSTEP('(');
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
if (_scanner->lastToken() != T_FUNCTION) {
STEPPOS;
return T_UNSET_CAST;
RETSTEP(T_UNSET_CAST);
}
yyless(1);
STEPPOS;
return '(';
RETSTEP('(');
}
<ST_IN_SCRIPTING>"eval" { SETTOKEN; return T_EVAL;}
<ST_IN_SCRIPTING>"include" { SETTOKEN; return T_INCLUDE;}
<ST_IN_SCRIPTING>"include_once" { SETTOKEN; return T_INCLUDE_ONCE;}
<ST_IN_SCRIPTING>"require" { SETTOKEN; return T_REQUIRE;}
<ST_IN_SCRIPTING>"require_once" { SETTOKEN; return T_REQUIRE_ONCE;}
<ST_IN_SCRIPTING>"namespace" { SETTOKEN; return T_NAMESPACE;}
<ST_IN_SCRIPTING>"use" { SETTOKEN; return T_USE;}
<ST_IN_SCRIPTING>"global" { SETTOKEN; return T_GLOBAL;}
<ST_IN_SCRIPTING>"isset" { SETTOKEN; return T_ISSET;}
<ST_IN_SCRIPTING>"empty" { SETTOKEN; return T_EMPTY;}
<ST_IN_SCRIPTING>"__halt_compiler" { SETTOKEN; return T_HALT_COMPILER;}
<ST_IN_SCRIPTING>"__compiler_halt_offset__" {
SETTOKEN;
return T_COMPILER_HALT_OFFSET;
}
<ST_IN_SCRIPTING>"static" { SETTOKEN; return T_STATIC;}
<ST_IN_SCRIPTING>"abstract" { SETTOKEN; return T_ABSTRACT;}
<ST_IN_SCRIPTING>"final" { SETTOKEN; return T_FINAL;}
<ST_IN_SCRIPTING>"private" { SETTOKEN; return T_PRIVATE;}
<ST_IN_SCRIPTING>"protected" { SETTOKEN; return T_PROTECTED;}
<ST_IN_SCRIPTING>"public" { SETTOKEN; return T_PUBLIC;}
<ST_IN_SCRIPTING>"unset" { SETTOKEN; return T_UNSET;}
<ST_IN_SCRIPTING>"=>" { STEPPOS; return T_DOUBLE_ARROW;}
<ST_IN_SCRIPTING>"list" { SETTOKEN; return T_LIST;}
<ST_IN_SCRIPTING>"array" { SETTOKEN; return T_ARRAY;}
<ST_IN_SCRIPTING>"++" { STEPPOS; return T_INC;}
<ST_IN_SCRIPTING>"--" { STEPPOS; return T_DEC;}
<ST_IN_SCRIPTING>"===" { STEPPOS; return T_IS_IDENTICAL;}
<ST_IN_SCRIPTING>"!==" { STEPPOS; return T_IS_NOT_IDENTICAL;}
<ST_IN_SCRIPTING>"==" { STEPPOS; return T_IS_EQUAL;}
<ST_IN_SCRIPTING>"!="|"<>" { STEPPOS; return T_IS_NOT_EQUAL;}
<ST_IN_SCRIPTING>"<=" { STEPPOS; return T_IS_SMALLER_OR_EQUAL;}
<ST_IN_SCRIPTING>">=" { STEPPOS; return T_IS_GREATER_OR_EQUAL;}
<ST_IN_SCRIPTING>"+=" { STEPPOS; return T_PLUS_EQUAL;}
<ST_IN_SCRIPTING>"-=" { STEPPOS; return T_MINUS_EQUAL;}
<ST_IN_SCRIPTING>"*=" { STEPPOS; return T_MUL_EQUAL;}
<ST_IN_SCRIPTING>"/=" { STEPPOS; return T_DIV_EQUAL;}
<ST_IN_SCRIPTING>".=" { STEPPOS; return T_CONCAT_EQUAL;}
<ST_IN_SCRIPTING>"%=" { STEPPOS; return T_MOD_EQUAL;}
<ST_IN_SCRIPTING>"<<=" { STEPPOS; return T_SL_EQUAL;}
<ST_IN_SCRIPTING>">>=" { STEPPOS; return T_SR_EQUAL;}
<ST_IN_SCRIPTING>"&=" { STEPPOS; return T_AND_EQUAL;}
<ST_IN_SCRIPTING>"|=" { STEPPOS; return T_OR_EQUAL;}
<ST_IN_SCRIPTING>"^=" { STEPPOS; return T_XOR_EQUAL;}
<ST_IN_SCRIPTING>"||" { STEPPOS; return T_BOOLEAN_OR;}
<ST_IN_SCRIPTING>"&&" { STEPPOS; return T_BOOLEAN_AND;}
<ST_IN_SCRIPTING>"OR" { SETTOKEN; return T_LOGICAL_OR;}
<ST_IN_SCRIPTING>"AND" { SETTOKEN; return T_LOGICAL_AND;}
<ST_IN_SCRIPTING>"XOR" { SETTOKEN; return T_LOGICAL_XOR;}
<ST_IN_SCRIPTING>"<<" { STEPPOS; return T_SL;}
<ST_IN_SCRIPTING>"..." { SETTOKEN; return T_VARARG; }
<ST_IN_SCRIPTING>"eval" { RETTOKEN(T_EVAL);}
<ST_IN_SCRIPTING>"include" { RETTOKEN(T_INCLUDE);}
<ST_IN_SCRIPTING>"include_once" { RETTOKEN(T_INCLUDE_ONCE);}
<ST_IN_SCRIPTING>"require" { RETTOKEN(T_REQUIRE);}
<ST_IN_SCRIPTING>"require_once" { RETTOKEN(T_REQUIRE_ONCE);}
<ST_IN_SCRIPTING>"namespace" { RETTOKEN(T_NAMESPACE);}
<ST_IN_SCRIPTING>"use" { RETTOKEN(T_USE);}
<ST_IN_SCRIPTING>"global" { RETTOKEN(T_GLOBAL);}
<ST_IN_SCRIPTING>"isset" { RETTOKEN(T_ISSET);}
<ST_IN_SCRIPTING>"empty" { RETTOKEN(T_EMPTY);}
<ST_IN_SCRIPTING>"__halt_compiler" { RETTOKEN(T_HALT_COMPILER);}
<ST_IN_SCRIPTING>"__compiler_halt_offset__" { RETTOKEN(T_COMPILER_HALT_OFFSET);}
<ST_IN_SCRIPTING>"static" { RETTOKEN(T_STATIC);}
<ST_IN_SCRIPTING>"abstract" { RETTOKEN(T_ABSTRACT);}
<ST_IN_SCRIPTING>"final" { RETTOKEN(T_FINAL);}
<ST_IN_SCRIPTING>"private" { RETTOKEN(T_PRIVATE);}
<ST_IN_SCRIPTING>"protected" { RETTOKEN(T_PROTECTED);}
<ST_IN_SCRIPTING>"public" { RETTOKEN(T_PUBLIC);}
<ST_IN_SCRIPTING>"unset" { RETTOKEN(T_UNSET);}
<ST_IN_SCRIPTING>"=>" { RETSTEP(T_DOUBLE_ARROW);}
<ST_IN_SCRIPTING>"list" { RETTOKEN(T_LIST);}
<ST_IN_SCRIPTING>"array" { RETTOKEN(T_ARRAY);}
<ST_IN_SCRIPTING>"++" { RETSTEP(T_INC);}
<ST_IN_SCRIPTING>"--" { RETSTEP(T_DEC);}
<ST_IN_SCRIPTING>"===" { RETSTEP(T_IS_IDENTICAL);}
<ST_IN_SCRIPTING>"!==" { RETSTEP(T_IS_NOT_IDENTICAL);}
<ST_IN_SCRIPTING>"==" { RETSTEP(T_IS_EQUAL);}
<ST_IN_SCRIPTING>"!="|"<>" { RETSTEP(T_IS_NOT_EQUAL);}
<ST_IN_SCRIPTING>"<=" { RETSTEP(T_IS_SMALLER_OR_EQUAL);}
<ST_IN_SCRIPTING>">=" { RETSTEP(T_IS_GREATER_OR_EQUAL);}
<ST_IN_SCRIPTING>"+=" { RETSTEP(T_PLUS_EQUAL);}
<ST_IN_SCRIPTING>"-=" { RETSTEP(T_MINUS_EQUAL);}
<ST_IN_SCRIPTING>"*=" { RETSTEP(T_MUL_EQUAL);}
<ST_IN_SCRIPTING>"/=" { RETSTEP(T_DIV_EQUAL);}
<ST_IN_SCRIPTING>".=" { RETSTEP(T_CONCAT_EQUAL);}
<ST_IN_SCRIPTING>"%=" { RETSTEP(T_MOD_EQUAL);}
<ST_IN_SCRIPTING>"<<=" { RETSTEP(T_SL_EQUAL);}
<ST_IN_SCRIPTING>">>=" { RETSTEP(T_SR_EQUAL);}
<ST_IN_SCRIPTING>"&=" { RETSTEP(T_AND_EQUAL);}
<ST_IN_SCRIPTING>"|=" { RETSTEP(T_OR_EQUAL);}
<ST_IN_SCRIPTING>"^=" { RETSTEP(T_XOR_EQUAL);}
<ST_IN_SCRIPTING>"||" { RETSTEP(T_BOOLEAN_OR);}
<ST_IN_SCRIPTING>"&&" { RETSTEP(T_BOOLEAN_AND);}
<ST_IN_SCRIPTING>"OR" { RETTOKEN(T_LOGICAL_OR);}
<ST_IN_SCRIPTING>"AND" { RETTOKEN(T_LOGICAL_AND);}
<ST_IN_SCRIPTING>"XOR" { RETTOKEN(T_LOGICAL_XOR);}
<ST_IN_SCRIPTING>"<<" { RETSTEP(T_SL);}
<ST_IN_SCRIPTING>"..." { RETTOKEN(T_VARARG); }
<ST_IN_SCRIPTING>"shape" { HH_ONLY_KEYWORD(T_SHAPE); }
<ST_IN_SCRIPTING>"type" { HH_ONLY_KEYWORD(T_UNRESOLVED_TYPE); }
@@ -413,19 +398,17 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
<ST_IN_SCRIPTING>">>" {
if (_scanner->getLookaheadLtDepth() < 2) {
STEPPOS;
return T_SR;
RETSTEP(T_SR);
}
yyless(1);
STEPPOS;
return '>';
RETSTEP('>');
}
<ST_IN_SCRIPTING>"<"[a-zA-Z_\x7f-\xff] {
int ntt = getNextTokenType(_scanner->lastToken());
if (ntt & NextTokenType::XhpTag) {
yyless(1);
STEPPOS;
STEPPOS(T_XHP_TAG_LT);
yy_push_state(ST_XHP_IN_TAG, yyscanner);
return T_XHP_TAG_LT;
}
@@ -437,82 +420,75 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
break;
}
yyless(1);
STEPPOS;
if (_scanner->hipHopSyntaxEnabled() && (ntt & NextTokenType::TypeListMaybe)) {
// Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
// to resolve this.
return T_UNRESOLVED_LT;
RETSTEP(T_UNRESOLVED_LT);
}
return '<';
RETSTEP('<');
}
<ST_IN_SCRIPTING>"<" {
STEPPOS;
if (_scanner->hipHopSyntaxEnabled()) {
int ntt = getNextTokenType(_scanner->lastToken());
if (ntt & NextTokenType::TypeListMaybe) {
// Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
// to resolve this.
return T_UNRESOLVED_LT;
RETSTEP(T_UNRESOLVED_LT);
}
}
return '<';
RETSTEP('<');
}
<ST_LT_CHECK>"<"{XHPLABEL}(">"|"/>"|{WHITESPACE_AND_COMMENTS}(">"|"/>"|[a-zA-Z_\x7f-\xff])) {
BEGIN(ST_IN_SCRIPTING);
yyless(1);
STEPPOS;
STEPPOS(T_XHP_TAG_LT);
yy_push_state(ST_XHP_IN_TAG, yyscanner);
return T_XHP_TAG_LT;
}
<ST_LT_CHECK>"<" {
BEGIN(ST_IN_SCRIPTING);
STEPPOS;
return '<';
RETSTEP('<');
}
<ST_IN_SCRIPTING>":"{XHPLABEL} {
int ntt = getNextTokenType(_scanner->lastToken());
if (ntt & NextTokenType::XhpClassName) {
yytext++; yyleng--; // skipping the first colon
SETTOKEN;
return T_XHP_LABEL;
RETTOKEN(T_XHP_LABEL);
}
yyless(1);
STEPPOS;
return ':';
RETSTEP(':');
}
<ST_IN_SCRIPTING>"%"{XHPLABEL} {
int ntt = getNextTokenType(_scanner->lastToken());
if (ntt & NextTokenType::XhpCategoryName) {
yytext++; yyleng--; // skipping "%"
SETTOKEN;
return T_XHP_CATEGORY_LABEL;
RETTOKEN(T_XHP_CATEGORY_LABEL);
}
yyless(1);
STEPPOS;
return '%';
RETSTEP('%');
}
<ST_IN_SCRIPTING>{TOKENS} {STEPPOS; return yytext[0];}
<ST_IN_SCRIPTING>{TOKENS} {RETSTEP(yytext[0]);}
<ST_IN_SCRIPTING>"{" {
STEPPOS;
STEPPOS('{');
yy_push_state(ST_IN_SCRIPTING, yyscanner);
return '{';
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
STEPPOS;
STEPPOS(T_DOLLAR_OPEN_CURLY_BRACES);
yy_push_state(ST_LOOKING_FOR_VARNAME, yyscanner);
return T_DOLLAR_OPEN_CURLY_BRACES;
}
<ST_IN_SCRIPTING>"}" {
STEPPOS;
STEPPOS('}');
// We need to be robust against a '}' in PHP code with
// no corresponding '{'
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
@@ -521,7 +497,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<ST_LOOKING_FOR_VARNAME>{LABEL} {
SETTOKEN;
SETTOKEN(T_STRING_VARNAME);
// Change state to IN_SCRIPTING; current state will be popped
// when we encounter '}'
BEGIN(ST_IN_SCRIPTING);
@@ -536,86 +512,81 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{LNUM} {
SETTOKEN;
errno = 0;
long ret = strtoll(yytext, NULL, 0);
if (errno == ERANGE || ret < 0) {
_scanner->error("Dec number is too big: %s", yytext);
if (_scanner->isHackMode()) {
return T_HACK_ERROR;
RETTOKEN(T_HACK_ERROR);
}
}
return T_LNUMBER;
RETTOKEN(T_LNUMBER);
}
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{HNUM} {
SETTOKEN;
errno = 0;
long ret = strtoull(yytext, NULL, 16);
if (errno == ERANGE || ret < 0) {
_scanner->error("Hex number is too big: %s", yytext);
if (_scanner->isHackMode()) {
return T_HACK_ERROR;
RETTOKEN(T_HACK_ERROR);
}
}
return T_LNUMBER;
RETTOKEN(T_LNUMBER);
}
<ST_VAR_OFFSET>0|([1-9][0-9]*) { /* Offset could be treated as a long */
SETTOKEN;
errno = 0;
long ret = strtoll(yytext, NULL, 0);
if (ret == LLONG_MAX && errno == ERANGE) {
_scanner->error("Offset number is too big: %s", yytext);
if (_scanner->isHackMode()) {
return T_HACK_ERROR;
RETTOKEN(T_HACK_ERROR);
}
}
return T_NUM_STRING;
RETTOKEN(T_NUM_STRING);
}
<ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */
SETTOKEN;
return T_NUM_STRING;
RETTOKEN(T_NUM_STRING);
}
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{DNUM}|{EXPONENT_DNUM} {
SETTOKEN;
return T_DNUMBER;
RETTOKEN(T_DNUMBER);
}
<ST_IN_SCRIPTING>"__CLASS__" { SETTOKEN; return T_CLASS_C; }
<ST_IN_SCRIPTING>"__TRAIT__" { SETTOKEN; return T_TRAIT_C; }
<ST_IN_SCRIPTING>"__FUNCTION__" { SETTOKEN; return T_FUNC_C; }
<ST_IN_SCRIPTING>"__METHOD__" { SETTOKEN; return T_METHOD_C;}
<ST_IN_SCRIPTING>"__LINE__" { SETTOKEN; return T_LINE; }
<ST_IN_SCRIPTING>"__FILE__" { SETTOKEN; return T_FILE; }
<ST_IN_SCRIPTING>"__DIR__" { SETTOKEN; return T_DIR; }
<ST_IN_SCRIPTING>"__NAMESPACE__" { SETTOKEN; return T_NS_C; }
<ST_IN_SCRIPTING>"__CLASS__" { RETTOKEN(T_CLASS_C); }
<ST_IN_SCRIPTING>"__TRAIT__" { RETTOKEN(T_TRAIT_C); }
<ST_IN_SCRIPTING>"__FUNCTION__" { RETTOKEN(T_FUNC_C); }
<ST_IN_SCRIPTING>"__METHOD__" { RETTOKEN(T_METHOD_C);}
<ST_IN_SCRIPTING>"__LINE__" { RETTOKEN(T_LINE); }
<ST_IN_SCRIPTING>"__FILE__" { RETTOKEN(T_FILE); }
<ST_IN_SCRIPTING>"__DIR__" { RETTOKEN(T_DIR); }
<ST_IN_SCRIPTING>"__NAMESPACE__" { RETTOKEN(T_NS_C); }
<INITIAL>"#"[^\n]*"\n" {
_scanner->setHashBang(yytext, yyleng);
_scanner->setHashBang(yytext, yyleng, T_INLINE_HTML);
BEGIN(ST_IN_SCRIPTING);
yy_push_state(ST_AFTER_HASHBANG, yyscanner);
return T_INLINE_HTML;
}
<INITIAL>(([^<#]|"<"[^?%s<]){1,400})|"<s"|"<" {
SETTOKEN;
SETTOKEN(T_INLINE_HTML);
BEGIN(ST_IN_SCRIPTING);
yy_push_state(ST_IN_HTML, yyscanner);
return T_INLINE_HTML;
}
<ST_IN_HTML,ST_AFTER_HASHBANG>(([^<]|"<"[^?%s<]){1,400})|"<s"|"<" {
SETTOKEN;
SETTOKEN(T_INLINE_HTML);
BEGIN(ST_IN_HTML);
return T_INLINE_HTML;
}
<INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<?"|("<?php"([ \t]|{NEWLINE}))|"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"\'php\'"){WHITESPACE}*">" {
SETTOKEN;
if (_scanner->shortTags() || yyleng > 2) {
SETTOKEN(T_OPEN_TAG);
if (YY_START == INITIAL) {
BEGIN(ST_IN_SCRIPTING);
} else {
@@ -623,6 +594,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
return T_OPEN_TAG;
} else {
SETTOKEN(T_INLINE_HTML);
if (YY_START == INITIAL) {
BEGIN(ST_IN_SCRIPTING);
yy_push_state(ST_IN_HTML, yyscanner);
@@ -634,7 +606,6 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%="|"<?=" {
SETTOKEN;
if ((yytext[1]=='%' && _scanner->aspTags()) ||
(yytext[1]=='?' && _scanner->shortTags())) {
if (YY_START == INITIAL) {
@@ -642,7 +613,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
} else {
yy_pop_state(yyscanner);
}
return T_ECHO; //return T_OPEN_TAG_WITH_ECHO;
RETTOKEN(T_ECHO); //return T_OPEN_TAG_WITH_ECHO;
} else {
if (YY_START == INITIAL) {
BEGIN(ST_IN_SCRIPTING);
@@ -650,19 +621,18 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
} else if (YY_START == ST_AFTER_HASHBANG) {
BEGIN(ST_IN_HTML);
}
return T_INLINE_HTML;
RETTOKEN(T_INLINE_HTML);
}
}
<INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%" {
SETTOKEN;
if (_scanner->aspTags()) {
if (YY_START == INITIAL) {
BEGIN(ST_IN_SCRIPTING);
} else {
yy_pop_state(yyscanner);
}
return T_OPEN_TAG;
RETTOKEN(T_OPEN_TAG);
} else {
if (YY_START == INITIAL) {
BEGIN(ST_IN_SCRIPTING);
@@ -670,7 +640,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
} else if (YY_START == ST_AFTER_HASHBANG) {
BEGIN(ST_IN_HTML);
}
return T_INLINE_HTML;
RETTOKEN(T_INLINE_HTML);
}
}
@@ -683,27 +653,27 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
_scanner->error("Hack mode: content before <?hh");
return T_HACK_ERROR;
}
STEPPOS;
STEPPOS(T_OPEN_TAG);
_scanner->setHackMode();
return T_OPEN_TAG;
}
<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
return T_VARIABLE;
}
<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
yyless(yyleng - 3);
yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
return T_VARIABLE;
}
<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
yyless(yyleng - 1);
yy_push_state(ST_VAR_OFFSET, yyscanner);
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
return T_VARIABLE;
}
@@ -723,18 +693,15 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
line number */
yyless(0);
yy_pop_state(yyscanner);
STEPPOS;
return T_ENCAPSED_AND_WHITESPACE;
RETSTEP(T_ENCAPSED_AND_WHITESPACE);
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
SETTOKEN;
return T_STRING;
RETTOKEN(T_STRING);
}
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{WHITESPACE} {
STEPPOS;
return T_WHITESPACE;
RETSTEP(T_WHITESPACE);
}
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>"#"|"//" {
@@ -755,14 +722,14 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
yymore();
break;
default:
STEPPOS;
STEPPOS(T_COMMENT);
yy_pop_state(yyscanner);
return T_COMMENT;
}
}
<ST_ONE_LINE_COMMENT>{NEWLINE} {
STEPPOS;
STEPPOS(T_COMMENT);
yy_pop_state(yyscanner);
return T_COMMENT;
}
@@ -773,7 +740,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
return T_HACK_ERROR;
}
if (_scanner->aspTags() || yytext[yyleng-2] != '%') {
_scanner->setToken(yytext, yyleng-2, yytext, yyleng-2);
_scanner->setToken(yytext, yyleng-2, yytext, yyleng-2, T_COMMENT);
yyless(yyleng-2);
yy_pop_state(yyscanner);
return T_COMMENT;
@@ -797,13 +764,13 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<ST_DOC_COMMENT>"*/" {
SETTOKEN;
SETTOKEN(T_DOC_COMMENT);
yy_pop_state(yyscanner);
return T_DOC_COMMENT;
}
<ST_COMMENT>"*/" {
STEPPOS;
STEPPOS(T_COMMENT);
yy_pop_state(yyscanner);
return T_COMMENT;
}
@@ -817,7 +784,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<ST_XHP_COMMENT>"-->" {
STEPPOS;
STEPPOS(T_COMMENT);
yy_pop_state(yyscanner);
return T_COMMENT;
}
@@ -831,38 +798,35 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
_scanner->error("Hack mode: ?> not allowed");
return T_HACK_ERROR;
}
STEPPOS;
yy_push_state(ST_IN_HTML, yyscanner);
if (_scanner->full()) {
return T_CLOSE_TAG;
RETSTEP(T_CLOSE_TAG);
} else {
return ';';
RETSTEP(';');
}
}
<ST_IN_SCRIPTING>"</script"{WHITESPACE}*">"{NEWLINE}? {
STEPPOS;
yy_push_state(ST_IN_HTML, yyscanner);
if (_scanner->full()) {
return T_CLOSE_TAG;
RETSTEP(T_CLOSE_TAG);
} else {
return ';';
RETSTEP(';');
}
}
<ST_IN_SCRIPTING>"%>"{NEWLINE}? {
if (_scanner->aspTags()) {
STEPPOS;
yy_push_state(ST_IN_HTML, yyscanner);
if (_scanner->full()) {
return T_CLOSE_TAG;
RETSTEP(T_CLOSE_TAG);
} else {
return ';';
RETSTEP(';');
}
} else {
yyless(1);
_scanner->setToken(yytext, 1, yytext, 1);
return yytext[0];
RETSTEP(yytext[0]);
}
}
@@ -917,19 +881,17 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<ST_IN_SCRIPTING>[`] {
STEPPOS;
STEPPOS('`');
BEGIN(ST_BACKQUOTE);
return '`';
}
<ST_XHP_IN_TAG>{XHPLABEL} {
SETTOKEN;
return T_XHP_LABEL;
RETTOKEN(T_XHP_LABEL);
}
<ST_XHP_IN_TAG>"=" {
STEPPOS;
return yytext[0];
RETSTEP(yytext[0]);
}
<ST_XHP_IN_TAG>["][^"]*["] {
@@ -938,13 +900,13 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<ST_XHP_IN_TAG>[{] {
STEPPOS;
STEPPOS('{');
yy_push_state(ST_IN_SCRIPTING, yyscanner);
return '{';
}
<ST_XHP_IN_TAG>">" {
STEPPOS;
STEPPOS(T_XHP_TAG_GT);
BEGIN(ST_XHP_CHILD);
return T_XHP_TAG_GT;
}
@@ -958,14 +920,14 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
<ST_XHP_IN_TAG>{ANY_CHAR} {
// This rule ensures we get a reasonable syntax error message
// when unexpected characters occur inside XHP tags
STEPPOS;
STEPPOS(yytext[0]);
_scanner->error("Unexpected character in input: '%c' (ASCII=%d)",
yytext[0], yytext[0]);
return yytext[0];
}
<ST_XHP_END_SINGLETON_TAG>">" {
STEPPOS;
STEPPOS(T_XHP_TAG_GT);
yy_pop_state(yyscanner);
return T_XHP_TAG_GT;
}
@@ -976,12 +938,11 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
}
<ST_XHP_CHILD>[^{<]+ {
SETTOKEN;
return T_XHP_TEXT;
RETTOKEN(T_XHP_TEXT);
}
<ST_XHP_CHILD>"{" {
STEPPOS;
STEPPOS('{');
yy_push_state(ST_IN_SCRIPTING, yyscanner);
return '{';
}
@@ -989,28 +950,25 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
<ST_XHP_CHILD>"</" {
BEGIN(ST_XHP_END_CLOSE_TAG);
yyless(1);
STEPPOS;
return T_XHP_TAG_LT;
RETSTEP(T_XHP_TAG_LT);
}
<ST_XHP_END_CLOSE_TAG>"/" {
STEPPOS;
return '/';
RETSTEP('/');
}
<ST_XHP_END_CLOSE_TAG>{XHPLABEL} {
SETTOKEN;
return T_XHP_LABEL;
RETTOKEN(T_XHP_LABEL);
}
<ST_XHP_END_CLOSE_TAG>">" {
STEPPOS;
STEPPOS(T_XHP_TAG_GT);
yy_pop_state(yyscanner);
return T_XHP_TAG_GT;
}
<ST_XHP_CHILD>"<" {
STEPPOS;
STEPPOS(T_XHP_TAG_LT);
yy_push_state(ST_XHP_IN_TAG, yyscanner);
return T_XHP_TAG_LT;
}
@@ -1187,8 +1145,7 @@ doc_scan_done:
<ST_END_HEREDOC>{LABEL} {
BEGIN(ST_IN_SCRIPTING);
STEPPOS;
return T_END_HEREDOC;
RETSTEP(T_END_HEREDOC);
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
+3 -1
Ver Arquivo
@@ -1,5 +1,7 @@
%{
#ifdef TEST_PARSER
#ifdef XHPAST2_PARSER
#include "hphp/util/parser/xhpast2/parser.h"
#elif TEST_PARSER
#include "hphp/util/parser/test/parser.h"
#else
#include "hphp/compiler/parser/parser.h"
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
+11 -6
Ver Arquivo
@@ -85,7 +85,8 @@ void ScannerToken::xhpDecode() {
Scanner::Scanner(const char *filename, int type, bool md5 /* = false */)
: m_filename(filename), m_stream(nullptr), m_source(nullptr), m_len(0), m_pos(0),
m_state(Start), m_type(type), m_yyscanner(nullptr), m_token(nullptr),
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0) {
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0),
m_listener(nullptr) {
m_stream = new std::ifstream(filename);
m_streamOwner = true;
if (m_stream->fail()) {
@@ -101,7 +102,8 @@ Scanner::Scanner(std::istream &stream, int type,
bool md5 /* = false */)
: m_filename(fileName), m_source(nullptr), m_len(0), m_pos(0),
m_state(Start), m_type(type), m_yyscanner(nullptr), m_token(nullptr),
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0) {
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0),
m_listener(nullptr) {
m_stream = &stream;
m_streamOwner = false;
if (md5) computeMd5();
@@ -113,7 +115,7 @@ Scanner::Scanner(const char *source, int len, int type,
: m_filename(fileName), m_stream(nullptr), m_source(source), m_len(len),
m_pos(0), m_state(Start), m_type(type), m_yyscanner(nullptr),
m_token(nullptr), m_loc(nullptr), m_lastToken(-1), m_isHackMode(0),
m_lookaheadLtDepth(0) {
m_lookaheadLtDepth(0), m_listener(nullptr) {
assert(m_source);
m_streamOwner = false;
if (md5) {
@@ -147,12 +149,12 @@ Scanner::~Scanner() {
}
}
void Scanner::setHashBang(const char *rawText, int rawLeng) {
void Scanner::setHashBang(const char *rawText, int rawLeng, int type) {
if (m_type & ReturnAllTokens) {
setToken(rawText, rawLeng);
} else {
m_token->setText("", 0);
incLoc(rawText, rawLeng);
incLoc(rawText, rawLeng, type);
}
}
@@ -487,9 +489,12 @@ void Scanner::warn(const char* fmt, ...) {
m_filename.c_str(), m_loc->line0, m_loc->char0);
}
void Scanner::incLoc(const char *rawText, int rawLeng) {
void Scanner::incLoc(const char *rawText, int rawLeng, int type) {
assert(rawText);
assert(rawLeng > 0);
if (m_listener) {
m_token->setID(m_listener->publish(rawText, rawLeng, type));
}
m_loc->cursor += rawLeng;
+25 -10
Ver Arquivo
@@ -29,8 +29,8 @@ typedef int TokenID;
class ScannerToken {
public:
ScannerToken() : m_num(0), m_check(false) {}
void reset() { m_num = 0; m_text.clear();}
ScannerToken() : m_num(0), m_check(false), m_id(-1) {}
void reset() { m_num = 0; m_text.clear(); m_id = -1; }
TokenID num() const { return m_num;}
void setNum(TokenID num) {
@@ -50,6 +50,7 @@ public:
void operator=(ScannerToken &other) {
m_num = other.m_num;
m_text = other.m_text;
m_id = other.m_id;
}
const std::string &text() const {
@@ -76,6 +77,12 @@ public:
void setCheck() {
m_check = true;
}
void setID(int id) {
m_id = id;
}
int ID() {
return m_id;
}
void xhpLabel(bool prefix = true);
bool htmlTrim(); // true if non-empty after trimming
@@ -85,6 +92,7 @@ protected:
TokenID m_num; // internal token id
std::string m_text;
bool m_check;
int m_id;
};
struct LookaheadToken {
@@ -167,6 +175,11 @@ struct TokenStore {
///////////////////////////////////////////////////////////////////////////////
struct TokenListener {
virtual int publish(const char *rawText, int rawLeng, int type) = 0;
virtual ~TokenListener() {}
};
class Scanner {
public:
enum Type {
@@ -182,6 +195,7 @@ public:
bool md5 = false);
Scanner(const char *source, int len, int type, const char *fileName = "",
bool md5 = false);
void setListener(TokenListener *listener) { m_listener = listener; }
~Scanner();
const std::string &getMd5() const {
@@ -221,26 +235,26 @@ public:
bool aspTags() const { return m_type & AllowAspTags;}
bool full() const { return m_type & ReturnAllTokens;}
int lastToken() const { return m_lastToken;}
void setToken(const char *rawText, int rawLeng) {
void setToken(const char *rawText, int rawLeng, int type = -1) {
m_token->setText(rawText, rawLeng);
incLoc(rawText, rawLeng);
incLoc(rawText, rawLeng, type);
}
void stepPos(const char *rawText, int rawLeng) {
void stepPos(const char *rawText, int rawLeng, int type = -1) {
if (m_type & ReturnAllTokens) {
m_token->setText(rawText, rawLeng);
}
incLoc(rawText, rawLeng);
incLoc(rawText, rawLeng, type);
}
void setToken(const char *rawText, int rawLeng,
const char *ytext, int yleng) {
const char *ytext, int yleng, int type = -1) {
if (m_type & ReturnAllTokens) {
m_token->setText(rawText, rawLeng);
} else {
m_token->setText(ytext, yleng);
}
incLoc(rawText, rawLeng);
incLoc(rawText, rawLeng, type);
}
void setHashBang(const char *rawText, int rawLeng);
void setHashBang(const char *rawText, int rawLeng, int type = -1);
// also used for YY_FATAL_ERROR in hphp.x
void error(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
void warn(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
@@ -335,11 +349,12 @@ private:
// fields for XHP parsing
int m_lastToken;
void incLoc(const char *rawText, int rawLeng);
void incLoc(const char *rawText, int rawLeng, int type);
bool m_isHackMode;
TokenStore m_lookahead;
int m_lookaheadLtDepth;
TokenListener *m_listener;
};
///////////////////////////////////////////////////////////////////////////////
+92
Ver Arquivo
@@ -0,0 +1,92 @@
xhpast2 Parser Design
This file discusses the design decisions made to produce xhpast-compatible
output from the HPHP parser. Most of the features of the design have
to do with the impedance mismatch between the HPHP parser and xhpast.
Specifically:
1. xhpast outputs a byte-accurate token stream but HPHP does not.
This is natural since HPHP is more concerned with executing PHP, not linting it.
Therefore it is necessary for us to modify the HPHP parser so we can intercept
and accumulate tokens as they are seen and associate them with the relevant
parse tree node. There is not a 1:1 correspondence between the xhpast and HPHP
tokenizers so some massaging is necessary.
2. HPHP nodes are generally at a semantically higher level than xhpast nodes.
xhpast nodes do not carry any attributes other than node type, pointers to
the range of tokens in the token stream corresponding to that node + a list of
children. HPHP parse tree nodes are more condensed than xhpast nodes, often
choosing to represent features of a node as attributes instead of children.
For example:
$x = &$a;
The HPHP parser callback is:
void onAssign(Token& out, Token& var, Token& expr, bool ref, bool rhsFirst = false)
Notice that '=' and '&' are not represented as tokens. The '=' is implicit in
the function call, and the optional '&' is represented as a bool.
The xhpast tree structure for the same expression is:
[n_BINARY_EXPRESSION ...
[n_VARIABLE ... // $x
[n_OPERATOR ... // =
[n_VARIABLE_REFERENCE ... // &
[n_VARIABLE ... // $a
As a result it is necessary to do a small bit of manual parsing to identify the
location of the = and & in the token stream and create nodes for them.
There are also situations where the opposite is true. For example strings with
embedded variables like "foo {$x} {$y}" generate additional nodes for $x and $y
but xhpast treats the entirety as a single string node. These cases are easier to
handle since we can simply prune or combine nodes we don't care about.
IDEAL DESIGN
In an ideal world, the HPHP parser would be augmented to provide a superset of the
information required for all other parsers that we have (including Hack and pfff),
then the other parsers would be trivial (or at least easy to derive from the HPHP
parser).
However, I didn't feel like making big intrusive changes to the HPHP parser. The ideal
design might make sense at some future point in time.
ACTUAL DESIGN
Given that I wanted to avoid intrusive changes to the HPHP parser, I elected to build
a framework that would most flexibly handle the differences listed above, plus any that
I had perhaps not discovered yet or might arise in future. Thus I elected to implement the
transformation as a batch process, that is, first build a clean HPHP AST + token stream,
then transform it to an xhpast-compatible AST. Specific changes include:
1. Adding a TokenListener facility to the parser to eavesdrop on tokens as they fly by. See
util/parser/scanner.h. In addition to eavesdropping on tokens we also want the token ids
that are returned by the scanner (this was not previously captured by HPHP tokens). This
has been accomplished by modifying scanner rules to also pass the token id, such as
T_WHITESPACE, whenever we notify the scanner that a token has been detected.
2. Constructing a new lightweight AST that purely captures the rules of the parser. See
util/parser/xhpast2/parser.h. Due to the higher semantic level of the HPHP parser, these
AST nodes need to contain arbitrary scalar attributes in addition to a list of children.
This has been implemented via the various "ExtraInfo" structs in that file. For example,
the extra arguments necessary for the onName parser callback are stored in the OnNameEI
struct.
The high level flow is found in xhpast2.cpp and is pretty simple. The only thing that might
be non-obvious at first glance is that the when you call parser.parse(), what is actually
invoked are the parse rules in hphp.y, which in turn calls each callback method as they
fire.
Once the tree is built we transform it to xhpast nodes via outputXHPAST() (with heavy
lifting done by outputXHPASTImpl). The heart of outputXHPASTImpl is a giant switch that
processes each node type differently. It would have been more object-oriented to make a
class for each node and have each node know how to transform itself to xhpast but I was
concerned that some of the transformations might require peeking up and down the hierarchy
and break this nice abstraction anyway. Also, I didn't want to create an army of classes.
Still, I would not be averse to going in this direction if it can be done elegantly.
+160
Ver Arquivo
@@ -0,0 +1,160 @@
/*
* Copyright 2011 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstdio>
#include <cstdlib>
#include <list>
#include <string>
#include "node_names.hpp"
#define NNEW(t) \
(new xhpast::Node(t))
#define NTYPE(n, type) \
((n)->setType(type))
#define NMORE(n, end) \
((n)->setEnd(end))
#define NSPAN(n, type, end) \
(NMORE(NTYPE((n), type), end))
#define NLMORE(n, begin) \
((n)->setBegin(begin))
#define NEXPAND(l, n, r) \
((n)->setBegin(l)->setEnd(r))
namespace xhpast {
class Token;
typedef std::list<Token *> token_list_t;
class Token {
public:
unsigned int type;
std::string value;
// unsigned int lineno;
unsigned int n;
Token(unsigned int type, char *value, unsigned int n) :
type(type),
value(value),
n(n) {
}
};
class Node;
typedef std::list<Node *> node_list_t;
class Node {
public:
unsigned int type;
int l_tok;
int r_tok;
node_list_t children;
Node() : type(0), l_tok(-1), r_tok(-1) {};
explicit Node(unsigned int type) : type(type), l_tok(-1), r_tok(-1) {};
Node(unsigned int type, int end_tok) :
type(type) {
this->l_tok = end_tok;
this->r_tok = end_tok;
}
Node(unsigned int type, int l_tok, int r_tok) :
type(type),
l_tok(l_tok),
r_tok(r_tok) {
}
Node *appendChild(Node *node) {
this->children.push_back(node);
return this->setEnd(node);
}
Node *prependChild(Node *node) {
this->children.push_front(node);
return this->setBegin(node);
}
Node *appendChildren(Node *node) {
for (node_list_t::iterator ii = node->children.begin();
ii != node->children.end(); ++ii) {
this->children.push_back(*ii);
this->setEnd(*ii);
}
return this;
}
Node *firstChild() {
if (this->children.empty()) {
return nullptr;
}
return *(this->children.begin());
}
Node *setType(unsigned int t) {
this->type = t;
return this;
}
Node *setEnd(Node *n) {
if (!n) {
fprintf(stderr,
"Trying to setEnd() a null node to one of type %d\n",
this->type);
exit(1);
}
if (n->r_tok != -1 && (n->r_tok > this->r_tok || (this->r_tok == -1))) {
this->r_tok = n->r_tok;
}
if (this->l_tok == -1) {
this->l_tok = n->l_tok;
}
return this;
}
Node *setBegin(Node *n) {
if (!n) {
fprintf(stderr,
"Trying to setBegin() a null node to one of type %d\n",
this->type);
exit(1);
}
if (n->l_tok != -1 && (n->l_tok < this->l_tok || (this->l_tok == -1))) {
this->l_tok = n->l_tok;
}
if (this->r_tok == -1) {
this->r_tok = n->r_tok;
}
return this;
}
};
}
+24
Ver Arquivo
@@ -0,0 +1,24 @@
#!/usr/bin/python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import fileinput
import sys
indent = -2
def process(c):
global indent
if c == '[':
indent = indent + 2
sys.stdout.write('\n')
sys.stdout.write(' ' * indent)
sys.stdout.write(c)
if c == ']':
indent = indent - 2
for line in fileinput.input():
for c in line:
process(c)
+128
Ver Arquivo
@@ -0,0 +1,128 @@
#ifndef incl_HPHP_UTIL_PARSER_XHPAST2_NODE_NAMES_H_
#define incl_HPHP_UTIL_PARSER_XHPAST2_NODE_NAMES_H_
#define n_PROGRAM 9000
#define n_SYMBOL_NAME 9001
#define n_HALT_COMPILER 9002
#define n_NAMESPACE 9003
#define n_STATEMENT 9004
#define n_EMPTY 9005
#define n_STATEMENT_LIST 9006
#define n_OPEN_TAG 9007
#define n_CLOSE_TAG 9008
#define n_USE_LIST 9009
#define n_USE 9010
#define n_CONSTANT_DECLARATION_LIST 9011
#define n_CONSTANT_DECLARATION 9012
#define n_STRING 9013
#define n_LABEL 9014
#define n_CONDITION_LIST 9015
#define n_CONTROL_CONDITION 9016
#define n_IF 9017
#define n_ELSEIF 9018
#define n_ELSE 9019
#define n_WHILE 9020
#define n_DO_WHILE 9021
#define n_FOR 9022
#define n_FOR_EXPRESSION 9023
#define n_SWITCH 9024
#define n_BREAK 9025
#define n_CONTINUE 9026
#define n_RETURN 9027
#define n_GLOBAL_DECLARATION_LIST 9028
#define n_GLOBAL_DECLARATION 9029
#define n_STATIC_DECLARATION_LIST 9030
#define n_STATIC_DECLARATION 9031
#define n_ECHO_LIST 9032
#define n_ECHO 9033
#define n_INLINE_HTML 9034
#define n_UNSET_LIST 9035
#define n_UNSET 9036
#define n_FOREACH 9037
#define n_FOREACH_EXPRESSION 9038
#define n_THROW 9039
#define n_GOTO 9040
#define n_TRY 9041
#define n_CATCH_LIST 9042
#define n_CATCH 9043
#define n_DECLARE 9044
#define n_DECLARE_DECLARATION_LIST 9045
#define n_DECLARE_DECLARATION 9046
#define n_VARIABLE 9047
#define n_REFERENCE 9048
#define n_VARIABLE_REFERENCE 9049
#define n_FUNCTION_DECLARATION 9050
#define n_CLASS_DECLARATION 9051
#define n_CLASS_ATTRIBUTES 9052
#define n_EXTENDS 9053
#define n_EXTENDS_LIST 9054
#define n_IMPLEMENTS_LIST 9055
#define n_INTERFACE_DECLARATION 9056
#define n_CASE 9057
#define n_DEFAULT 9058
#define n_DECLARATION_PARAMETER_LIST 9059
#define n_DECLARATION_PARAMETER 9060
#define n_TYPE_NAME 9061
#define n_VARIABLE_VARIABLE 9062
#define n_CLASS_MEMBER_DECLARATION_LIST 9063
#define n_CLASS_MEMBER_DECLARATION 9064
#define n_CLASS_CONSTANT_DECLARATION_LIST 9065
#define n_CLASS_CONSTANT_DECLARATION 9066
#define n_METHOD_DECLARATION 9067
#define n_METHOD_MODIFIER_LIST 9068
#define n_FUNCTION_MODIFIER_LIST 9069
#define n_CLASS_MEMBER_MODIFIER_LIST 9070
#define n_EXPRESSION_LIST 9071
#define n_LIST 9072
#define n_ASSIGNMENT 9073
#define n_NEW 9074
#define n_UNARY_PREFIX_EXPRESSION 9075
#define n_UNARY_POSTFIX_EXPRESSION 9076
#define n_BINARY_EXPRESSION 9077
#define n_TERNARY_EXPRESSION 9078
#define n_CAST_EXPRESSION 9079
#define n_CAST 9080
#define n_OPERATOR 9081
#define n_ARRAY_LITERAL 9082
#define n_EXIT_EXPRESSION 9083
#define n_BACKTICKS_EXPRESSION 9084
#define n_LEXICAL_VARIABLE_LIST 9085
#define n_NUMERIC_SCALAR 9086
#define n_STRING_SCALAR 9087
#define n_MAGIC_SCALAR 9088
#define n_CLASS_STATIC_ACCESS 9089
#define n_CLASS_NAME 9090
#define n_MAGIC_CLASS_KEYWORD 9091
#define n_OBJECT_PROPERTY_ACCESS 9092
#define n_ARRAY_VALUE_LIST 9093
#define n_ARRAY_VALUE 9094
#define n_CALL_PARAMETER_LIST 9095
#define n_VARIABLE_EXPRESSION 9096
#define n_INCLUDE_FILE 9097
#define n_HEREDOC 9098
#define n_FUNCTION_CALL 9099
#define n_INDEX_ACCESS 9100
#define n_ASSIGNMENT_LIST 9101
#define n_METHOD_CALL 9102
#define n_XHP_TAG 9103
#define n_XHP_TAG_OPEN 9104
#define n_XHP_TAG_CLOSE 9105
#define n_XHP_TEXT 9106
#define n_XHP_EXPRESSION 9107
#define n_XHP_ATTRIBUTE_LIST 9108
#define n_XHP_ATTRIBUTE 9109
#define n_XHP_LITERAL 9110
#define n_XHP_ATTRIBUTE_LITERAL 9111
#define n_XHP_ATTRIBUTE_EXPRESSION 9112
#define n_XHP_NODE_LIST 9113
#define n_CONCATENATION_LIST 9114
#define n_PARENTHETICAL_EXPRESSION 9115
#define n_YIELD 9116
#define n_YIELD_EXPRESSION 9117
#define n_TRAIT_DECLARATION 9118
#define n_USE_TRAIT_DECLARATION 9119
#define n_USE_TRAIT_LIST 9120
#define n_USE_TRAIT_RESOLUTION 9121
#define n_USE_TRAIT_RESOLUTION_LIST 9122
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
+135
Ver Arquivo
@@ -0,0 +1,135 @@
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include <iostream>
#include <cstdlib>
#include <string.h>
#include "hphp/util/parser/xhpast2/parser.h"
namespace HPHP { namespace HPHP_PARSER_NS {
bool g_verifyMode = false;
}}
void print_node(xhpast::Node *node) {
int l = -1;
int r = -1;
if (node->l_tok != -1) {
l = node->l_tok;
}
if (l == -1) {
printf("[%d]", node->type);
} else {
if (node->r_tok != -1) {
r = node->r_tok;
}
printf("[%d, %d, %d", node->type, l, r);
if (!node->children.empty()) {
printf(", [");
for (xhpast::node_list_t::iterator ii = node->children.begin();;) {
print_node(*ii);
if (++ii != node->children.end()) {
printf(",");
} else {
break;
}
}
printf("]");
}
printf("]");
}
}
/*
* This program parses a file with the hphp php parser, and dumps
* every callback the parser makes to stdout.
*
* If a parse error occurs, it says why.
*/
int main(int argc, char** argv) try {
if (argc >= 2 && !strcmp(argv[1], "--verify")) {
HPHP::XHPAST2::g_verifyMode = true;
--argc, ++argv;
}
if (argc != 2) {
std::cerr << "usage: " << argv[0] << " [--verify] filename\n";
std::exit(1);
}
std::ifstream in(argv[1]);
if (!in.is_open()) {
std::cerr << argv[0] << ": couldn't open file: "
<< strerror(errno) << '\n';
}
std::cout << "1..1\n";
try {
using HPHP::Scanner;
using HPHP::XHPAST2::Parser;
Scanner scan(in, Scanner::AllowShortTags);
Parser parser(scan, argv[1]);
parser.parse();
parser.coalesceTree();
std::cout << parser.tree << std::endl;
xhpast::Node* root = parser.outputXHPAST();
std::vector<xhpast::Token *>* tokens = &(parser.m_listener.tokens);
printf("{");
printf("\"tree\":");
if (root) {
// Extend the right token for the root node to the end of the concrete
// token stream. This ensure all tokens appear in the tree. If we don't
// do this and the file ends in tokens which don't go to the parser (like
// comments and whitespace) they won't be represented in the tree.
root->r_tok = (tokens->size() - 1);
print_node(root);
} else {
printf("null");
}
printf(",");
printf("\"stream\":");
printf("[");
for (std::vector<xhpast::Token *>::iterator ii = tokens->begin();;) {
printf("[%d, %d]", (*ii)->type, (int)(*ii)->value.length());
if (++ii != tokens->end()) {
printf(",");
} else {
break;
}
}
printf("]");
printf("}\n");
} catch (const std::exception& e) {
if (HPHP::XHPAST2::g_verifyMode) {
std::cout << "not ";
} else {
throw;
}
}
std::cout << "ok 1\n";
}
catch (const std::runtime_error& e) {
std::cerr << argv[0] << ": " << e.what() << '\n';
return 1;
}