Initial rough draft of xhpast2, a replacement for xhpast
The goal of this diff is to clean up the current work-in-progress and check it in so that others may contribute if they have time. The program currently outputs the HHVM tree as well as the xhpast-like json, for ease of debugging. There is a little helper program jsonpretty.py to make the trees more readable, e.g. _build/dbg/hphp/util/parser/xhpast/xhpast2 <some php file> | hphp/util/parser/xhpast/xhpast2/jsonpretty.py
Esse commit está contido em:
+807
-805
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
+201
-244
@@ -27,12 +27,15 @@
|
||||
#define RESET_YYCURSOR yyg->yy_hold_char = *YYCURSOR; *YYCURSOR = '\0';
|
||||
|
||||
// macros for rules
|
||||
#define SETTOKEN _scanner->setToken(yytext, yyleng)
|
||||
#define STEPPOS _scanner->stepPos(yytext, yyleng)
|
||||
#define RETTOKEN(t) do {_scanner->setToken(yytext, yyleng, t); return t;} \
|
||||
while (0)
|
||||
#define RETSTEP(t) do {_scanner->stepPos(yytext, yyleng, t); return t;} \
|
||||
while (0)
|
||||
#define SETTOKEN(t) _scanner->setToken(yytext, yyleng, t)
|
||||
#define STEPPOS(t) _scanner->stepPos(yytext, yyleng, t)
|
||||
|
||||
#define HH_ONLY_KEYWORD(tok) do { \
|
||||
SETTOKEN; \
|
||||
return _scanner->hipHopSyntaxEnabled() ? tok : T_STRING; \
|
||||
#define HH_ONLY_KEYWORD(tok) do { \
|
||||
RETTOKEN(_scanner->hipHopSyntaxEnabled() ? tok : T_STRING); \
|
||||
} while (0)
|
||||
|
||||
#define IS_LABEL_START(c) \
|
||||
@@ -211,65 +214,64 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
|
||||
%%
|
||||
|
||||
<ST_IN_SCRIPTING>"exit" { SETTOKEN; return T_EXIT;}
|
||||
<ST_IN_SCRIPTING>"die" { SETTOKEN; return T_EXIT;}
|
||||
<ST_IN_SCRIPTING>"function" { SETTOKEN; return T_FUNCTION;}
|
||||
<ST_IN_SCRIPTING>"const" { SETTOKEN; return T_CONST;}
|
||||
<ST_IN_SCRIPTING>"return" { SETTOKEN; return T_RETURN;}
|
||||
<ST_IN_SCRIPTING>"yield" { SETTOKEN; return T_YIELD;}
|
||||
<ST_IN_SCRIPTING>"try" { SETTOKEN; return T_TRY;}
|
||||
<ST_IN_SCRIPTING>"catch" { SETTOKEN; return T_CATCH;}
|
||||
<ST_IN_SCRIPTING>"finally" { SETTOKEN; return T_FINALLY;}
|
||||
<ST_IN_SCRIPTING>"throw" { SETTOKEN; return T_THROW;}
|
||||
<ST_IN_SCRIPTING>"if" { SETTOKEN; return T_IF;}
|
||||
<ST_IN_SCRIPTING>"elseif" { SETTOKEN; return T_ELSEIF;}
|
||||
<ST_IN_SCRIPTING>"endif" { SETTOKEN; return T_ENDIF;}
|
||||
<ST_IN_SCRIPTING>"else" { SETTOKEN; return T_ELSE;}
|
||||
<ST_IN_SCRIPTING>"while" { SETTOKEN; return T_WHILE;}
|
||||
<ST_IN_SCRIPTING>"endwhile" { SETTOKEN; return T_ENDWHILE;}
|
||||
<ST_IN_SCRIPTING>"do" { SETTOKEN; return T_DO;}
|
||||
<ST_IN_SCRIPTING>"for" { SETTOKEN; return T_FOR;}
|
||||
<ST_IN_SCRIPTING>"endfor" { SETTOKEN; return T_ENDFOR;}
|
||||
<ST_IN_SCRIPTING>"foreach" { SETTOKEN; return T_FOREACH;}
|
||||
<ST_IN_SCRIPTING>"endforeach" { SETTOKEN; return T_ENDFOREACH;}
|
||||
<ST_IN_SCRIPTING>"declare" { SETTOKEN; return T_DECLARE;}
|
||||
<ST_IN_SCRIPTING>"enddeclare" { SETTOKEN; return T_ENDDECLARE;}
|
||||
<ST_IN_SCRIPTING>"instanceof" { SETTOKEN; return T_INSTANCEOF;}
|
||||
<ST_IN_SCRIPTING>"as" { SETTOKEN; return T_AS;}
|
||||
<ST_IN_SCRIPTING>"switch" { SETTOKEN; return T_SWITCH;}
|
||||
<ST_IN_SCRIPTING>"endswitch" { SETTOKEN; return T_ENDSWITCH;}
|
||||
<ST_IN_SCRIPTING>"case" { SETTOKEN; return T_CASE;}
|
||||
<ST_IN_SCRIPTING>"default" { SETTOKEN; return T_DEFAULT;}
|
||||
<ST_IN_SCRIPTING>"break" { SETTOKEN; return T_BREAK;}
|
||||
<ST_IN_SCRIPTING>"continue" { SETTOKEN; return T_CONTINUE;}
|
||||
<ST_IN_SCRIPTING>"goto" { SETTOKEN; return T_GOTO;}
|
||||
<ST_IN_SCRIPTING>"echo" { SETTOKEN; return T_ECHO;}
|
||||
<ST_IN_SCRIPTING>"print" { SETTOKEN; return T_PRINT;}
|
||||
<ST_IN_SCRIPTING>"class" { SETTOKEN; return T_CLASS;}
|
||||
<ST_IN_SCRIPTING>"interface" { SETTOKEN; return T_INTERFACE;}
|
||||
<ST_IN_SCRIPTING>"trait" { SETTOKEN; return T_TRAIT;}
|
||||
<ST_IN_SCRIPTING>"insteadof" { SETTOKEN; return T_INSTEADOF;}
|
||||
<ST_IN_SCRIPTING>"extends" { SETTOKEN; return T_EXTENDS;}
|
||||
<ST_IN_SCRIPTING>"implements" { SETTOKEN; return T_IMPLEMENTS;}
|
||||
<ST_IN_SCRIPTING>"attribute" { SETTOKEN; return T_XHP_ATTRIBUTE;}
|
||||
<ST_IN_SCRIPTING>"category" { SETTOKEN; return T_XHP_CATEGORY;}
|
||||
<ST_IN_SCRIPTING>"children" { SETTOKEN; return T_XHP_CHILDREN;}
|
||||
<ST_IN_SCRIPTING>"required" { SETTOKEN; return T_XHP_REQUIRED;}
|
||||
<ST_IN_SCRIPTING>"enum" { SETTOKEN; return T_XHP_ENUM;}
|
||||
<ST_IN_SCRIPTING>"exit" { RETTOKEN(T_EXIT);}
|
||||
<ST_IN_SCRIPTING>"die" { RETTOKEN(T_EXIT);}
|
||||
<ST_IN_SCRIPTING>"function" { RETTOKEN(T_FUNCTION);}
|
||||
<ST_IN_SCRIPTING>"const" { RETTOKEN(T_CONST);}
|
||||
<ST_IN_SCRIPTING>"return" { RETTOKEN(T_RETURN); }
|
||||
<ST_IN_SCRIPTING>"yield" { RETTOKEN(T_YIELD);}
|
||||
<ST_IN_SCRIPTING>"try" { RETTOKEN(T_TRY);}
|
||||
<ST_IN_SCRIPTING>"catch" { RETTOKEN(T_CATCH);}
|
||||
<ST_IN_SCRIPTING>"finally" { RETTOKEN(T_FINALLY);}
|
||||
<ST_IN_SCRIPTING>"throw" { RETTOKEN(T_THROW);}
|
||||
<ST_IN_SCRIPTING>"if" { RETTOKEN(T_IF);}
|
||||
<ST_IN_SCRIPTING>"elseif" { RETTOKEN(T_ELSEIF);}
|
||||
<ST_IN_SCRIPTING>"endif" { RETTOKEN(T_ENDIF);}
|
||||
<ST_IN_SCRIPTING>"else" { RETTOKEN(T_ELSE);}
|
||||
<ST_IN_SCRIPTING>"while" { RETTOKEN(T_WHILE);}
|
||||
<ST_IN_SCRIPTING>"endwhile" { RETTOKEN(T_ENDWHILE);}
|
||||
<ST_IN_SCRIPTING>"do" { RETTOKEN(T_DO);}
|
||||
<ST_IN_SCRIPTING>"for" { RETTOKEN(T_FOR);}
|
||||
<ST_IN_SCRIPTING>"endfor" { RETTOKEN(T_ENDFOR);}
|
||||
<ST_IN_SCRIPTING>"foreach" { RETTOKEN(T_FOREACH);}
|
||||
<ST_IN_SCRIPTING>"endforeach" { RETTOKEN(T_ENDFOREACH);}
|
||||
<ST_IN_SCRIPTING>"declare" { RETTOKEN(T_DECLARE);}
|
||||
<ST_IN_SCRIPTING>"enddeclare" { RETTOKEN(T_ENDDECLARE);}
|
||||
<ST_IN_SCRIPTING>"instanceof" { RETTOKEN(T_INSTANCEOF);}
|
||||
<ST_IN_SCRIPTING>"as" { RETTOKEN(T_AS);}
|
||||
<ST_IN_SCRIPTING>"switch" { RETTOKEN(T_SWITCH);}
|
||||
<ST_IN_SCRIPTING>"endswitch" { RETTOKEN(T_ENDSWITCH);}
|
||||
<ST_IN_SCRIPTING>"case" { RETTOKEN(T_CASE);}
|
||||
<ST_IN_SCRIPTING>"default" { RETTOKEN(T_DEFAULT);}
|
||||
<ST_IN_SCRIPTING>"break" { RETTOKEN(T_BREAK);}
|
||||
<ST_IN_SCRIPTING>"continue" { RETTOKEN(T_CONTINUE);}
|
||||
<ST_IN_SCRIPTING>"goto" { RETTOKEN(T_GOTO);}
|
||||
<ST_IN_SCRIPTING>"echo" { RETTOKEN(T_ECHO);}
|
||||
<ST_IN_SCRIPTING>"print" { RETTOKEN(T_PRINT);}
|
||||
<ST_IN_SCRIPTING>"class" { RETTOKEN(T_CLASS);}
|
||||
<ST_IN_SCRIPTING>"interface" { RETTOKEN(T_INTERFACE);}
|
||||
<ST_IN_SCRIPTING>"trait" { RETTOKEN(T_TRAIT);}
|
||||
<ST_IN_SCRIPTING>"insteadof" { RETTOKEN(T_INSTEADOF);}
|
||||
<ST_IN_SCRIPTING>"extends" { RETTOKEN(T_EXTENDS);}
|
||||
<ST_IN_SCRIPTING>"implements" { RETTOKEN(T_IMPLEMENTS);}
|
||||
<ST_IN_SCRIPTING>"attribute" { RETTOKEN(T_XHP_ATTRIBUTE);}
|
||||
<ST_IN_SCRIPTING>"category" { RETTOKEN(T_XHP_CATEGORY);}
|
||||
<ST_IN_SCRIPTING>"children" { RETTOKEN(T_XHP_CHILDREN);}
|
||||
<ST_IN_SCRIPTING>"required" { RETTOKEN(T_XHP_REQUIRED);}
|
||||
<ST_IN_SCRIPTING>"enum" { RETTOKEN(T_XHP_ENUM);}
|
||||
|
||||
<ST_IN_SCRIPTING>"->" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_OBJECT_OPERATOR);
|
||||
yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
|
||||
return T_OBJECT_OPERATOR;
|
||||
}
|
||||
|
||||
<ST_LOOKING_FOR_PROPERTY>"->" {
|
||||
STEPPOS;
|
||||
return T_OBJECT_OPERATOR;
|
||||
RETSTEP(T_OBJECT_OPERATOR);
|
||||
}
|
||||
|
||||
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
|
||||
SETTOKEN;
|
||||
SETTOKEN(T_STRING);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_STRING;
|
||||
}
|
||||
@@ -279,133 +281,116 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
yy_pop_state(yyscanner);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"::" { STEPPOS;return T_PAAMAYIM_NEKUDOTAYIM;}
|
||||
<ST_IN_SCRIPTING>"\\" { SETTOKEN;return T_NS_SEPARATOR;}
|
||||
<ST_IN_SCRIPTING>"new" { SETTOKEN;return T_NEW;}
|
||||
<ST_IN_SCRIPTING>"clone" { SETTOKEN;return T_CLONE;}
|
||||
<ST_IN_SCRIPTING>"var" { SETTOKEN;return T_VAR;}
|
||||
<ST_IN_SCRIPTING>"::" { RETSTEP(T_PAAMAYIM_NEKUDOTAYIM);}
|
||||
<ST_IN_SCRIPTING>"\\" { RETTOKEN(T_NS_SEPARATOR);}
|
||||
<ST_IN_SCRIPTING>"new" { RETTOKEN(T_NEW);}
|
||||
<ST_IN_SCRIPTING>"clone" { RETTOKEN(T_CLONE);}
|
||||
<ST_IN_SCRIPTING>"var" { RETTOKEN(T_VAR);}
|
||||
|
||||
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
|
||||
if (_scanner->lastToken() != T_FUNCTION) {
|
||||
STEPPOS;
|
||||
return T_INT_CAST;
|
||||
RETSTEP(T_INT_CAST);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '(';
|
||||
RETSTEP('(');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
|
||||
if (_scanner->lastToken() != T_FUNCTION) {
|
||||
STEPPOS;
|
||||
return T_DOUBLE_CAST;
|
||||
RETSTEP(T_DOUBLE_CAST);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '(';
|
||||
RETSTEP('(');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
|
||||
if (_scanner->lastToken() != T_FUNCTION) {
|
||||
STEPPOS;
|
||||
return T_STRING_CAST;
|
||||
RETSTEP(T_STRING_CAST);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '(';
|
||||
RETSTEP('(');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
|
||||
if (_scanner->lastToken() != T_FUNCTION) {
|
||||
STEPPOS;
|
||||
return T_ARRAY_CAST;
|
||||
RETSTEP(T_ARRAY_CAST);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '(';
|
||||
RETSTEP('(');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
|
||||
if (_scanner->lastToken() != T_FUNCTION) {
|
||||
STEPPOS;
|
||||
return T_OBJECT_CAST;
|
||||
RETSTEP(T_OBJECT_CAST);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '(';
|
||||
RETSTEP('(');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
|
||||
if (_scanner->lastToken() != T_FUNCTION) {
|
||||
STEPPOS;
|
||||
return T_BOOL_CAST;
|
||||
RETSTEP(T_BOOL_CAST);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '(';
|
||||
RETSTEP('(');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
|
||||
if (_scanner->lastToken() != T_FUNCTION) {
|
||||
STEPPOS;
|
||||
return T_UNSET_CAST;
|
||||
RETSTEP(T_UNSET_CAST);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '(';
|
||||
RETSTEP('(');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"eval" { SETTOKEN; return T_EVAL;}
|
||||
<ST_IN_SCRIPTING>"include" { SETTOKEN; return T_INCLUDE;}
|
||||
<ST_IN_SCRIPTING>"include_once" { SETTOKEN; return T_INCLUDE_ONCE;}
|
||||
<ST_IN_SCRIPTING>"require" { SETTOKEN; return T_REQUIRE;}
|
||||
<ST_IN_SCRIPTING>"require_once" { SETTOKEN; return T_REQUIRE_ONCE;}
|
||||
<ST_IN_SCRIPTING>"namespace" { SETTOKEN; return T_NAMESPACE;}
|
||||
<ST_IN_SCRIPTING>"use" { SETTOKEN; return T_USE;}
|
||||
<ST_IN_SCRIPTING>"global" { SETTOKEN; return T_GLOBAL;}
|
||||
<ST_IN_SCRIPTING>"isset" { SETTOKEN; return T_ISSET;}
|
||||
<ST_IN_SCRIPTING>"empty" { SETTOKEN; return T_EMPTY;}
|
||||
<ST_IN_SCRIPTING>"__halt_compiler" { SETTOKEN; return T_HALT_COMPILER;}
|
||||
<ST_IN_SCRIPTING>"__compiler_halt_offset__" {
|
||||
SETTOKEN;
|
||||
return T_COMPILER_HALT_OFFSET;
|
||||
}
|
||||
<ST_IN_SCRIPTING>"static" { SETTOKEN; return T_STATIC;}
|
||||
<ST_IN_SCRIPTING>"abstract" { SETTOKEN; return T_ABSTRACT;}
|
||||
<ST_IN_SCRIPTING>"final" { SETTOKEN; return T_FINAL;}
|
||||
<ST_IN_SCRIPTING>"private" { SETTOKEN; return T_PRIVATE;}
|
||||
<ST_IN_SCRIPTING>"protected" { SETTOKEN; return T_PROTECTED;}
|
||||
<ST_IN_SCRIPTING>"public" { SETTOKEN; return T_PUBLIC;}
|
||||
<ST_IN_SCRIPTING>"unset" { SETTOKEN; return T_UNSET;}
|
||||
<ST_IN_SCRIPTING>"=>" { STEPPOS; return T_DOUBLE_ARROW;}
|
||||
<ST_IN_SCRIPTING>"list" { SETTOKEN; return T_LIST;}
|
||||
<ST_IN_SCRIPTING>"array" { SETTOKEN; return T_ARRAY;}
|
||||
<ST_IN_SCRIPTING>"++" { STEPPOS; return T_INC;}
|
||||
<ST_IN_SCRIPTING>"--" { STEPPOS; return T_DEC;}
|
||||
<ST_IN_SCRIPTING>"===" { STEPPOS; return T_IS_IDENTICAL;}
|
||||
<ST_IN_SCRIPTING>"!==" { STEPPOS; return T_IS_NOT_IDENTICAL;}
|
||||
<ST_IN_SCRIPTING>"==" { STEPPOS; return T_IS_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"!="|"<>" { STEPPOS; return T_IS_NOT_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"<=" { STEPPOS; return T_IS_SMALLER_OR_EQUAL;}
|
||||
<ST_IN_SCRIPTING>">=" { STEPPOS; return T_IS_GREATER_OR_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"+=" { STEPPOS; return T_PLUS_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"-=" { STEPPOS; return T_MINUS_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"*=" { STEPPOS; return T_MUL_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"/=" { STEPPOS; return T_DIV_EQUAL;}
|
||||
<ST_IN_SCRIPTING>".=" { STEPPOS; return T_CONCAT_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"%=" { STEPPOS; return T_MOD_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"<<=" { STEPPOS; return T_SL_EQUAL;}
|
||||
<ST_IN_SCRIPTING>">>=" { STEPPOS; return T_SR_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"&=" { STEPPOS; return T_AND_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"|=" { STEPPOS; return T_OR_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"^=" { STEPPOS; return T_XOR_EQUAL;}
|
||||
<ST_IN_SCRIPTING>"||" { STEPPOS; return T_BOOLEAN_OR;}
|
||||
<ST_IN_SCRIPTING>"&&" { STEPPOS; return T_BOOLEAN_AND;}
|
||||
<ST_IN_SCRIPTING>"OR" { SETTOKEN; return T_LOGICAL_OR;}
|
||||
<ST_IN_SCRIPTING>"AND" { SETTOKEN; return T_LOGICAL_AND;}
|
||||
<ST_IN_SCRIPTING>"XOR" { SETTOKEN; return T_LOGICAL_XOR;}
|
||||
<ST_IN_SCRIPTING>"<<" { STEPPOS; return T_SL;}
|
||||
<ST_IN_SCRIPTING>"..." { SETTOKEN; return T_VARARG; }
|
||||
<ST_IN_SCRIPTING>"eval" { RETTOKEN(T_EVAL);}
|
||||
<ST_IN_SCRIPTING>"include" { RETTOKEN(T_INCLUDE);}
|
||||
<ST_IN_SCRIPTING>"include_once" { RETTOKEN(T_INCLUDE_ONCE);}
|
||||
<ST_IN_SCRIPTING>"require" { RETTOKEN(T_REQUIRE);}
|
||||
<ST_IN_SCRIPTING>"require_once" { RETTOKEN(T_REQUIRE_ONCE);}
|
||||
<ST_IN_SCRIPTING>"namespace" { RETTOKEN(T_NAMESPACE);}
|
||||
<ST_IN_SCRIPTING>"use" { RETTOKEN(T_USE);}
|
||||
<ST_IN_SCRIPTING>"global" { RETTOKEN(T_GLOBAL);}
|
||||
<ST_IN_SCRIPTING>"isset" { RETTOKEN(T_ISSET);}
|
||||
<ST_IN_SCRIPTING>"empty" { RETTOKEN(T_EMPTY);}
|
||||
<ST_IN_SCRIPTING>"__halt_compiler" { RETTOKEN(T_HALT_COMPILER);}
|
||||
<ST_IN_SCRIPTING>"__compiler_halt_offset__" { RETTOKEN(T_COMPILER_HALT_OFFSET);}
|
||||
<ST_IN_SCRIPTING>"static" { RETTOKEN(T_STATIC);}
|
||||
<ST_IN_SCRIPTING>"abstract" { RETTOKEN(T_ABSTRACT);}
|
||||
<ST_IN_SCRIPTING>"final" { RETTOKEN(T_FINAL);}
|
||||
<ST_IN_SCRIPTING>"private" { RETTOKEN(T_PRIVATE);}
|
||||
<ST_IN_SCRIPTING>"protected" { RETTOKEN(T_PROTECTED);}
|
||||
<ST_IN_SCRIPTING>"public" { RETTOKEN(T_PUBLIC);}
|
||||
<ST_IN_SCRIPTING>"unset" { RETTOKEN(T_UNSET);}
|
||||
<ST_IN_SCRIPTING>"=>" { RETSTEP(T_DOUBLE_ARROW);}
|
||||
<ST_IN_SCRIPTING>"list" { RETTOKEN(T_LIST);}
|
||||
<ST_IN_SCRIPTING>"array" { RETTOKEN(T_ARRAY);}
|
||||
<ST_IN_SCRIPTING>"++" { RETSTEP(T_INC);}
|
||||
<ST_IN_SCRIPTING>"--" { RETSTEP(T_DEC);}
|
||||
<ST_IN_SCRIPTING>"===" { RETSTEP(T_IS_IDENTICAL);}
|
||||
<ST_IN_SCRIPTING>"!==" { RETSTEP(T_IS_NOT_IDENTICAL);}
|
||||
<ST_IN_SCRIPTING>"==" { RETSTEP(T_IS_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"!="|"<>" { RETSTEP(T_IS_NOT_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"<=" { RETSTEP(T_IS_SMALLER_OR_EQUAL);}
|
||||
<ST_IN_SCRIPTING>">=" { RETSTEP(T_IS_GREATER_OR_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"+=" { RETSTEP(T_PLUS_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"-=" { RETSTEP(T_MINUS_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"*=" { RETSTEP(T_MUL_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"/=" { RETSTEP(T_DIV_EQUAL);}
|
||||
<ST_IN_SCRIPTING>".=" { RETSTEP(T_CONCAT_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"%=" { RETSTEP(T_MOD_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"<<=" { RETSTEP(T_SL_EQUAL);}
|
||||
<ST_IN_SCRIPTING>">>=" { RETSTEP(T_SR_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"&=" { RETSTEP(T_AND_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"|=" { RETSTEP(T_OR_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"^=" { RETSTEP(T_XOR_EQUAL);}
|
||||
<ST_IN_SCRIPTING>"||" { RETSTEP(T_BOOLEAN_OR);}
|
||||
<ST_IN_SCRIPTING>"&&" { RETSTEP(T_BOOLEAN_AND);}
|
||||
<ST_IN_SCRIPTING>"OR" { RETTOKEN(T_LOGICAL_OR);}
|
||||
<ST_IN_SCRIPTING>"AND" { RETTOKEN(T_LOGICAL_AND);}
|
||||
<ST_IN_SCRIPTING>"XOR" { RETTOKEN(T_LOGICAL_XOR);}
|
||||
<ST_IN_SCRIPTING>"<<" { RETSTEP(T_SL);}
|
||||
<ST_IN_SCRIPTING>"..." { RETTOKEN(T_VARARG); }
|
||||
|
||||
<ST_IN_SCRIPTING>"shape" { HH_ONLY_KEYWORD(T_SHAPE); }
|
||||
<ST_IN_SCRIPTING>"type" { HH_ONLY_KEYWORD(T_UNRESOLVED_TYPE); }
|
||||
@@ -413,19 +398,17 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
|
||||
<ST_IN_SCRIPTING>">>" {
|
||||
if (_scanner->getLookaheadLtDepth() < 2) {
|
||||
STEPPOS;
|
||||
return T_SR;
|
||||
RETSTEP(T_SR);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '>';
|
||||
RETSTEP('>');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"<"[a-zA-Z_\x7f-\xff] {
|
||||
int ntt = getNextTokenType(_scanner->lastToken());
|
||||
if (ntt & NextTokenType::XhpTag) {
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
STEPPOS(T_XHP_TAG_LT);
|
||||
yy_push_state(ST_XHP_IN_TAG, yyscanner);
|
||||
return T_XHP_TAG_LT;
|
||||
}
|
||||
@@ -437,82 +420,75 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
break;
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
if (_scanner->hipHopSyntaxEnabled() && (ntt & NextTokenType::TypeListMaybe)) {
|
||||
// Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
|
||||
// to resolve this.
|
||||
return T_UNRESOLVED_LT;
|
||||
RETSTEP(T_UNRESOLVED_LT);
|
||||
}
|
||||
return '<';
|
||||
RETSTEP('<');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"<" {
|
||||
STEPPOS;
|
||||
if (_scanner->hipHopSyntaxEnabled()) {
|
||||
int ntt = getNextTokenType(_scanner->lastToken());
|
||||
if (ntt & NextTokenType::TypeListMaybe) {
|
||||
// Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
|
||||
// to resolve this.
|
||||
return T_UNRESOLVED_LT;
|
||||
RETSTEP(T_UNRESOLVED_LT);
|
||||
}
|
||||
}
|
||||
return '<';
|
||||
RETSTEP('<');
|
||||
}
|
||||
|
||||
<ST_LT_CHECK>"<"{XHPLABEL}(">"|"/>"|{WHITESPACE_AND_COMMENTS}(">"|"/>"|[a-zA-Z_\x7f-\xff])) {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
STEPPOS(T_XHP_TAG_LT);
|
||||
yy_push_state(ST_XHP_IN_TAG, yyscanner);
|
||||
return T_XHP_TAG_LT;
|
||||
}
|
||||
|
||||
<ST_LT_CHECK>"<" {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
STEPPOS;
|
||||
return '<';
|
||||
RETSTEP('<');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>":"{XHPLABEL} {
|
||||
int ntt = getNextTokenType(_scanner->lastToken());
|
||||
if (ntt & NextTokenType::XhpClassName) {
|
||||
yytext++; yyleng--; // skipping the first colon
|
||||
SETTOKEN;
|
||||
return T_XHP_LABEL;
|
||||
RETTOKEN(T_XHP_LABEL);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return ':';
|
||||
RETSTEP(':');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"%"{XHPLABEL} {
|
||||
int ntt = getNextTokenType(_scanner->lastToken());
|
||||
if (ntt & NextTokenType::XhpCategoryName) {
|
||||
yytext++; yyleng--; // skipping "%"
|
||||
SETTOKEN;
|
||||
return T_XHP_CATEGORY_LABEL;
|
||||
RETTOKEN(T_XHP_CATEGORY_LABEL);
|
||||
}
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return '%';
|
||||
RETSTEP('%');
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>{TOKENS} {STEPPOS; return yytext[0];}
|
||||
<ST_IN_SCRIPTING>{TOKENS} {RETSTEP(yytext[0]);}
|
||||
|
||||
<ST_IN_SCRIPTING>"{" {
|
||||
STEPPOS;
|
||||
STEPPOS('{');
|
||||
yy_push_state(ST_IN_SCRIPTING, yyscanner);
|
||||
return '{';
|
||||
}
|
||||
|
||||
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_DOLLAR_OPEN_CURLY_BRACES);
|
||||
yy_push_state(ST_LOOKING_FOR_VARNAME, yyscanner);
|
||||
return T_DOLLAR_OPEN_CURLY_BRACES;
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"}" {
|
||||
STEPPOS;
|
||||
STEPPOS('}');
|
||||
// We need to be robust against a '}' in PHP code with
|
||||
// no corresponding '{'
|
||||
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
|
||||
@@ -521,7 +497,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<ST_LOOKING_FOR_VARNAME>{LABEL} {
|
||||
SETTOKEN;
|
||||
SETTOKEN(T_STRING_VARNAME);
|
||||
// Change state to IN_SCRIPTING; current state will be popped
|
||||
// when we encounter '}'
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
@@ -536,86 +512,81 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{LNUM} {
|
||||
SETTOKEN;
|
||||
errno = 0;
|
||||
long ret = strtoll(yytext, NULL, 0);
|
||||
if (errno == ERANGE || ret < 0) {
|
||||
_scanner->error("Dec number is too big: %s", yytext);
|
||||
if (_scanner->isHackMode()) {
|
||||
return T_HACK_ERROR;
|
||||
RETTOKEN(T_HACK_ERROR);
|
||||
}
|
||||
}
|
||||
return T_LNUMBER;
|
||||
RETTOKEN(T_LNUMBER);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{HNUM} {
|
||||
SETTOKEN;
|
||||
errno = 0;
|
||||
long ret = strtoull(yytext, NULL, 16);
|
||||
if (errno == ERANGE || ret < 0) {
|
||||
_scanner->error("Hex number is too big: %s", yytext);
|
||||
if (_scanner->isHackMode()) {
|
||||
return T_HACK_ERROR;
|
||||
RETTOKEN(T_HACK_ERROR);
|
||||
}
|
||||
}
|
||||
return T_LNUMBER;
|
||||
RETTOKEN(T_LNUMBER);
|
||||
}
|
||||
|
||||
<ST_VAR_OFFSET>0|([1-9][0-9]*) { /* Offset could be treated as a long */
|
||||
SETTOKEN;
|
||||
errno = 0;
|
||||
long ret = strtoll(yytext, NULL, 0);
|
||||
if (ret == LLONG_MAX && errno == ERANGE) {
|
||||
_scanner->error("Offset number is too big: %s", yytext);
|
||||
if (_scanner->isHackMode()) {
|
||||
return T_HACK_ERROR;
|
||||
RETTOKEN(T_HACK_ERROR);
|
||||
}
|
||||
}
|
||||
return T_NUM_STRING;
|
||||
RETTOKEN(T_NUM_STRING);
|
||||
}
|
||||
|
||||
<ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */
|
||||
SETTOKEN;
|
||||
return T_NUM_STRING;
|
||||
RETTOKEN(T_NUM_STRING);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{DNUM}|{EXPONENT_DNUM} {
|
||||
SETTOKEN;
|
||||
return T_DNUMBER;
|
||||
RETTOKEN(T_DNUMBER);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"__CLASS__" { SETTOKEN; return T_CLASS_C; }
|
||||
<ST_IN_SCRIPTING>"__TRAIT__" { SETTOKEN; return T_TRAIT_C; }
|
||||
<ST_IN_SCRIPTING>"__FUNCTION__" { SETTOKEN; return T_FUNC_C; }
|
||||
<ST_IN_SCRIPTING>"__METHOD__" { SETTOKEN; return T_METHOD_C;}
|
||||
<ST_IN_SCRIPTING>"__LINE__" { SETTOKEN; return T_LINE; }
|
||||
<ST_IN_SCRIPTING>"__FILE__" { SETTOKEN; return T_FILE; }
|
||||
<ST_IN_SCRIPTING>"__DIR__" { SETTOKEN; return T_DIR; }
|
||||
<ST_IN_SCRIPTING>"__NAMESPACE__" { SETTOKEN; return T_NS_C; }
|
||||
<ST_IN_SCRIPTING>"__CLASS__" { RETTOKEN(T_CLASS_C); }
|
||||
<ST_IN_SCRIPTING>"__TRAIT__" { RETTOKEN(T_TRAIT_C); }
|
||||
<ST_IN_SCRIPTING>"__FUNCTION__" { RETTOKEN(T_FUNC_C); }
|
||||
<ST_IN_SCRIPTING>"__METHOD__" { RETTOKEN(T_METHOD_C);}
|
||||
<ST_IN_SCRIPTING>"__LINE__" { RETTOKEN(T_LINE); }
|
||||
<ST_IN_SCRIPTING>"__FILE__" { RETTOKEN(T_FILE); }
|
||||
<ST_IN_SCRIPTING>"__DIR__" { RETTOKEN(T_DIR); }
|
||||
<ST_IN_SCRIPTING>"__NAMESPACE__" { RETTOKEN(T_NS_C); }
|
||||
|
||||
<INITIAL>"#"[^\n]*"\n" {
|
||||
_scanner->setHashBang(yytext, yyleng);
|
||||
_scanner->setHashBang(yytext, yyleng, T_INLINE_HTML);
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
yy_push_state(ST_AFTER_HASHBANG, yyscanner);
|
||||
return T_INLINE_HTML;
|
||||
}
|
||||
|
||||
<INITIAL>(([^<#]|"<"[^?%s<]){1,400})|"<s"|"<" {
|
||||
SETTOKEN;
|
||||
SETTOKEN(T_INLINE_HTML);
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
yy_push_state(ST_IN_HTML, yyscanner);
|
||||
return T_INLINE_HTML;
|
||||
}
|
||||
|
||||
<ST_IN_HTML,ST_AFTER_HASHBANG>(([^<]|"<"[^?%s<]){1,400})|"<s"|"<" {
|
||||
SETTOKEN;
|
||||
SETTOKEN(T_INLINE_HTML);
|
||||
BEGIN(ST_IN_HTML);
|
||||
return T_INLINE_HTML;
|
||||
}
|
||||
|
||||
<INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<?"|("<?php"([ \t]|{NEWLINE}))|"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"\'php\'"){WHITESPACE}*">" {
|
||||
SETTOKEN;
|
||||
if (_scanner->shortTags() || yyleng > 2) {
|
||||
SETTOKEN(T_OPEN_TAG);
|
||||
if (YY_START == INITIAL) {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
} else {
|
||||
@@ -623,6 +594,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
return T_OPEN_TAG;
|
||||
} else {
|
||||
SETTOKEN(T_INLINE_HTML);
|
||||
if (YY_START == INITIAL) {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
yy_push_state(ST_IN_HTML, yyscanner);
|
||||
@@ -634,7 +606,6 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%="|"<?=" {
|
||||
SETTOKEN;
|
||||
if ((yytext[1]=='%' && _scanner->aspTags()) ||
|
||||
(yytext[1]=='?' && _scanner->shortTags())) {
|
||||
if (YY_START == INITIAL) {
|
||||
@@ -642,7 +613,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
} else {
|
||||
yy_pop_state(yyscanner);
|
||||
}
|
||||
return T_ECHO; //return T_OPEN_TAG_WITH_ECHO;
|
||||
RETTOKEN(T_ECHO); //return T_OPEN_TAG_WITH_ECHO;
|
||||
} else {
|
||||
if (YY_START == INITIAL) {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
@@ -650,19 +621,18 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
} else if (YY_START == ST_AFTER_HASHBANG) {
|
||||
BEGIN(ST_IN_HTML);
|
||||
}
|
||||
return T_INLINE_HTML;
|
||||
RETTOKEN(T_INLINE_HTML);
|
||||
}
|
||||
}
|
||||
|
||||
<INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%" {
|
||||
SETTOKEN;
|
||||
if (_scanner->aspTags()) {
|
||||
if (YY_START == INITIAL) {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
} else {
|
||||
yy_pop_state(yyscanner);
|
||||
}
|
||||
return T_OPEN_TAG;
|
||||
RETTOKEN(T_OPEN_TAG);
|
||||
} else {
|
||||
if (YY_START == INITIAL) {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
@@ -670,7 +640,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
} else if (YY_START == ST_AFTER_HASHBANG) {
|
||||
BEGIN(ST_IN_HTML);
|
||||
}
|
||||
return T_INLINE_HTML;
|
||||
RETTOKEN(T_INLINE_HTML);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -683,27 +653,27 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
_scanner->error("Hack mode: content before <?hh");
|
||||
return T_HACK_ERROR;
|
||||
}
|
||||
STEPPOS;
|
||||
STEPPOS(T_OPEN_TAG);
|
||||
_scanner->setHackMode();
|
||||
return T_OPEN_TAG;
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
|
||||
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
|
||||
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
|
||||
return T_VARIABLE;
|
||||
}
|
||||
|
||||
<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
|
||||
yyless(yyleng - 3);
|
||||
yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
|
||||
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
|
||||
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
|
||||
return T_VARIABLE;
|
||||
}
|
||||
|
||||
<ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
|
||||
yyless(yyleng - 1);
|
||||
yy_push_state(ST_VAR_OFFSET, yyscanner);
|
||||
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
|
||||
_scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
|
||||
return T_VARIABLE;
|
||||
}
|
||||
|
||||
@@ -723,18 +693,15 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
line number */
|
||||
yyless(0);
|
||||
yy_pop_state(yyscanner);
|
||||
STEPPOS;
|
||||
return T_ENCAPSED_AND_WHITESPACE;
|
||||
RETSTEP(T_ENCAPSED_AND_WHITESPACE);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
|
||||
SETTOKEN;
|
||||
return T_STRING;
|
||||
RETTOKEN(T_STRING);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>{WHITESPACE} {
|
||||
STEPPOS;
|
||||
return T_WHITESPACE;
|
||||
RETSTEP(T_WHITESPACE);
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING,ST_XHP_IN_TAG>"#"|"//" {
|
||||
@@ -755,14 +722,14 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
yymore();
|
||||
break;
|
||||
default:
|
||||
STEPPOS;
|
||||
STEPPOS(T_COMMENT);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_COMMENT;
|
||||
}
|
||||
}
|
||||
|
||||
<ST_ONE_LINE_COMMENT>{NEWLINE} {
|
||||
STEPPOS;
|
||||
STEPPOS(T_COMMENT);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_COMMENT;
|
||||
}
|
||||
@@ -773,7 +740,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
return T_HACK_ERROR;
|
||||
}
|
||||
if (_scanner->aspTags() || yytext[yyleng-2] != '%') {
|
||||
_scanner->setToken(yytext, yyleng-2, yytext, yyleng-2);
|
||||
_scanner->setToken(yytext, yyleng-2, yytext, yyleng-2, T_COMMENT);
|
||||
yyless(yyleng-2);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_COMMENT;
|
||||
@@ -797,13 +764,13 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<ST_DOC_COMMENT>"*/" {
|
||||
SETTOKEN;
|
||||
SETTOKEN(T_DOC_COMMENT);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_DOC_COMMENT;
|
||||
}
|
||||
|
||||
<ST_COMMENT>"*/" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_COMMENT);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_COMMENT;
|
||||
}
|
||||
@@ -817,7 +784,7 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<ST_XHP_COMMENT>"-->" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_COMMENT);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_COMMENT;
|
||||
}
|
||||
@@ -831,38 +798,35 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
_scanner->error("Hack mode: ?> not allowed");
|
||||
return T_HACK_ERROR;
|
||||
}
|
||||
STEPPOS;
|
||||
yy_push_state(ST_IN_HTML, yyscanner);
|
||||
if (_scanner->full()) {
|
||||
return T_CLOSE_TAG;
|
||||
RETSTEP(T_CLOSE_TAG);
|
||||
} else {
|
||||
return ';';
|
||||
RETSTEP(';');
|
||||
}
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"</script"{WHITESPACE}*">"{NEWLINE}? {
|
||||
STEPPOS;
|
||||
yy_push_state(ST_IN_HTML, yyscanner);
|
||||
if (_scanner->full()) {
|
||||
return T_CLOSE_TAG;
|
||||
RETSTEP(T_CLOSE_TAG);
|
||||
} else {
|
||||
return ';';
|
||||
RETSTEP(';');
|
||||
}
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>"%>"{NEWLINE}? {
|
||||
if (_scanner->aspTags()) {
|
||||
STEPPOS;
|
||||
yy_push_state(ST_IN_HTML, yyscanner);
|
||||
if (_scanner->full()) {
|
||||
return T_CLOSE_TAG;
|
||||
RETSTEP(T_CLOSE_TAG);
|
||||
} else {
|
||||
return ';';
|
||||
RETSTEP(';');
|
||||
}
|
||||
} else {
|
||||
yyless(1);
|
||||
_scanner->setToken(yytext, 1, yytext, 1);
|
||||
return yytext[0];
|
||||
RETSTEP(yytext[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -917,19 +881,17 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<ST_IN_SCRIPTING>[`] {
|
||||
STEPPOS;
|
||||
STEPPOS('`');
|
||||
BEGIN(ST_BACKQUOTE);
|
||||
return '`';
|
||||
}
|
||||
|
||||
<ST_XHP_IN_TAG>{XHPLABEL} {
|
||||
SETTOKEN;
|
||||
return T_XHP_LABEL;
|
||||
RETTOKEN(T_XHP_LABEL);
|
||||
}
|
||||
|
||||
<ST_XHP_IN_TAG>"=" {
|
||||
STEPPOS;
|
||||
return yytext[0];
|
||||
RETSTEP(yytext[0]);
|
||||
}
|
||||
|
||||
<ST_XHP_IN_TAG>["][^"]*["] {
|
||||
@@ -938,13 +900,13 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<ST_XHP_IN_TAG>[{] {
|
||||
STEPPOS;
|
||||
STEPPOS('{');
|
||||
yy_push_state(ST_IN_SCRIPTING, yyscanner);
|
||||
return '{';
|
||||
}
|
||||
|
||||
<ST_XHP_IN_TAG>">" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_XHP_TAG_GT);
|
||||
BEGIN(ST_XHP_CHILD);
|
||||
return T_XHP_TAG_GT;
|
||||
}
|
||||
@@ -958,14 +920,14 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
<ST_XHP_IN_TAG>{ANY_CHAR} {
|
||||
// This rule ensures we get a reasonable syntax error message
|
||||
// when unexpected characters occur inside XHP tags
|
||||
STEPPOS;
|
||||
STEPPOS(yytext[0]);
|
||||
_scanner->error("Unexpected character in input: '%c' (ASCII=%d)",
|
||||
yytext[0], yytext[0]);
|
||||
return yytext[0];
|
||||
}
|
||||
|
||||
<ST_XHP_END_SINGLETON_TAG>">" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_XHP_TAG_GT);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_XHP_TAG_GT;
|
||||
}
|
||||
@@ -976,12 +938,11 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
}
|
||||
|
||||
<ST_XHP_CHILD>[^{<]+ {
|
||||
SETTOKEN;
|
||||
return T_XHP_TEXT;
|
||||
RETTOKEN(T_XHP_TEXT);
|
||||
}
|
||||
|
||||
<ST_XHP_CHILD>"{" {
|
||||
STEPPOS;
|
||||
STEPPOS('{');
|
||||
yy_push_state(ST_IN_SCRIPTING, yyscanner);
|
||||
return '{';
|
||||
}
|
||||
@@ -989,28 +950,25 @@ BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
|
||||
<ST_XHP_CHILD>"</" {
|
||||
BEGIN(ST_XHP_END_CLOSE_TAG);
|
||||
yyless(1);
|
||||
STEPPOS;
|
||||
return T_XHP_TAG_LT;
|
||||
RETSTEP(T_XHP_TAG_LT);
|
||||
}
|
||||
|
||||
<ST_XHP_END_CLOSE_TAG>"/" {
|
||||
STEPPOS;
|
||||
return '/';
|
||||
RETSTEP('/');
|
||||
}
|
||||
|
||||
<ST_XHP_END_CLOSE_TAG>{XHPLABEL} {
|
||||
SETTOKEN;
|
||||
return T_XHP_LABEL;
|
||||
RETTOKEN(T_XHP_LABEL);
|
||||
}
|
||||
|
||||
<ST_XHP_END_CLOSE_TAG>">" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_XHP_TAG_GT);
|
||||
yy_pop_state(yyscanner);
|
||||
return T_XHP_TAG_GT;
|
||||
}
|
||||
|
||||
<ST_XHP_CHILD>"<" {
|
||||
STEPPOS;
|
||||
STEPPOS(T_XHP_TAG_LT);
|
||||
yy_push_state(ST_XHP_IN_TAG, yyscanner);
|
||||
return T_XHP_TAG_LT;
|
||||
}
|
||||
@@ -1187,8 +1145,7 @@ doc_scan_done:
|
||||
|
||||
<ST_END_HEREDOC>{LABEL} {
|
||||
BEGIN(ST_IN_SCRIPTING);
|
||||
STEPPOS;
|
||||
return T_END_HEREDOC;
|
||||
RETSTEP(T_END_HEREDOC);
|
||||
}
|
||||
|
||||
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
%{
|
||||
#ifdef TEST_PARSER
|
||||
#ifdef XHPAST2_PARSER
|
||||
#include "hphp/util/parser/xhpast2/parser.h"
|
||||
#elif TEST_PARSER
|
||||
#include "hphp/util/parser/test/parser.h"
|
||||
#else
|
||||
#include "hphp/compiler/parser/parser.h"
|
||||
|
||||
+408
-451
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -85,7 +85,8 @@ void ScannerToken::xhpDecode() {
|
||||
Scanner::Scanner(const char *filename, int type, bool md5 /* = false */)
|
||||
: m_filename(filename), m_stream(nullptr), m_source(nullptr), m_len(0), m_pos(0),
|
||||
m_state(Start), m_type(type), m_yyscanner(nullptr), m_token(nullptr),
|
||||
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0) {
|
||||
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0),
|
||||
m_listener(nullptr) {
|
||||
m_stream = new std::ifstream(filename);
|
||||
m_streamOwner = true;
|
||||
if (m_stream->fail()) {
|
||||
@@ -101,7 +102,8 @@ Scanner::Scanner(std::istream &stream, int type,
|
||||
bool md5 /* = false */)
|
||||
: m_filename(fileName), m_source(nullptr), m_len(0), m_pos(0),
|
||||
m_state(Start), m_type(type), m_yyscanner(nullptr), m_token(nullptr),
|
||||
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0) {
|
||||
m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0),
|
||||
m_listener(nullptr) {
|
||||
m_stream = &stream;
|
||||
m_streamOwner = false;
|
||||
if (md5) computeMd5();
|
||||
@@ -113,7 +115,7 @@ Scanner::Scanner(const char *source, int len, int type,
|
||||
: m_filename(fileName), m_stream(nullptr), m_source(source), m_len(len),
|
||||
m_pos(0), m_state(Start), m_type(type), m_yyscanner(nullptr),
|
||||
m_token(nullptr), m_loc(nullptr), m_lastToken(-1), m_isHackMode(0),
|
||||
m_lookaheadLtDepth(0) {
|
||||
m_lookaheadLtDepth(0), m_listener(nullptr) {
|
||||
assert(m_source);
|
||||
m_streamOwner = false;
|
||||
if (md5) {
|
||||
@@ -147,12 +149,12 @@ Scanner::~Scanner() {
|
||||
}
|
||||
}
|
||||
|
||||
void Scanner::setHashBang(const char *rawText, int rawLeng) {
|
||||
void Scanner::setHashBang(const char *rawText, int rawLeng, int type) {
|
||||
if (m_type & ReturnAllTokens) {
|
||||
setToken(rawText, rawLeng);
|
||||
} else {
|
||||
m_token->setText("", 0);
|
||||
incLoc(rawText, rawLeng);
|
||||
incLoc(rawText, rawLeng, type);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -487,9 +489,12 @@ void Scanner::warn(const char* fmt, ...) {
|
||||
m_filename.c_str(), m_loc->line0, m_loc->char0);
|
||||
}
|
||||
|
||||
void Scanner::incLoc(const char *rawText, int rawLeng) {
|
||||
void Scanner::incLoc(const char *rawText, int rawLeng, int type) {
|
||||
assert(rawText);
|
||||
assert(rawLeng > 0);
|
||||
if (m_listener) {
|
||||
m_token->setID(m_listener->publish(rawText, rawLeng, type));
|
||||
}
|
||||
|
||||
m_loc->cursor += rawLeng;
|
||||
|
||||
|
||||
@@ -29,8 +29,8 @@ typedef int TokenID;
|
||||
|
||||
class ScannerToken {
|
||||
public:
|
||||
ScannerToken() : m_num(0), m_check(false) {}
|
||||
void reset() { m_num = 0; m_text.clear();}
|
||||
ScannerToken() : m_num(0), m_check(false), m_id(-1) {}
|
||||
void reset() { m_num = 0; m_text.clear(); m_id = -1; }
|
||||
|
||||
TokenID num() const { return m_num;}
|
||||
void setNum(TokenID num) {
|
||||
@@ -50,6 +50,7 @@ public:
|
||||
void operator=(ScannerToken &other) {
|
||||
m_num = other.m_num;
|
||||
m_text = other.m_text;
|
||||
m_id = other.m_id;
|
||||
}
|
||||
|
||||
const std::string &text() const {
|
||||
@@ -76,6 +77,12 @@ public:
|
||||
void setCheck() {
|
||||
m_check = true;
|
||||
}
|
||||
void setID(int id) {
|
||||
m_id = id;
|
||||
}
|
||||
int ID() {
|
||||
return m_id;
|
||||
}
|
||||
|
||||
void xhpLabel(bool prefix = true);
|
||||
bool htmlTrim(); // true if non-empty after trimming
|
||||
@@ -85,6 +92,7 @@ protected:
|
||||
TokenID m_num; // internal token id
|
||||
std::string m_text;
|
||||
bool m_check;
|
||||
int m_id;
|
||||
};
|
||||
|
||||
struct LookaheadToken {
|
||||
@@ -167,6 +175,11 @@ struct TokenStore {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct TokenListener {
|
||||
virtual int publish(const char *rawText, int rawLeng, int type) = 0;
|
||||
virtual ~TokenListener() {}
|
||||
};
|
||||
|
||||
class Scanner {
|
||||
public:
|
||||
enum Type {
|
||||
@@ -182,6 +195,7 @@ public:
|
||||
bool md5 = false);
|
||||
Scanner(const char *source, int len, int type, const char *fileName = "",
|
||||
bool md5 = false);
|
||||
void setListener(TokenListener *listener) { m_listener = listener; }
|
||||
~Scanner();
|
||||
|
||||
const std::string &getMd5() const {
|
||||
@@ -221,26 +235,26 @@ public:
|
||||
bool aspTags() const { return m_type & AllowAspTags;}
|
||||
bool full() const { return m_type & ReturnAllTokens;}
|
||||
int lastToken() const { return m_lastToken;}
|
||||
void setToken(const char *rawText, int rawLeng) {
|
||||
void setToken(const char *rawText, int rawLeng, int type = -1) {
|
||||
m_token->setText(rawText, rawLeng);
|
||||
incLoc(rawText, rawLeng);
|
||||
incLoc(rawText, rawLeng, type);
|
||||
}
|
||||
void stepPos(const char *rawText, int rawLeng) {
|
||||
void stepPos(const char *rawText, int rawLeng, int type = -1) {
|
||||
if (m_type & ReturnAllTokens) {
|
||||
m_token->setText(rawText, rawLeng);
|
||||
}
|
||||
incLoc(rawText, rawLeng);
|
||||
incLoc(rawText, rawLeng, type);
|
||||
}
|
||||
void setToken(const char *rawText, int rawLeng,
|
||||
const char *ytext, int yleng) {
|
||||
const char *ytext, int yleng, int type = -1) {
|
||||
if (m_type & ReturnAllTokens) {
|
||||
m_token->setText(rawText, rawLeng);
|
||||
} else {
|
||||
m_token->setText(ytext, yleng);
|
||||
}
|
||||
incLoc(rawText, rawLeng);
|
||||
incLoc(rawText, rawLeng, type);
|
||||
}
|
||||
void setHashBang(const char *rawText, int rawLeng);
|
||||
void setHashBang(const char *rawText, int rawLeng, int type = -1);
|
||||
// also used for YY_FATAL_ERROR in hphp.x
|
||||
void error(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
|
||||
void warn(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
|
||||
@@ -335,11 +349,12 @@ private:
|
||||
|
||||
// fields for XHP parsing
|
||||
int m_lastToken;
|
||||
void incLoc(const char *rawText, int rawLeng);
|
||||
void incLoc(const char *rawText, int rawLeng, int type);
|
||||
bool m_isHackMode;
|
||||
|
||||
TokenStore m_lookahead;
|
||||
int m_lookaheadLtDepth;
|
||||
TokenListener *m_listener;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
xhpast2 Parser Design
|
||||
|
||||
This file discusses the design decisions made to produce xhpast-compatible
|
||||
output from the HPHP parser. Most of the features of the design have
|
||||
to do with the impedance mismatch between the HPHP parser and xhpast.
|
||||
|
||||
Specifically:
|
||||
|
||||
1. xhpast outputs a byte-accurate token stream but HPHP does not.
|
||||
|
||||
This is natural since HPHP is more concerned with executing PHP, not linting it.
|
||||
Therefore it is necessary for us to modify the HPHP parser so we can intercept
|
||||
and accumulate tokens as they are seen and associate them with the relevant
|
||||
parse tree node. There is not a 1:1 correspondence between the xhpast and HPHP
|
||||
tokenizers so some massaging is necessary.
|
||||
|
||||
|
||||
2. HPHP nodes are generally at a semantically higher level than xhpast nodes.
|
||||
xhpast nodes do not carry any attributes other than node type, pointers to
|
||||
the range of tokens in the token stream corresponding to that node + a list of
|
||||
children. HPHP parse tree nodes are more condensed than xhpast nodes, often
|
||||
choosing to represent features of a node as attributes instead of children.
|
||||
For example:
|
||||
|
||||
$x = &$a;
|
||||
|
||||
The HPHP parser callback is:
|
||||
|
||||
void onAssign(Token& out, Token& var, Token& expr, bool ref, bool rhsFirst = false)
|
||||
|
||||
Notice that '=' and '&' are not represented as tokens. The '=' is implicit in
|
||||
the function call, and the optional '&' is represented as a bool.
|
||||
|
||||
The xhpast tree structure for the same expression is:
|
||||
|
||||
[n_BINARY_EXPRESSION ...
|
||||
[n_VARIABLE ... // $x
|
||||
[n_OPERATOR ... // =
|
||||
[n_VARIABLE_REFERENCE ... // &
|
||||
[n_VARIABLE ... // $a
|
||||
|
||||
As a result it is necessary to do a small bit of manual parsing to identify the
|
||||
location of the = and & in the token stream and create nodes for them.
|
||||
|
||||
There are also situations where the opposite is true. For example strings with
|
||||
embedded variables like "foo {$x} {$y}" generate additional nodes for $x and $y
|
||||
but xhpast treats the entirety as a single string node. These cases are easier to
|
||||
handle since we can simply prune or combine nodes we don't care about.
|
||||
|
||||
IDEAL DESIGN
|
||||
|
||||
In an ideal world, the HPHP parser would be augmented to provide a superset of the
|
||||
information required for all other parsers that we have (including Hack and pfff),
|
||||
then the other parsers would be trivial (or at least easy to derive from the HPHP
|
||||
parser).
|
||||
|
||||
However, I didn't feel like making big intrusive changes to the HPHP parser. The ideal
|
||||
design might make sense at some future point in time.
|
||||
|
||||
ACTUAL DESIGN
|
||||
|
||||
Given that I wanted to avoid intrusive changes to the HPHP parser, I elected to build
|
||||
a framework that would most flexibly handle the differences listed above, plus any that
|
||||
I had perhaps not discovered yet or might arise in future. Thus I elected to implement the
|
||||
transformation as a batch process, that is, first build a clean HPHP AST + token stream,
|
||||
then transform it to an xhpast-compatible AST. Specific changes include:
|
||||
|
||||
1. Adding a TokenListener facility to the parser to eavesdrop on tokens as they fly by. See
|
||||
util/parser/scanner.h. In addition to eavesdropping on tokens we also want the token ids
|
||||
that are returned by the scanner (this was not previously captured by HPHP tokens). This
|
||||
has been accomplished by modifying scanner rules to also pass the token id, such as
|
||||
T_WHITESPACE, whenever we notify the scanner that a token has been detected.
|
||||
|
||||
2. Constructing a new lightweight AST that purely captures the rules of the parser. See
|
||||
util/parser/xhpast2/parser.h. Due to the higher semantic level of the HPHP parser, these
|
||||
AST nodes need to contain arbitrary scalar attributes in addition to a list of children.
|
||||
This has been implemented via the various "ExtraInfo" structs in that file. For example,
|
||||
the extra arguments necessary for the onName parser callback are stored in the OnNameEI
|
||||
struct.
|
||||
|
||||
The high level flow is found in xhpast2.cpp and is pretty simple. The only thing that might
|
||||
be non-obvious at first glance is that the when you call parser.parse(), what is actually
|
||||
invoked are the parse rules in hphp.y, which in turn calls each callback method as they
|
||||
fire.
|
||||
|
||||
Once the tree is built we transform it to xhpast nodes via outputXHPAST() (with heavy
|
||||
lifting done by outputXHPASTImpl). The heart of outputXHPASTImpl is a giant switch that
|
||||
processes each node type differently. It would have been more object-oriented to make a
|
||||
class for each node and have each node know how to transform itself to xhpast but I was
|
||||
concerned that some of the transformations might require peeking up and down the hierarchy
|
||||
and break this nice abstraction anyway. Also, I didn't want to create an army of classes.
|
||||
Still, I would not be averse to going in this direction if it can be done elegantly.
|
||||
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright 2011 Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
#include "node_names.hpp"
|
||||
|
||||
#define NNEW(t) \
|
||||
(new xhpast::Node(t))
|
||||
|
||||
#define NTYPE(n, type) \
|
||||
((n)->setType(type))
|
||||
|
||||
#define NMORE(n, end) \
|
||||
((n)->setEnd(end))
|
||||
|
||||
#define NSPAN(n, type, end) \
|
||||
(NMORE(NTYPE((n), type), end))
|
||||
|
||||
#define NLMORE(n, begin) \
|
||||
((n)->setBegin(begin))
|
||||
|
||||
#define NEXPAND(l, n, r) \
|
||||
((n)->setBegin(l)->setEnd(r))
|
||||
|
||||
|
||||
namespace xhpast {
|
||||
|
||||
class Token;
|
||||
typedef std::list<Token *> token_list_t;
|
||||
|
||||
class Token {
|
||||
|
||||
public:
|
||||
unsigned int type;
|
||||
std::string value;
|
||||
// unsigned int lineno;
|
||||
unsigned int n;
|
||||
|
||||
Token(unsigned int type, char *value, unsigned int n) :
|
||||
type(type),
|
||||
value(value),
|
||||
n(n) {
|
||||
}
|
||||
};
|
||||
|
||||
class Node;
|
||||
typedef std::list<Node *> node_list_t;
|
||||
|
||||
class Node {
|
||||
public:
|
||||
unsigned int type;
|
||||
|
||||
int l_tok;
|
||||
int r_tok;
|
||||
|
||||
node_list_t children;
|
||||
|
||||
|
||||
Node() : type(0), l_tok(-1), r_tok(-1) {};
|
||||
|
||||
explicit Node(unsigned int type) : type(type), l_tok(-1), r_tok(-1) {};
|
||||
|
||||
Node(unsigned int type, int end_tok) :
|
||||
type(type) {
|
||||
this->l_tok = end_tok;
|
||||
this->r_tok = end_tok;
|
||||
}
|
||||
|
||||
Node(unsigned int type, int l_tok, int r_tok) :
|
||||
type(type),
|
||||
l_tok(l_tok),
|
||||
r_tok(r_tok) {
|
||||
|
||||
}
|
||||
|
||||
Node *appendChild(Node *node) {
|
||||
this->children.push_back(node);
|
||||
return this->setEnd(node);
|
||||
}
|
||||
|
||||
Node *prependChild(Node *node) {
|
||||
this->children.push_front(node);
|
||||
return this->setBegin(node);
|
||||
}
|
||||
|
||||
Node *appendChildren(Node *node) {
|
||||
for (node_list_t::iterator ii = node->children.begin();
|
||||
ii != node->children.end(); ++ii) {
|
||||
this->children.push_back(*ii);
|
||||
this->setEnd(*ii);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
Node *firstChild() {
|
||||
if (this->children.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
return *(this->children.begin());
|
||||
}
|
||||
|
||||
Node *setType(unsigned int t) {
|
||||
this->type = t;
|
||||
return this;
|
||||
}
|
||||
|
||||
Node *setEnd(Node *n) {
|
||||
if (!n) {
|
||||
fprintf(stderr,
|
||||
"Trying to setEnd() a null node to one of type %d\n",
|
||||
this->type);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (n->r_tok != -1 && (n->r_tok > this->r_tok || (this->r_tok == -1))) {
|
||||
this->r_tok = n->r_tok;
|
||||
}
|
||||
if (this->l_tok == -1) {
|
||||
this->l_tok = n->l_tok;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
Node *setBegin(Node *n) {
|
||||
if (!n) {
|
||||
fprintf(stderr,
|
||||
"Trying to setBegin() a null node to one of type %d\n",
|
||||
this->type);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (n->l_tok != -1 && (n->l_tok < this->l_tok || (this->l_tok == -1))) {
|
||||
this->l_tok = n->l_tok;
|
||||
}
|
||||
if (this->r_tok == -1) {
|
||||
this->r_tok = n->r_tok;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
Arquivo executável
+24
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
import fileinput
|
||||
import sys
|
||||
|
||||
indent = -2
|
||||
|
||||
def process(c):
|
||||
global indent
|
||||
if c == '[':
|
||||
indent = indent + 2
|
||||
sys.stdout.write('\n')
|
||||
sys.stdout.write(' ' * indent)
|
||||
sys.stdout.write(c)
|
||||
if c == ']':
|
||||
indent = indent - 2
|
||||
|
||||
for line in fileinput.input():
|
||||
for c in line:
|
||||
process(c)
|
||||
@@ -0,0 +1,128 @@
|
||||
#ifndef incl_HPHP_UTIL_PARSER_XHPAST2_NODE_NAMES_H_
|
||||
#define incl_HPHP_UTIL_PARSER_XHPAST2_NODE_NAMES_H_
|
||||
|
||||
#define n_PROGRAM 9000
|
||||
#define n_SYMBOL_NAME 9001
|
||||
#define n_HALT_COMPILER 9002
|
||||
#define n_NAMESPACE 9003
|
||||
#define n_STATEMENT 9004
|
||||
#define n_EMPTY 9005
|
||||
#define n_STATEMENT_LIST 9006
|
||||
#define n_OPEN_TAG 9007
|
||||
#define n_CLOSE_TAG 9008
|
||||
#define n_USE_LIST 9009
|
||||
#define n_USE 9010
|
||||
#define n_CONSTANT_DECLARATION_LIST 9011
|
||||
#define n_CONSTANT_DECLARATION 9012
|
||||
#define n_STRING 9013
|
||||
#define n_LABEL 9014
|
||||
#define n_CONDITION_LIST 9015
|
||||
#define n_CONTROL_CONDITION 9016
|
||||
#define n_IF 9017
|
||||
#define n_ELSEIF 9018
|
||||
#define n_ELSE 9019
|
||||
#define n_WHILE 9020
|
||||
#define n_DO_WHILE 9021
|
||||
#define n_FOR 9022
|
||||
#define n_FOR_EXPRESSION 9023
|
||||
#define n_SWITCH 9024
|
||||
#define n_BREAK 9025
|
||||
#define n_CONTINUE 9026
|
||||
#define n_RETURN 9027
|
||||
#define n_GLOBAL_DECLARATION_LIST 9028
|
||||
#define n_GLOBAL_DECLARATION 9029
|
||||
#define n_STATIC_DECLARATION_LIST 9030
|
||||
#define n_STATIC_DECLARATION 9031
|
||||
#define n_ECHO_LIST 9032
|
||||
#define n_ECHO 9033
|
||||
#define n_INLINE_HTML 9034
|
||||
#define n_UNSET_LIST 9035
|
||||
#define n_UNSET 9036
|
||||
#define n_FOREACH 9037
|
||||
#define n_FOREACH_EXPRESSION 9038
|
||||
#define n_THROW 9039
|
||||
#define n_GOTO 9040
|
||||
#define n_TRY 9041
|
||||
#define n_CATCH_LIST 9042
|
||||
#define n_CATCH 9043
|
||||
#define n_DECLARE 9044
|
||||
#define n_DECLARE_DECLARATION_LIST 9045
|
||||
#define n_DECLARE_DECLARATION 9046
|
||||
#define n_VARIABLE 9047
|
||||
#define n_REFERENCE 9048
|
||||
#define n_VARIABLE_REFERENCE 9049
|
||||
#define n_FUNCTION_DECLARATION 9050
|
||||
#define n_CLASS_DECLARATION 9051
|
||||
#define n_CLASS_ATTRIBUTES 9052
|
||||
#define n_EXTENDS 9053
|
||||
#define n_EXTENDS_LIST 9054
|
||||
#define n_IMPLEMENTS_LIST 9055
|
||||
#define n_INTERFACE_DECLARATION 9056
|
||||
#define n_CASE 9057
|
||||
#define n_DEFAULT 9058
|
||||
#define n_DECLARATION_PARAMETER_LIST 9059
|
||||
#define n_DECLARATION_PARAMETER 9060
|
||||
#define n_TYPE_NAME 9061
|
||||
#define n_VARIABLE_VARIABLE 9062
|
||||
#define n_CLASS_MEMBER_DECLARATION_LIST 9063
|
||||
#define n_CLASS_MEMBER_DECLARATION 9064
|
||||
#define n_CLASS_CONSTANT_DECLARATION_LIST 9065
|
||||
#define n_CLASS_CONSTANT_DECLARATION 9066
|
||||
#define n_METHOD_DECLARATION 9067
|
||||
#define n_METHOD_MODIFIER_LIST 9068
|
||||
#define n_FUNCTION_MODIFIER_LIST 9069
|
||||
#define n_CLASS_MEMBER_MODIFIER_LIST 9070
|
||||
#define n_EXPRESSION_LIST 9071
|
||||
#define n_LIST 9072
|
||||
#define n_ASSIGNMENT 9073
|
||||
#define n_NEW 9074
|
||||
#define n_UNARY_PREFIX_EXPRESSION 9075
|
||||
#define n_UNARY_POSTFIX_EXPRESSION 9076
|
||||
#define n_BINARY_EXPRESSION 9077
|
||||
#define n_TERNARY_EXPRESSION 9078
|
||||
#define n_CAST_EXPRESSION 9079
|
||||
#define n_CAST 9080
|
||||
#define n_OPERATOR 9081
|
||||
#define n_ARRAY_LITERAL 9082
|
||||
#define n_EXIT_EXPRESSION 9083
|
||||
#define n_BACKTICKS_EXPRESSION 9084
|
||||
#define n_LEXICAL_VARIABLE_LIST 9085
|
||||
#define n_NUMERIC_SCALAR 9086
|
||||
#define n_STRING_SCALAR 9087
|
||||
#define n_MAGIC_SCALAR 9088
|
||||
#define n_CLASS_STATIC_ACCESS 9089
|
||||
#define n_CLASS_NAME 9090
|
||||
#define n_MAGIC_CLASS_KEYWORD 9091
|
||||
#define n_OBJECT_PROPERTY_ACCESS 9092
|
||||
#define n_ARRAY_VALUE_LIST 9093
|
||||
#define n_ARRAY_VALUE 9094
|
||||
#define n_CALL_PARAMETER_LIST 9095
|
||||
#define n_VARIABLE_EXPRESSION 9096
|
||||
#define n_INCLUDE_FILE 9097
|
||||
#define n_HEREDOC 9098
|
||||
#define n_FUNCTION_CALL 9099
|
||||
#define n_INDEX_ACCESS 9100
|
||||
#define n_ASSIGNMENT_LIST 9101
|
||||
#define n_METHOD_CALL 9102
|
||||
#define n_XHP_TAG 9103
|
||||
#define n_XHP_TAG_OPEN 9104
|
||||
#define n_XHP_TAG_CLOSE 9105
|
||||
#define n_XHP_TEXT 9106
|
||||
#define n_XHP_EXPRESSION 9107
|
||||
#define n_XHP_ATTRIBUTE_LIST 9108
|
||||
#define n_XHP_ATTRIBUTE 9109
|
||||
#define n_XHP_LITERAL 9110
|
||||
#define n_XHP_ATTRIBUTE_LITERAL 9111
|
||||
#define n_XHP_ATTRIBUTE_EXPRESSION 9112
|
||||
#define n_XHP_NODE_LIST 9113
|
||||
#define n_CONCATENATION_LIST 9114
|
||||
#define n_PARENTHETICAL_EXPRESSION 9115
|
||||
#define n_YIELD 9116
|
||||
#define n_YIELD_EXPRESSION 9117
|
||||
#define n_TRAIT_DECLARATION 9118
|
||||
#define n_USE_TRAIT_DECLARATION 9119
|
||||
#define n_USE_TRAIT_LIST 9120
|
||||
#define n_USE_TRAIT_RESOLUTION 9121
|
||||
#define n_USE_TRAIT_RESOLUTION_LIST 9122
|
||||
|
||||
#endif
|
||||
Diferenças do arquivo suprimidas por serem muito extensas
Carregar Diff
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| HipHop for PHP |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <string.h>
|
||||
|
||||
#include "hphp/util/parser/xhpast2/parser.h"
|
||||
|
||||
namespace HPHP { namespace HPHP_PARSER_NS {
|
||||
|
||||
bool g_verifyMode = false;
|
||||
|
||||
}}
|
||||
|
||||
void print_node(xhpast::Node *node) {
|
||||
int l = -1;
|
||||
int r = -1;
|
||||
if (node->l_tok != -1) {
|
||||
l = node->l_tok;
|
||||
}
|
||||
|
||||
if (l == -1) {
|
||||
printf("[%d]", node->type);
|
||||
} else {
|
||||
if (node->r_tok != -1) {
|
||||
r = node->r_tok;
|
||||
}
|
||||
|
||||
printf("[%d, %d, %d", node->type, l, r);
|
||||
if (!node->children.empty()) {
|
||||
printf(", [");
|
||||
for (xhpast::node_list_t::iterator ii = node->children.begin();;) {
|
||||
print_node(*ii);
|
||||
if (++ii != node->children.end()) {
|
||||
printf(",");
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
printf("]");
|
||||
}
|
||||
printf("]");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This program parses a file with the hphp php parser, and dumps
|
||||
* every callback the parser makes to stdout.
|
||||
*
|
||||
* If a parse error occurs, it says why.
|
||||
*/
|
||||
int main(int argc, char** argv) try {
|
||||
if (argc >= 2 && !strcmp(argv[1], "--verify")) {
|
||||
HPHP::XHPAST2::g_verifyMode = true;
|
||||
--argc, ++argv;
|
||||
}
|
||||
|
||||
if (argc != 2) {
|
||||
std::cerr << "usage: " << argv[0] << " [--verify] filename\n";
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
std::ifstream in(argv[1]);
|
||||
if (!in.is_open()) {
|
||||
std::cerr << argv[0] << ": couldn't open file: "
|
||||
<< strerror(errno) << '\n';
|
||||
}
|
||||
|
||||
std::cout << "1..1\n";
|
||||
|
||||
try {
|
||||
using HPHP::Scanner;
|
||||
using HPHP::XHPAST2::Parser;
|
||||
Scanner scan(in, Scanner::AllowShortTags);
|
||||
Parser parser(scan, argv[1]);
|
||||
parser.parse();
|
||||
parser.coalesceTree();
|
||||
std::cout << parser.tree << std::endl;
|
||||
xhpast::Node* root = parser.outputXHPAST();
|
||||
std::vector<xhpast::Token *>* tokens = &(parser.m_listener.tokens);
|
||||
printf("{");
|
||||
printf("\"tree\":");
|
||||
if (root) {
|
||||
// Extend the right token for the root node to the end of the concrete
|
||||
// token stream. This ensure all tokens appear in the tree. If we don't
|
||||
// do this and the file ends in tokens which don't go to the parser (like
|
||||
// comments and whitespace) they won't be represented in the tree.
|
||||
root->r_tok = (tokens->size() - 1);
|
||||
print_node(root);
|
||||
} else {
|
||||
printf("null");
|
||||
}
|
||||
printf(",");
|
||||
printf("\"stream\":");
|
||||
printf("[");
|
||||
|
||||
for (std::vector<xhpast::Token *>::iterator ii = tokens->begin();;) {
|
||||
printf("[%d, %d]", (*ii)->type, (int)(*ii)->value.length());
|
||||
if (++ii != tokens->end()) {
|
||||
printf(",");
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
printf("]");
|
||||
printf("}\n");
|
||||
} catch (const std::exception& e) {
|
||||
if (HPHP::XHPAST2::g_verifyMode) {
|
||||
std::cout << "not ";
|
||||
} else {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
std::cout << "ok 1\n";
|
||||
}
|
||||
|
||||
catch (const std::runtime_error& e) {
|
||||
std::cerr << argv[0] << ": " << e.what() << '\n';
|
||||
return 1;
|
||||
}
|
||||
Referência em uma Nova Issue
Bloquear um usuário