Initial rough draft of xhpast2, a replacement for xhpast

The goal of this diff is to clean up the current work-in-progress and check it in so that others may contribute if they have time. The program currently outputs the HHVM tree as well as the xhpast-like json, for ease of debugging. There is a little helper program jsonpretty.py to make the trees more readable, e.g. _build/dbg/hphp/util/parser/xhpast/xhpast2 <some php file> | hphp/util/parser/xhpast/xhpast2/jsonpretty.py
2013-06-07 18:51:36 -07:00
commit 75397a10c7
@@ -27,12 +27,15 @@
 #define RESET_YYCURSOR yyg->yy_hold_char = *YYCURSOR; *YYCURSOR = '\0';

 // macros for rules
-#define SETTOKEN _scanner->setToken(yytext, yyleng)
-#define STEPPOS  _scanner->stepPos(yytext, yyleng)
+#define RETTOKEN(t) do {_scanner->setToken(yytext, yyleng, t); return t;} \
+  while (0)
+#define RETSTEP(t)  do {_scanner->stepPos(yytext, yyleng, t); return t;} \
+  while (0)
+#define SETTOKEN(t) _scanner->setToken(yytext, yyleng, t)
+#define STEPPOS(t)  _scanner->stepPos(yytext, yyleng, t)

-#define HH_ONLY_KEYWORD(tok) do {                             \
-  SETTOKEN;                                                   \
-  return _scanner->hipHopSyntaxEnabled() ? tok : T_STRING;    \
+#define HH_ONLY_KEYWORD(tok) do {                               \
+  RETTOKEN(_scanner->hipHopSyntaxEnabled() ? tok : T_STRING); \
 } while (0)

 #define IS_LABEL_START(c) \
@@ -211,65 +214,64 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})

 %%

-<ST_IN_SCRIPTING>"exit"                 { SETTOKEN; return T_EXIT;}
-<ST_IN_SCRIPTING>"die"                  { SETTOKEN; return T_EXIT;}
-<ST_IN_SCRIPTING>"function"             { SETTOKEN; return T_FUNCTION;}
-<ST_IN_SCRIPTING>"const"                { SETTOKEN; return T_CONST;}
-<ST_IN_SCRIPTING>"return"               { SETTOKEN; return T_RETURN;}
-<ST_IN_SCRIPTING>"yield"                { SETTOKEN; return T_YIELD;}
-<ST_IN_SCRIPTING>"try"                  { SETTOKEN; return T_TRY;}
-<ST_IN_SCRIPTING>"catch"                { SETTOKEN; return T_CATCH;}
-<ST_IN_SCRIPTING>"finally"              { SETTOKEN; return T_FINALLY;}
-<ST_IN_SCRIPTING>"throw"                { SETTOKEN; return T_THROW;}
-<ST_IN_SCRIPTING>"if"                   { SETTOKEN; return T_IF;}
-<ST_IN_SCRIPTING>"elseif"               { SETTOKEN; return T_ELSEIF;}
-<ST_IN_SCRIPTING>"endif"                { SETTOKEN; return T_ENDIF;}
-<ST_IN_SCRIPTING>"else"                 { SETTOKEN; return T_ELSE;}
-<ST_IN_SCRIPTING>"while"                { SETTOKEN; return T_WHILE;}
-<ST_IN_SCRIPTING>"endwhile"             { SETTOKEN; return T_ENDWHILE;}
-<ST_IN_SCRIPTING>"do"                   { SETTOKEN; return T_DO;}
-<ST_IN_SCRIPTING>"for"                  { SETTOKEN; return T_FOR;}
-<ST_IN_SCRIPTING>"endfor"               { SETTOKEN; return T_ENDFOR;}
-<ST_IN_SCRIPTING>"foreach"              { SETTOKEN; return T_FOREACH;}
-<ST_IN_SCRIPTING>"endforeach"           { SETTOKEN; return T_ENDFOREACH;}
-<ST_IN_SCRIPTING>"declare"              { SETTOKEN; return T_DECLARE;}
-<ST_IN_SCRIPTING>"enddeclare"           { SETTOKEN; return T_ENDDECLARE;}
-<ST_IN_SCRIPTING>"instanceof"           { SETTOKEN; return T_INSTANCEOF;}
-<ST_IN_SCRIPTING>"as"                   { SETTOKEN; return T_AS;}
-<ST_IN_SCRIPTING>"switch"               { SETTOKEN; return T_SWITCH;}
-<ST_IN_SCRIPTING>"endswitch"            { SETTOKEN; return T_ENDSWITCH;}
-<ST_IN_SCRIPTING>"case"                 { SETTOKEN; return T_CASE;}
-<ST_IN_SCRIPTING>"default"              { SETTOKEN; return T_DEFAULT;}
-<ST_IN_SCRIPTING>"break"                { SETTOKEN; return T_BREAK;}
-<ST_IN_SCRIPTING>"continue"             { SETTOKEN; return T_CONTINUE;}
-<ST_IN_SCRIPTING>"goto"                 { SETTOKEN; return T_GOTO;}
-<ST_IN_SCRIPTING>"echo"                 { SETTOKEN; return T_ECHO;}
-<ST_IN_SCRIPTING>"print"                { SETTOKEN; return T_PRINT;}
-<ST_IN_SCRIPTING>"class"                { SETTOKEN; return T_CLASS;}
-<ST_IN_SCRIPTING>"interface"            { SETTOKEN; return T_INTERFACE;}
-<ST_IN_SCRIPTING>"trait"                { SETTOKEN; return T_TRAIT;}
-<ST_IN_SCRIPTING>"insteadof"            { SETTOKEN; return T_INSTEADOF;}
-<ST_IN_SCRIPTING>"extends"              { SETTOKEN; return T_EXTENDS;}
-<ST_IN_SCRIPTING>"implements"           { SETTOKEN; return T_IMPLEMENTS;}
-<ST_IN_SCRIPTING>"attribute"            { SETTOKEN; return T_XHP_ATTRIBUTE;}
-<ST_IN_SCRIPTING>"category"             { SETTOKEN; return T_XHP_CATEGORY;}
-<ST_IN_SCRIPTING>"children"             { SETTOKEN; return T_XHP_CHILDREN;}
-<ST_IN_SCRIPTING>"required"             { SETTOKEN; return T_XHP_REQUIRED;}
-<ST_IN_SCRIPTING>"enum"                 { SETTOKEN; return T_XHP_ENUM;}
+<ST_IN_SCRIPTING>"exit"                 { RETTOKEN(T_EXIT);}
+<ST_IN_SCRIPTING>"die"                  { RETTOKEN(T_EXIT);}
+<ST_IN_SCRIPTING>"function"             { RETTOKEN(T_FUNCTION);}
+<ST_IN_SCRIPTING>"const"                { RETTOKEN(T_CONST);}
+<ST_IN_SCRIPTING>"return"               { RETTOKEN(T_RETURN); }
+<ST_IN_SCRIPTING>"yield"                { RETTOKEN(T_YIELD);}
+<ST_IN_SCRIPTING>"try"                  { RETTOKEN(T_TRY);}
+<ST_IN_SCRIPTING>"catch"                { RETTOKEN(T_CATCH);}
+<ST_IN_SCRIPTING>"finally"              { RETTOKEN(T_FINALLY);}
+<ST_IN_SCRIPTING>"throw"                { RETTOKEN(T_THROW);}
+<ST_IN_SCRIPTING>"if"                   { RETTOKEN(T_IF);}
+<ST_IN_SCRIPTING>"elseif"               { RETTOKEN(T_ELSEIF);}
+<ST_IN_SCRIPTING>"endif"                { RETTOKEN(T_ENDIF);}
+<ST_IN_SCRIPTING>"else"                 { RETTOKEN(T_ELSE);}
+<ST_IN_SCRIPTING>"while"                { RETTOKEN(T_WHILE);}
+<ST_IN_SCRIPTING>"endwhile"             { RETTOKEN(T_ENDWHILE);}
+<ST_IN_SCRIPTING>"do"                   { RETTOKEN(T_DO);}
+<ST_IN_SCRIPTING>"for"                  { RETTOKEN(T_FOR);}
+<ST_IN_SCRIPTING>"endfor"               { RETTOKEN(T_ENDFOR);}
+<ST_IN_SCRIPTING>"foreach"              { RETTOKEN(T_FOREACH);}
+<ST_IN_SCRIPTING>"endforeach"           { RETTOKEN(T_ENDFOREACH);}
+<ST_IN_SCRIPTING>"declare"              { RETTOKEN(T_DECLARE);}
+<ST_IN_SCRIPTING>"enddeclare"           { RETTOKEN(T_ENDDECLARE);}
+<ST_IN_SCRIPTING>"instanceof"           { RETTOKEN(T_INSTANCEOF);}
+<ST_IN_SCRIPTING>"as"                   { RETTOKEN(T_AS);}
+<ST_IN_SCRIPTING>"switch"               { RETTOKEN(T_SWITCH);}
+<ST_IN_SCRIPTING>"endswitch"            { RETTOKEN(T_ENDSWITCH);}
+<ST_IN_SCRIPTING>"case"                 { RETTOKEN(T_CASE);}
+<ST_IN_SCRIPTING>"default"              { RETTOKEN(T_DEFAULT);}
+<ST_IN_SCRIPTING>"break"                { RETTOKEN(T_BREAK);}
+<ST_IN_SCRIPTING>"continue"             { RETTOKEN(T_CONTINUE);}
+<ST_IN_SCRIPTING>"goto"                 { RETTOKEN(T_GOTO);}
+<ST_IN_SCRIPTING>"echo"                 { RETTOKEN(T_ECHO);}
+<ST_IN_SCRIPTING>"print"                { RETTOKEN(T_PRINT);}
+<ST_IN_SCRIPTING>"class"                { RETTOKEN(T_CLASS);}
+<ST_IN_SCRIPTING>"interface"            { RETTOKEN(T_INTERFACE);}
+<ST_IN_SCRIPTING>"trait"                { RETTOKEN(T_TRAIT);}
+<ST_IN_SCRIPTING>"insteadof"            { RETTOKEN(T_INSTEADOF);}
+<ST_IN_SCRIPTING>"extends"              { RETTOKEN(T_EXTENDS);}
+<ST_IN_SCRIPTING>"implements"           { RETTOKEN(T_IMPLEMENTS);}
+<ST_IN_SCRIPTING>"attribute"            { RETTOKEN(T_XHP_ATTRIBUTE);}
+<ST_IN_SCRIPTING>"category"             { RETTOKEN(T_XHP_CATEGORY);}
+<ST_IN_SCRIPTING>"children"             { RETTOKEN(T_XHP_CHILDREN);}
+<ST_IN_SCRIPTING>"required"             { RETTOKEN(T_XHP_REQUIRED);}
+<ST_IN_SCRIPTING>"enum"                 { RETTOKEN(T_XHP_ENUM);}

 <ST_IN_SCRIPTING>"->" {
-        STEPPOS;
+        STEPPOS(T_OBJECT_OPERATOR);
        yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
        return T_OBJECT_OPERATOR;
 }

 <ST_LOOKING_FOR_PROPERTY>"->" {
-        STEPPOS;
-        return T_OBJECT_OPERATOR;
+        RETSTEP(T_OBJECT_OPERATOR);
 }

 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
-        SETTOKEN;
+        SETTOKEN(T_STRING);
        yy_pop_state(yyscanner);
        return T_STRING;
 }
@@ -279,133 +281,116 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
        yy_pop_state(yyscanner);
 }

-<ST_IN_SCRIPTING>"::"                { STEPPOS;return T_PAAMAYIM_NEKUDOTAYIM;}
-<ST_IN_SCRIPTING>"\\"                { SETTOKEN;return T_NS_SEPARATOR;}
-<ST_IN_SCRIPTING>"new"               { SETTOKEN;return T_NEW;}
-<ST_IN_SCRIPTING>"clone"             { SETTOKEN;return T_CLONE;}
-<ST_IN_SCRIPTING>"var"               { SETTOKEN;return T_VAR;}
+<ST_IN_SCRIPTING>"::"                { RETSTEP(T_PAAMAYIM_NEKUDOTAYIM);}
+<ST_IN_SCRIPTING>"\\"                { RETTOKEN(T_NS_SEPARATOR);}
+<ST_IN_SCRIPTING>"new"               { RETTOKEN(T_NEW);}
+<ST_IN_SCRIPTING>"clone"             { RETTOKEN(T_CLONE);}
+<ST_IN_SCRIPTING>"var"               { RETTOKEN(T_VAR);}

 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
  if (_scanner->lastToken() != T_FUNCTION) {
-    STEPPOS;
-    return T_INT_CAST;
+    RETSTEP(T_INT_CAST);
  }
  yyless(1);
-  STEPPOS;
-  return '(';
+  RETSTEP('(');
 }

 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
  if (_scanner->lastToken() != T_FUNCTION) {
-    STEPPOS;
-    return T_DOUBLE_CAST;
+    RETSTEP(T_DOUBLE_CAST);
  }
  yyless(1);
-  STEPPOS;
-  return '(';
+  RETSTEP('(');
 }

 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
  if (_scanner->lastToken() != T_FUNCTION) {
-    STEPPOS;
-    return T_STRING_CAST;
+    RETSTEP(T_STRING_CAST);
  }
  yyless(1);
-  STEPPOS;
-  return '(';
+  RETSTEP('(');
 }

 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
  if (_scanner->lastToken() != T_FUNCTION) {
-    STEPPOS;
-    return T_ARRAY_CAST;
+    RETSTEP(T_ARRAY_CAST);
  }
  yyless(1);
-  STEPPOS;
-  return '(';
+  RETSTEP('(');
 }

 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
  if (_scanner->lastToken() != T_FUNCTION) {
-    STEPPOS;
-    return T_OBJECT_CAST;
+    RETSTEP(T_OBJECT_CAST);
  }
  yyless(1);
-  STEPPOS;
-  return '(';
+  RETSTEP('(');
 }

 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
  if (_scanner->lastToken() != T_FUNCTION) {
-    STEPPOS;
-    return T_BOOL_CAST;
+    RETSTEP(T_BOOL_CAST);
  }
  yyless(1);
-  STEPPOS;
-  return '(';
+  RETSTEP('(');
 }

 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
  if (_scanner->lastToken() != T_FUNCTION) {
-    STEPPOS;
-    return T_UNSET_CAST;
+    RETSTEP(T_UNSET_CAST);
  }
  yyless(1);
-  STEPPOS;
-  return '(';
+  RETSTEP('(');
 }

-<ST_IN_SCRIPTING>"eval"               { SETTOKEN; return T_EVAL;}
-<ST_IN_SCRIPTING>"include"            { SETTOKEN; return T_INCLUDE;}
-<ST_IN_SCRIPTING>"include_once"       { SETTOKEN; return T_INCLUDE_ONCE;}
-<ST_IN_SCRIPTING>"require"            { SETTOKEN; return T_REQUIRE;}
-<ST_IN_SCRIPTING>"require_once"       { SETTOKEN; return T_REQUIRE_ONCE;}
-<ST_IN_SCRIPTING>"namespace"          { SETTOKEN; return T_NAMESPACE;}
-<ST_IN_SCRIPTING>"use"                { SETTOKEN; return T_USE;}
-<ST_IN_SCRIPTING>"global"             { SETTOKEN; return T_GLOBAL;}
-<ST_IN_SCRIPTING>"isset"              { SETTOKEN; return T_ISSET;}
-<ST_IN_SCRIPTING>"empty"              { SETTOKEN; return T_EMPTY;}
-<ST_IN_SCRIPTING>"__halt_compiler"    { SETTOKEN; return T_HALT_COMPILER;}
-<ST_IN_SCRIPTING>"__compiler_halt_offset__" {
-  SETTOKEN;
-  return T_COMPILER_HALT_OFFSET;
-}
-<ST_IN_SCRIPTING>"static"             { SETTOKEN; return T_STATIC;}
-<ST_IN_SCRIPTING>"abstract"           { SETTOKEN; return T_ABSTRACT;}
-<ST_IN_SCRIPTING>"final"              { SETTOKEN; return T_FINAL;}
-<ST_IN_SCRIPTING>"private"            { SETTOKEN; return T_PRIVATE;}
-<ST_IN_SCRIPTING>"protected"          { SETTOKEN; return T_PROTECTED;}
-<ST_IN_SCRIPTING>"public"             { SETTOKEN; return T_PUBLIC;}
-<ST_IN_SCRIPTING>"unset"              { SETTOKEN; return T_UNSET;}
-<ST_IN_SCRIPTING>"=>"                 { STEPPOS; return T_DOUBLE_ARROW;}
-<ST_IN_SCRIPTING>"list"               { SETTOKEN; return T_LIST;}
-<ST_IN_SCRIPTING>"array"              { SETTOKEN; return T_ARRAY;}
-<ST_IN_SCRIPTING>"++"                 { STEPPOS; return T_INC;}
-<ST_IN_SCRIPTING>"--"                 { STEPPOS; return T_DEC;}
-<ST_IN_SCRIPTING>"==="                { STEPPOS; return T_IS_IDENTICAL;}
-<ST_IN_SCRIPTING>"!=="                { STEPPOS; return T_IS_NOT_IDENTICAL;}
-<ST_IN_SCRIPTING>"=="                 { STEPPOS; return T_IS_EQUAL;}
-<ST_IN_SCRIPTING>"!="|"<>"            { STEPPOS; return T_IS_NOT_EQUAL;}
-<ST_IN_SCRIPTING>"<="                 { STEPPOS; return T_IS_SMALLER_OR_EQUAL;}
-<ST_IN_SCRIPTING>">="                 { STEPPOS; return T_IS_GREATER_OR_EQUAL;}
-<ST_IN_SCRIPTING>"+="                 { STEPPOS; return T_PLUS_EQUAL;}
-<ST_IN_SCRIPTING>"-="                 { STEPPOS; return T_MINUS_EQUAL;}
-<ST_IN_SCRIPTING>"*="                 { STEPPOS; return T_MUL_EQUAL;}
-<ST_IN_SCRIPTING>"/="                 { STEPPOS; return T_DIV_EQUAL;}
-<ST_IN_SCRIPTING>".="                 { STEPPOS; return T_CONCAT_EQUAL;}
-<ST_IN_SCRIPTING>"%="                 { STEPPOS; return T_MOD_EQUAL;}
-<ST_IN_SCRIPTING>"<<="                { STEPPOS; return T_SL_EQUAL;}
-<ST_IN_SCRIPTING>">>="                { STEPPOS; return T_SR_EQUAL;}
-<ST_IN_SCRIPTING>"&="                 { STEPPOS; return T_AND_EQUAL;}
-<ST_IN_SCRIPTING>"|="                 { STEPPOS; return T_OR_EQUAL;}
-<ST_IN_SCRIPTING>"^="                 { STEPPOS; return T_XOR_EQUAL;}
-<ST_IN_SCRIPTING>"||"                 { STEPPOS; return T_BOOLEAN_OR;}
-<ST_IN_SCRIPTING>"&&"                 { STEPPOS; return T_BOOLEAN_AND;}
-<ST_IN_SCRIPTING>"OR"                 { SETTOKEN; return T_LOGICAL_OR;}
-<ST_IN_SCRIPTING>"AND"                { SETTOKEN; return T_LOGICAL_AND;}
-<ST_IN_SCRIPTING>"XOR"                { SETTOKEN; return T_LOGICAL_XOR;}
-<ST_IN_SCRIPTING>"<<"                 { STEPPOS; return T_SL;}
-<ST_IN_SCRIPTING>"..."                { SETTOKEN; return T_VARARG; }
+<ST_IN_SCRIPTING>"eval"               { RETTOKEN(T_EVAL);}
+<ST_IN_SCRIPTING>"include"            { RETTOKEN(T_INCLUDE);}
+<ST_IN_SCRIPTING>"include_once"       { RETTOKEN(T_INCLUDE_ONCE);}
+<ST_IN_SCRIPTING>"require"            { RETTOKEN(T_REQUIRE);}
+<ST_IN_SCRIPTING>"require_once"       { RETTOKEN(T_REQUIRE_ONCE);}
+<ST_IN_SCRIPTING>"namespace"          { RETTOKEN(T_NAMESPACE);}
+<ST_IN_SCRIPTING>"use"                { RETTOKEN(T_USE);}
+<ST_IN_SCRIPTING>"global"             { RETTOKEN(T_GLOBAL);}
+<ST_IN_SCRIPTING>"isset"              { RETTOKEN(T_ISSET);}
+<ST_IN_SCRIPTING>"empty"              { RETTOKEN(T_EMPTY);}
+<ST_IN_SCRIPTING>"__halt_compiler"    { RETTOKEN(T_HALT_COMPILER);}
+<ST_IN_SCRIPTING>"__compiler_halt_offset__" { RETTOKEN(T_COMPILER_HALT_OFFSET);}
+<ST_IN_SCRIPTING>"static"             { RETTOKEN(T_STATIC);}
+<ST_IN_SCRIPTING>"abstract"           { RETTOKEN(T_ABSTRACT);}
+<ST_IN_SCRIPTING>"final"              { RETTOKEN(T_FINAL);}
+<ST_IN_SCRIPTING>"private"            { RETTOKEN(T_PRIVATE);}
+<ST_IN_SCRIPTING>"protected"          { RETTOKEN(T_PROTECTED);}
+<ST_IN_SCRIPTING>"public"             { RETTOKEN(T_PUBLIC);}
+<ST_IN_SCRIPTING>"unset"              { RETTOKEN(T_UNSET);}
+<ST_IN_SCRIPTING>"=>"                 { RETSTEP(T_DOUBLE_ARROW);}
+<ST_IN_SCRIPTING>"list"               { RETTOKEN(T_LIST);}
+<ST_IN_SCRIPTING>"array"              { RETTOKEN(T_ARRAY);}
+<ST_IN_SCRIPTING>"++"                 { RETSTEP(T_INC);}
+<ST_IN_SCRIPTING>"--"                 { RETSTEP(T_DEC);}
+<ST_IN_SCRIPTING>"==="                { RETSTEP(T_IS_IDENTICAL);}
+<ST_IN_SCRIPTING>"!=="                { RETSTEP(T_IS_NOT_IDENTICAL);}
+<ST_IN_SCRIPTING>"=="                 { RETSTEP(T_IS_EQUAL);}
+<ST_IN_SCRIPTING>"!="|"<>"            { RETSTEP(T_IS_NOT_EQUAL);}
+<ST_IN_SCRIPTING>"<="                 { RETSTEP(T_IS_SMALLER_OR_EQUAL);}
+<ST_IN_SCRIPTING>">="                 { RETSTEP(T_IS_GREATER_OR_EQUAL);}
+<ST_IN_SCRIPTING>"+="                 { RETSTEP(T_PLUS_EQUAL);}
+<ST_IN_SCRIPTING>"-="                 { RETSTEP(T_MINUS_EQUAL);}
+<ST_IN_SCRIPTING>"*="                 { RETSTEP(T_MUL_EQUAL);}
+<ST_IN_SCRIPTING>"/="                 { RETSTEP(T_DIV_EQUAL);}
+<ST_IN_SCRIPTING>".="                 { RETSTEP(T_CONCAT_EQUAL);}
+<ST_IN_SCRIPTING>"%="                 { RETSTEP(T_MOD_EQUAL);}
+<ST_IN_SCRIPTING>"<<="                { RETSTEP(T_SL_EQUAL);}
+<ST_IN_SCRIPTING>">>="                { RETSTEP(T_SR_EQUAL);}
+<ST_IN_SCRIPTING>"&="                 { RETSTEP(T_AND_EQUAL);}
+<ST_IN_SCRIPTING>"|="                 { RETSTEP(T_OR_EQUAL);}
+<ST_IN_SCRIPTING>"^="                 { RETSTEP(T_XOR_EQUAL);}
+<ST_IN_SCRIPTING>"||"                 { RETSTEP(T_BOOLEAN_OR);}
+<ST_IN_SCRIPTING>"&&"                 { RETSTEP(T_BOOLEAN_AND);}
+<ST_IN_SCRIPTING>"OR"                 { RETTOKEN(T_LOGICAL_OR);}
+<ST_IN_SCRIPTING>"AND"                { RETTOKEN(T_LOGICAL_AND);}
+<ST_IN_SCRIPTING>"XOR"                { RETTOKEN(T_LOGICAL_XOR);}
+<ST_IN_SCRIPTING>"<<"                 { RETSTEP(T_SL);}
+<ST_IN_SCRIPTING>"..."                { RETTOKEN(T_VARARG); }

 <ST_IN_SCRIPTING>"shape"              { HH_ONLY_KEYWORD(T_SHAPE); }
 <ST_IN_SCRIPTING>"type"               { HH_ONLY_KEYWORD(T_UNRESOLVED_TYPE); }
@@ -413,19 +398,17 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})

 <ST_IN_SCRIPTING>">>" {
  if (_scanner->getLookaheadLtDepth() < 2) {
-    STEPPOS;
-    return T_SR;
+    RETSTEP(T_SR);
  }
  yyless(1);
-  STEPPOS;
-  return '>';
+  RETSTEP('>');
 }

 <ST_IN_SCRIPTING>"<"[a-zA-Z_\x7f-\xff] {
  int ntt = getNextTokenType(_scanner->lastToken());
  if (ntt & NextTokenType::XhpTag) {
    yyless(1);
-    STEPPOS;
+    STEPPOS(T_XHP_TAG_LT);
    yy_push_state(ST_XHP_IN_TAG, yyscanner);
    return T_XHP_TAG_LT;
  }
@@ -437,82 +420,75 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
    break;
  }
  yyless(1);
-  STEPPOS;
  if (_scanner->hipHopSyntaxEnabled() && (ntt & NextTokenType::TypeListMaybe)) {
    // Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
    // to resolve this.
-    return T_UNRESOLVED_LT;
+    RETSTEP(T_UNRESOLVED_LT);
  }
-  return '<';
+  RETSTEP('<');
 }

 <ST_IN_SCRIPTING>"<" {
-  STEPPOS;
  if (_scanner->hipHopSyntaxEnabled()) {
    int ntt = getNextTokenType(_scanner->lastToken());
    if (ntt & NextTokenType::TypeListMaybe) {
      // Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens
      // to resolve this.
-      return T_UNRESOLVED_LT;
+      RETSTEP(T_UNRESOLVED_LT);
    }
  }
-  return '<';
+  RETSTEP('<');
 }

 <ST_LT_CHECK>"<"{XHPLABEL}(">"|"/>"|{WHITESPACE_AND_COMMENTS}(">"|"/>"|[a-zA-Z_\x7f-\xff])) {
  BEGIN(ST_IN_SCRIPTING);
  yyless(1);
-  STEPPOS;
+  STEPPOS(T_XHP_TAG_LT);
  yy_push_state(ST_XHP_IN_TAG, yyscanner);
  return T_XHP_TAG_LT;
 }

 <ST_LT_CHECK>"<" {
  BEGIN(ST_IN_SCRIPTING);
-  STEPPOS;
-  return '<';
+  RETSTEP('<');
 }

 <ST_IN_SCRIPTING>":"{XHPLABEL}  {
  int ntt = getNextTokenType(_scanner->lastToken());
  if (ntt & NextTokenType::XhpClassName) {
    yytext++; yyleng--; // skipping the first colon
-    SETTOKEN;
-    return T_XHP_LABEL;
+    RETTOKEN(T_XHP_LABEL);
  }
  yyless(1);
-  STEPPOS;
-  return ':';
+  RETSTEP(':');
 }

 <ST_IN_SCRIPTING>"%"{XHPLABEL}  {
  int ntt = getNextTokenType(_scanner->lastToken());
  if (ntt & NextTokenType::XhpCategoryName) {
    yytext++; yyleng--; // skipping "%"
-    SETTOKEN;
-    return T_XHP_CATEGORY_LABEL;
+    RETTOKEN(T_XHP_CATEGORY_LABEL);
  }
  yyless(1);
-  STEPPOS;
-  return '%';
+  RETSTEP('%');
 }

-<ST_IN_SCRIPTING>{TOKENS}             {STEPPOS; return yytext[0];}
+<ST_IN_SCRIPTING>{TOKENS}             {RETSTEP(yytext[0]);}

 <ST_IN_SCRIPTING>"{" {
-        STEPPOS;
+        STEPPOS('{');
        yy_push_state(ST_IN_SCRIPTING, yyscanner);
        return '{';
 }

 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
-        STEPPOS;
+        STEPPOS(T_DOLLAR_OPEN_CURLY_BRACES);
        yy_push_state(ST_LOOKING_FOR_VARNAME, yyscanner);
        return T_DOLLAR_OPEN_CURLY_BRACES;
 }

 <ST_IN_SCRIPTING>"}" {
-        STEPPOS;
+        STEPPOS('}');
        // We need to be robust against a '}' in PHP code with
        // no corresponding '{'
        struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
@@ -521,7 +497,7 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <ST_LOOKING_FOR_VARNAME>{LABEL} {
-        SETTOKEN;
+        SETTOKEN(T_STRING_VARNAME);
        // Change state to IN_SCRIPTING; current state will be popped
        // when we encounter '}'
        BEGIN(ST_IN_SCRIPTING);
@@ -536,86 +512,81 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{LNUM} {
-        SETTOKEN;
        errno = 0;
        long ret = strtoll(yytext, NULL, 0);
        if (errno == ERANGE || ret < 0) {
                _scanner->error("Dec number is too big: %s", yytext);
                if (_scanner->isHackMode()) {
-                        return T_HACK_ERROR;
+                        RETTOKEN(T_HACK_ERROR);
                }
        }
-        return T_LNUMBER;
+        RETTOKEN(T_LNUMBER);
 }

 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{HNUM} {
-        SETTOKEN;
        errno = 0;
        long ret = strtoull(yytext, NULL, 16);
        if (errno == ERANGE || ret < 0) {
                _scanner->error("Hex number is too big: %s", yytext);
                if (_scanner->isHackMode()) {
-                        return T_HACK_ERROR;
+                        RETTOKEN(T_HACK_ERROR);
                }
        }
-        return T_LNUMBER;
+        RETTOKEN(T_LNUMBER);
 }

 <ST_VAR_OFFSET>0|([1-9][0-9]*) { /* Offset could be treated as a long */
-        SETTOKEN;
        errno = 0;
        long ret = strtoll(yytext, NULL, 0);
        if (ret == LLONG_MAX && errno == ERANGE) {
                _scanner->error("Offset number is too big: %s", yytext);
                if (_scanner->isHackMode()) {
-                        return T_HACK_ERROR;
+                        RETTOKEN(T_HACK_ERROR);
                }
        }
-        return T_NUM_STRING;
+        RETTOKEN(T_NUM_STRING);
 }

 <ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */
-        SETTOKEN;
-        return T_NUM_STRING;
+        RETTOKEN(T_NUM_STRING);
 }

 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{DNUM}|{EXPONENT_DNUM} {
-        SETTOKEN;
-        return T_DNUMBER;
+        RETTOKEN(T_DNUMBER);
 }

-<ST_IN_SCRIPTING>"__CLASS__"            { SETTOKEN; return T_CLASS_C; }
-<ST_IN_SCRIPTING>"__TRAIT__"            { SETTOKEN; return T_TRAIT_C; }
-<ST_IN_SCRIPTING>"__FUNCTION__"         { SETTOKEN; return T_FUNC_C;  }
-<ST_IN_SCRIPTING>"__METHOD__"           { SETTOKEN; return T_METHOD_C;}
-<ST_IN_SCRIPTING>"__LINE__"             { SETTOKEN; return T_LINE;    }
-<ST_IN_SCRIPTING>"__FILE__"             { SETTOKEN; return T_FILE;    }
-<ST_IN_SCRIPTING>"__DIR__"              { SETTOKEN; return T_DIR;     }
-<ST_IN_SCRIPTING>"__NAMESPACE__"        { SETTOKEN; return T_NS_C;    }
+<ST_IN_SCRIPTING>"__CLASS__"            { RETTOKEN(T_CLASS_C); }
+<ST_IN_SCRIPTING>"__TRAIT__"            { RETTOKEN(T_TRAIT_C); }
+<ST_IN_SCRIPTING>"__FUNCTION__"         { RETTOKEN(T_FUNC_C); }
+<ST_IN_SCRIPTING>"__METHOD__"           { RETTOKEN(T_METHOD_C);}
+<ST_IN_SCRIPTING>"__LINE__"             { RETTOKEN(T_LINE); }
+<ST_IN_SCRIPTING>"__FILE__"             { RETTOKEN(T_FILE); }
+<ST_IN_SCRIPTING>"__DIR__"              { RETTOKEN(T_DIR); }
+<ST_IN_SCRIPTING>"__NAMESPACE__"        { RETTOKEN(T_NS_C); }

 <INITIAL>"#"[^\n]*"\n" {
-        _scanner->setHashBang(yytext, yyleng);
+        _scanner->setHashBang(yytext, yyleng, T_INLINE_HTML);
        BEGIN(ST_IN_SCRIPTING);
        yy_push_state(ST_AFTER_HASHBANG, yyscanner);
        return T_INLINE_HTML;
 }

 <INITIAL>(([^<#]|"<"[^?%s<]){1,400})|"<s"|"<" {
-        SETTOKEN;
+        SETTOKEN(T_INLINE_HTML);
        BEGIN(ST_IN_SCRIPTING);
        yy_push_state(ST_IN_HTML, yyscanner);
        return T_INLINE_HTML;
 }

 <ST_IN_HTML,ST_AFTER_HASHBANG>(([^<]|"<"[^?%s<]){1,400})|"<s"|"<" {
-        SETTOKEN;
+        SETTOKEN(T_INLINE_HTML);
        BEGIN(ST_IN_HTML);
        return T_INLINE_HTML;
 }

 <INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<?"|("<?php"([ \t]|{NEWLINE}))|"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"\'php\'"){WHITESPACE}*">" {
-        SETTOKEN;
        if (_scanner->shortTags() || yyleng > 2) {
+          SETTOKEN(T_OPEN_TAG);
          if (YY_START == INITIAL) {
            BEGIN(ST_IN_SCRIPTING);
          } else {
@@ -623,6 +594,7 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
          }
          return T_OPEN_TAG;
        } else {
+          SETTOKEN(T_INLINE_HTML);
          if (YY_START == INITIAL) {
            BEGIN(ST_IN_SCRIPTING);
            yy_push_state(ST_IN_HTML, yyscanner);
@@ -634,7 +606,6 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%="|"<?=" {
-        SETTOKEN;
        if ((yytext[1]=='%' && _scanner->aspTags()) ||
            (yytext[1]=='?' && _scanner->shortTags())) {
          if (YY_START == INITIAL) {
@@ -642,7 +613,7 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
          } else {
            yy_pop_state(yyscanner);
          }
-          return T_ECHO; //return T_OPEN_TAG_WITH_ECHO;
+          RETTOKEN(T_ECHO); //return T_OPEN_TAG_WITH_ECHO;
        } else {
          if (YY_START == INITIAL) {
            BEGIN(ST_IN_SCRIPTING);
@@ -650,19 +621,18 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
          } else if (YY_START == ST_AFTER_HASHBANG) {
            BEGIN(ST_IN_HTML);
          }
-          return T_INLINE_HTML;
+          RETTOKEN(T_INLINE_HTML);
        }
 }

 <INITIAL,ST_IN_HTML,ST_AFTER_HASHBANG>"<%" {
-        SETTOKEN;
        if (_scanner->aspTags()) {
          if (YY_START == INITIAL) {
            BEGIN(ST_IN_SCRIPTING);
          } else {
            yy_pop_state(yyscanner);
          }
-          return T_OPEN_TAG;
+          RETTOKEN(T_OPEN_TAG);
        } else {
          if (YY_START == INITIAL) {
            BEGIN(ST_IN_SCRIPTING);
@@ -670,7 +640,7 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
          } else if (YY_START == ST_AFTER_HASHBANG) {
            BEGIN(ST_IN_HTML);
          }
-          return T_INLINE_HTML;
+          RETTOKEN(T_INLINE_HTML);
        }
 }

@@ -683,27 +653,27 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
          _scanner->error("Hack mode: content before <?hh");
          return T_HACK_ERROR;
        }
-        STEPPOS;
+        STEPPOS(T_OPEN_TAG);
        _scanner->setHackMode();
        return T_OPEN_TAG;
 }

 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
-        _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
+        _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
        return T_VARIABLE;
 }

 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] {
        yyless(yyleng - 3);
        yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner);
-        _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
+        _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
        return T_VARIABLE;
 }

 <ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE>"$"{LABEL}"[" {
        yyless(yyleng - 1);
        yy_push_state(ST_VAR_OFFSET, yyscanner);
-        _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1);
+        _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE);
        return T_VARIABLE;
 }

@@ -723,18 +693,15 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
           line number */
        yyless(0);
        yy_pop_state(yyscanner);
-        STEPPOS;
-        return T_ENCAPSED_AND_WHITESPACE;
+        RETSTEP(T_ENCAPSED_AND_WHITESPACE);
 }

 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
-        SETTOKEN;
-        return T_STRING;
+        RETTOKEN(T_STRING);
 }

 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>{WHITESPACE} {
-        STEPPOS;
-        return T_WHITESPACE;
+        RETSTEP(T_WHITESPACE);
 }

 <ST_IN_SCRIPTING,ST_XHP_IN_TAG>"#"|"//" {
@@ -755,14 +722,14 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
                yymore();
                break;
        default:
-                STEPPOS;
+                STEPPOS(T_COMMENT);
                yy_pop_state(yyscanner);
                return T_COMMENT;
        }
 }

 <ST_ONE_LINE_COMMENT>{NEWLINE} {
-        STEPPOS;
+        STEPPOS(T_COMMENT);
        yy_pop_state(yyscanner);
        return T_COMMENT;
 }
@@ -773,7 +740,7 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
          return T_HACK_ERROR;
        }
        if (_scanner->aspTags() || yytext[yyleng-2] != '%') {
-                _scanner->setToken(yytext, yyleng-2, yytext, yyleng-2);
+          _scanner->setToken(yytext, yyleng-2, yytext, yyleng-2, T_COMMENT);
                yyless(yyleng-2);
                yy_pop_state(yyscanner);
                return T_COMMENT;
@@ -797,13 +764,13 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <ST_DOC_COMMENT>"*/" {
-        SETTOKEN;
+        SETTOKEN(T_DOC_COMMENT);
        yy_pop_state(yyscanner);
        return T_DOC_COMMENT;
 }

 <ST_COMMENT>"*/" {
-        STEPPOS;
+        STEPPOS(T_COMMENT);
        yy_pop_state(yyscanner);
        return T_COMMENT;
 }
@@ -817,7 +784,7 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <ST_XHP_COMMENT>"-->" {
-        STEPPOS;
+        STEPPOS(T_COMMENT);
        yy_pop_state(yyscanner);
        return T_COMMENT;
 }
@@ -831,38 +798,35 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
          _scanner->error("Hack mode: ?> not allowed");
          return T_HACK_ERROR;
        }
-        STEPPOS;
        yy_push_state(ST_IN_HTML, yyscanner);
        if (_scanner->full()) {
-          return T_CLOSE_TAG;
+          RETSTEP(T_CLOSE_TAG);
        } else {
-          return ';';
+          RETSTEP(';');
        }
 }

 <ST_IN_SCRIPTING>"</script"{WHITESPACE}*">"{NEWLINE}? {
-        STEPPOS;
        yy_push_state(ST_IN_HTML, yyscanner);
        if (_scanner->full()) {
-          return T_CLOSE_TAG;
+          RETSTEP(T_CLOSE_TAG);
        } else {
-          return ';';
+          RETSTEP(';');
        }
 }

 <ST_IN_SCRIPTING>"%>"{NEWLINE}? {
        if (_scanner->aspTags()) {
-                STEPPOS;
                yy_push_state(ST_IN_HTML, yyscanner);
                if (_scanner->full()) {
-                  return T_CLOSE_TAG;
+                  RETSTEP(T_CLOSE_TAG);
                } else {
-                  return ';';
+                  RETSTEP(';');
                }
        } else {
                yyless(1);
                _scanner->setToken(yytext, 1, yytext, 1);
-                return yytext[0];
+                RETSTEP(yytext[0]);
        }
 }

@@ -917,19 +881,17 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <ST_IN_SCRIPTING>[`] {
-        STEPPOS;
+        STEPPOS('`');
        BEGIN(ST_BACKQUOTE);
        return '`';
 }

 <ST_XHP_IN_TAG>{XHPLABEL} {
-  SETTOKEN;
-  return T_XHP_LABEL;
+        RETTOKEN(T_XHP_LABEL);
 }

 <ST_XHP_IN_TAG>"=" {
-  STEPPOS;
-  return yytext[0];
+  RETSTEP(yytext[0]);
 }

 <ST_XHP_IN_TAG>["][^"]*["] {
@@ -938,13 +900,13 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <ST_XHP_IN_TAG>[{] {
-  STEPPOS;
+  STEPPOS('{');
  yy_push_state(ST_IN_SCRIPTING, yyscanner);
  return '{';
 }

 <ST_XHP_IN_TAG>">" {
-  STEPPOS;
+  STEPPOS(T_XHP_TAG_GT);
  BEGIN(ST_XHP_CHILD);
  return T_XHP_TAG_GT;
 }
@@ -958,14 +920,14 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 <ST_XHP_IN_TAG>{ANY_CHAR} {
  // This rule ensures we get a reasonable syntax error message
  // when unexpected characters occur inside XHP tags
-  STEPPOS;
+  STEPPOS(yytext[0]);
  _scanner->error("Unexpected character in input: '%c' (ASCII=%d)",
                  yytext[0], yytext[0]);
  return yytext[0];
 }

 <ST_XHP_END_SINGLETON_TAG>">" {
-  STEPPOS;
+  STEPPOS(T_XHP_TAG_GT);
  yy_pop_state(yyscanner);
  return T_XHP_TAG_GT;
 }
@@ -976,12 +938,11 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 }

 <ST_XHP_CHILD>[^{<]+ {
-  SETTOKEN;
-  return T_XHP_TEXT;
+  RETTOKEN(T_XHP_TEXT);
 }

 <ST_XHP_CHILD>"{" {
-  STEPPOS;
+  STEPPOS('{');
  yy_push_state(ST_IN_SCRIPTING, yyscanner);
  return '{';
 }
@@ -989,28 +950,25 @@ BACKQUOTE_CHARS     ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR})
 <ST_XHP_CHILD>"</" {
  BEGIN(ST_XHP_END_CLOSE_TAG);
  yyless(1);
-  STEPPOS;
-  return T_XHP_TAG_LT;
+  RETSTEP(T_XHP_TAG_LT);
 }

 <ST_XHP_END_CLOSE_TAG>"/" {
-  STEPPOS;
-  return '/';
+  RETSTEP('/');
 }

 <ST_XHP_END_CLOSE_TAG>{XHPLABEL} {
-  SETTOKEN;
-  return T_XHP_LABEL;
+  RETTOKEN(T_XHP_LABEL);
 }

 <ST_XHP_END_CLOSE_TAG>">" {
-  STEPPOS;
+  STEPPOS(T_XHP_TAG_GT);
  yy_pop_state(yyscanner);
  return T_XHP_TAG_GT;
 }

 <ST_XHP_CHILD>"<" {
-  STEPPOS;
+  STEPPOS(T_XHP_TAG_LT);
  yy_push_state(ST_XHP_IN_TAG, yyscanner);
  return T_XHP_TAG_LT;
 }
@@ -1187,8 +1145,7 @@ doc_scan_done:

 <ST_END_HEREDOC>{LABEL} {
        BEGIN(ST_IN_SCRIPTING);
-        STEPPOS;
-        return T_END_HEREDOC;
+        RETSTEP(T_END_HEREDOC);
 }

 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
@@ -1,5 +1,7 @@
 %{
-#ifdef TEST_PARSER
+#ifdef XHPAST2_PARSER
+#include "hphp/util/parser/xhpast2/parser.h"
+#elif TEST_PARSER
 #include "hphp/util/parser/test/parser.h"
 #else
 #include "hphp/compiler/parser/parser.h"
@@ -85,7 +85,8 @@ void ScannerToken::xhpDecode() {
 Scanner::Scanner(const char *filename, int type, bool md5 /* = false */)
    : m_filename(filename), m_stream(nullptr), m_source(nullptr), m_len(0), m_pos(0),
      m_state(Start), m_type(type), m_yyscanner(nullptr), m_token(nullptr),
-      m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0) {
+      m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0),
+      m_listener(nullptr) {
  m_stream = new std::ifstream(filename);
  m_streamOwner = true;
  if (m_stream->fail()) {
@@ -101,7 +102,8 @@ Scanner::Scanner(std::istream &stream, int type,
                 bool md5 /* = false */)
    : m_filename(fileName), m_source(nullptr), m_len(0), m_pos(0),
      m_state(Start), m_type(type), m_yyscanner(nullptr), m_token(nullptr),
-      m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0) {
+      m_loc(nullptr), m_lastToken(-1), m_isHackMode(0), m_lookaheadLtDepth(0),
+      m_listener(nullptr) {
  m_stream = &stream;
  m_streamOwner = false;
  if (md5) computeMd5();
@@ -113,7 +115,7 @@ Scanner::Scanner(const char *source, int len, int type,
    : m_filename(fileName), m_stream(nullptr), m_source(source), m_len(len),
      m_pos(0), m_state(Start), m_type(type), m_yyscanner(nullptr),
      m_token(nullptr), m_loc(nullptr), m_lastToken(-1), m_isHackMode(0),
-      m_lookaheadLtDepth(0) {
+      m_lookaheadLtDepth(0), m_listener(nullptr) {
  assert(m_source);
  m_streamOwner = false;
  if (md5) {
@@ -147,12 +149,12 @@ Scanner::~Scanner() {
  }
 }

-void Scanner::setHashBang(const char *rawText, int rawLeng) {
+void Scanner::setHashBang(const char *rawText, int rawLeng, int type) {
  if (m_type & ReturnAllTokens) {
    setToken(rawText, rawLeng);
  } else {
    m_token->setText("", 0);
-    incLoc(rawText, rawLeng);
+    incLoc(rawText, rawLeng, type);
  }
 }

@@ -487,9 +489,12 @@ void Scanner::warn(const char* fmt, ...) {
                  m_filename.c_str(), m_loc->line0, m_loc->char0);
 }

-void Scanner::incLoc(const char *rawText, int rawLeng) {
+void Scanner::incLoc(const char *rawText, int rawLeng, int type) {
  assert(rawText);
  assert(rawLeng > 0);
+  if (m_listener) {
+    m_token->setID(m_listener->publish(rawText, rawLeng, type));
+  }

  m_loc->cursor += rawLeng;

@@ -29,8 +29,8 @@ typedef int TokenID;

 class ScannerToken {
 public:
-  ScannerToken() : m_num(0), m_check(false) {}
-  void reset() { m_num = 0; m_text.clear();}
+  ScannerToken() : m_num(0), m_check(false), m_id(-1) {}
+  void reset() { m_num = 0; m_text.clear(); m_id = -1; }

  TokenID num() const { return m_num;}
  void setNum(TokenID num) {
@@ -50,6 +50,7 @@ public:
  void operator=(ScannerToken &other) {
    m_num = other.m_num;
    m_text = other.m_text;
+    m_id = other.m_id;
  }

  const std::string &text() const {
@@ -76,6 +77,12 @@ public:
  void setCheck() {
    m_check = true;
  }
+  void setID(int id) {
+    m_id = id;
+  }
+  int ID() {
+    return m_id;
+  }

  void xhpLabel(bool prefix = true);
  bool htmlTrim(); // true if non-empty after trimming
@@ -85,6 +92,7 @@ protected:
  TokenID m_num; // internal token id
  std::string m_text;
  bool m_check;
+  int m_id;
 };

 struct LookaheadToken {
@@ -167,6 +175,11 @@ struct TokenStore {

 ///////////////////////////////////////////////////////////////////////////////

+struct TokenListener {
+  virtual int publish(const char *rawText, int rawLeng, int type) = 0;
+  virtual ~TokenListener() {}
+};
+
 class Scanner {
 public:
  enum Type {
@@ -182,6 +195,7 @@ public:
          bool md5 = false);
  Scanner(const char *source, int len, int type, const char *fileName = "",
          bool md5 = false);
+  void setListener(TokenListener *listener) { m_listener = listener; }
  ~Scanner();

  const std::string &getMd5() const {
@@ -221,26 +235,26 @@ public:
  bool aspTags() const { return m_type & AllowAspTags;}
  bool full() const { return m_type & ReturnAllTokens;}
  int lastToken() const { return m_lastToken;}
-  void setToken(const char *rawText, int rawLeng) {
+  void setToken(const char *rawText, int rawLeng, int type = -1) {
    m_token->setText(rawText, rawLeng);
-    incLoc(rawText, rawLeng);
+    incLoc(rawText, rawLeng, type);
  }
-  void stepPos(const char *rawText, int rawLeng) {
+  void stepPos(const char *rawText, int rawLeng, int type = -1) {
    if (m_type & ReturnAllTokens) {
      m_token->setText(rawText, rawLeng);
    }
-    incLoc(rawText, rawLeng);
+    incLoc(rawText, rawLeng, type);
  }
  void setToken(const char *rawText, int rawLeng,
-                const char *ytext, int yleng) {
+                const char *ytext, int yleng, int type = -1) {
    if (m_type & ReturnAllTokens) {
      m_token->setText(rawText, rawLeng);
    } else {
      m_token->setText(ytext, yleng);
    }
-    incLoc(rawText, rawLeng);
+    incLoc(rawText, rawLeng, type);
  }
-  void setHashBang(const char *rawText, int rawLeng);
+  void setHashBang(const char *rawText, int rawLeng, int type = -1);
  // also used for YY_FATAL_ERROR in hphp.x
  void error(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
  void warn(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
@@ -335,11 +349,12 @@ private:

  // fields for XHP parsing
  int m_lastToken;
-  void incLoc(const char *rawText, int rawLeng);
+  void incLoc(const char *rawText, int rawLeng, int type);
  bool m_isHackMode;

  TokenStore m_lookahead;
  int m_lookaheadLtDepth;
+  TokenListener *m_listener;
 };

 ///////////////////////////////////////////////////////////////////////////////
@@ -0,0 +1,92 @@
+xhpast2 Parser Design
+
+This file discusses the design decisions made to produce xhpast-compatible
+output from the HPHP parser.  Most of the features of the design have
+to do with the impedance mismatch between the HPHP parser and xhpast.
+
+Specifically:
+
+1. xhpast outputs a byte-accurate token stream but HPHP does not.
+
+This is natural since HPHP is more concerned with executing PHP, not linting it.
+Therefore it is necessary for us to modify the HPHP parser so we can intercept
+and accumulate tokens as they are seen and associate them with the relevant
+parse tree node. There is not a 1:1 correspondence between the xhpast and HPHP
+tokenizers so some massaging is necessary.
+
+
+2. HPHP nodes are generally at a semantically higher level than xhpast nodes.
+xhpast nodes do not carry any attributes other than node type, pointers to
+the range of tokens in the token stream corresponding to that node + a list of
+children. HPHP parse tree nodes are more condensed than xhpast nodes, often
+choosing to represent features of a node as attributes instead of children.
+For example:
+
+$x = &$a;
+
+The HPHP parser callback is:
+
+void onAssign(Token& out, Token& var, Token& expr, bool ref, bool rhsFirst = false)
+
+Notice that '=' and '&' are not represented as tokens.  The '=' is implicit in
+the function call, and the optional '&' is represented as a bool.
+
+The xhpast tree structure for the same expression is:
+
+[n_BINARY_EXPRESSION ...
+  [n_VARIABLE ...             // $x
+  [n_OPERATOR ...             // =
+  [n_VARIABLE_REFERENCE ...   // &
+    [n_VARIABLE ...           // $a
+
+As a result it is necessary to do a small bit of manual parsing to identify the
+location of the = and & in the token stream and create nodes for them.
+
+There are also situations where the opposite is true.  For example strings with
+embedded variables like "foo {$x} {$y}" generate additional nodes for $x and $y
+but xhpast treats the entirety as a single string node.  These cases are easier to
+handle since we can simply prune or combine nodes we don't care about.
+
+IDEAL DESIGN
+
+In an ideal world, the HPHP parser would be augmented to provide a superset of the
+information required for all other parsers that we have (including Hack and pfff),
+then the other parsers would be trivial (or at least easy to derive from the HPHP
+parser).
+
+However, I didn't feel like making big intrusive changes to the HPHP parser. The ideal
+design might make sense at some future point in time.
+
+ACTUAL DESIGN
+
+Given that I wanted to avoid intrusive changes to the HPHP parser, I elected to build
+a framework that would most flexibly handle the differences listed above, plus any that
+I had perhaps not discovered yet or might arise in future. Thus I elected to implement the
+transformation as a batch process, that is, first build a clean HPHP AST + token stream,
+then transform it to an xhpast-compatible AST.  Specific changes include:
+
+1. Adding a TokenListener facility to the parser to eavesdrop on tokens as they fly by. See
+util/parser/scanner.h. In addition to eavesdropping on tokens we also want the token ids
+that are returned by the scanner (this was not previously captured by HPHP tokens). This
+has been accomplished by modifying scanner rules to also pass the token id, such as
+T_WHITESPACE, whenever we notify the scanner that a token has been detected.
+
+2. Constructing a new lightweight AST that purely captures the rules of the parser. See
+util/parser/xhpast2/parser.h. Due to the higher semantic level of the HPHP parser, these
+AST nodes need to contain arbitrary scalar attributes in addition to a list of children.
+This has been implemented via the various "ExtraInfo" structs in that file. For example,
+the extra arguments necessary for the onName parser callback are stored in the OnNameEI
+struct.
+
+The high level flow is found in xhpast2.cpp and is pretty simple. The only thing that might
+be non-obvious at first glance is that the when you call parser.parse(), what is actually
+invoked are the parse rules in hphp.y, which in turn calls each callback method as they
+fire.
+
+Once the tree is built we transform it to xhpast nodes via outputXHPAST() (with heavy
+lifting done by outputXHPASTImpl). The heart of outputXHPASTImpl is a giant switch that
+processes each node type differently. It would have been more object-oriented to make a
+class for each node and have each node know how to transform itself to xhpast but I was
+concerned that some of the transformations might require peeking up and down the hierarchy
+and break this nice abstraction anyway. Also, I didn't want to create an army of classes.
+Still, I would not be averse to going in this direction if it can be done elegantly.
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2011 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <cstdio>
+#include <cstdlib>
+#include <list>
+#include <string>
+
+#include "node_names.hpp"
+
+#define NNEW(t) \
+  (new xhpast::Node(t))
+
+#define NTYPE(n, type) \
+  ((n)->setType(type))
+
+#define NMORE(n, end) \
+  ((n)->setEnd(end))
+
+#define NSPAN(n, type, end) \
+  (NMORE(NTYPE((n), type), end))
+
+#define NLMORE(n, begin) \
+  ((n)->setBegin(begin))
+
+#define NEXPAND(l, n, r) \
+  ((n)->setBegin(l)->setEnd(r))
+
+
+namespace xhpast {
+
+  class Token;
+  typedef std::list<Token *> token_list_t;
+
+  class Token {
+
+    public:
+      unsigned int type;
+      std::string value;
+//      unsigned int lineno;
+      unsigned int n;
+
+      Token(unsigned int type, char *value, unsigned int n) :
+        type(type),
+        value(value),
+        n(n) {
+      }
+  };
+
+  class Node;
+  typedef std::list<Node *> node_list_t;
+
+  class Node {
+    public:
+      unsigned int type;
+
+      int l_tok;
+      int r_tok;
+
+      node_list_t children;
+
+
+      Node() : type(0), l_tok(-1), r_tok(-1) {};
+
+      explicit Node(unsigned int type) : type(type), l_tok(-1), r_tok(-1) {};
+
+      Node(unsigned int type, int end_tok) :
+        type(type) {
+          this->l_tok = end_tok;
+          this->r_tok = end_tok;
+      }
+
+      Node(unsigned int type, int l_tok, int r_tok) :
+        type(type),
+        l_tok(l_tok),
+        r_tok(r_tok) {
+
+      }
+
+      Node *appendChild(Node *node) {
+        this->children.push_back(node);
+        return this->setEnd(node);
+      }
+
+      Node *prependChild(Node *node) {
+        this->children.push_front(node);
+        return this->setBegin(node);
+      }
+
+      Node *appendChildren(Node *node) {
+        for (node_list_t::iterator ii = node->children.begin();
+             ii != node->children.end(); ++ii) {
+          this->children.push_back(*ii);
+          this->setEnd(*ii);
+        }
+        return this;
+      }
+
+      Node *firstChild() {
+        if (this->children.empty()) {
+          return nullptr;
+        }
+        return *(this->children.begin());
+      }
+
+      Node *setType(unsigned int t) {
+        this->type = t;
+        return this;
+      }
+
+      Node *setEnd(Node *n) {
+        if (!n) {
+          fprintf(stderr,
+                  "Trying to setEnd() a null node to one of type %d\n",
+                  this->type);
+          exit(1);
+        }
+
+        if (n->r_tok != -1 && (n->r_tok > this->r_tok || (this->r_tok == -1))) {
+          this->r_tok = n->r_tok;
+        }
+        if (this->l_tok == -1) {
+          this->l_tok = n->l_tok;
+        }
+        return this;
+      }
+
+      Node *setBegin(Node *n) {
+        if (!n) {
+          fprintf(stderr,
+                  "Trying to setBegin() a null node to one of type %d\n",
+                  this->type);
+          exit(1);
+        }
+
+        if (n->l_tok != -1 && (n->l_tok < this->l_tok || (this->l_tok == -1))) {
+          this->l_tok = n->l_tok;
+        }
+        if (this->r_tok == -1) {
+          this->r_tok = n->r_tok;
+        }
+        return this;
+      }
+
+  };
+}
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import fileinput
+import sys
+
+indent = -2
+
+def process(c):
+    global indent
+    if c == '[':
+        indent = indent + 2
+        sys.stdout.write('\n')
+        sys.stdout.write(' ' * indent)
+    sys.stdout.write(c)
+    if c == ']':
+        indent = indent - 2
+
+for line in fileinput.input():
+    for c in line:
+        process(c)
@@ -0,0 +1,128 @@
+#ifndef incl_HPHP_UTIL_PARSER_XHPAST2_NODE_NAMES_H_
+#define incl_HPHP_UTIL_PARSER_XHPAST2_NODE_NAMES_H_
+
+#define n_PROGRAM 9000
+#define n_SYMBOL_NAME 9001
+#define n_HALT_COMPILER 9002
+#define n_NAMESPACE 9003
+#define n_STATEMENT 9004
+#define n_EMPTY 9005
+#define n_STATEMENT_LIST 9006
+#define n_OPEN_TAG 9007
+#define n_CLOSE_TAG 9008
+#define n_USE_LIST 9009
+#define n_USE 9010
+#define n_CONSTANT_DECLARATION_LIST 9011
+#define n_CONSTANT_DECLARATION 9012
+#define n_STRING 9013
+#define n_LABEL 9014
+#define n_CONDITION_LIST 9015
+#define n_CONTROL_CONDITION 9016
+#define n_IF 9017
+#define n_ELSEIF 9018
+#define n_ELSE 9019
+#define n_WHILE 9020
+#define n_DO_WHILE 9021
+#define n_FOR 9022
+#define n_FOR_EXPRESSION 9023
+#define n_SWITCH 9024
+#define n_BREAK 9025
+#define n_CONTINUE 9026
+#define n_RETURN 9027
+#define n_GLOBAL_DECLARATION_LIST 9028
+#define n_GLOBAL_DECLARATION 9029
+#define n_STATIC_DECLARATION_LIST 9030
+#define n_STATIC_DECLARATION 9031
+#define n_ECHO_LIST 9032
+#define n_ECHO 9033
+#define n_INLINE_HTML 9034
+#define n_UNSET_LIST 9035
+#define n_UNSET 9036
+#define n_FOREACH 9037
+#define n_FOREACH_EXPRESSION 9038
+#define n_THROW 9039
+#define n_GOTO 9040
+#define n_TRY 9041
+#define n_CATCH_LIST 9042
+#define n_CATCH 9043
+#define n_DECLARE 9044
+#define n_DECLARE_DECLARATION_LIST 9045
+#define n_DECLARE_DECLARATION 9046
+#define n_VARIABLE 9047
+#define n_REFERENCE 9048
+#define n_VARIABLE_REFERENCE 9049
+#define n_FUNCTION_DECLARATION 9050
+#define n_CLASS_DECLARATION 9051
+#define n_CLASS_ATTRIBUTES 9052
+#define n_EXTENDS 9053
+#define n_EXTENDS_LIST 9054
+#define n_IMPLEMENTS_LIST 9055
+#define n_INTERFACE_DECLARATION 9056
+#define n_CASE 9057
+#define n_DEFAULT 9058
+#define n_DECLARATION_PARAMETER_LIST 9059
+#define n_DECLARATION_PARAMETER 9060
+#define n_TYPE_NAME 9061
+#define n_VARIABLE_VARIABLE 9062
+#define n_CLASS_MEMBER_DECLARATION_LIST 9063
+#define n_CLASS_MEMBER_DECLARATION 9064
+#define n_CLASS_CONSTANT_DECLARATION_LIST 9065
+#define n_CLASS_CONSTANT_DECLARATION 9066
+#define n_METHOD_DECLARATION 9067
+#define n_METHOD_MODIFIER_LIST 9068
+#define n_FUNCTION_MODIFIER_LIST 9069
+#define n_CLASS_MEMBER_MODIFIER_LIST 9070
+#define n_EXPRESSION_LIST 9071
+#define n_LIST 9072
+#define n_ASSIGNMENT 9073
+#define n_NEW 9074
+#define n_UNARY_PREFIX_EXPRESSION 9075
+#define n_UNARY_POSTFIX_EXPRESSION 9076
+#define n_BINARY_EXPRESSION 9077
+#define n_TERNARY_EXPRESSION 9078
+#define n_CAST_EXPRESSION 9079
+#define n_CAST 9080
+#define n_OPERATOR 9081
+#define n_ARRAY_LITERAL 9082
+#define n_EXIT_EXPRESSION 9083
+#define n_BACKTICKS_EXPRESSION 9084
+#define n_LEXICAL_VARIABLE_LIST 9085
+#define n_NUMERIC_SCALAR 9086
+#define n_STRING_SCALAR 9087
+#define n_MAGIC_SCALAR 9088
+#define n_CLASS_STATIC_ACCESS 9089
+#define n_CLASS_NAME 9090
+#define n_MAGIC_CLASS_KEYWORD 9091
+#define n_OBJECT_PROPERTY_ACCESS 9092
+#define n_ARRAY_VALUE_LIST 9093
+#define n_ARRAY_VALUE 9094
+#define n_CALL_PARAMETER_LIST 9095
+#define n_VARIABLE_EXPRESSION 9096
+#define n_INCLUDE_FILE 9097
+#define n_HEREDOC 9098
+#define n_FUNCTION_CALL 9099
+#define n_INDEX_ACCESS 9100
+#define n_ASSIGNMENT_LIST 9101
+#define n_METHOD_CALL 9102
+#define n_XHP_TAG 9103
+#define n_XHP_TAG_OPEN 9104
+#define n_XHP_TAG_CLOSE 9105
+#define n_XHP_TEXT 9106
+#define n_XHP_EXPRESSION 9107
+#define n_XHP_ATTRIBUTE_LIST 9108
+#define n_XHP_ATTRIBUTE 9109
+#define n_XHP_LITERAL 9110
+#define n_XHP_ATTRIBUTE_LITERAL 9111
+#define n_XHP_ATTRIBUTE_EXPRESSION 9112
+#define n_XHP_NODE_LIST 9113
+#define n_CONCATENATION_LIST 9114
+#define n_PARENTHETICAL_EXPRESSION 9115
+#define n_YIELD 9116
+#define n_YIELD_EXPRESSION 9117
+#define n_TRAIT_DECLARATION 9118
+#define n_USE_TRAIT_DECLARATION 9119
+#define n_USE_TRAIT_LIST 9120
+#define n_USE_TRAIT_RESOLUTION 9121
+#define n_USE_TRAIT_RESOLUTION_LIST 9122
+
+#endif
@@ -0,0 +1,135 @@
+/*
+   +----------------------------------------------------------------------+
+   | HipHop for PHP                                                       |
+   +----------------------------------------------------------------------+
+   | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com)     |
+   +----------------------------------------------------------------------+
+   | This source file is subject to version 3.01 of the PHP license,      |
+   | that is bundled with this package in the file LICENSE, and is        |
+   | available through the world-wide-web at the following url:           |
+   | http://www.php.net/license/3_01.txt                                  |
+   | If you did not receive a copy of the PHP license and are unable to   |
+   | obtain it through the world-wide-web, please send a note to          |
+   | license@php.net so we can mail you a copy immediately.               |
+   +----------------------------------------------------------------------+
+*/
+
+#include <iostream>
+#include <cstdlib>
+#include <string.h>
+
+#include "hphp/util/parser/xhpast2/parser.h"
+
+namespace HPHP { namespace HPHP_PARSER_NS {
+
+  bool g_verifyMode = false;
+
+}}
+
+void print_node(xhpast::Node *node) {
+  int l = -1;
+  int r = -1;
+  if (node->l_tok != -1) {
+    l = node->l_tok;
+  }
+
+  if (l == -1) {
+    printf("[%d]", node->type);
+  } else {
+    if (node->r_tok != -1) {
+      r = node->r_tok;
+    }
+
+    printf("[%d, %d, %d", node->type, l, r);
+    if (!node->children.empty()) {
+      printf(", [");
+      for (xhpast::node_list_t::iterator ii = node->children.begin();;) {
+        print_node(*ii);
+        if (++ii != node->children.end()) {
+          printf(",");
+        } else {
+          break;
+        }
+      }
+      printf("]");
+    }
+    printf("]");
+  }
+}
+
+/*
+ * This program parses a file with the hphp php parser, and dumps
+ * every callback the parser makes to stdout.
+ *
+ * If a parse error occurs, it says why.
+ */
+int main(int argc, char** argv) try {
+  if (argc >= 2 && !strcmp(argv[1], "--verify")) {
+    HPHP::XHPAST2::g_verifyMode = true;
+    --argc, ++argv;
+  }
+
+  if (argc != 2) {
+    std::cerr << "usage: " << argv[0] << " [--verify] filename\n";
+    std::exit(1);
+  }
+
+  std::ifstream in(argv[1]);
+  if (!in.is_open()) {
+    std::cerr << argv[0] << ": couldn't open file: "
+              << strerror(errno) << '\n';
+  }
+
+  std::cout << "1..1\n";
+
+  try {
+    using HPHP::Scanner;
+    using HPHP::XHPAST2::Parser;
+    Scanner scan(in, Scanner::AllowShortTags);
+    Parser parser(scan, argv[1]);
+    parser.parse();
+    parser.coalesceTree();
+    std::cout << parser.tree << std::endl;
+    xhpast::Node* root = parser.outputXHPAST();
+    std::vector<xhpast::Token *>* tokens = &(parser.m_listener.tokens);
+    printf("{");
+    printf("\"tree\":");
+    if (root) {
+      // Extend the right token for the root node to the end of the concrete
+      // token stream. This ensure all tokens appear in the tree. If we don't
+      // do this and the file ends in tokens which don't go to the parser (like
+      // comments and whitespace) they won't be represented in the tree.
+      root->r_tok = (tokens->size() - 1);
+      print_node(root);
+    } else {
+      printf("null");
+    }
+    printf(",");
+    printf("\"stream\":");
+    printf("[");
+
+    for (std::vector<xhpast::Token *>::iterator ii = tokens->begin();;) {
+      printf("[%d, %d]", (*ii)->type, (int)(*ii)->value.length());
+      if (++ii != tokens->end()) {
+        printf(",");
+        } else {
+        break;
+      }
+    }
+
+    printf("]");
+    printf("}\n");
+  } catch (const std::exception& e) {
+    if (HPHP::XHPAST2::g_verifyMode) {
+      std::cout << "not ";
+    } else {
+      throw;
+    }
+  }
+  std::cout << "ok 1\n";
+}
+
+catch (const std::runtime_error& e) {
+  std::cerr << argv[0] << ": " << e.what() << '\n';
+  return 1;
+}