%{ /* -*- mode: c++ -*- */ #include "hphp/util/parser/scanner.h" // macros for flex #define YYSTYPE HPHP::ScannerToken #define YYLTYPE HPHP::Location #define YY_EXTRA_TYPE HPHP::Scanner* #define _scanner yyextra #define YY_INPUT(buf,result,max) _scanner->read(buf,result,max) #define YY_FATAL_ERROR(msg) \ do { \ struct yyguts_t *yyg = (struct yyguts_t *)yyscanner; \ _scanner->error(msg); \ } while (0) \ #undef YY_READ_BUF_SIZE #undef YY_BUF_SIZE #define YY_READ_BUF_SIZE 1024*128 /* for reading from input */ #define YY_BUF_SIZE 1024*64 /* for pattern matching */ #define DECLARE_YYCURSOR \ char *&cursor = yyg->yy_c_buf_p; *cursor = yyg->yy_hold_char; #define DECLARE_YYLIMIT \ char *limit = YY_CURRENT_BUFFER->yy_ch_buf + yyg->yy_n_chars; #define YYCURSOR cursor #define YYLIMIT limit #define RESET_YYCURSOR yyg->yy_hold_char = *YYCURSOR; *YYCURSOR = '\0'; // macros for rules #define RETTOKEN(t) do {_scanner->setToken(yytext, yyleng, t); return t;} \ while (0) #define RETSTEP(t) do {_scanner->stepPos(yytext, yyleng, t); return t;} \ while (0) #define SETTOKEN(t) _scanner->setToken(yytext, yyleng, t) #define STEPPOS(t) _scanner->stepPos(yytext, yyleng, t) #define HH_ONLY_KEYWORD(tok) do { \ RETTOKEN(_scanner->hipHopSyntaxEnabled() ? tok : T_STRING); \ } while (0) #define IS_LABEL_START(c) \ (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || \ (c) == '_' || (c) >= 0x7F) /** * "Next token" types tell us how to treat a token based on the previous * token for the purpose of recognizing XHP tags, XHP class names, XHP * category names, and type lists. * XhpTag: * '<' should be recognized as the start of an XHP tag * XhpTagMaybe: * '<' should be recognized as possibly being the start of an XHP tag; * this will be resolved by inspecting subsequent characters * XhpClassName: * ':' should be recognized as the start of an XHP class name * XhpCategoryName: * '%' should be recognized as the start of an XHP category name * TypeListMaybe: * '<' should be recognized as possibly being the start of a type list; * this will be resolved by inspecting subsequent tokens */ namespace NextTokenType { static const int Normal = 0x1; static const int XhpTag = 0x2; static const int XhpTagMaybe = 0x4; static const int XhpClassName = 0x8; static const int XhpCategoryName = 0x10; static const int TypeListMaybe = 0x20; } static int getNextTokenType(int t) { switch (t) { case '=': case '.': case '+': case '-': case '*': case '/': case '%': case '!': case '~': case '&': case '^': case '<': case '>': case '?': case ':': case '[': case '{': case ';': case '@': case -1: case T_LOGICAL_OR: case T_LOGICAL_XOR: case T_LOGICAL_AND: case T_SL: case T_SR: case T_BOOLEAN_OR: case T_BOOLEAN_AND: case T_IS_EQUAL: case T_IS_NOT_EQUAL: case T_IS_IDENTICAL: case T_IS_NOT_IDENTICAL: case T_IS_SMALLER_OR_EQUAL: case T_IS_GREATER_OR_EQUAL: case T_PLUS_EQUAL: case T_MINUS_EQUAL: case T_MUL_EQUAL: case T_DIV_EQUAL: case T_CONCAT_EQUAL: case T_MOD_EQUAL: case T_AND_EQUAL: case T_OR_EQUAL: case T_XOR_EQUAL: case T_SL_EQUAL: case T_SR_EQUAL: case T_ECHO: case T_PRINT: case T_CLONE: case T_EXIT: case T_RETURN: case T_YIELD: case T_NEW: case T_INSTANCEOF: case T_DOUBLE_ARROW: case T_NS_SEPARATOR: case T_INLINE_HTML: case T_INT_CAST: case T_DOUBLE_CAST: case T_STRING_CAST: case T_ARRAY_CAST: case T_OBJECT_CAST: case T_BOOL_CAST: case T_UNSET_CAST: case T_UNRESOLVED_LT: case T_AS: return NextTokenType::XhpTag | NextTokenType::XhpClassName; case ',': case '(': case '|': return NextTokenType::XhpTag | NextTokenType::XhpClassName | NextTokenType::XhpCategoryName; case '}': return NextTokenType::XhpTagMaybe | NextTokenType::XhpClassName; case T_INC: case T_DEC: return NextTokenType::XhpTagMaybe; case T_EXTENDS: case T_CLASS: case T_PRIVATE: case T_PROTECTED: case T_PUBLIC: case T_STATIC: return NextTokenType::XhpClassName; case T_STRING: case T_XHP_CHILDREN: case T_XHP_REQUIRED: case T_XHP_ENUM: case T_ARRAY: return NextTokenType::TypeListMaybe; case T_XHP_ATTRIBUTE: return NextTokenType::XhpClassName | NextTokenType::TypeListMaybe; case T_XHP_CATEGORY: return NextTokenType::XhpCategoryName | NextTokenType::TypeListMaybe; default: return NextTokenType::Normal; } } %} %x ST_IN_HTML %x ST_IN_SCRIPTING %x ST_AFTER_HASHBANG %x ST_DOUBLE_QUOTES %x ST_BACKQUOTE %x ST_HEREDOC %x ST_NOWDOC %x ST_END_HEREDOC %x ST_LOOKING_FOR_PROPERTY %x ST_LOOKING_FOR_VARNAME %x ST_VAR_OFFSET %x ST_LT_CHECK %x ST_COMMENT %x ST_DOC_COMMENT %x ST_ONE_LINE_COMMENT %x ST_XHP_IN_TAG %x ST_XHP_END_SINGLETON_TAG %x ST_XHP_END_CLOSE_TAG %x ST_XHP_CHILD %x ST_XHP_COMMENT %option stack LNUM [0-9]+ DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*) EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) HNUM "0x"[0-9a-fA-F]+ LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* TOKENS [;:,.\[\]()|^&+\-*/=%!~$<>?@] ANY_CHAR (.|[\n]) NEWLINE ("\r"|"\n"|"\r\n") XHPLABEL {LABEL}([:-]{LABEL})* COMMENT_REGEX ([\/][\*]([^\*]|(\*[^/]))*[\*][\/]|"//"[^\r\n]*{NEWLINE}) WHITESPACE_AND_COMMENTS ([ \n\r\t]|({COMMENT_REGEX}))+ /* * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character * or a { and therefore will be taken literally. The case of literal $ before * a variable or "${" is handled in a rule for each string type */ DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\"\\{]|("\\"{ANY_CHAR}))) BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR}))) /* * CHARS matches everything up to a variable or "{$" * {'s are matched as long as they aren't followed by a $ * The case of { before "{$" is handled in a rule for each string type * * For heredocs, matching continues across/after newlines if/when it's known * that the next line doesn't contain a possible ending label */ DOUBLE_QUOTES_CHARS ("{"*([^$\"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR}) BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR}) %% "exit" { RETTOKEN(T_EXIT);} "die" { RETTOKEN(T_EXIT);} "function" { RETTOKEN(T_FUNCTION);} "const" { RETTOKEN(T_CONST);} "return" { RETTOKEN(T_RETURN); } "yield" { RETTOKEN(T_YIELD);} "try" { RETTOKEN(T_TRY);} "catch" { RETTOKEN(T_CATCH);} "finally" { RETTOKEN(T_FINALLY);} "throw" { RETTOKEN(T_THROW);} "if" { RETTOKEN(T_IF);} "elseif" { RETTOKEN(T_ELSEIF);} "endif" { RETTOKEN(T_ENDIF);} "else" { RETTOKEN(T_ELSE);} "while" { RETTOKEN(T_WHILE);} "endwhile" { RETTOKEN(T_ENDWHILE);} "do" { RETTOKEN(T_DO);} "for" { RETTOKEN(T_FOR);} "endfor" { RETTOKEN(T_ENDFOR);} "foreach" { RETTOKEN(T_FOREACH);} "endforeach" { RETTOKEN(T_ENDFOREACH);} "declare" { RETTOKEN(T_DECLARE);} "enddeclare" { RETTOKEN(T_ENDDECLARE);} "instanceof" { RETTOKEN(T_INSTANCEOF);} "as" { RETTOKEN(T_AS);} "switch" { RETTOKEN(T_SWITCH);} "endswitch" { RETTOKEN(T_ENDSWITCH);} "case" { RETTOKEN(T_CASE);} "default" { RETTOKEN(T_DEFAULT);} "break" { RETTOKEN(T_BREAK);} "continue" { RETTOKEN(T_CONTINUE);} "goto" { RETTOKEN(T_GOTO);} "echo" { RETTOKEN(T_ECHO);} "print" { RETTOKEN(T_PRINT);} "class" { RETTOKEN(T_CLASS);} "interface" { RETTOKEN(T_INTERFACE);} "trait" { RETTOKEN(T_TRAIT);} "insteadof" { RETTOKEN(T_INSTEADOF);} "extends" { RETTOKEN(T_EXTENDS);} "implements" { RETTOKEN(T_IMPLEMENTS);} "attribute" { RETTOKEN(T_XHP_ATTRIBUTE);} "category" { RETTOKEN(T_XHP_CATEGORY);} "children" { RETTOKEN(T_XHP_CHILDREN);} "required" { RETTOKEN(T_XHP_REQUIRED);} "enum" { RETTOKEN(T_XHP_ENUM);} "->" { STEPPOS(T_OBJECT_OPERATOR); yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner); return T_OBJECT_OPERATOR; } "->" { RETSTEP(T_OBJECT_OPERATOR); } {LABEL} { SETTOKEN(T_STRING); yy_pop_state(yyscanner); return T_STRING; } {ANY_CHAR} { yyless(0); yy_pop_state(yyscanner); } "::" { RETSTEP(T_PAAMAYIM_NEKUDOTAYIM);} "\\" { RETTOKEN(T_NS_SEPARATOR);} "new" { RETTOKEN(T_NEW);} "clone" { RETTOKEN(T_CLONE);} "var" { RETTOKEN(T_VAR);} "("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" { if (_scanner->lastToken() != T_FUNCTION) { RETSTEP(T_INT_CAST); } yyless(1); RETSTEP('('); } "("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" { if (_scanner->lastToken() != T_FUNCTION) { RETSTEP(T_DOUBLE_CAST); } yyless(1); RETSTEP('('); } "("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" { if (_scanner->lastToken() != T_FUNCTION) { RETSTEP(T_STRING_CAST); } yyless(1); RETSTEP('('); } "("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" { if (_scanner->lastToken() != T_FUNCTION) { RETSTEP(T_ARRAY_CAST); } yyless(1); RETSTEP('('); } "("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" { if (_scanner->lastToken() != T_FUNCTION) { RETSTEP(T_OBJECT_CAST); } yyless(1); RETSTEP('('); } "("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" { if (_scanner->lastToken() != T_FUNCTION) { RETSTEP(T_BOOL_CAST); } yyless(1); RETSTEP('('); } "("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" { if (_scanner->lastToken() != T_FUNCTION) { RETSTEP(T_UNSET_CAST); } yyless(1); RETSTEP('('); } "eval" { RETTOKEN(T_EVAL);} "include" { RETTOKEN(T_INCLUDE);} "include_once" { RETTOKEN(T_INCLUDE_ONCE);} "require" { RETTOKEN(T_REQUIRE);} "require_once" { RETTOKEN(T_REQUIRE_ONCE);} "namespace" { RETTOKEN(T_NAMESPACE);} "use" { RETTOKEN(T_USE);} "global" { RETTOKEN(T_GLOBAL);} "isset" { RETTOKEN(T_ISSET);} "empty" { RETTOKEN(T_EMPTY);} "__halt_compiler" { RETTOKEN(T_HALT_COMPILER);} "__compiler_halt_offset__" { RETTOKEN(T_COMPILER_HALT_OFFSET);} "static" { RETTOKEN(T_STATIC);} "abstract" { RETTOKEN(T_ABSTRACT);} "final" { RETTOKEN(T_FINAL);} "private" { RETTOKEN(T_PRIVATE);} "protected" { RETTOKEN(T_PROTECTED);} "public" { RETTOKEN(T_PUBLIC);} "unset" { RETTOKEN(T_UNSET);} "=>" { RETSTEP(T_DOUBLE_ARROW);} "list" { RETTOKEN(T_LIST);} "array" { RETTOKEN(T_ARRAY);} "++" { RETSTEP(T_INC);} "--" { RETSTEP(T_DEC);} "===" { RETSTEP(T_IS_IDENTICAL);} "!==" { RETSTEP(T_IS_NOT_IDENTICAL);} "==" { RETSTEP(T_IS_EQUAL);} "!="|"<>" { RETSTEP(T_IS_NOT_EQUAL);} "<=" { RETSTEP(T_IS_SMALLER_OR_EQUAL);} ">=" { RETSTEP(T_IS_GREATER_OR_EQUAL);} "+=" { RETSTEP(T_PLUS_EQUAL);} "-=" { RETSTEP(T_MINUS_EQUAL);} "*=" { RETSTEP(T_MUL_EQUAL);} "/=" { RETSTEP(T_DIV_EQUAL);} ".=" { RETSTEP(T_CONCAT_EQUAL);} "%=" { RETSTEP(T_MOD_EQUAL);} "<<=" { RETSTEP(T_SL_EQUAL);} ">>=" { RETSTEP(T_SR_EQUAL);} "&=" { RETSTEP(T_AND_EQUAL);} "|=" { RETSTEP(T_OR_EQUAL);} "^=" { RETSTEP(T_XOR_EQUAL);} "||" { RETSTEP(T_BOOLEAN_OR);} "&&" { RETSTEP(T_BOOLEAN_AND);} "OR" { RETTOKEN(T_LOGICAL_OR);} "AND" { RETTOKEN(T_LOGICAL_AND);} "XOR" { RETTOKEN(T_LOGICAL_XOR);} "<<" { RETSTEP(T_SL);} "..." { RETTOKEN(T_VARARG); } "shape" { HH_ONLY_KEYWORD(T_SHAPE); } "type" { HH_ONLY_KEYWORD(T_UNRESOLVED_TYPE); } "newtype" { HH_ONLY_KEYWORD(T_UNRESOLVED_NEWTYPE); } ">>" { if (_scanner->getLookaheadLtDepth() < 2) { RETSTEP(T_SR); } yyless(1); RETSTEP('>'); } "<"[a-zA-Z_\x7f-\xff] { int ntt = getNextTokenType(_scanner->lastToken()); if (ntt & NextTokenType::XhpTag) { yyless(1); STEPPOS(T_XHP_TAG_LT); yy_push_state(ST_XHP_IN_TAG, yyscanner); return T_XHP_TAG_LT; } if (ntt & NextTokenType::XhpTagMaybe) { // Shift to state state ST_LT_CHECK to do a more extensive check to // determine if this is the beginning of an XHP tag. yyless(0); BEGIN(ST_LT_CHECK); break; } yyless(1); if (_scanner->hipHopSyntaxEnabled() && (ntt & NextTokenType::TypeListMaybe)) { // Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens // to resolve this. RETSTEP(T_UNRESOLVED_LT); } RETSTEP('<'); } "<" { if (_scanner->hipHopSyntaxEnabled()) { int ntt = getNextTokenType(_scanner->lastToken()); if (ntt & NextTokenType::TypeListMaybe) { // Return T_UNRESOLVED_LT; the scanner will inspect subseqent tokens // to resolve this. RETSTEP(T_UNRESOLVED_LT); } } RETSTEP('<'); } "<"{XHPLABEL}(">"|"/>"|{WHITESPACE_AND_COMMENTS}(">"|"/>"|[a-zA-Z_\x7f-\xff])) { BEGIN(ST_IN_SCRIPTING); yyless(1); STEPPOS(T_XHP_TAG_LT); yy_push_state(ST_XHP_IN_TAG, yyscanner); return T_XHP_TAG_LT; } "<" { BEGIN(ST_IN_SCRIPTING); RETSTEP('<'); } ":"{XHPLABEL} { int ntt = getNextTokenType(_scanner->lastToken()); if (ntt & NextTokenType::XhpClassName) { yytext++; yyleng--; // skipping the first colon RETTOKEN(T_XHP_LABEL); } yyless(1); RETSTEP(':'); } "%"{XHPLABEL} { int ntt = getNextTokenType(_scanner->lastToken()); if (ntt & NextTokenType::XhpCategoryName) { yytext++; yyleng--; // skipping "%" RETTOKEN(T_XHP_CATEGORY_LABEL); } yyless(1); RETSTEP('%'); } {TOKENS} {RETSTEP(yytext[0]);} "{" { STEPPOS('{'); yy_push_state(ST_IN_SCRIPTING, yyscanner); return '{'; } "${" { STEPPOS(T_DOLLAR_OPEN_CURLY_BRACES); yy_push_state(ST_LOOKING_FOR_VARNAME, yyscanner); return T_DOLLAR_OPEN_CURLY_BRACES; } "}" { STEPPOS('}'); // We need to be robust against a '}' in PHP code with // no corresponding '{' struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; if (yyg->yy_start_stack_ptr) yy_pop_state(yyscanner); return '}'; } {LABEL} { SETTOKEN(T_STRING_VARNAME); // Change state to IN_SCRIPTING; current state will be popped // when we encounter '}' BEGIN(ST_IN_SCRIPTING); return T_STRING_VARNAME; } {ANY_CHAR} { yyless(0); // Change state to IN_SCRIPTING; current state will be popped // when we encounter '}' BEGIN(ST_IN_SCRIPTING); } {LNUM} { errno = 0; long ret = strtoll(yytext, NULL, 0); if (errno == ERANGE || ret < 0) { _scanner->error("Dec number is too big: %s", yytext); if (_scanner->isHackMode()) { RETTOKEN(T_HACK_ERROR); } } RETTOKEN(T_LNUMBER); } {HNUM} { errno = 0; long ret = strtoull(yytext, NULL, 16); if (errno == ERANGE || ret < 0) { _scanner->error("Hex number is too big: %s", yytext); if (_scanner->isHackMode()) { RETTOKEN(T_HACK_ERROR); } } RETTOKEN(T_LNUMBER); } 0|([1-9][0-9]*) { /* Offset could be treated as a long */ errno = 0; long ret = strtoll(yytext, NULL, 0); if (ret == LLONG_MAX && errno == ERANGE) { _scanner->error("Offset number is too big: %s", yytext); if (_scanner->isHackMode()) { RETTOKEN(T_HACK_ERROR); } } RETTOKEN(T_NUM_STRING); } {LNUM}|{HNUM} { /* Offset must be treated as a string */ RETTOKEN(T_NUM_STRING); } {DNUM}|{EXPONENT_DNUM} { RETTOKEN(T_DNUMBER); } "__CLASS__" { RETTOKEN(T_CLASS_C); } "__TRAIT__" { RETTOKEN(T_TRAIT_C); } "__FUNCTION__" { RETTOKEN(T_FUNC_C); } "__METHOD__" { RETTOKEN(T_METHOD_C);} "__LINE__" { RETTOKEN(T_LINE); } "__FILE__" { RETTOKEN(T_FILE); } "__DIR__" { RETTOKEN(T_DIR); } "__NAMESPACE__" { RETTOKEN(T_NS_C); } "#"[^\n]*"\n" { _scanner->setHashBang(yytext, yyleng, T_INLINE_HTML); BEGIN(ST_IN_SCRIPTING); yy_push_state(ST_AFTER_HASHBANG, yyscanner); return T_INLINE_HTML; } (([^<#]|"<"[^?%s<]){1,400})|"(([^<]|"<"[^?%s<]){1,400})|""" { if (_scanner->shortTags() || yyleng > 2) { SETTOKEN(T_OPEN_TAG); if (YY_START == INITIAL) { BEGIN(ST_IN_SCRIPTING); } else { yy_pop_state(yyscanner); } return T_OPEN_TAG; } else { SETTOKEN(T_INLINE_HTML); if (YY_START == INITIAL) { BEGIN(ST_IN_SCRIPTING); yy_push_state(ST_IN_HTML, yyscanner); } else if (YY_START == ST_AFTER_HASHBANG) { BEGIN(ST_IN_HTML); } return T_INLINE_HTML; } } "<%="|"aspTags()) || (yytext[1]=='?' && _scanner->shortTags())) { if (YY_START == INITIAL) { BEGIN(ST_IN_SCRIPTING); } else { yy_pop_state(yyscanner); } RETTOKEN(T_ECHO); //return T_OPEN_TAG_WITH_ECHO; } else { if (YY_START == INITIAL) { BEGIN(ST_IN_SCRIPTING); yy_push_state(ST_IN_HTML, yyscanner); } else if (YY_START == ST_AFTER_HASHBANG) { BEGIN(ST_IN_HTML); } RETTOKEN(T_INLINE_HTML); } } "<%" { if (_scanner->aspTags()) { if (YY_START == INITIAL) { BEGIN(ST_IN_SCRIPTING); } else { yy_pop_state(yyscanner); } RETTOKEN(T_OPEN_TAG); } else { if (YY_START == INITIAL) { BEGIN(ST_IN_SCRIPTING); yy_push_state(ST_IN_HTML, yyscanner); } else if (YY_START == ST_AFTER_HASHBANG) { BEGIN(ST_IN_HTML); } RETTOKEN(T_INLINE_HTML); } } "error("Hack mode: content before setHackMode(); return T_OPEN_TAG; } "$"{LABEL} { _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE); return T_VARIABLE; } "$"{LABEL}"->"[a-zA-Z_\x7f-\xff] { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY, yyscanner); _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE); return T_VARIABLE; } "$"{LABEL}"[" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET, yyscanner); _scanner->setToken(yytext, yyleng, yytext+1, yyleng-1, T_VARIABLE); return T_VARIABLE; } "]" { yy_pop_state(yyscanner); return ']'; } {TOKENS}|[{}\"`] { /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */ return yytext[0]; } [ \n\r\t\\\'#] { /* Invalid rule to return a more explicit parse error with proper line number */ yyless(0); yy_pop_state(yyscanner); RETSTEP(T_ENCAPSED_AND_WHITESPACE); } {LABEL} { RETTOKEN(T_STRING); } {WHITESPACE} { RETSTEP(T_WHITESPACE); } "#"|"//" { yy_push_state(ST_ONE_LINE_COMMENT, yyscanner); yymore(); } "?"|"%"|">" { yymore(); } [^\n\r?%>]*{ANY_CHAR} { switch (yytext[yyleng-1]) { case '?': case '%': case '>': yyless(yyleng-1); yymore(); break; default: STEPPOS(T_COMMENT); yy_pop_state(yyscanner); return T_COMMENT; } } {NEWLINE} { STEPPOS(T_COMMENT); yy_pop_state(yyscanner); return T_COMMENT; } "?>"|"%>" { if (_scanner->isHackMode()) { _scanner->error("Hack mode: ?> not allowed"); return T_HACK_ERROR; } if (_scanner->aspTags() || yytext[yyleng-2] != '%') { _scanner->setToken(yytext, yyleng-2, yytext, yyleng-2, T_COMMENT); yyless(yyleng-2); yy_pop_state(yyscanner); return T_COMMENT; } else { yymore(); } } "/**"{WHITESPACE} { yy_push_state(ST_DOC_COMMENT, yyscanner); yymore(); } "/*" { yy_push_state(ST_COMMENT, yyscanner); yymore(); } [^*]+ { yymore(); } "*/" { SETTOKEN(T_DOC_COMMENT); yy_pop_state(yyscanner); return T_DOC_COMMENT; } "*/" { STEPPOS(T_COMMENT); yy_pop_state(yyscanner); return T_COMMENT; } "*" { yymore(); } [^-]+ { yymore(); } "-->" { STEPPOS(T_COMMENT); yy_pop_state(yyscanner); return T_COMMENT; } "-" { yymore(); } "?>"{NEWLINE}? { if (_scanner->isHackMode()) { _scanner->error("Hack mode: ?> not allowed"); return T_HACK_ERROR; } yy_push_state(ST_IN_HTML, yyscanner); if (_scanner->full()) { RETSTEP(T_CLOSE_TAG); } else { RETSTEP(';'); } } ""{NEWLINE}? { yy_push_state(ST_IN_HTML, yyscanner); if (_scanner->full()) { RETSTEP(T_CLOSE_TAG); } else { RETSTEP(';'); } } "%>"{NEWLINE}? { if (_scanner->aspTags()) { yy_push_state(ST_IN_HTML, yyscanner); if (_scanner->full()) { RETSTEP(T_CLOSE_TAG); } else { RETSTEP(';'); } } else { yyless(1); _scanner->setToken(yytext, 1, yytext, 1); RETSTEP(yytext[0]); } } (b?[\"]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)[\"]) { int bprefix = (yytext[0] != '"') ? 1 : 0; std::string strval = _scanner->escape(yytext + bprefix + 1, yyleng - bprefix - 2, '"'); _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length()); return T_CONSTANT_ENCAPSED_STRING; } (b?[\']([^\'\\]|("\\"{ANY_CHAR}))*[\']?) { int bprefix = (yytext[0] != '\'') ? 1 : 0; int closed = (yytext[yyleng - 1] == '\''); std::string strval = _scanner->escape(yytext + bprefix + 1, yyleng - bprefix - 2, '\''); _scanner->setToken(yytext, yyleng, strval.c_str(), strval.length()); return closed ? T_CONSTANT_ENCAPSED_STRING : T_ENCAPSED_AND_WHITESPACE; } b?[\"] { int bprefix = (yytext[0] != '"') ? 1 : 0; _scanner->setToken(yytext, yyleng, yytext + bprefix, yyleng - bprefix); BEGIN(ST_DOUBLE_QUOTES); return '\"'; } b?"<<<"{TABS_AND_SPACES}({LABEL}|[']{LABEL}[']|["]{LABEL}["]){NEWLINE} { int bprefix = (yytext[0] != '<') ? 1 : 0; int label_len = yyleng-bprefix-3-1-(yytext[yyleng-2]=='\r'?1:0); char *s = yytext+bprefix+3; while ((*s == ' ') || (*s == '\t')) { s++; label_len--; } if (*s == '\'') { s++; label_len -= 2; BEGIN(ST_NOWDOC); } else { if (*s == '"') { s++; label_len -= 2; } BEGIN(ST_HEREDOC); } _scanner->setHeredocLabel(s, label_len); _scanner->setToken(yytext, yyleng, s, label_len); return T_START_HEREDOC; } [`] { STEPPOS('`'); BEGIN(ST_BACKQUOTE); return '`'; } {XHPLABEL} { RETTOKEN(T_XHP_LABEL); } "=" { RETSTEP(yytext[0]); } ["][^"]*["] { _scanner->setToken(yytext, yyleng, yytext+1, yyleng-2); return T_XHP_TEXT; } [{] { STEPPOS('{'); yy_push_state(ST_IN_SCRIPTING, yyscanner); return '{'; } ">" { STEPPOS(T_XHP_TAG_GT); BEGIN(ST_XHP_CHILD); return T_XHP_TAG_GT; } "/>" { BEGIN(ST_XHP_END_SINGLETON_TAG); yyless(1); return '/'; } {ANY_CHAR} { // This rule ensures we get a reasonable syntax error message // when unexpected characters occur inside XHP tags STEPPOS(yytext[0]); _scanner->error("Unexpected character in input: '%c' (ASCII=%d)", yytext[0], yytext[0]); return yytext[0]; } ">" { STEPPOS(T_XHP_TAG_GT); yy_pop_state(yyscanner); return T_XHP_TAG_GT; } "