75397a10c7
The goal of this diff is to clean up the current work-in-progress and check it in so that others may contribute if they have time. The program currently outputs the HHVM tree as well as the xhpast-like json, for ease of debugging. There is a little helper program jsonpretty.py to make the trees more readable, e.g. _build/dbg/hphp/util/parser/xhpast/xhpast2 <some php file> | hphp/util/parser/xhpast/xhpast2/jsonpretty.py
364 linhas
9.2 KiB
C++
364 linhas
9.2 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 3.01 of the PHP license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.php.net/license/3_01.txt |
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@php.net so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
|
|
#ifndef incl_HPHP_UTIL_PARSER_SCANNER_H_
|
|
#define incl_HPHP_UTIL_PARSER_SCANNER_H_
|
|
|
|
#include <sstream>
|
|
#include "hphp/util/exception.h"
|
|
#include "hphp/util/parser/location.h"
|
|
#include "hphp/util/parser/hphp.tab.hpp"
|
|
|
|
namespace HPHP {
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
typedef int TokenID;
|
|
|
|
class ScannerToken {
|
|
public:
|
|
ScannerToken() : m_num(0), m_check(false), m_id(-1) {}
|
|
void reset() { m_num = 0; m_text.clear(); m_id = -1; }
|
|
|
|
TokenID num() const { return m_num;}
|
|
void setNum(TokenID num) {
|
|
m_num = num;
|
|
}
|
|
void set(TokenID num, const char *t) {
|
|
m_num = num;
|
|
m_text = t;
|
|
}
|
|
void set(TokenID num, const std::string &t) {
|
|
m_num = num;
|
|
m_text = t;
|
|
}
|
|
void operator++(TokenID) {
|
|
++m_num;
|
|
}
|
|
void operator=(ScannerToken &other) {
|
|
m_num = other.m_num;
|
|
m_text = other.m_text;
|
|
m_id = other.m_id;
|
|
}
|
|
|
|
const std::string &text() const {
|
|
return m_text;
|
|
}
|
|
bool same(const char *s) const {
|
|
return strcasecmp(m_text.c_str(), s) == 0;
|
|
}
|
|
void setText(const char *t, int len) {
|
|
m_text = std::string(t, len);
|
|
}
|
|
void setText(const char *t) {
|
|
m_text = t;
|
|
}
|
|
void setText(const std::string &t) {
|
|
m_text = t;
|
|
}
|
|
void setText(const ScannerToken &token) {
|
|
m_text = token.m_text;
|
|
}
|
|
bool check() const {
|
|
return m_check;
|
|
}
|
|
void setCheck() {
|
|
m_check = true;
|
|
}
|
|
void setID(int id) {
|
|
m_id = id;
|
|
}
|
|
int ID() {
|
|
return m_id;
|
|
}
|
|
|
|
void xhpLabel(bool prefix = true);
|
|
bool htmlTrim(); // true if non-empty after trimming
|
|
void xhpDecode(); // xhp supports more entities than html
|
|
|
|
protected:
|
|
TokenID m_num; // internal token id
|
|
std::string m_text;
|
|
bool m_check;
|
|
int m_id;
|
|
};
|
|
|
|
struct LookaheadToken {
|
|
ScannerToken token;
|
|
Location loc;
|
|
int t;
|
|
};
|
|
|
|
struct LookaheadSlab {
|
|
static const int SlabSize = 32;
|
|
LookaheadToken m_data[SlabSize];
|
|
int m_beginPos;
|
|
int m_endPos;
|
|
LookaheadSlab* m_next;
|
|
};
|
|
|
|
struct TokenStore {
|
|
LookaheadSlab* m_head;
|
|
LookaheadSlab* m_tail;
|
|
TokenStore() {
|
|
m_head = nullptr;
|
|
m_tail = nullptr;
|
|
}
|
|
~TokenStore() {
|
|
LookaheadSlab* s = m_head;
|
|
LookaheadSlab* next;
|
|
while (s) {
|
|
next = m_head->m_next;
|
|
delete s;
|
|
s = next;
|
|
}
|
|
}
|
|
bool empty() {
|
|
return !m_head || (m_head->m_beginPos == m_head->m_endPos);
|
|
}
|
|
struct iterator {
|
|
LookaheadSlab* m_slab;
|
|
int m_pos;
|
|
const LookaheadToken& operator*() const {
|
|
return m_slab->m_data[m_pos];
|
|
}
|
|
LookaheadToken& operator*() {
|
|
return m_slab->m_data[m_pos];
|
|
}
|
|
const LookaheadToken* operator->() const {
|
|
return m_slab->m_data + m_pos;
|
|
}
|
|
LookaheadToken* operator->() {
|
|
return m_slab->m_data + m_pos;
|
|
}
|
|
void next() {
|
|
if (!m_slab) return;
|
|
++m_pos;
|
|
if (m_pos < m_slab->m_endPos) return;
|
|
m_slab = m_slab->m_next;
|
|
if (!m_slab) return;
|
|
m_pos = m_slab->m_beginPos;
|
|
return;
|
|
}
|
|
iterator& operator++() {
|
|
next();
|
|
return *this;
|
|
}
|
|
iterator operator++(int) {
|
|
iterator it = *this;
|
|
next();
|
|
return it;
|
|
}
|
|
bool operator==(const iterator& it) const {
|
|
if (m_slab != it.m_slab) return false;
|
|
if (!m_slab) return true;
|
|
return (m_pos == it.m_pos);
|
|
}
|
|
};
|
|
iterator begin();
|
|
iterator end();
|
|
void popFront();
|
|
iterator appendNew();
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
struct TokenListener {
|
|
virtual int publish(const char *rawText, int rawLeng, int type) = 0;
|
|
virtual ~TokenListener() {}
|
|
};
|
|
|
|
class Scanner {
|
|
public:
|
|
enum Type {
|
|
AllowShortTags = 0x01, // allow <?
|
|
AllowAspTags = 0x02, // allow <% %>
|
|
ReturnAllTokens = 0x08, // return comments and whitespaces
|
|
AllowHipHopSyntax = 0x10, // allow hip-hop specific reserved words
|
|
};
|
|
|
|
public:
|
|
Scanner(const char *filename, int type, bool md5 = false);
|
|
Scanner(std::istream &stream, int type, const char *fileName = "",
|
|
bool md5 = false);
|
|
Scanner(const char *source, int len, int type, const char *fileName = "",
|
|
bool md5 = false);
|
|
void setListener(TokenListener *listener) { m_listener = listener; }
|
|
~Scanner();
|
|
|
|
const std::string &getMd5() const {
|
|
return m_md5;
|
|
}
|
|
|
|
int scanToken(ScannerToken &t, Location &l);
|
|
int fetchToken(ScannerToken &t, Location &l);
|
|
void nextLookahead(TokenStore::iterator& pos);
|
|
bool tryParseNSType(TokenStore::iterator& pos);
|
|
bool tryParseTypeList(TokenStore::iterator& pos);
|
|
bool tryParseFuncTypeList(TokenStore::iterator& pos);
|
|
|
|
/**
|
|
* Called by parser or tokenizer.
|
|
*/
|
|
int getNextToken(ScannerToken &t, Location &l);
|
|
const std::string &getError() const { return m_error;}
|
|
Location *getLocation() const { return m_loc;}
|
|
|
|
/**
|
|
* Implemented in hphp.x, as they need to call yy functions.
|
|
*/
|
|
void init();
|
|
void reset();
|
|
int scan();
|
|
|
|
/**
|
|
* Called by lex.yy.cpp for YY_INPUT (see hphp.x)
|
|
*/
|
|
int read(char *text, int &result, int max);
|
|
|
|
/**
|
|
* Called by scanner rules.
|
|
*/
|
|
bool shortTags() const { return m_type & AllowShortTags;}
|
|
bool aspTags() const { return m_type & AllowAspTags;}
|
|
bool full() const { return m_type & ReturnAllTokens;}
|
|
int lastToken() const { return m_lastToken;}
|
|
void setToken(const char *rawText, int rawLeng, int type = -1) {
|
|
m_token->setText(rawText, rawLeng);
|
|
incLoc(rawText, rawLeng, type);
|
|
}
|
|
void stepPos(const char *rawText, int rawLeng, int type = -1) {
|
|
if (m_type & ReturnAllTokens) {
|
|
m_token->setText(rawText, rawLeng);
|
|
}
|
|
incLoc(rawText, rawLeng, type);
|
|
}
|
|
void setToken(const char *rawText, int rawLeng,
|
|
const char *ytext, int yleng, int type = -1) {
|
|
if (m_type & ReturnAllTokens) {
|
|
m_token->setText(rawText, rawLeng);
|
|
} else {
|
|
m_token->setText(ytext, yleng);
|
|
}
|
|
incLoc(rawText, rawLeng, type);
|
|
}
|
|
void setHashBang(const char *rawText, int rawLeng, int type = -1);
|
|
// also used for YY_FATAL_ERROR in hphp.x
|
|
void error(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
|
|
void warn(const char* fmt, ...) ATTRIBUTE_PRINTF(2,3);
|
|
std::string escape(const char *str, int len, char quote_type) const;
|
|
|
|
/**
|
|
* Called by scanner rules for doc comments.
|
|
*/
|
|
void setDocComment(const char *ytext, int yleng) {
|
|
m_docComment.assign(ytext, yleng);
|
|
}
|
|
void setDocComment(const std::string& com) {
|
|
m_docComment = com;
|
|
}
|
|
std::string detachDocComment() {
|
|
std::string dc = m_docComment;
|
|
m_docComment.clear();
|
|
return dc;
|
|
}
|
|
|
|
/**
|
|
* Called by scanner rules for HEREDOC/NOWDOC.
|
|
*/
|
|
void setHeredocLabel(const char *label, int len) {
|
|
m_heredocLabel.assign(label, len);
|
|
}
|
|
int getHeredocLabelLen() const {
|
|
return m_heredocLabel.length();
|
|
}
|
|
const char *getHeredocLabel() const {
|
|
return m_heredocLabel.data();
|
|
}
|
|
void resetHeredoc() {
|
|
m_heredocLabel.clear();
|
|
}
|
|
|
|
/**
|
|
* Enables Hack for HipHop mode (types and other goodies).
|
|
*/
|
|
void setHackMode() {
|
|
m_isHackMode = 1;
|
|
}
|
|
|
|
bool isHackMode() const {
|
|
return m_isHackMode;
|
|
}
|
|
|
|
bool hipHopSyntaxEnabled() const {
|
|
return (m_type & AllowHipHopSyntax) || m_isHackMode;
|
|
}
|
|
|
|
int getLookaheadLtDepth() {
|
|
return m_lookaheadLtDepth;
|
|
}
|
|
|
|
private:
|
|
bool tryParseShapeType(TokenStore::iterator& pos);
|
|
bool tryParseShapeMemberList(TokenStore::iterator& pos);
|
|
|
|
bool nextIfToken(TokenStore::iterator& pos, int tok);
|
|
|
|
void computeMd5();
|
|
|
|
std::string m_filename;
|
|
bool m_streamOwner;
|
|
std::istream *m_stream;
|
|
std::stringstream m_sstream; // XHP helper
|
|
const char *m_source;
|
|
int m_len;
|
|
int m_pos;
|
|
std::string m_md5;
|
|
|
|
enum State {
|
|
Start = -1,
|
|
NoLineFeed,
|
|
HadLineFeed,
|
|
};
|
|
State m_state;
|
|
|
|
int m_type;
|
|
void *m_yyscanner;
|
|
|
|
// These fields are used to temporarily hold pointers to token/location
|
|
// storage while the lexer is active to facilitate functions such as
|
|
// setToken() and incLoc()
|
|
ScannerToken *m_token;
|
|
Location *m_loc;
|
|
|
|
std::string m_error;
|
|
std::string m_docComment;
|
|
std::string m_heredocLabel;
|
|
|
|
// fields for XHP parsing
|
|
int m_lastToken;
|
|
void incLoc(const char *rawText, int rawLeng, int type);
|
|
bool m_isHackMode;
|
|
|
|
TokenStore m_lookahead;
|
|
int m_lookaheadLtDepth;
|
|
TokenListener *m_listener;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|
|
|
|
#endif // incl_HPHP_UTIL_PARSER_SCANNER_H_
|