From 3abc997a4e38391a4cf6220d7d995d875fd18aef Mon Sep 17 00:00:00 2001 From: Herman Venter Date: Thu, 9 May 2013 17:07:17 -0700 Subject: [PATCH] Rewrite parser for break point specification. The specification string following a break command was parsed by means of ad-hoc string splitting and sub string finding. This scheme cannot deal with namespaces and class names that contain : (such as xhp classes). There is now a proper parser for break point specifications. Also, xhp class names are mangled before putting them in the BreakPointInfo structure, otherwise breakpoints qualified with xhp classes will not be hit. The syntax for namespaces has been changed to fit the language and if present namespaces are used to mangle function and class names, so that they match the names produced by the regular parser. --- hphp/doc/debugger.refs | 146 +++----- hphp/doc/debugger.start | 14 +- hphp/runtime/eval/debugger/break_point.cpp | 342 ++++++++++++------- hphp/runtime/eval/debugger/break_point.h | 1 + hphp/runtime/eval/debugger/cmd/cmd_break.cpp | 4 +- hphp/runtime/eval/debugger/debugger_base.cpp | 4 +- hphp/test/debugger_tests/break_t3.php | 20 ++ hphp/test/debugger_tests/break_t4.php | 13 + hphp/test/test_debugger.cpp | 15 +- 9 files changed, 314 insertions(+), 245 deletions(-) create mode 100644 hphp/test/debugger_tests/break_t3.php create mode 100644 hphp/test/debugger_tests/break_t4.php diff --git a/hphp/doc/debugger.refs b/hphp/doc/debugger.refs index 4948d0c60..7a1c179ae 100644 --- a/hphp/doc/debugger.refs +++ b/hphp/doc/debugger.refs @@ -11,113 +11,51 @@ continuation prompt like ">>>>". --------------------------- Break Command --------------------------- - [b]reak breaks at current line of code - [b]reak {exp} breaks at matching location - [b]reak [s]tart breaks at start of web request - {url} - [b]reak [e]nd breaks at end of web request - {url} - [b]reak [p]sp breaks at end of psp - {url} + [b]reak breaks at current line of code + [b]reak {exp} breaks at matching location - [b]reak [r]egex breaks at matching regex pattern - {above} - [b]reak [o]nce breaks just once then disables it - {above} + [b]reak [o]nce {above} breaks just once then disables it - [b]reak {above} breaks if condition meets - if {php} - [b]reak {above} breaks and evaluates an expression - && {php} + [b]reak {above} if {php} breaks if condition meets + [b]reak {above} && {php} breaks and evaluates an expression - [b]reak [l]ist lists all breakpoints - [b]reak [c]lear clears the n-th breakpoint on list - {index} - [b]reak [c]lear clears all breakpoints - [a]ll - [b]reak [c]lear clears current breakpoint - [b]reak [t]oggle toggles the n-th breakpoint on list - {index} - [b]reak [t]oggle toggles all breakpoints - [a]ll - [b]reak [t]oggle toggles current breakpoint + [b]reak [l]ist lists all breakpoints + [b]reak [c]lear {index} clears the n-th breakpoint on list + [b]reak [c]lear [a]ll clears all breakpoints + [b]reak [c]lear clears current breakpoint + [b]reak [t]oggle {index} toggles the n-th breakpoint on list + [b]reak [t]oggle [a]ll toggles all breakpoints + [b]reak [t]oggle toggles current breakpoint + [b]reak [e]nable {index} enables the n-th breakpoint on list + [b]reak [e]nable [a]ll enables all breakpoints + [b]reak [e]nable enables current breakpoint + [b]reak [d]isable {index} disables the n-th breakpoint on list + [b]reak [d]isable [a]ll disables all breakpoints + [b]reak [d]isable disables current breakpoint -------------------------- Where to break? -------------------------- -There are many ways to specify a source file location to set a -breakpoint, but it's ONE single string without whitespaces. The complete -format, though every field is optional, looks like this, +There are many ways to specify a source file location to set a breakpoint, +but it's ONE single string without whitespaces. The format looks like this, - {file location}:{call}=>{call}()@{url} - {call}=>{call}():{file location}@{url} - - file location: {file}:{line1}-{line2} - function call: {namespace}::{cls}::{func} - url matching: @{url} - -1) Url has to be specified at end. - -2) Function calls can be 1, 2 or more, matching a call chain. If more -than one function are specified, they don't have to be direct callers to -match. It will match any caller on the stack. - -3) Pay attention to those delimiters, and they are required to tell what -a field should be interpreted as, unless it is a number, then it must be -line numbers. Otherwise, use them to indicate what the names are: - - {file}: filename - {line1}-{line2} any line between them (inclusive) - {line} single line, if without dashes around - {line}: needs colon if anything after - {namespace}::{cls}:: a class in specified namespace - {cls}:: a class in any namespace - {func}() function or method - {func}=>{func}() function called by specified function - {cls}::{method}() class method (static or instance) - @{url} breaks only when this URL is visited + file location: {file}:{line} + function call: {func}() + method invoke: {cls}::{method}() For examples, b mypage.php:123 - b 456 b foo() b MyClass::foo() - b mypage.php:foo() - b html/mypage.php:MyClass::foo() - b mypage.php:123@index.php - -4) You may also use regular expressions to match any of these names, -except line numbers. For examples, - - b r Feed.*::on.*() - -This may match FeedStory::onLoad(), FeedFilter::onclick(), etc.. Note -that it uses PCRE format, not shell format. So you will have to use ".*" -instead of just "*" for wildcard match. - - ------------------------ Special Breakpoints ------------------------ - -There are special breakpoints that can only be set by names: - - start - end - psp - -They represent different time points of a web request. "start" is at -beginning of a web request, when no PHP file is invoked yet, but query -strings and server variables are already prepared. "end" is at end of a -web request, but BEFORE post-send processing (psp). "psp" is at END of -psp, not beginning. To set a breakpoint at beginning of psp, use "end", -because end of a request is the same as beginning of psp. -------------- Conditional Breakpoints and Watchpoints -------------- -Every breakpoint can specify a condition, which is an arbitrary PHP -expression that will be evaulated to TRUE or FALSE. When TRUE, it will -break. When FALSE, it will continue without break. "&&" is similar to -"if", except it will always break, regardless what the expression -returns. This is useful to watch variables at breakpoints. For example, +Every breakpoint can specify a condition, which is an arbitrary PHP expression +that will be evaluated to TRUE or FALSE. When TRUE, it will break. When FALSE, +it will continue without break. "&&" is similar to "if", except it will always +break, regardless what the expression returns. This is useful to watch +variables at breakpoints. For example, b mypage.php:123 && print $a @@ -125,24 +63,22 @@ So every time it breaks at mypage.php line 123, it will print out $a. --------------------- Breakpoint States and List --------------------- -Every breakpoint has 3 states: ALWAYS, ONCE, DISABLED. Without keyword -"once", a breakpoint is in ALWAYS state. ONCE breakpoints will turn into -DISABLED after it's hit once. DISABLED breakpoints will not break, but -they are kept in the list, so one can run 'b l' command and 'b t' -command to toggle their states. +Every breakpoint has 3 states: ALWAYS, ONCE, DISABLED. Without keyword "once", +a breakpoint is in ALWAYS state. ONCE breakpoints will turn into DISABLED after +it's hit once. DISABLED breakpoints will not break, but they are kept in the +list, so one can run 'b l' command and 'b t' command to toggle their states. -Use '[b]reak [l]ist' command to view indices of different breakpoints. -Then use those indices to clear or toggle their states. This list of -breakpoints and their states will remain the same when switching to -different machines, sandboxes and threads. +Use '[b]reak [l]ist' command to view indices of different breakpoints. Then use +those indices to clear or toggle their states. This list of breakpoints and +their states will remain the same when switching to different machines, +sandboxes and threads. -------------------------- Hard Breakpoints -------------------------- -From within PHP code, you can place a function call hphpd_break() to -embed a breakpoint. You may also specify a condition as the function's -parameter, so it breaks when the condition is met. Please read about -this function for more details with '[i]nfo hphpd_break'. - +From within PHP code, you can place a function call hphpd_break() to embed a +breakpoint. You may also specify a condition as the function's parameter, so it +breaks when the condition is met. Please read about this function for more +details with '[i]nfo hphpd_break'. -------------------------- Continue Command -------------------------- @@ -168,7 +104,7 @@ to move down several levels a time. [e]xception {cls} breaks if class of exception throws [e]xception breaks if class of exception throws - {ns}::{cls} + {ns}\{cls} [e]xception error breaks on errors, warnings and notices [e]xception breaks only if url also matches {above}@{url} diff --git a/hphp/doc/debugger.start b/hphp/doc/debugger.start index bf8651e09..89fbb1223 100644 --- a/hphp/doc/debugger.start +++ b/hphp/doc/debugger.start @@ -43,12 +43,12 @@ documentation]] for more details. The command to run a script normally looks like this, - ./hphpi -f myscript.php + hhvm myscript.php Simply add "-m debug" to run the script in debugger, - ./hphpi -m debug -f myscript.php - + hhvm -m debug myscript.php + Once started, set breakpoints like this, hphpd> break myscript.php:10 @@ -92,9 +92,9 @@ Finally, quit debugger, 3. Debugging sandbox -Connect to an HPHPi server from command line, +Connect to an HPHP server from command line, - ./hphpi -m debug --debug-host mymachine.com + hhvm -m debug -h mymachine.com Or, connect from within debugger, @@ -126,8 +126,8 @@ Request" thread. Use "thread" command to display this information, What will debugger use when there is no web request thread that's active, but we need to set a breakpoint? We created so-called "dummy sandbox", purely for taking debugger commands when there is no active -web request. When there is no active request, hit Ctrl-C to break -debugger, and use "thread" to display dummy sandbox thread's +web request. When there is no active request, hit Ctrl-C to break into +the debugger, and use "thread" to display dummy sandbox thread's information. Ctrl-C diff --git a/hphp/runtime/eval/debugger/break_point.cpp b/hphp/runtime/eval/debugger/break_point.cpp index ec55dea8f..e2d676a8a 100644 --- a/hphp/runtime/eval/debugger/break_point.cpp +++ b/hphp/runtime/eval/debugger/break_point.cpp @@ -302,7 +302,7 @@ bool BreakPointInfo::valid() { return false; } } else { - if (m_file.empty() || m_line1 == 0) { + if (m_file.empty() || m_line1 == 0 || m_line2 != m_line1) { return false; } } @@ -529,149 +529,241 @@ std::string BreakPointInfo::desc() const { return ret; } -bool BreakPointInfo::parseLines(const std::string &token) { - TRACE(2, "BreakPointInfo::parseLines\n"); - if (token.empty()) return false; - - for (unsigned int i = 0; i < token.size(); i++) { - char ch = token[i]; - if ((ch < '0' || ch > '9') && ch != '-') { - return false; +void mangleXhpName(const std::string &source, std::string &target) { + auto oldLen = source.length(); + size_t newLen = 0; + size_t index = 0; + for (; index < oldLen; index++) { + auto ch = source[index]; + if (ch != ':' && ch != '-') continue; + newLen = 4+index; + break; + } + if (newLen == 0) { + target = source; + return; + } + for (; index < oldLen; index++) { + if (source[index] == ':') newLen += 2; else newLen +=1; + } + target.clear(); + target.reserve(newLen); + target.append("xhp_"); + for (index = 0; index < oldLen; index++) { + auto ch = source[index]; + if (ch == '-') { + target.push_back('_'); + } else if (ch == ':') { + if (index > 0) { + target.push_back('_'); + target.push_back('_'); + } + } else { + target.push_back(ch); } } - - if (m_line1 || m_line2) { - m_valid = false; // lines are specified twice - return true; - } - - vector lines; - Util::split('-', token.c_str(), lines); - if (lines.empty() || lines.size() > 2 || - (lines.size() == 2 && lines[0].empty() && lines[1].empty())) { - m_valid = false; - return true; - } - - if (lines[0].empty()) { - m_line1 = 1; - } else { - m_line1 = atoi(lines[0].c_str()); - if (m_line1 <= 0) { - m_valid = false; - return true; - } - } - if (lines.size() == 1) { - m_line2 = m_line1; - return true; - } - - if (lines[1].empty()) { - m_line2 = -1; - } else { - m_line2 = atoi(lines[1].c_str()); - if (m_line2 <= 0) { - m_valid = false; - return true; - } - if (m_line2 < m_line1) { - int32_t tmp = m_line1; - m_line1 = m_line2; - m_line2 = tmp; - } - } - - return true; } +int32_t scanName(const std::string &str, int32_t offset) { + auto len = str.length(); + assert(0 <= offset && offset < len); + while (offset < len) { + char ch = str[offset]; + if (ch == ':' || ch == '\\' || ch == ',' || ch == '(' || ch == '=' || + ch == '@') { + if (offset+1 >= len) return offset; + char ch1 = str[offset+1]; + if (ch == ':') { + if (ch1 == ':' || ('0' <= ch1 && ch1 <= '9')) return offset; + } else if (ch == '(') { + if (ch1 == ')') return offset; + } else if (ch == '=') { + if (ch1 == '>') return offset; + } else { + assert (ch == '\\' || ch == ',' || ch == '@'); + return offset; + } + } + offset++; + } + return offset; +} + +int32_t scanNumber(const std::string &str, int32_t offset, int32_t& value) { + value = 0; + auto len = str.length(); + assert(0 <= offset && offset < len); + while (offset < len) { + char ch = str[offset]; + if (ch < '0' || ch > '9') return offset; + value = value*10 + (ch - '0'); + offset++; + } + return offset; +} + +int32_t BreakPointInfo::parseFileLocation(const std::string &str, + int32_t offset) { + auto len = str.length(); + assert(0 <= offset && offset < len); + auto offset1 = scanNumber(str, offset, m_line1); + if (offset1 == offset) return offset; //Did not find a number + m_line2 = m_line1; //so that we always have a range + if (offset1 >= len) return len; //Nothing follows the number + auto ch = str[offset1]; + if (ch == '-') { + if (offset1+1 >= len) return offset; //Invalid file location + auto offset2 = scanNumber(str, offset1+1, m_line2); + if (offset1+1 == offset2) return offset; //Invalid file location + return offset2; + } + return offset1; +} + +/* The parser accepts the following syntax, which harks back to pre VM days + (all components are optional, as long as there is at least one component): + + {file location},{call}=>{call}()@{url} + {call}=>{call}(),{file location}@{url} + + file location: {file}:{line1}-{line2} + call: \{namespace}\{cls}::{func} + + Currently semantic checks will disallow expressions that specify + both file locations and calls. +*/ void BreakPointInfo::parseBreakPointReached(const std::string &exp, const std::string &file) { TRACE(2, "BreakPointInfo::parseBreakPointReached\n"); - string input = exp; - // everything after @ is URL - size_t pos = input.find('@'); - if (pos != string::npos) { - m_url = input.substr(pos + 1); - if (pos == 0) return; - input = input.substr(0, pos); + string name; + auto len = exp.length(); + auto offset0 = 0; + // Skip over a leading backslash + if (len > 0 && exp[0] == '\\') offset0++; + auto offset1 = scanName(exp, offset0); + // check that exp starts with a file or method name + if (offset1 == offset0) goto returnInvalid; + name = exp.substr(offset0, offset1-offset0); + + if (offset0 == 0) { + // parse {file location} if appropriate + if (offset1 < len && exp[offset1] == ',') { + m_file = name; + name.clear(); + offset1 += 1; + } else if (offset1 < len-1 && exp[offset1] == ':' && + exp[offset1+1] != ':') { + m_file = name; + name.clear(); + offset1 += 1; + auto offset2 = parseFileLocation(exp, offset1); + // check for {file}:{something that is not a number} + if (offset2 == offset1) goto returnInvalid; + offset1 = offset2; + if (offset1 >= len) return; // file location without anything else + if (exp[offset1] == '@') goto parseUrl; // file location followed by url + // check for {file}{ something other than @ or :} + if (exp[offset1] != ',') goto returnInvalid; + offset1 += 1; + } } - vector tokens; - bool seenClass = false; - bool seenFile = false; - Util::replaceAll(input, "=>", "=>:"); - Util::split(':', input.c_str(), tokens); - for (unsigned int i = 0; i < tokens.size(); i++) { - const std::string &token = tokens[i]; - if (parseLines(token)) continue; - - // if i'm ended with "::" - if (i+1 < tokens.size() && tokens[i+1].empty()) { - if (seenClass) { - m_valid = false; - return; + // parse {func}() or {func}=>{func}() or {func}=>{func}=>{func}() and so on + while (true) { + if (name.empty()) { + if (len > offset1 && exp[offset1] == '\\') offset1++; + auto offset2 = scanName(exp, offset1); + // check for {something other than a name} + if (offset2 == offset1) goto returnInvalid; + name = exp.substr(offset1, offset2-offset1); + } + if (offset1 < len && exp[offset1] == '\\') { + m_namespace = name; + offset1 += 1; + auto offset2 = scanName(exp, offset1); + // check for {namespace}\{something that is not a name} + if (offset2 == offset1) goto returnInvalid; + name = exp.substr(offset1, offset2-offset1); + offset1 = offset2; + } + if (offset1 < len-1 && exp[offset1] == ':' && exp[offset1+1] == ':') { + m_class = name; + offset1 += 2; + auto offset2 = scanName(exp, offset1); + // check for {namespace}\{class}::{something that is not a name} + if (offset2 == offset1) goto returnInvalid; + name = exp.substr(offset1, offset2-offset1); + offset1 = offset2; + } + // Now we have a namespace, class and func name. + // The namespace only or the namespace and class might be empty. + DFunctionInfoPtr pfunc(new DFunctionInfo()); + if (m_class.empty()) { + if (m_namespace.empty()) + pfunc->m_function = name; + else { + // Yes this does seem beyond strange, but that is what the PHP parser + // does when it sees a function declared inside a namespace, so we + // too have to pretend there is no namespace here. At some point + // the parser hack may have to go away. At that stage, this code + // will have to change, as well as other parts of the debugger. + pfunc->m_function = m_namespace + "\\" + name; } - seenClass = true; - if (i+3 < tokens.size() && tokens[i+3].empty()) { - i += 2; - m_namespace = token; - m_class = tokens[i]; - } else { - m_class = token; + } else { + mangleXhpName(m_class, pfunc->m_class); + if (!m_namespace.empty()) { + // Emulate parser hack. See longer comment above. + pfunc->m_class = m_namespace + "\\" + pfunc->m_class; } - - i++; + pfunc->m_function = name; + } + m_funcs.insert(m_funcs.begin(), pfunc); + m_namespace.clear(); + m_class.clear(); + name.clear(); + // If we are now at () we skip over it and terminate the loop + if (offset1 < len && exp[offset1] == '(') { + // check for {func}{(}{not )} + if (offset1+1 >= len || exp[offset1+1] != ')') goto returnInvalid; + offset1 += 2; + break; // parsed the last (perhaps only) call in a function call chain + } + // If we are now at => we need to carry on with the loop + if (offset1 < len-1 && exp[offset1] == '=' && exp[offset1+1] == '>') { + offset1 += 2; continue; } - - string ending; - if (token.size() >= 2) { - ending = token.substr(token.size() - 2); - } - if (token.size() >= 2 && (ending == "=>" || ending == "()")) { - string func = token.substr(0, token.size() - 2); - if (ending == "=>" && - func.size() >= 2 && func.substr(func.size() - 2) == "()") { - func = func.substr(0, func.size() - 2); + goto returnInvalid; // {func calls}{not () or =>} + } + if (m_file.empty()) { + if (offset1 < len && exp[offset1] == ',') { + auto offset2 = scanName(exp, ++offset1); + // check for {func calls}:{not a filename} + if (offset2 == offset1) goto returnInvalid; + m_file = exp.substr(offset1, offset2-offset1); + offset1 = offset2; + if (offset1 < len && exp[offset1] == ':') { + offset2 = parseFileLocation(exp, offset1+1); + // check for {file}:{something that is not a number} + if (offset2 == offset2+1) goto returnInvalid; + offset1 = offset2; } - if (token.empty()) { - m_valid = false; - return; - } - DFunctionInfoPtr pfunc(new DFunctionInfo()); - pfunc->m_namespace = m_namespace; - pfunc->m_class = m_class; - pfunc->m_function = func; - m_funcs.insert(m_funcs.begin(), pfunc); - m_namespace.clear(); - m_class.clear(); - seenClass = false; - } else { - if (!m_file.empty()) { - m_valid = false; - return; - } - if (seenFile) { - m_valid = false; - return; - } - seenFile = true; - m_file = token; } } - - if (!m_namespace.empty() || !m_class.empty()) { - DFunctionInfoPtr func(new DFunctionInfo()); - func->m_namespace = m_namespace; - func->m_class = m_class; - m_funcs.insert(m_funcs.begin(), func); +parseUrl: + if (offset1 < len-2 && exp[offset1] == '@') { + offset1++; + m_url = exp.substr(offset1, len-offset1); + } else { + // check for unparsed characters at end of exp + if (offset1 != len) goto returnInvalid; } + return; - if (m_line1 && m_file.empty()) { - m_file = file; // default to current file - } +returnInvalid: + m_valid = false; + return; } void BreakPointInfo::parseExceptionThrown(const std::string &exp) { diff --git a/hphp/runtime/eval/debugger/break_point.h b/hphp/runtime/eval/debugger/break_point.h index 552d128fc..87442fc76 100644 --- a/hphp/runtime/eval/debugger/break_point.h +++ b/hphp/runtime/eval/debugger/break_point.h @@ -197,6 +197,7 @@ private: void parseExceptionThrown(const std::string &exp); void parseBreakPointReached(const std::string &exp, const std::string &file); + int32_t parseFileLocation(const std::string &str, int32_t offset); bool parseLines(const std::string &token); bool checkException(CVarRef e); diff --git a/hphp/runtime/eval/debugger/cmd/cmd_break.cpp b/hphp/runtime/eval/debugger/cmd/cmd_break.cpp index b4e9ff2a1..f47b37ce1 100644 --- a/hphp/runtime/eval/debugger/cmd/cmd_break.cpp +++ b/hphp/runtime/eval/debugger/cmd/cmd_break.cpp @@ -93,7 +93,7 @@ bool CmdBreak::help(DebuggerClient *client) { client->helpTitle("Where to break?"); client->helpSection( "There are many ways to specify a source file location to set a " - "breakpoint, but it's ONE single string without whitespaces. The" + "breakpoint, but it's ONE single string without whitespaces. The " "format looks like this,\n" "\n" "\tfile location: {file}:{line}\n" @@ -110,7 +110,7 @@ bool CmdBreak::help(DebuggerClient *client) { client->helpTitle("Conditional Breakpoints and Watchpoints"); client->helpSection( "Every breakpoint can specify a condition, which is an arbitrary PHP " - "expression that will be evaulated to TRUE or FALSE. When TRUE, it will " + "expression that will be evaluated to TRUE or FALSE. When TRUE, it will " "break. When FALSE, it will continue without break. \"&&\" is similar to " "\"if\", except it will always break, regardless what the expression " "returns. This is useful to watch variables at breakpoints. For example,\n" diff --git a/hphp/runtime/eval/debugger/debugger_base.cpp b/hphp/runtime/eval/debugger/debugger_base.cpp index 1c73349b4..082e8936f 100644 --- a/hphp/runtime/eval/debugger/debugger_base.cpp +++ b/hphp/runtime/eval/debugger/debugger_base.cpp @@ -135,7 +135,7 @@ std::string DFunctionInfo::site(std::string &preposition) const { preposition = "at "; if (!m_class.empty()) { if (!m_namespace.empty()) { - ret = m_namespace + "::"; + ret = m_namespace + "\\"; } ret += m_class; if (!m_function.empty()) { @@ -164,7 +164,7 @@ std::string DFunctionInfo::desc(const BreakPointInfo *bpi) const { if (!m_class.empty()) { string cls; if (!m_namespace.empty()) { - cls = bpi->regex(m_namespace) + "::"; + cls = bpi->regex(m_namespace) + "\\"; } cls += bpi->regex(m_class); if (!m_function.empty()) { diff --git a/hphp/test/debugger_tests/break_t3.php b/hphp/test/debugger_tests/break_t3.php new file mode 100644 index 000000000..101b94304 --- /dev/null +++ b/hphp/test/debugger_tests/break_t3.php @@ -0,0 +1,20 @@ +