summaryrefslogtreecommitdiff
path: root/deps/v8/test/cctest/disasm-regex-helper.h
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/test/cctest/disasm-regex-helper.h')
-rw-r--r--deps/v8/test/cctest/disasm-regex-helper.h318
1 files changed, 318 insertions, 0 deletions
diff --git a/deps/v8/test/cctest/disasm-regex-helper.h b/deps/v8/test/cctest/disasm-regex-helper.h
new file mode 100644
index 0000000000..c50b27a36b
--- /dev/null
+++ b/deps/v8/test/cctest/disasm-regex-helper.h
@@ -0,0 +1,318 @@
+// Copyright 2019 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_CCTEST_DISASM_REGEX_HELPER_H_
+#define V8_CCTEST_DISASM_REGEX_HELPER_H_
+
+#include <iostream>
+#include <map>
+#include <regex> // NOLINT(build/c++11)
+#include <vector>
+
+#include "src/base/logging.h"
+#include "src/base/macros.h"
+
+namespace v8 {
+namespace internal {
+
+// This class provides methods for regular expression matching with an extra
+// feature of user defined named capture groups which are alive across
+// regex search calls.
+//
+// The main use case for the class is to test multiple-line assembly
+// output with an ability to express dataflow or dependencies by allowing single
+// definition / multiple use symbols. When processing output lines and trying to
+// match them against the set of patterns a user can define a named group - a
+// symbol - and a regex for matching it. If the regex with the definitions is
+// matched then whenever this symbol appears again (no redefinitions though) in
+// the following patterns the parser will replace the symbol reference in the
+// pattern by an actual literal value matched during processing symbol
+// definition. This effectively checks that all of the output lines have
+// the same literal for the described symbol. To track the symbols this class
+// implements a simple single-definition symbol table.
+//
+// Example: Lets consider a case when we want to test that the assembly
+// output consists of two instructions - a load and a store; we also want
+// to check that the loaded value is used as store value for the store,
+// like here:
+//
+// ldr x3, [x4]
+// str x3, [x5]
+//
+// Using special syntax for symbol definitions and uses one could write the
+// following regex making sure that the load register is used by the store:
+//
+// 'ldr <<NamedReg:x[0-9]+>>, [x[0-9]+]'
+// 'str <<NamedReg>>, [x[0-9]+]'
+//
+// See 'ProcessPattern' for more details.
+class RegexParser {
+ public:
+ RegexParser()
+ // Regex to parse symbol references: definitions or uses.
+ // <<SymbolName[:'def regex']>>
+ : symbol_ref_regex_("<<([a-zA-Z_][a-zA-Z0-9_]*)(?::(.*?))?>>") {}
+
+ // Status codes used for return values and error diagnostics.
+ enum class Status {
+ kSuccess = 0,
+ kNotMatched,
+ kWrongPattern,
+ kDefNotFound,
+ kRedefinition,
+ };
+
+ // This class holds info on a symbol definition.
+ class SymbolInfo {
+ public:
+ explicit SymbolInfo(const std::string& matched_value)
+ : matched_value_(matched_value) {}
+
+ // Returns an actual matched value for the symbol.
+ const std::string& matched_value() const { return matched_value_; }
+
+ private:
+ std::string matched_value_;
+ };
+
+ // This class holds temporary info on a symbol while processing an input line.
+ class SymbolVectorElem {
+ public:
+ SymbolVectorElem(bool is_def, const std::string& symbol_name)
+ : is_def_(is_def), symbol_name_(symbol_name) {}
+
+ bool is_def() const { return is_def_; }
+ const std::string& symbol_name() const { return symbol_name_; }
+
+ private:
+ bool is_def_;
+ std::string symbol_name_;
+ };
+
+ using SymbolMap = std::map<std::string, SymbolInfo>;
+ using MatchVector = std::vector<SymbolVectorElem>;
+
+ // Tries to match (actually search, similar to std::regex_serach) the line
+ // against the pattern (possibly containing symbols references) and if
+ // matched commits symbols definitions from the pattern to the symbol table.
+ //
+ // Returns: status of the matching attempt.
+ //
+ // Important: the format of pattern regexs is based on std::ECMAScript syntax
+ // (http://www.cplusplus.com/reference/regex/ECMAScript/) with a few extra
+ // restrictions:
+ // * no backreference (or submatch) groups
+ // - when a group (e.g. "(a|b)+") is needed use a passive group
+ // (e.g. "(?:a|b)+").
+ // * special syntax for symbol definitions: <<Name:regex>>
+ // - 'Name' must be c-ctyle variable name ([a-zA-Z_][a-zA-Z0-9_]*).
+ // - 'regex' - is a regex for the actual literal expected in the symbol
+ // definition line. It must not contain any symbol references.
+ // * special syntax for symbol uses <<Name>>
+ //
+ // Semantical restrictions on symbols references:
+ // * symbols mustn't be referenced before they are defined.
+ // - a pattern R1 which uses symbol 'A' mustn't be processed if a pattern
+ // R2 with the symbol 'A' definition hasn't been yet matched (R1!=R2).
+ // - A pattern mustn't define a symbol and use it inside the same regex.
+ // * symbols mustn't be redefined.
+ // - if a line has been matched against a pattern R1 with symbol 'A'
+ // then other patterns mustn't define symbol 'A'.
+ // * symbols defininitions are only committed and registered if the whole
+ // pattern is successfully matched.
+ //
+ // Notes:
+ // * A pattern may contain uses of the same or different symbols and
+ // definitions of different symbols however if a symbol is defined in the
+ // pattern it can't be used in the same pattern.
+ //
+ // Pattern example: "<<A:[0-9]+>> <<B>>, <<B> <<C:[a-z]+>>" (assuming 'B' is
+ // defined and matched).
+ Status ProcessPattern(const std::string& line, const std::string& pattern) {
+ // Processed pattern which is going to be used for std::regex_search; symbol
+ // references are replaced accordingly to the reference type - def or use.
+ std::string final_pattern;
+ // A vector of records for symbols references in the pattern. The format is
+ // {is_definition, symbol_name}.
+ MatchVector symbols_refs;
+ Status status =
+ ParseSymbolsInPattern(pattern, &final_pattern, &symbols_refs);
+ if (status != Status::kSuccess) {
+ return status;
+ }
+
+ std::smatch match;
+ if (!std::regex_search(line, match, std::regex(final_pattern))) {
+ return Status::kNotMatched;
+ }
+
+ // This checks that no backreference groups were used in the pattern except
+ // for those added by ParseSymbolsInPattern.
+ if (symbols_refs.size() != (match.size() - 1)) {
+ return Status::kWrongPattern;
+ }
+
+ status = CheckSymbolsMatchedValues(symbols_refs, match);
+ if (status != Status::kSuccess) {
+ return status;
+ }
+
+ CommitSymbolsDefinitions(symbols_refs, match);
+
+ return Status::kSuccess;
+ }
+
+ // Returns whether a symbol is defined in the symbol name.
+ bool IsSymbolDefined(const std::string& symbol_name) const {
+ auto symbol_map_iter = map_.find(symbol_name);
+ return symbol_map_iter != std::end(map_);
+ }
+
+ // Returns the matched value for a symbol.
+ std::string GetSymbolMatchedValue(const std::string& symbol_name) const {
+ DCHECK(IsSymbolDefined(symbol_name));
+ return map_.find(symbol_name)->second.matched_value();
+ }
+
+ // Prints the symbol table.
+ void PrintSymbols(std::ostream& os) const {
+ os << "Printing symbol table..." << std::endl;
+ for (const auto& t : map_) {
+ const std::string& sym_name = t.first;
+ const SymbolInfo& sym_info = t.second;
+ os << "<<" << sym_name << ">>: \"" << sym_info.matched_value() << "\""
+ << std::endl;
+ }
+ }
+
+ protected:
+ // Fixed layout for the symbol reference match.
+ enum SymbolMatchIndex {
+ kFullSubmatch = 0,
+ kName = 1,
+ kDefRegex = 2,
+ kSize = kDefRegex + 1,
+ };
+
+ // Processes a symbol reference: for definitions it adds the symbol regex, for
+ // uses it adds actual literal from a previously matched definition. Also
+ // fills the symbol references vector.
+ Status ProcessSymbol(const std::smatch& match, MatchVector* symbols_refs,
+ std::string* new_pattern) const {
+ bool is_def = match[SymbolMatchIndex::kDefRegex].length() != 0;
+ const std::string& symbol_name = match[SymbolMatchIndex::kName];
+
+ if (is_def) {
+ // Make sure the symbol isn't already defined.
+ auto symbol_iter =
+ std::find_if(symbols_refs->begin(), symbols_refs->end(),
+ [symbol_name](const SymbolVectorElem& ref) -> bool {
+ return ref.symbol_name() == symbol_name;
+ });
+ if (symbol_iter != std::end(*symbols_refs)) {
+ return Status::kRedefinition;
+ }
+
+ symbols_refs->emplace_back(true, symbol_name);
+ new_pattern->append("(");
+ new_pattern->append(match[SymbolMatchIndex::kDefRegex]);
+ new_pattern->append(")");
+ } else {
+ auto symbol_map_iter = map_.find(symbol_name);
+ if (symbol_map_iter == std::end(map_)) {
+ return Status::kDefNotFound;
+ }
+
+ const SymbolInfo& sym_info = symbol_map_iter->second;
+ new_pattern->append("(");
+ new_pattern->append(sym_info.matched_value());
+ new_pattern->append(")");
+
+ symbols_refs->emplace_back(false, symbol_name);
+ }
+ return Status::kSuccess;
+ }
+
+ // Parses the input pattern regex, processes symbols defs and uses inside
+ // it, fills a raw pattern used for std::regex_search.
+ Status ParseSymbolsInPattern(const std::string& pattern,
+ std::string* raw_pattern,
+ MatchVector* symbols_refs) const {
+ std::string::const_iterator low = pattern.cbegin();
+ std::string::const_iterator high = pattern.cend();
+ std::smatch match;
+
+ while (low != high) {
+ // Search for a symbol reference.
+ if (!std::regex_search(low, high, match, symbol_ref_regex_)) {
+ raw_pattern->append(low, high);
+ break;
+ }
+
+ if (match.size() != SymbolMatchIndex::kSize) {
+ return Status::kWrongPattern;
+ }
+
+ raw_pattern->append(match.prefix());
+
+ Status status = ProcessSymbol(match, symbols_refs, raw_pattern);
+ if (status != Status::kSuccess) {
+ return status;
+ }
+ low = match[SymbolMatchIndex::kFullSubmatch].second;
+ }
+ return Status::kSuccess;
+ }
+
+ // Checks that there are no symbol redefinitions and the symbols uses matched
+ // literal values are equal to corresponding matched definitions.
+ Status CheckSymbolsMatchedValues(const MatchVector& symbols_refs,
+ const std::smatch& match) const {
+ // There is a one-to-one correspondence between matched subexpressions and
+ // symbols refences in the vector (by construction).
+ for (size_t vec_pos = 0, size = symbols_refs.size(); vec_pos < size;
+ vec_pos++) {
+ auto elem = symbols_refs[vec_pos];
+ auto map_iter = map_.find(elem.symbol_name());
+ if (elem.is_def()) {
+ if (map_iter != std::end(map_)) {
+ return Status::kRedefinition;
+ }
+ } else {
+ DCHECK(map_iter != std::end(map_));
+ // We replaced use with matched definition value literal.
+ DCHECK_EQ(map_iter->second.matched_value().compare(match[vec_pos + 1]),
+ 0);
+ }
+ }
+ return Status::kSuccess;
+ }
+
+ // Commits symbols definitions and their matched values to the symbol table.
+ void CommitSymbolsDefinitions(const MatchVector& groups_vector,
+ const std::smatch& match) {
+ for (size_t vec_pos = 0, size = groups_vector.size(); vec_pos < size;
+ vec_pos++) {
+ size_t match_pos = vec_pos + 1;
+ auto elem = groups_vector[vec_pos];
+ if (elem.is_def()) {
+ auto emplace_res =
+ map_.emplace(elem.symbol_name(), SymbolInfo(match[match_pos]));
+ USE(emplace_res); // Silence warning about unused variable.
+ DCHECK(emplace_res.second == true);
+ }
+ }
+ }
+
+ const std::regex symbol_ref_regex_;
+ SymbolMap map_;
+};
+
+bool CheckDisassemblyRegexPatterns(
+ const char* function_name, const std::vector<std::string>& patterns_array);
+
+} // namespace internal
+} // namespace v8
+
+#endif // V8_CCTEST_DISASM_REGEX_HELPER_H_