#include #include #include "../shared/MiscSupport.h" #include "../shared/SimpleLogFile.h" #include "../shared/ThreadMonitor.h" #include "Parse.h" namespace Parse { ///////////////////////////////////////////////////////////////////// // Exception ///////////////////////////////////////////////////////////////////// const char* Exception::what() const throw() { return _message.c_str(); } std::string Exception::getBeforeError() const { return std::string(_toParse, 0, _location); } std::string Exception::getAfterError() const { return std::string(_toParse, _location); } ///////////////////////////////////////////////////////////////////// // Token ///////////////////////////////////////////////////////////////////// std::string Token::asString(Type type) { switch (type) { case ttPunctuation: return "ttPunctuation"; case ttFilter: return "ttFilter"; case ttAltField: return "ttAltField"; case ttSymbol: return "ttSymbol"; case ttInteger: return "ttInteger"; case ttDecimal: return "ttDecimal"; case ttString: return "ttString"; case ttAlert: return "ttAlert"; case ttEnd: return "ttEnd"; } return "Invalid: " + ntoa(type); } std::string Token::shortDebug() const { return TclList()<>(-n&63)); } void TreeNode::createHashCode() const { // Warning: Asking for the type of an object doesn't work as expected // until the last constructor is called. Same goes for calling a // virtual method. // // Notice the thread safety here. It is possible that two or more threads // will each try to compute this themselves at the same time. It doesn't // matter. They should come up with the same result. The only down side // is that we might do some extra work. But at worst we'd do about the // same work as if we didn't try to cache this value. uint64_t hashCode = typeid(*this).hash_code() ^ getHashContributaion(); for (Tree const &child : getChildren()) { hashCode = rotl64(hashCode, 3); hashCode ^= child->hash_code(); } if (hashCode == 0) // 0 is reserved to say that we've never tried to compute the hash code. // We could have tried to implement this as a boolean and an integer. // But that would have made the threading more difficult. You'd have // to make sure that you read and wrote those in the right order, and // that the compiler doesn't change the order. hashCode = 1; (uint64_t &)_hashCode = hashCode; } int TreeNode::compareChildren(Args const &a, Args const &b) { // It doesn't really matter what order I choose. It only has to be // consistent. Checking the length first should make things faster than // the alternative. if (a.size() < b.size()) return -1; if (b.size() < a.size()) return 1; Args::const_iterator itA = a.begin(); Args::const_iterator itB = b.begin(); while (true) { if (itA == a.end()) return 0; int result = compare(*itA, *itB); if (result) return result; itA++; itB++; } } int TreeNode::compareSameType(TreeNode const &other) const { return compareChildren(getChildren(), other.getChildren()); } int TreeNode::compare(TreeNode const &other) const { if (this == &other) // Same pointer. An object is always equal to itself. That has to be // the case! And presumably we'll see this a lot. We try to reuse // Tree objects as much as we can. In any case, it's cheap to do the // test, and when it helps it helps a lot. return 0; if (typeid(*this) == typeid(other)) return compareSameType(other); else if (typeid(*this).before(typeid(other))) return -1; else return 1; } int TreeNode::compare(Tree a, Tree b) { if (a == b) return 0; if (!a) return -1; if (!b) return 1; return a->compare(*b); } Tree TreeNode::construct(Args const &children) const { // This should probably throw and exception. This will probaly be called // by replace() which will detect the error and report something, but if // we reported something in construct() would could report more details. return NULL; } std::string TreeNode::shortDebug() const { std::string result = typeid(*this).name(); result += '('; bool first = true; Args const &orig = getChildren(); for (Args::const_iterator it = orig.begin(); it != orig.end(); it++) { if (first) first = false; else result += ", "; if (!*it) result += "NULL"; else result += (*it)->shortDebug(); } result += ')'; return result; } void TreeNode::assertDone() const { for (Args::const_iterator it = _children.begin(); it != _children.end(); it++) (*it)->assertDone(); } TreeNode::Args TreeNode::optimizeChildren(OptimizationContext context) const { Args result; result.reserve(_children.size()); for (Args::const_iterator it = _children.begin(); it != _children.end(); it++) result.push_back(optimize(*it, context)); return result; } Tree TreeNode::optimize(OptimizationContext context) const { Args newChildren = optimizeChildren(OptimizationContext::NORMAL); if (compareChildren(_children, newChildren)) return construct(newChildren); else return NULL; } Tree TreeNode::optimize(Tree input, OptimizationContext context) { const Tree result = input->optimize(context); if (result) return result; else return input; } int TreeNode::height() const { int maxChild = 0; for (Args::const_iterator it = getChildren().begin(); it != getChildren().end(); it++) maxChild = std::max(maxChild, (*it)->height()); return maxChild + 1; } ///////////////////////////////////////////////////////////////////// // NullValue ///////////////////////////////////////////////////////////////////// Tree const &NullValue::noToken() { static const Tree value = new NullValue(Token::EMPTY); return value; } ///////////////////////////////////////////////////////////////////// // IntTreeNode ///////////////////////////////////////////////////////////////////// int IntTreeNode::compareSameType(TreeNode const &other) const { const int64_t otherValue = dynamic_cast< IntTreeNode const & >(other)._value; if (_value < otherValue) return -1; else if (_value > otherValue) return 1; else return 0; } std::string IntTreeNode::shortDebug() const { return ntoa(_value); } Tree IntTreeNode::create(Token const &token) { assert(token.getType() == Token::ttInteger); int64_t value; try { nfroma(value, token.getValue()); } catch (InvalidConversionException) { throw token.makeException("Not a valid integer."); } return new IntTreeNode(token, value); } Tree IntTreeNode::create(int64_t value) { return new IntTreeNode(Token::EMPTY, value); } const Tree IntTreeNode::TRUE = IntTreeNode::create(1); const Tree IntTreeNode::FALSE = IntTreeNode::create(0); ///////////////////////////////////////////////////////////////////// // DoubleTreeNode ///////////////////////////////////////////////////////////////////// int DoubleTreeNode::compareSameType(TreeNode const &other) const { const double otherValue = dynamic_cast< DoubleTreeNode const & >(other)._value; if (_value < otherValue) return -1; else if (_value > otherValue) return 1; else return 0; } std::string DoubleTreeNode::shortDebug() const { return ntoa(_value); } Tree DoubleTreeNode::create(Token const &token) { assert(token.getType() == Token::ttDecimal); double value; try { nfroma(value, token.getValue()); } catch (InvalidConversionException) { throw token.makeException("Not a valid number."); } if (std::isfinite(value)) return new DoubleTreeNode(token, value); else { ThreadMonitor::find().increment("DoubleTreeNode Token NULL"); return new NullValue(token); } } Tree DoubleTreeNode::create(double value) { if (std::isfinite(value)) return new DoubleTreeNode(Token::EMPTY, value); else { ThreadMonitor::find().increment("DoubleTreeNode double NULL"); return NullValue::noToken(); } } ///////////////////////////////////////////////////////////////////// // StringTreeNode ///////////////////////////////////////////////////////////////////// int StringTreeNode::compareSameType(TreeNode const &other) const { const std::string &otherValue = dynamic_cast< StringTreeNode const & >(other).getValue(); return getValue().compare(otherValue); } std::string StringTreeNode::shortDebug() const { return getToken().getOriginal(); } std::string const &StringTreeNode::getValue() const { return getToken().getValue(); } ///////////////////////////////////////////////////////////////////// // SimpleFunctionNode ///////////////////////////////////////////////////////////////////// int SimpleFunctionNode::compareSameType(TreeNode const &other) const { int result = getToken().getValue().compare(other.getToken().getValue()); if (result) return result; return TreeNode::compareSameType(other); } Tree SimpleFunctionNode::construct(Args const &children) const { return new SimpleFunctionNode(getToken(), children); } std::string SimpleFunctionNode::shortDebug() const { TclList encoded; for (Args::const_iterator it = getChildren().begin(); it != getChildren().end(); it++) if (!*it) encoded<<"NULL"; else encoded<<(*it)->shortDebug(); return TclList()<<(getToken().getValue())< otherToken.getType()) return 1; else if (token.getType() < otherToken.getType()) return -1; else return token.getValue().compare(otherToken.getValue()); } std::string SimpleItemNode::shortDebug() const { return getToken().getOriginal(); } void SimpleItemNode::assertDone() const { switch (getToken().getType()) { case Token::ttFilter: // This could also mean that a filter is being used where it is not // expected, like in the definitions of a standard filter. But that's // an internal error. The message is right for user errors, and // at least we get an error message in either case. throw getToken().makeException("Unknown filter: " + getToken().getOriginal() + '.'); case Token::ttAltField: throw getToken().makeException("Alt fields are not appropriate here."); case Token::ttAlert: throw getToken().makeException("Alerts are not appropriate here."); default: // We expect this to be a Token::ttSymbol, but we don't enforce that. throw getToken().makeException("Unknown identifier: “" + getToken().getOriginal() + "”."); } } ///////////////////////////////////////////////////////////////////// // Parser ///////////////////////////////////////////////////////////////////// pcre *Parser::tryCompile(std::string const &pattern) { const char *errptr; int erroffset; pcre *result = pcre_compile(pattern.c_str(), PCRE_UTF8, &errptr, &erroffset, NULL); if (!result) throw (Exception("Unable to compile “" + pattern + "”, error “" + errptr + "” @ " + ntoa(erroffset), "[Initialization]", 0)); return result; } void Parser::initParser() { _whiteSpacePattern = tryCompile("^\\s+"); _filterPattern = tryCompile("^\\[([0-9a-zA-Z_]+)\\]"); _altFieldPattern = tryCompile("^\\[\\=([0-9a-zA-Z]+)\\]"); _symbolPattern = tryCompile("^(([a-zA-Z_][0-9a-zA-Z_]*)|(\\$\\$\\$))"); _numberPattern = tryCompile("^[0-9]+(\\.[0-9]+)?"); _alertPattern = tryCompile("^\\{([A-Z][A-Z0-9]*(,[A-Z][A-Z0-9]*)*)\\}"); _quotedStringPattern = tryCompile("^\"([^\"\\\\\\0]|(\\\\[\"\\\\/bfnrt]))*\""); _punctuation2.insert("!="); _punctuation2.insert("<="); _punctuation2.insert(">="); _punctuation2.insert("=="); _punctuation2.insert("||"); _punctuation2.insert("&&"); _punctuation2.insert("??"); } Exception Parser::tokenizerException(std::string const &message) const { throw (Exception(message, _toParse, _position)); } bool Parser::matched(int rc) const { if (rc == PCRE_ERROR_NOMATCH) return false; if (rc > 0) return true; if (rc == PCRE_ERROR_BADUTF8) throw tokenizerException("Not valid UTF8"); throw tokenizerException("Unexpected error in regular expression match: " + ntoa(rc)); return false; // Only to make the compiler happy. } Token Parser::makeTokenAll(Token::Type type) { assert(_ovector[0] == 0); std::string value = std::string(_toParse, _position, _ovector[1]); Token result(type, _position, value, value, _toParse); _position += value.length(); return result; } Token Parser::makeTokenFirst(Token::Type type) { assert(_ovector[0] == 0); std::string orig = std::string(_toParse, _position, _ovector[1]); std::string value = std::string(_toParse, _position + _ovector[2], _ovector[3] - _ovector[2]); Token result(type, _position, orig, value, _toParse); _position += orig.length(); return result; } Token Parser::makeTokenQuotedString() { const std::string orig = std::string(_toParse, _position + _ovector[0], _ovector[1] - _ovector[0]); const std::string initial = std::string(orig, 1, orig.length() - 2); std::string value; value.reserve(initial.size()); for (const char *c_str = initial.c_str(); *c_str; c_str++) { if (*c_str == '\\') { c_str++; switch (*c_str) { case '\"': case '\\': case '/': value += *c_str; break; case 'b': value += '\b'; break; case 'f': value += '\f'; break; case 'n': value += '\n'; break; case 'r': value += '\r'; break; case 't': value += '\t'; break; // We still need \u followed by 4 hex digits. default: // We should never get here. This could be an assertion. throw Exception("Not a valid string.", _toParse, _position); } } else { value += *c_str; } } Token result(Token::ttString, _position, orig, value, _toParse); _position += orig.length(); return result; } int Parser::match(pcre *pattern) { return pcre_exec(pattern, NULL, _toParse.c_str() + _position, _toParse.size() - _position, 0, 0, _ovector, 30); } Parser::TokenList Parser::makeTokenList() { TokenList result; _position = 0; const char *c_str = _toParse.c_str(); while (_position < _toParse.size()) { const std::string prefix2 = std::string(_toParse, _position, 2); if (_punctuation2.count(prefix2)) { result.push_back(Token(Token::ttPunctuation, _position, prefix2, prefix2, _toParse)); _position += 2; continue; } switch(c_str[_position]) { case '!': case '<': case '>': case '+': case '-': case '*': case '/': case '(': case ')': case ',': std::string value = std::string(_toParse, _position, 1); result.push_back(Token(Token::ttPunctuation, _position, value, value, _toParse)); _position++; continue; } int rc; rc = match(_whiteSpacePattern); if (matched(rc)) { _position += _ovector[1] - _ovector[0]; continue; } rc = match(_filterPattern); if (matched(rc)) { result.push_back(makeTokenFirst(Token::ttFilter)); continue; } rc = match(_altFieldPattern); if (matched(rc)) { result.push_back(makeTokenFirst(Token::ttAltField)); continue; } rc = match(_symbolPattern); if (matched(rc)) { result.push_back(makeTokenAll(Token::ttSymbol)); continue; } rc = match(_numberPattern); if (matched(rc)) { Token::Type type; if (rc == 1) type = Token::ttInteger; else type = Token::ttDecimal; result.push_back(makeTokenAll(type)); continue; } rc = match(_alertPattern); if (matched(rc)) { result.push_back(makeTokenFirst(Token::ttAlert)); continue; } rc = match(_quotedStringPattern); if (matched(rc)) { result.push_back(makeTokenQuotedString()); continue; } throw tokenizerException("Syntax Error"); } return result; } pcre *Parser::_whiteSpacePattern; pcre *Parser::_filterPattern; pcre *Parser::_altFieldPattern; pcre *Parser::_symbolPattern; pcre *Parser::_numberPattern; pcre *Parser::_quotedStringPattern; pcre *Parser::_alertPattern; std::set< std::string > Parser::_punctuation2; pthread_once_t Parser::_hasBeenInitialized = PTHREAD_ONCE_INIT; Token const &Parser::peekToken(int skip) const { const int index = _nextToken + skip; if ((index < 0) || (index >= (int)_tokens.size())) return _endToken; else return _tokens[index]; } Token const &Parser::popToken() { _nextToken++; return peekToken(-1); } Exception Parser::currentTokenException(std::string const &message) const { return peekToken().makeException(message); } Tree Parser::parseExpression() { return parseBinaryOps1(); } Tree Parser::parseBinaryOps1() { Tree result = parseBinaryOps2(); Token const &token = peekToken(); if (token.matchesP("??")) { popToken(); Args args; args.push_back(result); args.push_back(parseBinaryOps1()); result = new SimpleFunctionNode(token, args); } return result; } Tree Parser::parseBinaryOps2() { Tree result = parseBinaryOps3(); while (true) { Token const &token = peekToken(); if (token.matchesP("||")) { popToken(); Args args; args.push_back(result); args.push_back(parseBinaryOps3()); result = new SimpleFunctionNode(token, args); } else break; } return result; } Tree Parser::parseBinaryOps3() { Tree result = parseBinaryOps4(); while (true) { Token const &token = peekToken(); if (token.matchesP("&&")) { popToken(); Args args; args.push_back(result); args.push_back(parseBinaryOps4()); result = new SimpleFunctionNode(token, args); } else break; } return result; } Tree Parser::parseBinaryOps4() { Tree result = parseBinaryOps5(); while (true) { Token const &token = peekToken(); if (token.matchesP("<=") || token.matchesP(">=") || token.matchesP("!=") || token.matchesP("==") || token.matchesP("<") || token.matchesP(">")) { popToken(); Args args; args.push_back(result); args.push_back(parseBinaryOps5()); result = new SimpleFunctionNode(token, args); } else break; } return result; } Tree Parser::parseBinaryOps5() { Tree result = parseBinaryOps6(); while (true) { Token const &token = peekToken(); if (token.matchesP("+") || token.matchesP("-")) { popToken(); Args args; args.push_back(result); args.push_back(parseBinaryOps6()); result = new SimpleFunctionNode(token, args); } else break; } return result; } Tree Parser::parseBinaryOps6() { Tree result = parseUnaryOps(); while (true) { Token const &token = peekToken(); if (token.matchesP("*") || token.matchesP("/")) { popToken(); Args args; args.push_back(result); args.push_back(parseUnaryOps()); result = new SimpleFunctionNode(token, args); } else break; } return result; } Tree Parser::parseUnaryOps() { Token const &token = peekToken(); if (token.matchesP("!") || token.matchesP("-")) { popToken(); Args args; args.push_back(parseUnaryOps()); return new SimpleFunctionNode(token, args); } else return parseBase(); } Tree Parser::parseBase() { Token const &token = popToken(); switch (token.getType()) { case Token::ttPunctuation: if (token.getValue() == "(") { Tree result = parseExpression(); Token const &close = popToken(); if (!close.matchesP(")")) throw close.makeException("Expecting “)”."); return result; } break; case Token::ttFilter: case Token::ttAltField: case Token::ttAlert: return new SimpleItemNode(token); break; case Token::ttSymbol: { Token const &open = peekToken(); if (open.matchesP("(")) { // Starting a function call. popToken(); Args args; if (peekToken().matchesP(")")) // Empty argument list. popToken(); else { // At least one argument. args.push_back(parseExpression()); while (true) { Token const &next = popToken(); if (next.matchesP(",")) args.push_back(parseExpression()); else if (next.matchesP(")")) break; else throw next.makeException("Expecting “,” or “)”."); } } return new SimpleFunctionNode(token, args); } else return new SimpleItemNode(token); break; } case Token::ttInteger: return IntTreeNode::create(token); break; case Token::ttDecimal: return DoubleTreeNode::create(token); break; case Token::ttString: return new StringTreeNode(token); break; case Token::ttEnd: throw token.makeException("Unexpected end of expression."); break; } // This probably comes from the punctuation case, above. That's the only // one that might fall through to here. I considered moving this code // up the to there, but it seems good to have an error handler down here // just in case we missed something. throw token.makeException("Syntax error."); } Parser::Parser(std::string const &toParse) : _toParse(toParse), _endToken(Token::ttEnd, toParse.length(), "", "", toParse) { int result = pthread_once(&_hasBeenInitialized, initParser); assert(result == 0); } Tree Parser::parse() { _tokens = makeTokenList(); _nextToken = 0; Tree result = parseExpression(); Token const &next = peekToken(); if (next.getType() != Token::ttEnd) throw currentTokenException("Unexpected code after the expression."); return result; } std::string Parser::getTokenListDebug() { TclList result; TokenList tokenList = makeTokenList(); for (TokenList::const_iterator it = tokenList.begin(); it != tokenList.end(); it++) result<shortDebug(); return result; } ///////////////////////////////////////////////////////////////////// // ReplaceRule ///////////////////////////////////////////////////////////////////// Exception ReplaceRule::makeException(std::string const &message) const { return Exception(message + " Location: " + shortDebug(), "", 0); } static const bool DETIALED_REPLACE_DEBUG = false; void ReplaceRule::replaceSafely(Tree &tree, Status &status) const { Tree orig = tree; replaceImpl(tree, status); if (!tree) throw makeException("Output cannot be NULL."); if (DETIALED_REPLACE_DEBUG && (orig != tree)) sendToLogFile(TclList()<shortDebug()<shortDebug() <::const_iterator it = rules.begin(); it != rules.end(); it++) { (*it)->replaceSafely(tree, status); if (status != sContinue) return; } } std::string RuleList::shortDebug() const { std::string result = "RuleList"; if (!name.empty()) { result += '('; result += name; result += ')'; } return result; } ///////////////////////////////////////////////////////////////////// // SymbolRules ///////////////////////////////////////////////////////////////////// void SymbolRules::replaceImpl(Tree &tree, Status &status) const { if (SimpleItemNode const *node = tree->as< SimpleItemNode >()) { Token const &token = node->getToken(); if (token.getType() == Token::ttSymbol) { if (ReplaceRule::Ref rule = getPropertyDefault(rules, token.getValue())) { rule->replaceSafely(tree, status); } } } } std::string SymbolRules::shortDebug() const { std::string result = "SymbolRules"; if (!name.empty()) { result += '('; result += name; result += ')'; } return result; } ///////////////////////////////////////////////////////////////////// // FilterRules ///////////////////////////////////////////////////////////////////// void FilterRules::replaceImpl(Tree &tree, Status &status) const { if (SimpleItemNode const *node = tree->as< SimpleItemNode >()) { Token const &token = node->getToken(); if (token.getType() == Token::ttFilter) { if (Tree replacement = getPropertyDefault(filters, token.getValue())) { tree = replacement; status = sRetryRecursive; } } } } std::string FilterRules::shortDebug() const { std::string result = "FilterRules"; if (!name.empty()) { result += '('; result += name; result += ')'; } return result; } ///////////////////////////////////////////////////////////////////// // replace() ///////////////////////////////////////////////////////////////////// Tree replace(Tree tree, ReplaceRule const &rule) { ReplaceRule::Status status = ReplaceRule::sRetryRecursive; while (true) { if (status == ReplaceRule::sRetryRecursive) { std::vector< Tree > children = tree->getChildren(); bool anyChanges = false; for (std::vector< Tree >::iterator it = children.begin(); it != children.end(); it++) { Tree newValue = replace(*it, rule); if (newValue != *it) { anyChanges = true; *it = newValue; } } if (anyChanges) tree = tree->construct(children); } status = ReplaceRule::sContinue; Tree previousValue = tree; rule.replaceSafely(tree, status); if (status == ReplaceRule::sStop) return tree; if ((status == ReplaceRule::sContinue) && (tree == previousValue)) return tree; } } }