// Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lexer.h" #include #include "eval_env.h" #include "util.h" bool Lexer::Error(const string& message, string* err) { // Compute line/column. int line = 1; const char* context = input_.str_; for (const char* p = input_.str_; p < last_token_; ++p) { if (*p == '\n') { ++line; context = p + 1; } } int col = last_token_ ? (int)(last_token_ - context) : 0; char buf[1024]; snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line); *err = buf; *err += message + "\n"; // Add some context to the message. const int kTruncateColumn = 72; if (col > 0 && col < kTruncateColumn) { int len; bool truncated = true; for (len = 0; len < kTruncateColumn; ++len) { if (context[len] == 0 || context[len] == '\n') { truncated = false; break; } } *err += string(context, len); if (truncated) *err += "..."; *err += "\n"; *err += string(col, ' '); *err += "^ near here"; } return false; } Lexer::Lexer(const char* input) { Start("input", input); } void Lexer::Start(StringPiece filename, StringPiece input) { filename_ = filename; input_ = input; ofs_ = input_.str_; last_token_ = NULL; } const char* Lexer::TokenName(Token t) { switch (t) { case ERROR: return "lexing error"; case BUILD: return "'build'"; case COLON: return "':'"; case DEFAULT: return "'default'"; case EQUALS: return "'='"; case IDENT: return "identifier"; case INCLUDE: return "'include'"; case INDENT: return "indent"; case NEWLINE: return "newline"; case PIPE2: return "'||'"; case PIPE: return "'|'"; case POOL: return "'pool'"; case RULE: return "'rule'"; case SUBNINJA: return "'subninja'"; case TEOF: return "eof"; } return NULL; // not reached } const char* Lexer::TokenErrorHint(Token expected) { switch (expected) { case COLON: return " ($ also escapes ':')"; default: return ""; } } string Lexer::DescribeLastError() { if (last_token_) { switch (last_token_[0]) { case '\r': return "carriage returns are not allowed, use newlines"; case '\t': return "tabs are not allowed, use spaces"; } } return "lexing error"; } void Lexer::UnreadToken() { ofs_ = last_token_; } Lexer::Token Lexer::ReadToken() { const char* p = ofs_; const char* q; const char* start; Lexer::Token token; for (;;) { start = p; /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; re2c:define:YYMARKER = q; re2c:yyfill:enable = 0; nul = "\000"; simple_varname = [a-zA-Z0-9_-]+; varname = [a-zA-Z0-9_.-]+; [ ]*"#"[^\000\r\n]*"\n" { continue; } [ ]*[\n] { token = NEWLINE; break; } [ ]+ { token = INDENT; break; } "build" { token = BUILD; break; } "pool" { token = POOL; break; } "rule" { token = RULE; break; } "default" { token = DEFAULT; break; } "=" { token = EQUALS; break; } ":" { token = COLON; break; } "||" { token = PIPE2; break; } "|" { token = PIPE; break; } "include" { token = INCLUDE; break; } "subninja" { token = SUBNINJA; break; } varname { token = IDENT; break; } nul { token = TEOF; break; } [^] { token = ERROR; break; } */ } last_token_ = start; ofs_ = p; if (token != NEWLINE && token != TEOF) EatWhitespace(); return token; } bool Lexer::PeekToken(Token token) { Token t = ReadToken(); if (t == token) return true; UnreadToken(); return false; } void Lexer::EatWhitespace() { const char* p = ofs_; for (;;) { ofs_ = p; /*!re2c [ ]+ { continue; } "$\n" { continue; } nul { break; } [^] { break; } */ } } bool Lexer::ReadIdent(string* out) { const char* p = ofs_; for (;;) { const char* start = p; /*!re2c varname { out->assign(start, p - start); break; } [^] { return false; } */ } ofs_ = p; EatWhitespace(); return true; } bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { const char* p = ofs_; const char* q; const char* start; for (;;) { start = p; /*!re2c [^$ :\r\n|\000]+ { eval->AddText(StringPiece(start, p - start)); continue; } [ :|\n] { if (path) { p = start; break; } else { if (*start == '\n') break; eval->AddText(StringPiece(start, 1)); continue; } } "$$" { eval->AddText(StringPiece("$", 1)); continue; } "$ " { eval->AddText(StringPiece(" ", 1)); continue; } "$\n"[ ]* { continue; } "${"varname"}" { eval->AddSpecial(StringPiece(start + 2, p - start - 3)); continue; } "$"simple_varname { eval->AddSpecial(StringPiece(start + 1, p - start - 1)); continue; } "$:" { eval->AddText(StringPiece(":", 1)); continue; } "$". { last_token_ = start; return Error("bad $-escape (literal $ must be written as $$)", err); } nul { last_token_ = start; return Error("unexpected EOF", err); } [^] { last_token_ = start; return Error(DescribeLastError(), err); } */ } last_token_ = start; ofs_ = p; if (path) EatWhitespace(); // Non-path strings end in newlines, so there's no whitespace to eat. return true; }