diff options
Diffstat (limited to 'CIAO/CCF/CCF/CompilerElements/Preprocessor.cpp')
-rw-r--r-- | CIAO/CCF/CCF/CompilerElements/Preprocessor.cpp | 955 |
1 files changed, 955 insertions, 0 deletions
diff --git a/CIAO/CCF/CCF/CompilerElements/Preprocessor.cpp b/CIAO/CCF/CCF/CompilerElements/Preprocessor.cpp new file mode 100644 index 00000000000..e52750ba74e --- /dev/null +++ b/CIAO/CCF/CCF/CompilerElements/Preprocessor.cpp @@ -0,0 +1,955 @@ +// file : CCF/CompilerElements/Preprocessor.cpp +// author : Boris Kolpackov <boris@dre.vanderbilt.edu> +// cvs-id : $Id$ + +#include <deque> +#include <locale> +#include <string> + +#include "CCF/CompilerElements/Preprocessor.hpp" + +#include <iostream> + +using std::cerr; +using std::endl; +using std::string; + +namespace CCF +{ + namespace CompilerElements + { + namespace CPP + { + /* + namespace Phase2 + { + class TokenPrototype + { + public: + struct Type + { + enum Value + { + punctuation, + keyword, + identifier + eos + }; + }; + + TokenPrototype (Type::Value type, string const& lexeme) + : type_ (type), lexeme_ (lexeme) + { + } + + Type::Value + type () const + { + return type_; + } + + string + lexeme () const + { + return lexeme_; + } + + private: + Type::Value type_; + string lexeme_; + }; + + bool + operator== (TokenPrototype const& a, TokenPrototype const& b) + { + return a.type () == b.type () && a.lexeme () == b.lexeme (); + } + + class Token : public TokenPrototype + { + public: + Token (Type::Value type, + string const& lexeme, + unsigned long line) + : TokenPrototype (type, lexeme), line_ (line) + { + } + + unsigned long + line () const + { + return line_; + } + + private: + unsigned long line_; + }; + + + class Scanner : public TokenStream<Token> + { + public: + Scanner (TokenStream<CPP::Token>& is) + : is_ (is) + { + } + + virtual Token + next () + { + try + { + InToken t (get_ns ()); + + switch (t) + { + case '#': + { + return Token (Token::Type::punctuation, "#", t.line ()); + } + case '\n': + { + return Token (Token::Type::punctuation, "\n", t.line ()); + } + } + } + catch (EOS const&) + { + } + } + + private: + typedef + CPP::Token + InToken; + + class EOS {}; + + InToken + get () throw (EOS) + { + if (ibuffer_.empty ()) + { + InToken t (is_.next ()); + if (t == InToken::eos) throw EOS (); + return t; + } + else + { + InToken t (ibuffer_.front ()); + ibuffer_.pop_front (); + + if (t == InToken::eos) throw EOS (); + return t; + } + } + + // Get first non-space token + // + InToken + get_ns () throw (EOS) + { + InToken t (get ()); + + while (is_space (t)) t = get (); + } + + bool + is_space (InToken const& t) + { + return t == ' ' || t == '\t'; + } + + private: + TokenStream<CPP::Token>& is_; + std::deque<InTokent> ibuffer_; + }; + } + */ + + // PreprocessorImpl + // + // + class Preprocessor::PreprocessorImpl : public TokenStream<Token> + { + public: + virtual + ~PreprocessorImpl () + { + } + + PreprocessorImpl (TokenStream<char>& is, Symbols const& symbols) + : loc_ ("C"), + state_ (State::preprocessing), + ln_ (1), + is_ (is), + dsa_ (copy_), + symbols_ (symbols), + balance_ (0), + skip_balance_ (0) + { + } + + virtual Token + next () + { + while (true) + { + if (line_.empty ()) + scan_line (); + + Token t (line_.front ()); + line_.pop_front (); + + if (skip_balance_ != 0 && t != Token::eos) + continue; + + return t; + } + } + + private: + + // Line-scanning + // + // + + class DequeStreamAdapter : public TokenStream<Token> + { + public: + DequeStreamAdapter (std::deque<Token>& d) + : d_ (d) + { + } + + virtual Token + next () + { + Token t (d_.front ()); + d_.pop_front (); + return t; + } + + private: + std::deque<Token>& d_; + }; + + void + scan_line () + { + bool eos (false); + + while (line_.empty () && !eos) + { + for (;;) + { + Token t (next_token ()); + + if (t == Token::eos) + { + eos = true; + + // Add new line if it's missing at the end of line. + // + if (!line_.empty () && line_.back () != '\n') + { + //@@ should issue a warning here + // + line_.push_back (Token ('\n', line_.back ().line ())); + } + } + else + { + line_.push_back (t); + } + + if (eos || t == '\n') break; + } + + /* + cerr << "line: \'"; + + for (std::deque<Token>::iterator + i (line_.begin ()), e (line_.end ()); i != e; ++i) + { + if (*i != Token::eos) cerr << *i; + } + + cerr << '\'' << endl; + */ + + // Check if it is a PP directive + // + + if (!line_.empty ()) + { + copy_ = line_; + ls_buffer_.clear (); + copy_.push_back (Token::eos); // add eos + + try + { + Token t (ls_get_ns ()); + + // cerr << "t = \'" << t << '\'' << endl; + + if (t == '#') + { + scan_directive (); + } + } + catch (EOS const&) + { + // cerr << "EOS" << endl; + + // error condition? + Token t ('\n', line_.front ().line ()); + line_.clear (); + line_.push_back (t); + } + } + } + + if (eos) + { + if (balance_ > 0) + { + cerr << "missing endif directive at the end of file" << endl; + } + + line_.push_back (Token::eos); + } + } + + + void + scan_directive () + { + Token t (ls_get_ns ()); + + string lexeme; + + if (std::isalpha<char> (t, loc_)) + { + for (;std::isalpha<char> (t, loc_); t = ls_get ()) + lexeme += t; + + ls_ret (t); + } + + // cerr << "lexeme " << lexeme << endl; + + if (lexeme == "include") + { + scan_include (); + return; + } + else if (lexeme == "if") + { + ++balance_; + } + else if (lexeme == "ifdef" || lexeme == "ifndef") + { + ++balance_; + + string symbol; + Token t (ls_get_ns ()); + + + if (is_first_id_char (t)) + { + for (;is_id_char (t); t = ls_get ()) + symbol += t; + ls_ret (t); + } + + if (!symbol.empty ()) + { + //cerr << "symbol " << symbol << endl; + + if (skip_balance_ == 0) // Unless we are already skipping. + { + bool defined (symbols_.find (symbol) != symbols_.end ()); + + if ((!defined && lexeme == "ifdef") || + (defined && lexeme == "ifndef")) + skip_balance_ = balance_; + } + } + else + { + cerr << t.line () << ": no symbol specified for " << lexeme + << " directive" << endl; + throw EOS (); //@@ tmp + } + } + else if (lexeme == "elif" || lexeme == "else") + { + // For now we treat elif just like else. + // + if (skip_balance_ == balance_) + skip_balance_ = 0; + else if (skip_balance_ == 0) // Unless we are already skipping. + skip_balance_ = balance_; // Start skipping now. + } + else if (lexeme == "endif") + { + if (skip_balance_ == balance_) + skip_balance_ = 0; + + if (balance_ > 0) + --balance_; + else + { + cerr << t.line () << ": extraneous endif directive" << endl; + throw EOS (); //@@ tmp + } + } + else if (lexeme == "error") + { + if (skip_balance_ == 0) + { + string msg; + Token t (ls_get_ns ()); + + if (t != '\n') + { + for (;t != '\n'; t = ls_get ()) + msg += t; + ls_ret (t); + } + + cerr << t.line () << ": error: " << msg << endl; + throw EOS (); //@@ tmp + } + } + + // By default we replace this directive with a newline. + // + { + Token t ('\n', line_.front ().line ()); + line_.clear (); + line_.push_back (t); + } + } + + void + scan_include () + { + Token t (ls_get_ns ()); + + char finilizer; + + switch (t) + { + case '\"': + { + finilizer = '\"'; + break; + + } + case '<': + { + finilizer = '>'; + break; + } + default: + { + cerr << t.line () << ": invalid include directive" << endl; + throw EOS (); //@@ tmp + } + } + + string path; + + for (t = ls_get (); t != finilizer && t != '\n'; t = ls_get ()) + { + path += t; + } + + if (t != finilizer) + { + cerr << t.line () << ": invalid include directive" << endl; + throw EOS (); // @@ tmp + } + + string repl (finilizer == '>' ? "__binclude \"" : "__qinclude \""); + repl += path + "\";\n"; + + line_.clear (); + + for (string::const_iterator i (repl.begin ()), e (repl.end ()); + i != e; ++i) + { + line_.push_back (Token (*i, t.line ())); + } + } + + // Char-scanning + // + // + + Token + next_token () + { + if (!obuffer_.empty ()) + { + Token t (obuffer_.front ()); + obuffer_.pop_front (); + return t; + } + + try + { + switch (state_) + { + case State::preprocessing: + { + return preprocessing (); + } + case State::string_literal: + { + return string_literal (); + } + case State::char_literal: + { + return char_literal (); + } + } + } + catch (EOS const&) + { + } + + return Token::eos; + } + + Token + preprocessing () + { + unsigned long l (ln_); + char_type c (get ()); + + switch (c) + { + case '\'': + { + state_ = State::char_literal; + break; + } + case '\"': + { + state_ = State::string_literal; + break; + } + case '\\': + { + return escape (); + } + case '\n': + { + ++ln_; + break; + } + case '/': + { + return slash (); + } + } + + return Token (c, l); + } + + + Token + string_literal () + { + unsigned long l (ln_); + char_type c (get ()); + + switch (c) + { + case '\"': + { + state_ = State::preprocessing; + break; + } + case '\\': + { + return escape (); + } + case '\n': + { + ++ln_; + break; + } + } + + return Token (c, l); + } + + + Token + char_literal () + { + unsigned long l (ln_); + char_type c (get ()); + + switch (c) + { + case '\'': + { + state_ = State::preprocessing; + break; + } + case '\\': + { + return escape (); + } + case '\n': + { + ++ln_; + break; + } + } + + return Token (c, l); + } + + + Token + escape () + { + try + { + char_type c (get ()); + + switch (c) + { + case '\n': + { + ++ln_; + return next_token (); + } + case '\'': + { + if (state_ != State::char_literal && + state_ != State::string_literal) + { + state_ = State::char_literal; + } + + put (Token ('\'', ln_)); + return Token ('\\', ln_); + } + case '\"': + { + if (state_ != State::char_literal && + state_ != State::string_literal) + { + state_ = State::string_literal; + } + + put (Token ('\"', ln_)); + return Token ('\\', ln_); + } + default: + { + ret (c); + return Token ('\\', ln_); + } + } + } + catch (EOS const&) + { + ret_eos (); + return Token ('\\', ln_); + } + } + + + Token + slash () + { + unsigned long l (ln_); + + try + { + char_type c (get ()); + + switch (c) + { + case '*': // C comment + { + return c_comment (); + } + case '/': // C++ comment + { + return cxx_comment (); + } + default: + { + ret (c); + break; + } + } + } + catch (EOS const&) + { + ret_eos (); + } + + return Token ('/', l); + } + + + Token + c_comment () + { + // Replace the whole C comment with the single space. + // + unsigned long l (ln_); + + try + { + char_type c (get ()); + + for (bool done (false); !done;) + { + switch (c) + { + case '\n': + { + ++ln_; + break; + } + case '*': + { + c = get (); + if (c == '/') done = true; + continue; + } + } + + c = get (); + } + } + catch (EOS const&) + { + put (Token::eos); + } + + return Token (' ', l); + } + + + Token + cxx_comment () + { + // Replace the whole C++ comment with the single space. + // + unsigned long l (ln_); + + try + { + for (bool done (false); !done;) + { + char_type c (get ()); + + switch (c) + { + case '\n': + { + ++ln_; + done = true; + break; + } + } + } + } + catch (EOS const&) + { + put (Token::eos); + } + + return Token ('\n', l); + } + + // Low-level utility functions. + // + private: + typedef + TokenStream<char>::int_type + int_type; + + typedef + TokenStream<char>::char_type + char_type; + + char_type + to_char_type (int_type i) + { + return TokenStream<char>::to_char_type (i); + } + + bool + eos (int_type i) + { + return TokenStream<char>::eos () == i; + } + + class EOS {}; + + char_type + get () throw (EOS) + { + int_type i; + + if (ibuffer_.empty ()) + { + i = is_.next (); + } + else + { + i = ibuffer_.front (); + ibuffer_.pop_front (); + } + + if (eos (i)) throw EOS (); + + return to_char_type (i); + } + + void + ret (char_type t) + { + ibuffer_.push_front (t); + } + + void + ret_eos () + { + ibuffer_.push_front (TokenStream<char>::eos ()); + } + + void + put (Token const& t) + { + obuffer_.push_back (t); + } + + + // line-scanner utility functions + // + // + void + ls_ret (Token const& t) + { + ls_buffer_.push_front (t); + } + + Token + ls_get () throw (EOS) + { + if (ls_buffer_.empty ()) + { + Token t (dsa_.next ()); + if (t == Token::eos) throw EOS (); + return t; + } + else + { + Token t (ls_buffer_.front ()); + ls_buffer_.pop_front (); + + if (t == Token::eos) throw EOS (); + return t; + } + } + + // Get first non-space token + // + Token + ls_get_ns () throw (EOS) + { + Token t (ls_get ()); + + while (ls_is_space (t)) t = ls_get (); + return t; + } + + bool + ls_is_space (Token const& t) + { + return t == ' ' || t == '\t'; + } + + bool + is_first_id_char (Token const& t) + { + return std::isalpha<char> (t, loc_) || t == '_'; + } + + bool + is_id_char (Token const& t) + { + return std::isalnum<char> (t, loc_) || t == '_'; + } + + private: + struct State + { + enum Value + { + preprocessing, + string_literal, + char_literal + }; + }; + + std::locale loc_; + + State::Value state_; + unsigned long ln_; + + TokenStream<char>& is_; + + std::deque<int_type> ibuffer_; + std::deque<Token> obuffer_; + + std::deque<Token> line_; + + std::deque<Token> copy_; + DequeStreamAdapter dsa_; + std::deque<Token> ls_buffer_; + + Symbols symbols_; + unsigned long balance_; // Current #if*/#endif balance. + unsigned long skip_balance_; // #if*/#endif balance at which we began + // skipping. 0 indicates no skipping. + + }; + + + // Preprocessor + // + // + Preprocessor:: + ~Preprocessor () + { + } + + Preprocessor:: + Preprocessor (TokenStream<char>& is, Symbols const& symbols) + : impl_ (new PreprocessorImpl (is, symbols)) + { + } + + Token Preprocessor:: + next () + { + return impl_->next (); + } + } + } +} |