diff options
Diffstat (limited to 'TAO/CIAO/CCF/CCF/IDL2/LexicalAnalyzer.cpp')
-rw-r--r-- | TAO/CIAO/CCF/CCF/IDL2/LexicalAnalyzer.cpp | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/TAO/CIAO/CCF/CCF/IDL2/LexicalAnalyzer.cpp b/TAO/CIAO/CCF/CCF/IDL2/LexicalAnalyzer.cpp new file mode 100644 index 00000000000..96b004ab030 --- /dev/null +++ b/TAO/CIAO/CCF/CCF/IDL2/LexicalAnalyzer.cpp @@ -0,0 +1,362 @@ +// file : CCF/IDL2/LexicalAnalyzer.cpp +// author : Boris Kolpackov <boris@dre.vanderbilt.edu> +// cvs-id : $id$ + +#include "CCF/IDL2/LexicalAnalyzer.hpp" + +#include <iostream> + +namespace CCF +{ + namespace IDL2 + { + LexicalAnalyzer:: + LexicalAnalyzer (TokenStream<char>& is) + : loc_ ("C"), + is_ (is), + after_nl (true), + line_ (0) + { + // Keywords (alphabetic order). + + keyword_table_.insert ("abstract" ); + keyword_table_.insert ("attribute"); + keyword_table_.insert ("factory" ); + keyword_table_.insert ("in" ); + keyword_table_.insert ("include" ); + keyword_table_.insert ("inout" ); + keyword_table_.insert ("interface"); + keyword_table_.insert ("local" ); + keyword_table_.insert ("module" ); + keyword_table_.insert ("out" ); + keyword_table_.insert ("sinclude" ); + keyword_table_.insert ("supports" ); + + punctuation_table_.insert (":"); + punctuation_table_.insert (","); + punctuation_table_.insert ("{"); + punctuation_table_.insert ("}"); + punctuation_table_.insert ("("); + punctuation_table_.insert (")"); + punctuation_table_.insert (";"); + } + + + LexicalAnalyzer::int_type LexicalAnalyzer:: + get () + { + int_type i; + + if (buffer_.empty ()) + { + i = is_.next (); + } + else + { + i = buffer_.front (); + buffer_.pop_front (); + } + + if (after_nl) + { + after_nl = false; + line_++; + } + + if (i == '\n') + { + after_nl = true; + } + + return i; + } + + LexicalAnalyzer::int_type LexicalAnalyzer:: + peek () + { + int_type i; + + if (buffer_.empty ()) + { + i = is_.next (); + buffer_.push_back (i); + } + else + { + i = buffer_.front (); + } + + return i; + } + + LexicalAnalyzer::int_type LexicalAnalyzer:: + peek_more () + { + int_type i; + + if (buffer_.size () < 2) + { + i = is_.next (); + buffer_.push_back (i); + } + else + { + i = buffer_.at (1); + } + + return i; + } + + LexicalAnalyzer::char_type LexicalAnalyzer:: + to_char_type (int_type i) + { + return is_.to_char_type (i); + } + + + TokenPtr LexicalAnalyzer:: + next () + { + while (true) + { + int_type i = get (); + + if (is_.eos(i)) + { + return TokenPtr (new EndOfStream (line_)); + } + + char_type c = to_char_type (i); + + // Handling spaces + if (std::isspace (c, loc_)) continue; + + // Handling C++ comments + if (c == '/' && peek () == '/') + { + cxx_comment (c); + continue; + } + + // Handling C comments + if (c == '/' && peek () == '*') + { + c_comment (c); + continue; + } + + if (std::isalpha (c, loc_) || c == '_' || (c == ':' && peek () == ':')) + { + return identifier (c); + } + + TokenPtr token; + + if (string_literal (c, token)) return token; + + if (punctuation (c, token)) return token; + + + std::cerr << line_ << ": error: unable to derive any token from \'" + << c << "\'" << std::endl; + + //@@ I should return something special here. Perhaps error recovery + // should happen. + return TokenPtr (new EndOfStream (line_)); + } + } + + void LexicalAnalyzer:: + cxx_comment (char_type c) + { + while (c != '\n') + { + int_type i = get (); + + if (is_.eos (i)) + { + std::cerr << "warning: no new line at the end of file" << std::endl; + + //@@ I wonder if it's ok to call get () again after getting eof. + //@@ no, it's not: std::istream throws exception (when enabled) on + // second attempt. + break; + } + c = to_char_type (i); + } + } + + void LexicalAnalyzer:: + c_comment (char_type c) + { + get (); // get '*' + + do + { + int_type i = get (); + + if (is_.eos (i)) + { + std::cerr << "error: end of file before C-style comment finished" + << std::endl; + return; + + //@@ I wonder if it's ok to call get () again after getting eof. + } + c = to_char_type (i); + + //std::cerr << "lexer: c_comment: read character \'" << c << "\'" + // << std::endl; + } + while (c != '*' || peek () != '/'); + + //std::cerr << "lexer: c_comment: finished C-comment \'" << c + // << "\',\'" << to_char_type (peek ()) + // << "\'" << std::endl; + + get (); // get '/' + } + + TokenPtr LexicalAnalyzer:: + identifier (char_type c) + { + std::string lexeme; + + enum + { + SIMPLE, + SCOPED, + OTHER + } type = SIMPLE; + + if (c == ':') + { + get (); + lexeme = "::"; + type = SCOPED; + } + else + { + lexeme += c; + } + + while (true) + { + int_type i = peek (); + + if (is_.eos (i)) + { + std::cerr << "warning: no new line at the end of file" << std::endl; + break; + } + + c = to_char_type (i); + + //std::cerr << "lexer::identifier: peeking on \'" << c + // << "\'; current lexeme \'" << lexeme << "\'" + // << std::endl; + + if (std::isalnum (c, loc_) || c == '_') + { + get (); + lexeme += c; + continue; + } + + if (c == ':' && peek_more () == ':') + { + get (); + get (); + lexeme += "::"; + if (type == SIMPLE) type = OTHER; + continue; + } + + break; + } + + //std::cerr << "lexer: found identifier with lexeme \'" + // << lexeme << "\'" << std::endl; + + if (type == SIMPLE) + { + KeywordTable::const_iterator i = keyword_table_.find (lexeme); + + if (i != keyword_table_.end ()) + { + return TokenPtr (new Keyword (*i, line_)); + } + else + { + return TokenPtr (new SimpleIdentifier (lexeme, line_)); + } + + + // otherwise deafult to Identifier + } + else if (type == SCOPED) + { + return TokenPtr (new ScopedIdentifier (lexeme, line_)); + } + else //type == OTHER + { + return TokenPtr (new Identifier (lexeme, line_)); + } + } + + bool LexicalAnalyzer:: + punctuation (char_type c, TokenPtr& token) + { + PunctuationTable::const_iterator i = punctuation_table_.begin (); + + while (true) + { + for (;i != punctuation_table_.end () && (*i)[0] != c; i++); + + if (i == punctuation_table_.end ()) return false; + + // if it's a two-character punctuation + if (i->size () == 2) + { + if ((*i)[1] != peek ()) + { + // move on to the next candidate + i++; + continue; + } + } + + token = TokenPtr (new Punctuation (*i, line_));; + return true; + } + } + + bool LexicalAnalyzer:: + string_literal (char_type c, TokenPtr& token) + { + if (c != '\"') return false; + + std::string lexeme; + + while (true) + { + int_type i = get (); + + if (is_.eos (i)) + { + std::cerr << "warning: end of file while reading string literal" + << std::endl; + break; + } + + c = to_char_type (i); + + if (c == '\"') break; + else lexeme += c; + } + + token = TokenPtr (new StringLiteral (lexeme, line_)); + return true; + } + } +} |