diff options
Diffstat (limited to 'ACE/ACEXML/common/FileCharStream.cpp')
-rw-r--r-- | ACE/ACEXML/common/FileCharStream.cpp | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/ACE/ACEXML/common/FileCharStream.cpp b/ACE/ACEXML/common/FileCharStream.cpp new file mode 100644 index 00000000000..68178850707 --- /dev/null +++ b/ACE/ACEXML/common/FileCharStream.cpp @@ -0,0 +1,255 @@ +// $Id$ + +#include "ACEXML/common/FileCharStream.h" +#include "ace/ACE.h" +#include "ace/Log_Msg.h" +#include "ace/OS_NS_stdio.h" +#include "ace/OS_NS_sys_stat.h" +#include "ace/Truncate.h" + +#if defined (ACE_USES_WCHAR) +# include "ace/OS_NS_wchar.h" +#endif /* ACE_USES_WCHAR */ + +ACEXML_FileCharStream::ACEXML_FileCharStream (void) + : filename_ (0), encoding_ (0), size_ (0), infile_ (0), peek_ (0) +{ +} + +ACEXML_FileCharStream::~ACEXML_FileCharStream (void) +{ + this->close(); +} + +int +ACEXML_FileCharStream::open (const ACEXML_Char *name) +{ + delete[] this->filename_; + this->filename_ = 0; + + delete[] this->encoding_; + this->encoding_ = 0; + + this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r")); + if (this->infile_ == 0) + return -1; + + ACE_stat statbuf; + if (ACE_OS::stat (name, &statbuf) < 0) + return -1; + + this->size_ = ACE_Utils::truncate_cast<ACE_OFF_T> (statbuf.st_size); + this->filename_ = ACE::strnew (name); + return this->determine_encoding(); +} + +int +ACEXML_FileCharStream::determine_encoding (void) +{ + if (this->infile_ == 0) + return -1; + + char input[4]; + int retval = 0; + int i = 0; + for (; i < 4 && retval != -1; ++i) + retval = this->getchar_i(input[i]); + if (i < 4) + return -1; + + // Rewind the stream + ACE_OS::rewind (this->infile_); + + const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input); + if (!temp) + return -1; + else + { + delete [] this->encoding_; + this->encoding_ = ACE::strnew (temp); +// ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("File's encoding is %s\n"), +// this->encoding_)); + } + // Move over the byte-order-mark if present. + char ch; + for (int j = 0; j < 3; ++j) + { + if (this->getchar_i (ch) < 0) + return -1; + if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF' || ch == '\xBB' || + ch == '\xBF') + continue; + else + { + ACE_OS::ungetc (ch, this->infile_); + break; + } + } + return 0; +} + +void +ACEXML_FileCharStream::rewind() +{ + if (this->infile_ == 0) + return; + ACE_OS::rewind (this->infile_); + this->determine_encoding(); +} + +int +ACEXML_FileCharStream::available (void) +{ + if (this->infile_ == 0) + return -1; + + long curr; + if ((curr = ACE_OS::ftell (this->infile_)) < 0) + return -1; + return static_cast<int> (this->size_ - curr); +} + +int +ACEXML_FileCharStream::close (void) +{ + if (this->infile_ != 0) + { + ACE_OS::fclose (this->infile_); + this->infile_ = 0; + } + delete[] this->filename_; + this->filename_ = 0; + delete[] this->encoding_; + this->encoding_ = 0; + this->size_ = 0; + this->peek_ = 0; + return 0; +} + + +int +ACEXML_FileCharStream::getchar_i (char& ch) +{ + ch = static_cast<char> (ACE_OS::fgetc (this->infile_)); + return (feof(this->infile_) ? -1 : 0); +} + +int +ACEXML_FileCharStream::read (ACEXML_Char *str, + size_t len) +{ + if (this->infile_ == 0) + return -1; + + return static_cast<int> (ACE_OS::fread (str, sizeof (ACEXML_Char), len, this->infile_)); +} + +int +ACEXML_FileCharStream::get (ACEXML_Char& ch) +{ + if (this->infile_ == 0) + return -1; +#if defined (ACE_USES_WCHAR) + return this->get_i (ch); +#else + ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_); + return (feof(this->infile_) ? -1 : 0); +#endif /* ACE_USES_WCHAR */ +} + +int +ACEXML_FileCharStream::peek (void) +{ + if (this->infile_ == 0) + return -1; +#if defined (ACE_USES_WCHAR) + return this->peek_i(); +#else + + ACEXML_Char ch = static_cast<ACEXML_Char> (ACE_OS::fgetc (this->infile_)); + ACE_OS::ungetc (ch, this->infile_); + return ch; +#endif /* ACE_USES_WCHAR */ +} + +#if defined (ACE_USES_WCHAR) +int +ACEXML_FileCharStream::get_i (ACEXML_Char& ch) +{ + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_); + return (feof(this->infile_) ? -1 : 0); + } + // If we have a value in peek_, return it. + if (this->peek_ != 0) + { + ch = this->peek_; + this->peek_ = 0; + return 0; + } + + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + ACEXML_Char input[2]; + int i = 0; + for (; i < 2 && !feof (this->infile_); ++i) + { + input[i] = ACE_OS::fgetwc (this->infile_); + } + if (i < 2) + { + ch = 0; + return -1; + } + ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]; + return 0; +} + +int +ACEXML_FileCharStream::peek_i (void) +{ + // If we are reading a UTF-8 encoded file, just use the plain unget. + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_); + ACE_OS::ungetc (ch, this->infile_); + return ch; + } + + // If somebody had already called peek() and not consumed it, return the + // value held in this->peek_. + if (this->peek_ != 0) + return this->peek_; + + // Peek into the stream. This reads two characters off the stream, keeps + // it in peek_. + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + ACEXML_Char input[2]; + int i = 0; + for (; i < 2 && !feof (this->infile_); ++i) + { + input[i] = ACE_OS::fgetwc (this->infile_); + } + if (i < 2) + { + this->peek_ = 0; + return -1; + } + this->peek_ = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]; + return this->peek_; +} +#endif /* ACE_USES_WCHAR */ + +const ACEXML_Char* +ACEXML_FileCharStream::getEncoding (void) +{ + return this->encoding_; +} + +const ACEXML_Char* +ACEXML_FileCharStream::getSystemId (void) +{ + return this->filename_; +} |