diff options
author | kitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2002-11-15 01:07:33 +0000 |
---|---|---|
committer | kitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2002-11-15 01:07:33 +0000 |
commit | aa7790e112469bd6560c249454c58b283cc05fdc (patch) | |
tree | 909714668679bad7b34f8870593f838f76ae0024 /ACEXML/parser | |
parent | c1ae0478065bfb4136ce1c4ea861428a6b0d1138 (diff) | |
download | ATCD-aa7790e112469bd6560c249454c58b283cc05fdc.tar.gz |
ChangeLogTag: Tue Nov 12 19:48:34 2002 Krishnakumar B <kitty@cs.wustl.edu>
Diffstat (limited to 'ACEXML/parser')
-rw-r--r-- | ACEXML/parser/parser/ParserInternals.cpp | 394 | ||||
-rw-r--r-- | ACEXML/parser/parser/ParserInternals.h | 104 |
2 files changed, 498 insertions, 0 deletions
diff --git a/ACEXML/parser/parser/ParserInternals.cpp b/ACEXML/parser/parser/ParserInternals.cpp new file mode 100644 index 00000000000..e6a75bd6d3f --- /dev/null +++ b/ACEXML/parser/parser/ParserInternals.cpp @@ -0,0 +1,394 @@ +// $Id$ + +#include "ACEXML/parser/parser/ParserInternals.h" + + +const ACEXML_Char* ACEXML_ParserInt::predef_ent_[] = { + ACE_TEXT ("amp"), + ACE_TEXT ("lt"), + ACE_TEXT ("gt"), + ACE_TEXT ("apos"), + ACE_TEXT ("quot") +}; + +const ACEXML_Char* ACEXML_ParserInt::predef_val_[] = { + ACE_TEXT ("&"), + ACE_TEXT ("<"), + ACE_TEXT (">"), + ACE_TEXT ("'"), + ACE_TEXT ("\"") +}; + +// Optimize away the most common cases. Any compiler worth it's salt should +// give generate a single memory access. + +const ACEXML_Char ACEXML_ParserInt::base_char_table_[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */ + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */ + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */ +}; + +#if defined (ACE_USES_WCHAR) + +int +ACEXML_ParserInt::isBasechar_i (const ACEXML_Char c) const +{ + if (c < 0x0100) + return ACEXML_ParserInt::base_char_table_[c]; + else if (c < 0x0905) + return ((c >= 0x0100 && c <= 0x0131) || + (c >= 0x0134 && c <= 0x013E) || + (c >= 0x0141 && c <= 0x0148) || + (c >= 0x014A && c <= 0x017E) || + (c >= 0x0180 && c <= 0x01C3) || + (c >= 0x01CD && c <= 0x01F0) || + (c >= 0x01F4 && c <= 0x01F5) || + (c >= 0x01FA && c <= 0x0217) || + (c >= 0x0250 && c <= 0x02A8) || + (c >= 0x02BB && c <= 0x02C1) || + (c == 0x0386) || + (c >= 0x0388 && c <= 0x038A) || + (c == 0x038C) || + (c >= 0x038E && c <= 0x03A1) || + (c >= 0x03A3 && c <= 0x03CE) || + (c >= 0x03D0 && c <= 0x03D6) || + (c == 0x03DA) || + (c == 0x03DC) || + (c == 0x03DE) || + (c == 0x03E0) || + (c >= 0x03E2 && c <= 0x03F3) || + (c >= 0x0401 && c <= 0x040C) || + (c >= 0x040E && c <= 0x044F) || + (c >= 0x0451 && c <= 0x045C) || + (c >= 0x045E && c <= 0x0481) || + (c >= 0x0490 && c <= 0x04C4) || + (c >= 0x04C7 && c <= 0x04C8) || + (c >= 0x04CB && c <= 0x04CC) || + (c >= 0x04D0 && c <= 0x04EB) || + (c >= 0x04EE && c <= 0x04F5) || + (c >= 0x04F8 && c <= 0x04F9) || + (c >= 0x0531 && c <= 0x0556) || + (c == 0x0559) || + (c >= 0x0561 && c <= 0x0586) || + (c >= 0x05D0 && c <= 0x05EA) || + (c >= 0x05F0 && c <= 0x05F2) || + (c >= 0x0621 && c <= 0x063A) || + (c >= 0x0641 && c <= 0x064A) || + (c >= 0x0671 && c <= 0x06B7) || + (c >= 0x06BA && c <= 0x06BE) || + (c >= 0x06C0 && c <= 0x06CE) || + (c >= 0x06D0 && c <= 0x06D3) || + (c == 0x06D5) || + (c >= 0x06E5 && c <= 0x06E6)); + else if (c < 0x10A0) + return ((c >= 0x0905 && c <= 0x0939) || + (c == 0x093D) || + (c >= 0x0958 && c <= 0x0961) || + (c >= 0x0985 && c <= 0x098C) || + (c >= 0x098F && c <= 0x0990) || + (c >= 0x0993 && c <= 0x09A8) || + (c >= 0x09AA && c <= 0x09B0) || + (c == 0x09B2) || + (c >= 0x09B6 && c <= 0x09B9) || + (c >= 0x09DC && c <= 0x09DD) || + (c >= 0x09DF && c <= 0x09E1) || + (c >= 0x09F0 && c <= 0x09F1) || + (c >= 0x0A05 && c <= 0x0A0A) || + (c >= 0x0A0F && c <= 0x0A10) || + (c >= 0x0A13 && c <= 0x0A28) || + (c >= 0x0A2A && c <= 0x0A30) || + (c >= 0x0A32 && c <= 0x0A33) || + (c >= 0x0A35 && c <= 0x0A36) || + (c >= 0x0A38 && c <= 0x0A39) || + (c >= 0x0A59 && c <= 0x0A5C) || + (c == 0x0A5E) || + (c >= 0x0A72 && c <= 0x0A74) || + (c >= 0x0A85 && c <= 0x0A8B) || + (c == 0x0A8D) || + (c >= 0x0A8F && c <= 0x0A91) || + (c >= 0x0A93 && c <= 0x0AA8) || + (c >= 0x0AAA && c <= 0x0AB0) || + (c >= 0x0AB2 && c <= 0x0AB3) || + (c >= 0x0AB5 && c <= 0x0AB9) || + (c == 0x0ABD || c == 0x0AE0) || + (c >= 0x0B05 && c <= 0x0B0C) || + (c >= 0x0B0F && c <= 0x0B10) || + (c >= 0x0B13 && c <= 0x0B28) || + (c >= 0x0B2A && c <= 0x0B30) || + (c >= 0x0B32 && c <= 0x0B33) || + (c >= 0x0B36 && c <= 0x0B39) || + (c == 0x0B3D) || + (c >= 0x0B5C && c <= 0x0B5D) || + (c >= 0x0B5F && c <= 0x0B61) || + (c >= 0x0B85 && c <= 0x0B8A) || + (c >= 0x0B8E && c <= 0x0B90) || + (c >= 0x0B92 && c <= 0x0B95) || + (c >= 0x0B99 && c <= 0x0B9A) || + (c == 0x0B9C) || + (c >= 0x0B9E && c <= 0x0B9F) || + (c >= 0x0BA3 && c <= 0x0BA4) || + (c >= 0x0BA8 && c <= 0x0BAA) || + (c >= 0x0BAE && c <= 0x0BB5) || + (c >= 0x0BB7 && c <= 0x0BB9) || + (c >= 0x0C05 && c <= 0x0C0C) || + (c >= 0x0C0E && c <= 0x0C10) || + (c >= 0x0C12 && c <= 0x0C28) || + (c >= 0x0C2A && c <= 0x0C33) || + (c >= 0x0C35 && c <= 0x0C39) || + (c >= 0x0C60 && c <= 0x0C61) || + (c >= 0x0C85 && c <= 0x0C8C) || + (c >= 0x0C8E && c <= 0x0C90) || + (c >= 0x0C92 && c <= 0x0CA8) || + (c >= 0x0CAA && c <= 0x0CB3) || + (c >= 0x0CB5 && c <= 0x0CB9) || + (c == 0x0CDE) || + (c >= 0x0CE0 && c <= 0x0CE1) || + (c >= 0x0D05 && c <= 0x0D0C) || + (c >= 0x0D0E && c <= 0x0D10) || + (c >= 0x0D12 && c <= 0x0D28) || + (c >= 0x0D2A && c <= 0x0D39) || + (c >= 0x0D60 && c <= 0x0D61) || + (c >= 0x0E01 && c <= 0x0E2E) || + (c == 0x0E30) || + (c >= 0x0E32 && c <= 0x0E33) || + (c >= 0x0E40 && c <= 0x0E45) || + (c >= 0x0E81 && c <= 0x0E82) || + (c == 0x0E84) || + (c >= 0x0E87 && c <= 0x0E88) || + (c == 0x0E8A || c == 0x0E8D) || + (c >= 0x0E94 && c <= 0x0E97) || + (c >= 0x0E99 && c <= 0x0E9F) || + (c >= 0x0EA1 && c <= 0x0EA3) || + (c == 0x0EA5 || c == 0x0EA7) || + (c >= 0x0EAA && c <= 0x0EAB) || + (c >= 0x0EAD && c <= 0x0EAE) || + (c == 0x0EB0) || + (c >= 0x0EB2 && c <= 0x0EB3) || + (c == 0x0EBD) || + (c >= 0x0EC0 && c <= 0x0EC4) || + (c >= 0x0F40 && c <= 0x0F47) || + (c >= 0x0F49 && c <= 0x0F69)); + else + return ((c >= 0x10A0 && c <= 0x10C5) || + (c >= 0x10D0 && c <= 0x10F6) || + (c == 0x1100) || + (c >= 0x1102 && c <= 0x1103) || + (c >= 0x1105 && c <= 0x1107) || + (c == 0x1109) || + (c >= 0x110B && c <= 0x110C) || + (c >= 0x110E && c <= 0x1112) || + (c == 0x113C || c == 0x113E || c == 0x1140) || + (c == 0x114C || c == 0x114E | c == 0x1150) || + (c >= 0x1154 && c <= 0x1155) || + (c == 0x1159) || + (c >= 0x115F && c <= 0x1161) || + (c == 0x1163) || + (c == 0x1165) || + (c == 0x1167) || + (c == 0x1169) || + (c >= 0x116D && c <= 0x116E) || + (c >= 0x1172 && c <= 0x1173) || + (c == 0x1175) || + (c == 0x119E) || + (c == 0x11A8) || + (c == 0x11AB) || + (c >= 0x11AE && c <= 0x11AF) || + (c >= 0x11B7 && c <= 0x11B8) || + (c == 0x11BA) || + (c >= 0x11BC && c <= 0x11C2) || + (c == 0x11EB) || + (c == 0x11F0) || + (c == 0x11F9) || + (c >= 0x1E00 && c <= 0x1E9B) || + (c >= 0x1EA0 && c <= 0x1EF9) || + (c >= 0x1F00 && c <= 0x1F15) || + (c >= 0x1F18 && c <= 0x1F1D) || + (c >= 0x1F20 && c <= 0x1F45) || + (c >= 0x1F48 && c <= 0x1F4D) || + (c >= 0x1F50 && c <= 0x1F57) || + (c == 0x1F59) || + (c == 0x1F5B) || + (c == 0x1F5D) || + (c >= 0x1F5F && c <= 0x1F7D) || + (c >= 0x1F80 && c <= 0x1FB4) || + (c >= 0x1FB6 && c <= 0x1FBC) || + (c == 0x1FBE) || + (c >= 0x1FC2 && c <= 0x1FC4) || + (c >= 0x1FC6 && c <= 0x1FCC) || + (c >= 0x1FD0 && c <= 0x1FD3) || + (c >= 0x1FD6 && c <= 0x1FDB) || + (c >= 0x1FE0 && c <= 0x1FEC) || + (c >= 0x1FF2 && c <= 0x1FF4) || + (c >= 0x1FF6 && c <= 0x1FFC) || + (c == 0x2126) || + (c >= 0x212A && c <= 0x212B) || + (c == 0x212E) || + (c >= 0x2180 && c <= 0x2182) || + (c >= 0x3041 && c <= 0x3094) || + (c >= 0x30A1 && c <= 0x30FA) || + (c >= 0x3105 && c <= 0x312C) || + (c >= 0xAC00 && c <= 0xD7A3)); +}; + +int +ACEXML_ParserInt::isIdeographic_i (const ACEXML_Char c) const +{ + return ((c >= 0x4E00 && c <= 0x9FA5) || + (c == 3007) || + (c >= 0x3021 && c <= 0x3029)); +} + +int +ACEXML_ParserInt::isCombiningchar_i (const ACEXML_Char c) const +{ + if (c < 0x0901) + return ((c >= 0x0300 && c <= 0x0345) || + (c >= 0x0360 && c <= 0x0361) || + (c >= 0x0483 && c <= 0x0486) || + (c >= 0x0591 && c <= 0x05A1) || + (c >= 0x05A3 && c <= 0x05B9) || + (c >= 0x05BB && c <= 0x05BD) || + (c == 0x05BF) || + (c >= 0x05C1 && c <= 0x05C2) || + (c == 0x05C4) || + (c >= 0x064B && c <= 0x0652) || + (c == 0x0670) || + (c >= 0x06D6 && c <= 0x06DC) || + (c >= 0x06DD && c <= 0x06DF) || + (c >= 0x06E0 && c <= 0x06E4) || + (c >= 0x06E7 && c <= 0x06E8) || + (c >= 0x06EA && c <= 0x06ED)); + else + return ((c >= 0x0901 && c <= 0x0903) || + (c == 0x093C) || + (c >= 0x093E && c <= 0x094C) || + (c == 0x094D) || + (c >= 0x0951 && c <= 0x0954) || + (c >= 0x0962 && c <= 0x0963) || + (c >= 0x0981 && c <= 0x0983) || + (c == 0x09BC) || + (c == 0x09BE) || + (c == 0x09BF) || + (c >= 0x09C0 && c <= 0x09C4) || + (c >= 0x09C7 && c <= 0x09C8) || + (c >= 0x09CB && c <= 0x09CD) || + (c == 0x09D7) || + (c >= 0x09E2 && c <= 0x09E3) || + (c == 0x0A02) || + (c == 0x0A3C) || + (c == 0x0A3E) || + (c == 0x0A3F) || + (c >= 0x0A40 && c <= 0x0A42) || + (c >= 0x0A47 && c <= 0x0A48) || + (c >= 0x0A4B && c <= 0x0A4D) || + (c >= 0x0A70 && c <= 0x0A71) || + (c >= 0x0A81 && c <= 0x0A83) || + (c == 0x0ABC) || + (c >= 0x0ABE && c <= 0x0AC5) || + (c >= 0x0AC7 && c <= 0x0AC9) || + (c >= 0x0ACB && c <= 0x0ACD) || + (c >= 0x0B01 && c <= 0x0B03) || + (c == 0x0B3C) || + (c >= 0x0B3E && c <= 0x0B43) || + (c >= 0x0B47 && c <= 0x0B48) || + (c >= 0x0B4B && c <= 0x0B4D) || + (c >= 0x0B56 && c <= 0x0B57) || + (c >= 0x0B82 && c <= 0x0B83) || + (c >= 0x0BBE && c <= 0x0BC2) || + (c >= 0x0BC6 && c <= 0x0BC8) || + (c >= 0x0BCA && c <= 0x0BCD) || + (c == 0x0BD7) || + (c >= 0x0C01 && c <= 0x0C03) || + (c >= 0x0C3E && c <= 0x0C44) || + (c >= 0x0C46 && c <= 0x0C48) || + (c >= 0x0C4A && c <= 0x0C4D) || + (c >= 0x0C55 && c <= 0x0C56) || + (c >= 0x0C82 && c <= 0x0C83) || + (c >= 0x0CBE && c <= 0x0CC4) || + (c >= 0x0CC6 && c <= 0x0CC8) || + (c >= 0x0CCA && c <= 0x0CCD) || + (c >= 0x0CD5 && c <= 0x0CD6) || + (c >= 0x0D02 && c <= 0x0D03) || + (c >= 0x0D3E && c <= 0x0D43) || + (c >= 0x0D46 && c <= 0x0D48) || + (c >= 0x0D4A && c <= 0x0D4D) || + (c == 0x0D57) || + (c == 0x0E31) || + (c >= 0x0E34 && c <= 0x0E3A) || + (c >= 0x0E47 && c <= 0x0E4E) || + (c == 0x0EB1) || + (c >= 0x0EB4 && c <= 0x0EB9) || + (c >= 0x0EBB && c <= 0x0EBC) || + (c >= 0x0EC8 && c <= 0x0ECD) || + (c >= 0x0F18 && c <= 0x0F19) || + (c == 0x0F35) || + (c == 0x0F37) || + (c == 0x0F39) || + (c == 0x0F3E) || + (c == 0x0F3F) || + (c >= 0x0F71 && c <= 0x0F84) || + (c >= 0x0F86 && c <= 0x0F8B) || + (c >= 0x0F90 && c <= 0x0F95) || + (c == 0x0F97) || + (c >= 0x0F99 && c <= 0x0FAD) || + (c >= 0x0FB1 && c <= 0x0FB7) || + (c == 0x0FB9) || + (c >= 0x20D0 && c <= 0x20DC) || + (c == 0x20E1) || + (c >= 0x302A && c <= 0x302F) || + (c == 0x3099) || + (c == 0x309A)); +} + +int +ACEXML_ParserInt::isDigit_i (const ACEXML_Char c) const +{ + if (c < 0x0040) + return (c >= 0x0030 && c <= 0x0039); + else + return ((c >= 0x0660 && c <= 0x0669) || + (c >= 0x06F0 && c <= 0x06F9) || + (c >= 0x0966 && c <= 0x096F) || + (c >= 0x09E6 && c <= 0x09EF) || + (c >= 0x0A66 && c <= 0x0A6F) || + (c >= 0x0AE6 && c <= 0x0AEF) || + (c >= 0x0B66 && c <= 0x0B6F) || + (c >= 0x0BE7 && c <= 0x0BEF) || + (c >= 0x0C66 && c <= 0x0C6F) || + (c >= 0x0CE6 && c <= 0x0CEF) || + (c >= 0x0D66 && c <= 0x0D6F) || + (c >= 0x0E50 && c <= 0x0E59) || + (c >= 0x0ED0 && c <= 0x0ED9) || + (c >= 0x0F20 && c <= 0x0F29)); +} + +int +ACEXML_ParserInt::isExtender_i (const ACEXML_Char c) const +{ + // The compiler should generate a jump table and index into it directly. + switch (c) + { + case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: case 0x0640: + case 0x0E46: case 0x0EC6: case 0x3005: case 0x3031: case 0x3032: + case 0x3033: case 0x3034: case 0x3035: case 0x309D: case 0x309E: + case 0x30FC: case 0x30FD: case 0x30FE: + return 1; + default: + return 0; + } +} + +#endif /* ACE_USES_WCHAR */ diff --git a/ACEXML/parser/parser/ParserInternals.h b/ACEXML/parser/parser/ParserInternals.h new file mode 100644 index 00000000000..9afecf93daa --- /dev/null +++ b/ACEXML/parser/parser/ParserInternals.h @@ -0,0 +1,104 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file ParserInternals.h + * + * $Id$ + * + * @author Krishnakumar B <kitty@cs.wustl.edu> + */ +//============================================================================= + +#ifndef _ACEXML_PARSER_INTERNALS_H_ +#define _ACEXML_PARSER_INTERNALS_H_ + +#include "ace/pre.h" +#include "ACEXML/parser/parser/Parser_export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XML_Types.h" + +/** + * @class ACEXML_ParserInt ParserInternals.h "ACEXML/parser/parser/ParserInternals.h" + * + * @brief A class to hide some of the internal implementation details of + * the parser. + * + */ +class ACEXML_PARSER_Export ACEXML_ParserInt +{ + +public: +#if defined (ACE_USES_WCHAR) + + /** + * Check if a character @a c is a BaseChar. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isBasechar_i (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Ideographic. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isIdeographic_i (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a CombiningChar. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isCombiningchar_i (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Digit. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isDigit_i (const ACEXML_Char c) const; + + /** + * Check if a character @a c is an Extender. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isExtender_i (const ACEXML_Char c) const; + +#endif /* ACE_USES_WCHAR */ + + static const ACEXML_Char base_char_table_[256]; + + static const ACEXML_Char* msg_[]; + + static const ACEXML_Char* predef_ent_[]; + + static const ACEXML_Char* predef_val_[]; + + // Enum describing the position in a document when a reference occurs. + enum ReferenceState { + IN_CONTENT, + IN_ATT_VALUE, + AS_ATT_VALUE, + IN_ENTITY_VALUE, + IN_INT_DTD, + IN_EXT_DTD, + IN_NOTATION, + INVALID = -1 + }; + +}; + +#include "ace/post.h" + +#endif /* _ACEXML_PARSER_INTERNALS_H_ */ |