summaryrefslogtreecommitdiff
path: root/Expat/encoding.h
diff options
context:
space:
mode:
Diffstat (limited to 'Expat/encoding.h')
-rw-r--r--Expat/encoding.h91
1 files changed, 91 insertions, 0 deletions
diff --git a/Expat/encoding.h b/Expat/encoding.h
new file mode 100644
index 0000000..4e0374b
--- /dev/null
+++ b/Expat/encoding.h
@@ -0,0 +1,91 @@
+/*****************************************************************
+** encoding.h
+**
+** Copyright 1998 Clark Cooper
+** All rights reserved.
+**
+** This program is free software; you can redistribute it and/or
+** modify it under the same terms as Perl itself.
+*/
+
+#ifndef ENCODING_H
+#define ENCODING_H 1
+
+#define ENCMAP_MAGIC 0xfeebface
+
+typedef struct prefixmap {
+ unsigned char min;
+ unsigned char len; /* 0 => 256 */
+ unsigned short bmap_start;
+ unsigned char ispfx[32];
+ unsigned char ischar[32];
+} PrefixMap;
+
+typedef struct encinf
+{
+ unsigned short prefixes_size;
+ unsigned short bytemap_size;
+ int firstmap[256];
+ PrefixMap *prefixes;
+ unsigned short *bytemap;
+} Encinfo;
+
+typedef struct encmaphdr
+{
+ unsigned int magic;
+ char name[40];
+ unsigned short pfsize;
+ unsigned short bmsize;
+ int map[256];
+} Encmap_Header;
+
+/*================================================================
+** Structure of Encoding map binary encoding
+**
+** Note that all shorts and ints are in network order,
+** so when packing or unpacking with perl, use 'n' and 'N' respectively.
+** In C, use the htonl family of functions.
+**
+** The basic structure is:
+**
+** _______________________
+** |Header (including map expat needs for 1st byte)
+** |PrefixMap * pfsize
+** | This section isn't included for single-byte encodings.
+** | For multiple byte encodings, when a byte represents a prefix
+** | then it indexes into this vector instead of mapping to a
+** | Unicode character. The PrefixMap type is declared above. The
+** | ispfx and ischar fields are bitvectors indicating whether
+** | the byte being mapped is a prefix or character respectively.
+** | If neither is set, then the character is not mapped to Unicode.
+** |
+** | The min field is the 1st byte mapped for this prefix; the
+** | len field is the number of bytes mapped; and bmap_start is
+** | the starting index of the map for this prefix in the overall
+** | map (next section).
+** |unsigned short * bmsize
+** | This section also is omitted for single-byte encodings.
+** | Each short is either a Unicode scalar or an index into the
+** | PrefixMap vector.
+**
+** The header for these files is declared above as the Encmap_Header type.
+** The magic field is a magic number which should match the ENCMAP_MAGIC
+** macro above. The next 40 bytes stores IANA registered name for the
+** encoding. The pfsize field holds the number of PrefixMaps, which should
+** be zero for single byte encodings. The bmsize field holds the number of
+** shorts used for the overall map.
+**
+** The map field contains either the Unicode scalar encoded by the 1st byte
+** or -n where n is the number of bytes that such a 1st byte implies (Expat
+** requires that the number of bytes to encode a character is indicated by
+** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
+**
+** If the encoding is a multiple byte encoding, then there will be PrefixMap
+** and character map sections. The 1st PrefixMap (index 0), covers a range
+** of bytes that includes all 1st byte prefixes.
+**
+** Look at convert_to_unicode in Expat.xs to see how this data structure
+** is used.
+*/
+
+#endif /* ndef ENCODING_H */