diff options
author | Adrian Thurston <thurston@colm.net> | 2019-12-11 18:33:35 +0200 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-12-11 18:37:48 +0200 |
commit | 900189a4d23f0180970f22479f44377973a2d330 (patch) | |
tree | a0e5e4158774e49f943000e452898fde9d55c9a4 | |
parent | d71253d7fdb59464e5c094b5b091c87aa2a189d5 (diff) | |
download | colm-900189a4d23f0180970f22479f44377973a2d330.tar.gz |
colm: add -R option, which allows hex ranges to cross 0
This is a quick hack to address issue #81. Since colm uses signed alphabets it
is not possible to specify ranges that cross zero in hex, which is desirable
for utf8. For example 0x20 .. 0xa0 will throw an error in a signed char mode.
-rw-r--r-- | colm/global.h | 1 | ||||
-rw-r--r-- | colm/keyops.h | 5 | ||||
-rw-r--r-- | colm/main.cc | 6 | ||||
-rw-r--r-- | colm/parsetree.cc | 27 |
4 files changed, 31 insertions, 8 deletions
diff --git a/colm/global.h b/colm/global.h index 5801436d..58b98077 100644 --- a/colm/global.h +++ b/colm/global.h @@ -67,6 +67,7 @@ extern bool gblLibrary; extern long gblActiveRealm; extern char machineMain[]; extern const char *exportHeaderFn; +extern bool rangeCrossesZero; struct colm_location; diff --git a/colm/keyops.h b/colm/keyops.h index 924fa7ab..094b09e2 100644 --- a/colm/keyops.h +++ b/colm/keyops.h @@ -35,6 +35,7 @@ enum MarkType typedef unsigned long long Size; +/* This key struct does not implement unsigned. */ struct Key { private: @@ -57,12 +58,8 @@ public: Key( const Key &key ) : key(key.key) {} Key( long key ) : key(key) {} - /* Returns the value used to represent the key. This value must be - * interpreted based on signedness. */ long getVal() const { return key; }; - /* Returns the key casted to a long long. This form of the key does not - * require and signedness interpretation. */ long long getLongLong() const; bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } diff --git a/colm/main.cc b/colm/main.cc index def8b150..519a42aa 100644 --- a/colm/main.cc +++ b/colm/main.cc @@ -90,6 +90,7 @@ bool run = false; bool addUniqueEmptyProductions = false; bool gblLibrary = false; long gblActiveRealm = 0; +bool rangeCrossesZero = false; ArgsVector includePaths; ArgsVector libraryPaths; @@ -545,12 +546,15 @@ bool inSourceTree( const char *argv0, char *&location ) void processArgs( int argc, const char **argv ) { - ParamCheck pc( "cD:e:x:I:L:vdliro:S:M:vHh?-:sVa:m:b:E:", argc, argv ); + ParamCheck pc( "RcD:e:x:I:L:vdliro:S:M:vHh?-:sVa:m:b:E:", argc, argv ); while ( pc.check() ) { switch ( pc.state ) { case ParamCheck::match: switch ( pc.parameter ) { + case 'R': + rangeCrossesZero = true; + break; case 'I': includePaths.append( pc.parameterArg ); break; diff --git a/colm/parsetree.cc b/colm/parsetree.cc index 572f0610..cf97fa23 100644 --- a/colm/parsetree.cc +++ b/colm/parsetree.cc @@ -1228,16 +1228,37 @@ FsmGraph *Range::walk( Compiler *pd ) delete lowerFsm; delete upperFsm; + bool span0 = false; + if ( rangeCrossesZero && lowKey >= 0 && highKey < 0 ) + span0 = true; + /* Validate the range. */ - if ( lowKey > highKey ) { + if ( !span0 && lowKey > highKey ) { /* Recover by setting upper to lower; */ error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; highKey = lowKey; } /* Return the range now that it is validated. */ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeFsm( lowKey, highKey ); + FsmGraph *retFsm = 0; + if ( span0 ) { + FsmGraph *first = new FsmGraph(); + FsmGraph *second = new FsmGraph(); + Key k128 = 127; + Key kn127 = -128; + first->rangeFsm( lowKey, k128 ); + second->rangeFsm( kn127, highKey ); + + first->unionOp( second ); + first->minimizePartition2(); + retFsm = first; + } + else { + /* Usual case. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( lowKey, highKey ); + } + return retFsm; } |