summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2019-12-11 18:33:35 +0200
committerAdrian Thurston <thurston@colm.net>2019-12-11 18:37:48 +0200
commit900189a4d23f0180970f22479f44377973a2d330 (patch)
treea0e5e4158774e49f943000e452898fde9d55c9a4
parentd71253d7fdb59464e5c094b5b091c87aa2a189d5 (diff)
downloadcolm-900189a4d23f0180970f22479f44377973a2d330.tar.gz
colm: add -R option, which allows hex ranges to cross 0
This is a quick hack to address issue #81. Since colm uses signed alphabets it is not possible to specify ranges that cross zero in hex, which is desirable for utf8. For example 0x20 .. 0xa0 will throw an error in a signed char mode.
-rw-r--r--colm/global.h1
-rw-r--r--colm/keyops.h5
-rw-r--r--colm/main.cc6
-rw-r--r--colm/parsetree.cc27
4 files changed, 31 insertions, 8 deletions
diff --git a/colm/global.h b/colm/global.h
index 5801436d..58b98077 100644
--- a/colm/global.h
+++ b/colm/global.h
@@ -67,6 +67,7 @@ extern bool gblLibrary;
extern long gblActiveRealm;
extern char machineMain[];
extern const char *exportHeaderFn;
+extern bool rangeCrossesZero;
struct colm_location;
diff --git a/colm/keyops.h b/colm/keyops.h
index 924fa7ab..094b09e2 100644
--- a/colm/keyops.h
+++ b/colm/keyops.h
@@ -35,6 +35,7 @@ enum MarkType
typedef unsigned long long Size;
+/* This key struct does not implement unsigned. */
struct Key
{
private:
@@ -57,12 +58,8 @@ public:
Key( const Key &key ) : key(key.key) {}
Key( long key ) : key(key) {}
- /* Returns the value used to represent the key. This value must be
- * interpreted based on signedness. */
long getVal() const { return key; };
- /* Returns the key casted to a long long. This form of the key does not
- * require and signedness interpretation. */
long long getLongLong() const;
bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
diff --git a/colm/main.cc b/colm/main.cc
index def8b150..519a42aa 100644
--- a/colm/main.cc
+++ b/colm/main.cc
@@ -90,6 +90,7 @@ bool run = false;
bool addUniqueEmptyProductions = false;
bool gblLibrary = false;
long gblActiveRealm = 0;
+bool rangeCrossesZero = false;
ArgsVector includePaths;
ArgsVector libraryPaths;
@@ -545,12 +546,15 @@ bool inSourceTree( const char *argv0, char *&location )
void processArgs( int argc, const char **argv )
{
- ParamCheck pc( "cD:e:x:I:L:vdliro:S:M:vHh?-:sVa:m:b:E:", argc, argv );
+ ParamCheck pc( "RcD:e:x:I:L:vdliro:S:M:vHh?-:sVa:m:b:E:", argc, argv );
while ( pc.check() ) {
switch ( pc.state ) {
case ParamCheck::match:
switch ( pc.parameter ) {
+ case 'R':
+ rangeCrossesZero = true;
+ break;
case 'I':
includePaths.append( pc.parameterArg );
break;
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
index 572f0610..cf97fa23 100644
--- a/colm/parsetree.cc
+++ b/colm/parsetree.cc
@@ -1228,16 +1228,37 @@ FsmGraph *Range::walk( Compiler *pd )
delete lowerFsm;
delete upperFsm;
+ bool span0 = false;
+ if ( rangeCrossesZero && lowKey >= 0 && highKey < 0 )
+ span0 = true;
+
/* Validate the range. */
- if ( lowKey > highKey ) {
+ if ( !span0 && lowKey > highKey ) {
/* Recover by setting upper to lower; */
error(lowerLit->loc) << "lower end of range is greater then upper end" << endl;
highKey = lowKey;
}
/* Return the range now that it is validated. */
- FsmGraph *retFsm = new FsmGraph();
- retFsm->rangeFsm( lowKey, highKey );
+ FsmGraph *retFsm = 0;
+ if ( span0 ) {
+ FsmGraph *first = new FsmGraph();
+ FsmGraph *second = new FsmGraph();
+ Key k128 = 127;
+ Key kn127 = -128;
+ first->rangeFsm( lowKey, k128 );
+ second->rangeFsm( kn127, highKey );
+
+ first->unionOp( second );
+ first->minimizePartition2();
+ retFsm = first;
+ }
+ else {
+ /* Usual case. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( lowKey, highKey );
+ }
+
return retFsm;
}