diff options
author | Adrian Thurston <thurston@colm.net> | 2019-11-14 18:47:17 -0300 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-11-14 18:47:17 -0300 |
commit | c780ba637eda693064e382a888f9ba72afe9360f (patch) | |
tree | 978a0fae2956cb27f5cfb5a7b981090bc2497597 | |
parent | 18219624ffe5f39551d3813c2b8d58c38b77f0b4 (diff) | |
download | colm-c780ba637eda693064e382a888f9ba72afe9360f.tar.gz |
pass the UTF8 BOM through to the output
refs #23
-rw-r--r-- | ragel/host-c/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-crack/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-csharp/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-d/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-go/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-java/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-js/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-julia/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-ocaml/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-ruby/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/host-rust/rlhc.lm | 3 | ||||
-rw-r--r-- | ragel/inputdata.cc | 10 | ||||
-rw-r--r-- | ragel/inputdata.h | 6 | ||||
-rw-r--r-- | ragel/ragel.lm | 8 | ||||
-rw-r--r-- | ragel/ril.lm | 8 | ||||
-rw-r--r-- | ragel/rlreduce.lm | 5 |
16 files changed, 61 insertions, 9 deletions
diff --git a/ragel/host-c/rlhc.lm b/ragel/host-c/rlhc.lm index 4e44a5f5..0a2b6d97 100644 --- a/ragel/host-c/rlhc.lm +++ b/ragel/host-c/rlhc.lm @@ -436,6 +436,9 @@ namespace c_gen { _ = new parser<c_out::c_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) CO: c_out::c_out = _->finish() diff --git a/ragel/host-crack/rlhc.lm b/ragel/host-crack/rlhc.lm index e5d2d576..bebe7cd5 100644 --- a/ragel/host-crack/rlhc.lm +++ b/ragel/host-crack/rlhc.lm @@ -510,6 +510,9 @@ namespace crack_gen { Parser = new parser<crack_out::crack_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) CO: crack_out::crack_out = Parser->finish() diff --git a/ragel/host-csharp/rlhc.lm b/ragel/host-csharp/rlhc.lm index a108a4a8..078157c5 100644 --- a/ragel/host-csharp/rlhc.lm +++ b/ragel/host-csharp/rlhc.lm @@ -454,6 +454,9 @@ namespace csharp_gen { Parser = new parser<csharp_out::csharp_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) CSO: csharp_out::csharp_out = Parser->finish() diff --git a/ragel/host-d/rlhc.lm b/ragel/host-d/rlhc.lm index 14f2f192..2a047e68 100644 --- a/ragel/host-d/rlhc.lm +++ b/ragel/host-d/rlhc.lm @@ -485,6 +485,9 @@ namespace d_gen { Parser = new parser<d_out::d_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) DO: d_out::d_out = Parser->finish() diff --git a/ragel/host-go/rlhc.lm b/ragel/host-go/rlhc.lm index 28eb0e90..ab75477d 100644 --- a/ragel/host-go/rlhc.lm +++ b/ragel/host-go/rlhc.lm @@ -380,6 +380,9 @@ namespace go_gen Input: _input = _->gets() Input->auto_trim(true) + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) GO: out_go::out_go = _->finish() diff --git a/ragel/host-java/rlhc.lm b/ragel/host-java/rlhc.lm index 3704d7d5..a458369f 100644 --- a/ragel/host-java/rlhc.lm +++ b/ragel/host-java/rlhc.lm @@ -478,6 +478,9 @@ namespace java_gen { Parser = new parser<java_out::java_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) JO: java_out::java_out = Parser->finish() diff --git a/ragel/host-js/rlhc.lm b/ragel/host-js/rlhc.lm index 61e0fbd6..29a61346 100644 --- a/ragel/host-js/rlhc.lm +++ b/ragel/host-js/rlhc.lm @@ -478,6 +478,9 @@ namespace js_gen send Parser "'use strict'; + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) CO: js_out::js_out = Parser->finish() diff --git a/ragel/host-julia/rlhc.lm b/ragel/host-julia/rlhc.lm index 085eb793..72108994 100644 --- a/ragel/host-julia/rlhc.lm +++ b/ragel/host-julia/rlhc.lm @@ -535,6 +535,9 @@ namespace julia_gen { Parser = new parser<julia_out::julia_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) CO: julia_out::julia_out = Parser->finish() diff --git a/ragel/host-ocaml/rlhc.lm b/ragel/host-ocaml/rlhc.lm index 362116b3..f68b61be 100644 --- a/ragel/host-ocaml/rlhc.lm +++ b/ragel/host-ocaml/rlhc.lm @@ -582,6 +582,9 @@ namespace ml_gen { Parser = new parser<ocaml_out::ocaml_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) MO: ocaml_out::ocaml_out = Parser->finish() diff --git a/ragel/host-ruby/rlhc.lm b/ragel/host-ruby/rlhc.lm index f2a4800f..87119465 100644 --- a/ragel/host-ruby/rlhc.lm +++ b/ragel/host-ruby/rlhc.lm @@ -502,6 +502,9 @@ void ruby_trans( Output: stream, Start: start ) { Parser = new parser<ruby_out::ruby_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) RO: ruby_out::ruby_out = Parser->finish() diff --git a/ragel/host-rust/rlhc.lm b/ragel/host-rust/rlhc.lm index a9c42c2d..57cd66e1 100644 --- a/ragel/host-rust/rlhc.lm +++ b/ragel/host-rust/rlhc.lm @@ -496,6 +496,9 @@ namespace rust_gen { Parser = new parser<rust_out::rust_out>() + if ( Start.opt_bom.bom ) + send Output [Start.opt_bom.bom] + stmt_list( Start._repeat_stmt ) CO: rust_out::rust_out = Parser->finish() diff --git a/ragel/inputdata.cc b/ragel/inputdata.cc index 6b689ae4..66ec4afb 100644 --- a/ragel/inputdata.cc +++ b/ragel/inputdata.cc @@ -219,6 +219,10 @@ void InputData::verifyWritesHaveData() void InputData::writeOutput( InputItem *ii ) { + /* If it is the first input item then check if we need to write the BOM. */ + if ( ii->prev == 0 && utf8BomPresent ) + *outStream << (uchar)0xEF << (uchar)0xBB << (uchar) 0xBF; + switch ( ii->type ) { case InputItem::Write: { CodeGenData *cgd = ii->pd->cgd; @@ -251,12 +255,6 @@ void InputData::writeOutput( InputItem *ii ) } } -void InputData::writeOutput() -{ - for ( InputItemList::Iter ii = inputItems; ii.lte(); ii++ ) - writeOutput( ii ); -} - void InputData::closeOutput() { /* If writing to a file, delete the ostream, causing it to flush. diff --git a/ragel/inputdata.h b/ragel/inputdata.h index 36028778..689f9078 100644 --- a/ragel/inputdata.h +++ b/ragel/inputdata.h @@ -208,7 +208,8 @@ struct InputData histogram(0), input(0), forceVar(false), - noFork(false) + noFork(false), + utf8BomPresent(false) {} ~InputData(); @@ -300,6 +301,9 @@ struct InputData bool forceVar; bool noFork; + /* Did the input file have a byte order mark? */ + bool utf8BomPresent; + void verifyWriteHasData( InputItem *ii ); void verifyWritesHaveData(); diff --git a/ragel/ragel.lm b/ragel/ragel.lm index a628dcea..a98012ce 100644 --- a/ragel/ragel.lm +++ b/ragel/ragel.lm @@ -893,6 +893,12 @@ namespace path end namespace host + token bom / 0xEF 0xBB 0xBF / + + def opt_bom + [bom] :Bom + | [] + def section [`%%{ ragel::opt_machine_name ragel::ign_want ragel::statement* ragel::`}%%] :MultiLine | [`%%{ ragel::opt_machine_name ragel::ign_ignore consume::tok* consume::`}%%] :Consume @@ -900,7 +906,7 @@ namespace host end def start - [SectionList: host::section*] + [host::opt_bom SectionList: host::section*] list<str> makeIncludePathChecks( CurFileName: str, IncFileName: str ) { diff --git a/ragel/ril.lm b/ragel/ril.lm index 5b764b81..cde6ce93 100644 --- a/ragel/ril.lm +++ b/ragel/ril.lm @@ -274,5 +274,11 @@ def stmt | [continue_stmt] | [block] +token bom / 0xEF 0xBB 0xBF / + +def opt_bom + [bom] :Bom +| [] + def start - [stmt*] + [opt_bom stmt*] diff --git a/ragel/rlreduce.lm b/ragel/rlreduce.lm index 88b807f7..fe25cd38 100644 --- a/ragel/rlreduce.lm +++ b/ragel/rlreduce.lm @@ -1,4 +1,9 @@ reduction TopLevel + host::opt_bom :Bom + { + id->utf8BomPresent = true; + } + # def machine_name # [`machine word `;] :MachineName ragel::machine_name :MachineName |