summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMariusz PluciƄski <mplucinski@mplucinski.com>2014-06-24 19:21:43 +0200
committerWill Estes <westes575@gmail.com>2014-11-30 19:22:43 -0500
commit227e731b7686d79902c31756e11e7104070f4c2b (patch)
treec417e60a930cb21a3c7dbb2616b80e10ddc7fb89
parent4d6089fc838072d6a057257254107fdf84ff690f (diff)
downloadflex-git-unicode.tar.gz
Make charset support working with C++ scanners toounicode
-rw-r--r--src/FlexLexer.h17
-rw-r--r--src/flex.skl123
-rw-r--r--src/main.c5
3 files changed, 118 insertions, 27 deletions
diff --git a/src/FlexLexer.h b/src/FlexLexer.h
index ccda8ae..a9aa4f2 100644
--- a/src/FlexLexer.h
+++ b/src/FlexLexer.h
@@ -96,8 +96,16 @@ public:
int debug() const { return yy_flex_debug; }
void set_debug( int flag ) { yy_flex_debug = flag; }
+#ifdef YY_CHARSET
+ void set_charset(char *charset);
+ char* get_charset();
+#endif
+
protected:
YY_CHAR* yytext;
+#ifdef YY_CHARSET
+ char *yycharset;
+#endif
int yyleng;
int yylineno; // only maintained if you use %option yylineno
int yy_flex_debug; // only has effect with -d or "%option debug"
@@ -158,6 +166,15 @@ protected:
yy_state_type yy_try_NUL_trans( yy_state_type current_state );
int yy_get_next_buffer();
+#ifdef YY_CHARSET
+ size_t yycharset_convert(char* source, size_t source_bytes, YY_CHAR* target,
+ size_t target_length, size_t* converted_bytes);
+ virtual size_t yycharset_handler(char *charset,
+ char *source, size_t source_bytes,
+ YY_CHAR *target, size_t target_length,
+ size_t *converted_bytes);
+#endif
+
FLEX_STD istream* yyin; // input source for default LexerInput
FLEX_STD ostream* yyout; // output sink for default LexerOutput
diff --git a/src/flex.skl b/src/flex.skl
index 5738453..fae28c1 100644
--- a/src/flex.skl
+++ b/src/flex.skl
@@ -925,8 +925,13 @@ m4_ifdef( [[<M4_YY_BISON_LLOC>]],
m4_ifdef( [[M4_YY_CHARSET]], [[
char *yycharset_r; /** current charset name */
+]])
+
+m4_ifdef( [[M4_YY_CXX]],,[[
+m4_ifdef( [[M4_YY_CHARSET]],[[
yycharset_handler_t yycharset_handler_r; /** charset handle function */
]])
+]])
}; /* end struct yyguts_t */
]])
@@ -1052,19 +1057,20 @@ m4_ifdef( [[M4_YY_REENTRANT]],[[
m4_ifdef( [[M4_YY_CHARSET]],[[
m4_ifdef( [[M4_YY_NO_GET_CHARSET]],,[[
char *yyget_charset M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG );
+%if-c-only
yycharset_handler_t yyget_charset_handler M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG );
+%endif
]])
]])
]])
m4_ifdef( [[M4_YY_REENTRANT]],[[
-/* YY_REENTRANT */
m4_ifdef( [[M4_YY_CHARSET]], [[
-/* YY_CHARSET */
m4_ifdef( [[M4_YY_NO_SET_CHARSET]],,[[
-/* !YY_NO_SET_CHARSET */
void yyset_charset M4_YY_PARAMS( char *charset M4_YY_PROTO_LAST_ARG );
+%if-c-only
void yyset_charset_handler M4_YY_PARAMS( yycharset_handler_t charset_handler M4_YY_PROTO_LAST_ARG );
+%endif
]])
]])
]])
@@ -1694,10 +1700,18 @@ m4_ifdef( [[M4_YY_CHARSET]],[[
/* yycharset_convert - convert incoming data from arbitrary
* charset into internal representation
*/
+%if-c-only
static size_t yycharset_convert YYFARGS5(
char*, source, size_t, source_bytes,
YY_CHAR*, target, size_t, target_length,
size_t*, converted_bytes) {
+%endif
+%if-c++-only
+size_t yyFlexLexer::yycharset_convert(
+ char* source, size_t source_bytes,
+ YY_CHAR* target, size_t target_length,
+ size_t* converted_bytes) {
+%endif
M4_YY_DECL_GUTS_VAR();
if(strcmp(yycharset, "M4_YY_CHARSET_SOURCE")==0) {
if(target_length < source_bytes)
@@ -1705,15 +1719,28 @@ static size_t yycharset_convert YYFARGS5(
strncpy((char*)target, source, source_bytes);
*converted_bytes = source_bytes;
return source_bytes;
- } else if(yycharset_handler)
+ }
+%if-c-only
+ else if(yycharset_handler)
+%endif
return yycharset_handler(yycharset, source, source_bytes,
target, target_length, converted_bytes M4_YY_CALL_LAST_ARG);
- else {
- char msg[256];
- snprintf(msg, sizeof(msg),
- "Unsupported character encoding: %s", yycharset);
- YY_FATAL_ERROR(msg);
- }
+
+/* Code below just outputs an error message saying that selected encoding
+ * is not supported. In C scanner it is an end part of yycharset_convert,
+ * while in C++ scanner it is a default implementation of yycharset_handler */
+%if-c++-only
+}
+
+size_t yyFlexLexer::yycharset_handler(char *charset,
+ char *source, size_t source_bytes,
+ YY_CHAR *target, size_t target_length,
+ size_t *converted_bytes) {
+%endif
+ char msg[256];
+ snprintf(msg, sizeof(msg),
+ "Unsupported character encoding: %s", yycharset);
+ YY_FATAL_ERROR(msg);
return 0;
}
]])
@@ -2727,33 +2754,55 @@ int yyget_column YYFARGS0(void)
}
]])
]])
+%endif
-m4_ifdef( [[M4_YY_REENTRANT]],[[
-m4_ifdef( [[M4_YY_CHARSET]], [[
-m4_ifdef( [[M4_YY_NO_GET_CHARSET]],,[[
-/** Get the currently set charset name
+m4_ifdef([[M4_YY_CHARSET]],[[
+ m4_ifdef([[M4_YY_REENTRANT]],[[
+
+ m4_ifdef([[M4_YY_NO_GET_CHARSET]],,[[
+ m4_define([[M4_YY_GET_CHARSET]],[[
+ char* yyget_charset YYFARGS0(void)
+ ]])
+ ]])
+
+ m4_ifdef([[M4_YY_NO_GET_CHARSET_HANDLER]],,[[
+ m4_define([[M4_YY_GET_CHARSET_HANDLER]],[[
+ yycharset_handler_t yyget_charset_handler YYFARGS0(void)
+ ]])
+ ]])
+ ]])
+
+ m4_ifdef([[M4_YY_CXX]], [[
+ m4_define( [[M4_YY_GET_CHARSET]], [[
+ char *FlexLexer::get_charset()
+ ]])
+ ]])
+]])
+
+m4_ifdef( [[M4_YY_GET_CHARSET]], [[
+/** Get the current charset name
* M4_YY_DOC_PARAM
+ * @return charset name
*/
-char *yyget_charset YYFARGS0(void)
+M4_YY_GET_CHARSET
{
M4_YY_DECL_GUTS_VAR();
return yycharset;
}
]])
-m4_ifdef( [[M4_YY_NO_GET_CHARSET_HANDLER]],,[[
+m4_ifdef( [[M4_YY_GET_CHARSET_HANDLER]],[[
/** Get the currently set charset handler
* M4_YY_DOC_PARAM
*/
-yycharset_handler_t yyget_charset_handler YYFARGS0(void)
+M4_YY_GET_CHARSET_HANDLER
{
M4_YY_DECL_GUTS_VAR();
return yycharset_handler;
}
]])
-]])
-]])
+%if-c-only
m4_ifdef( [[M4_YY_NO_GET_IN]],,
[[
/** Get the input stream.
@@ -2859,22 +2908,44 @@ void yyset_column YYFARGS1( int , _column_no)
}
]])
]])
+%endif
-m4_ifdef( [[M4_YY_REENTRANT]],[[
-m4_ifdef( [[M4_YY_CHARSET]], [[
-m4_ifdef( [[M4_YY_NO_SET_CHARSET]],,[[
+m4_ifdef([[M4_YY_CHARSET]], [[
+ m4_ifdef([[M4_YY_REENTRANT]],[[
+
+ m4_ifdef([[M4_YY_NO_SET_CHARSET]],,[[
+ m4_define([[M4_YY_SET_CHARSET]], [[
+ void yyset_charset YYFARGS1(char*, charset)
+ ]])
+ ]])
+
+ m4_ifdef([[M4_YY_NO_SET_CHARSET_HANDLER]],,[[
+ m4_define([[M4_YY_SET_CHARSET_HANDLER]],[[
+ void yyset_charset_handler YYFARGS1(yycharset_handler_t, charset_handler)
+ ]])
+ ]])
+ ]])
+
+ m4_ifdef( [[M4_YY_CXX]],[[
+ m4_define( [[M4_YY_SET_CHARSET]], [[
+ void FlexLexer::set_charset(char *charset)
+ ]])
+ ]])
+]])
+
+m4_ifdef( [[M4_YY_SET_CHARSET]],[[
/** Set the current charset name
* @param charset charset name
* M4_YY_DOC_PARAM
*/
-void yyset_charset YYFARGS1( char*, charset)
+M4_YY_SET_CHARSET
{
M4_YY_DECL_GUTS_VAR();
yycharset = strdup(charset);
}
]])
-m4_ifdef( [[M4_YY_NO_SET_CHARSET_HANDLER]],,[[
+m4_ifdef( [[M4_YY_SET_CHARSET_HANDLER]],[[
/** Set the current charset handler
* @param charset_handler handler function
* M4_YY_DOC_PARAM
@@ -2885,10 +2956,8 @@ void yyset_charset_handler YYFARGS1( yycharset_handler_t, charset_handler)
yycharset_handler = charset_handler;
}
]])
-]])
-]])
-
+%if-c-only
m4_ifdef( [[M4_YY_NO_SET_IN]],,
[[
/** Set the input stream. This does not discard the current
diff --git a/src/main.c b/src/main.c
index 4671a08..d042809 100644
--- a/src/main.c
+++ b/src/main.c
@@ -304,6 +304,8 @@ void check_options ()
if (C_plus_plus && bison_bridge_lval)
flexerror (_("bison bridge not supported for the C++ scanner."));
+ if(C_plus_plus)
+ buf_m4_define( &m4defs_buf, "M4_YY_CXX", NULL);
if (useecs) { /* Set up doubly-linked equivalence classes. */
@@ -1663,6 +1665,9 @@ void readin ()
}
OUT_END_CODE ();
+ if(charset_enabled)
+ outn ("#define YY_CHARSET");
+
if (C_plus_plus) {
outn ("#define yytext_ptr yytext");