/* * @LANG: c */ /* * Emulate the basic parser of the awk program. Breaks lines up into * words and prints the words. */ #include #include #define LINEBUF 2048 static char lineBuf[LINEBUF]; static char blineBuf[LINEBUF]; static int lineLen; static int blineLen; static int words; void finishLine(); struct awkemu { int cs; }; %%{ machine awkemu; variable cs fsm->cs; # Starts a line. Will initialize all the data necessary for capturing the line. action startline { lineLen = 0; blineLen = 0; words = 0; } # Will be executed on every character seen in a word. Captures the word # to the broken up line buffer. action wordchar { blineBuf[blineLen++] = fc; } # Terminate a word. Adds the null after the word and increments the word count # for the line. action termword { blineBuf[blineLen++] = 0; words += 1; } # Will be executed on every character seen in a line (not including # the newline itself. action linechar { lineBuf[lineLen++] = fc; } # This section of the machine deals with breaking up lines into fields. # Lines are separed by the whitespace and put in an array of words. # Words in a line. word = (extend - [ \t\n])+; # The whitespace separating words in a line. whitespace = [ \t]; # The components in a line to break up. Either a word or a single char of # whitespace. On the word capture characters. blineElements = word $wordchar %termword | whitespace; # Star the break line elements. Just be careful to decrement the leaving # priority as we don't want multiple character identifiers to be treated as # multiple single char identifiers. breakLine = ( blineElements $1 %0 )* . '\n'; # This machine lets us capture entire lines. We do it separate from the words # in a line. bufLine = (extend - '\n')* $linechar %{ finishLine(); } . '\n'; # A line can then consist of the machine that will break up the line into # words and a machine that will buffer the entire line. line = ( breakLine | bufLine ) > startline; # Any number of lines. main := line*; }%% void finishLine() { int i; char *pword = blineBuf; lineBuf[lineLen] = 0; printf("endline(%i): %s\n", words, lineBuf ); for ( i = 0; i < words; i++ ) { printf(" word: %s\n", pword ); pword += strlen(pword) + 1; } } %% write data; void awkemu_init( struct awkemu *fsm ) { %% write init; } void awkemu_execute( struct awkemu *fsm, const char *_data, int _len ) { const char *p = _data; const char *pe = _data+_len; %% write exec; } int awkemu_finish( struct awkemu *fsm ) { if ( fsm->cs == awkemu_error ) return -1; if ( fsm->cs >= awkemu_first_final ) return 1; return 0; } #include #define BUFSIZE 2048 struct awkemu fsm; char buf[BUFSIZE]; void test( char *buf ) { int len = strlen( buf ); awkemu_init( &fsm ); awkemu_execute( &fsm, buf, len ); if ( awkemu_finish( &fsm ) > 0 ) printf("ACCEPT\n"); else printf("FAIL\n"); } int main() { test( "" ); test( "one line with no newline" ); test( "one line\n" ); return 0; } #ifdef _____OUTPUT_____ ACCEPT FAIL endline(2): one line word: one word: line ACCEPT #endif