test/awkemu.rl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

/*
 * @LANG: c
 */

/*
 * Emulate the basic parser of the awk program. Breaks lines up into
 * words and prints the words.
 */

#include <stdio.h>
#include <string.h>

#define LINEBUF 2048
static char lineBuf[LINEBUF];
static char blineBuf[LINEBUF];
static int lineLen;
static int blineLen;
static int words;

void finishLine();

struct awkemu
{
	int cs;
};

%%{
	machine awkemu;

	variable cs fsm->cs;

	# Starts a line. Will initialize all the data necessary for capturing the line.
	action startline {
		lineLen = 0;	
		blineLen = 0;	
		words = 0;
	}

	# Will be executed on every character seen in a word. Captures the word
	# to the broken up line buffer.
	action wordchar {
		blineBuf[blineLen++] = fc;
	}

	# Terminate a word. Adds the null after the word and increments the word count
	# for the line.
	action termword {
		blineBuf[blineLen++] = 0;
		words += 1;
	}

	# Will be executed on every character seen in a line (not including 
	# the newline itself.
	action linechar {
		lineBuf[lineLen++] = fc;
	}

	# This section of the machine deals with breaking up lines into fields.
	# Lines are separed by the whitespace and put in an array of words.

	# Words in a line.
	word = (extend - [ \t\n])+;

	# The whitespace separating words in a line.
	whitespace = [ \t];

	# The components in a line to break up. Either a word or a single char of
	# whitespace. On the word capture characters.
	blineElements = word $wordchar %termword | whitespace;

	# Star the break line elements. Just be careful to decrement the leaving
	# priority as we don't want multiple character identifiers to be treated as
	# multiple single char identifiers.
	breakLine = ( blineElements $1 %0 )* . '\n';

	# This machine lets us capture entire lines. We do it separate from the words
	# in a line.
	bufLine = (extend - '\n')* $linechar %{ finishLine(); } . '\n';

	# A line can then consist of the machine that will break up the line into
	# words and a machine that will buffer the entire line. 
	line = ( breakLine | bufLine ) > startline;

	# Any number of lines.
	main := line*;
}%%

void finishLine()
{
	int i;
	char *pword = blineBuf;
	lineBuf[lineLen] = 0;
	printf("endline(%i): %s\n", words, lineBuf );
	for ( i = 0; i < words; i++ ) {
		printf("  word: %s\n", pword );
		pword += strlen(pword) + 1;
	}
}

%% write data;

void awkemu_init( struct awkemu *fsm )
{
    %% write init;
}

void awkemu_execute( struct awkemu *fsm, const char *_data, int _len )
{
    const char *p = _data;
    const char *pe = _data+_len;
	%% write exec;
}

int awkemu_finish( struct awkemu *fsm )
{
	if ( fsm->cs == awkemu_error ) 
		return -1;
	if ( fsm->cs >= awkemu_first_final ) 
		return 1;
	return 0;
}

#include <stdio.h>
#define BUFSIZE 2048

struct awkemu fsm;
char buf[BUFSIZE];

void test( char *buf )
{
	int len = strlen( buf );
	awkemu_init( &fsm );
	awkemu_execute( &fsm, buf, len );
	if ( awkemu_finish( &fsm ) > 0 )
		printf("ACCEPT\n");
	else
		printf("FAIL\n");
}

int main()
{
	test( "" );
	test( "one line with no newline" );
	test( "one line\n" );
	return 0;
}

#ifdef _____OUTPUT_____
ACCEPT
FAIL
endline(2): one line
  word: one
  word: line
ACCEPT
#endif