summaryrefslogtreecommitdiff
path: root/src/lmparse.kh
blob: e55f3c252badb57647f7d9c1c9fa4739959ff704 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
 *  Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
 */

/*  This file is part of Colm.
 *
 *  Colm is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Colm is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Colm; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#ifndef RLPARSE_H
#define RLPARSE_H

#include <iostream>
#include "avltree.h"
#include "parsedata.h"

#define PROPERTY_REDUCE_FIRST 0x1

struct ColmParser
{
	ColmParser( Compiler *pd, const char *fileName, const char *sectionName, const InputLoc &sectionLoc )
		: pd(pd), sectionName(sectionName), enterRl(false)
	{}

	%%{
	parser ColmParser;

	# Use a class for tokens.
	token uses class Token;

	# Atoms.
	token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt,
		TK_Hex, KW_Nil, KW_True, KW_False;

	# General tokens.
	token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon,
		TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql,
		TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow,
		TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose,
		TK_Dash, TK_ReChar, TK_LtLt;

	# Defining things.
	token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace,
		KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Parser, KW_Global, KW_Export,
		KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref;

	# Language.
	token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In,
		KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require;

	# Patterns.
	token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken,
		KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni, KW_Ci;

	token KW_Include, KW_Preeof;

	token KW_Left, KW_Right, KW_Nonassoc, KW_Prec;

	}%%

	%% write instance_data;


	void init();
	int parseLangEl( int type, const Token *token );

	int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
	void addRegularDef( const InputLoc &loc, Namespace *nspace, 
		const String &name, Join *join );
	TokenRegion *createRegion( String &name );
	void addRegionDef( const InputLoc &loc, Namespace *nspace, 
		const String &name, TokenRegion *join );
	void addProduction( const InputLoc &loc, const String &name, 
		ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf );
	void addArgvList();

	/* Report an error encountered by the parser. */
	ostream &parse_error( int tokId, Token &token );

	Compiler *pd;

	/* The name of the root section, this does not change during an include. */
	const char *sectionName;

	NameRef nameRef;
	NameRefList nameRefList;

	LangElVect langElVect;

	PatternItemList *patternItemList;
	ReplItemList *replItemList;
	RegionVect regionStack;
	NamespaceVect namespaceStack;
	ContextVect contextStack;

	String curDefineId;
	LelDefList *curDefList;
	ProdElList *curProdElList;

	PredType predType;
	ReCaptureVect reCaptureVect;

	bool enterRl;
};

%% write token_defs;

#endif