summaryrefslogtreecommitdiff
path: root/lib/lace/lex.lua
blob: d7d18da71cba3e1983e1d3e9975f647d66e00cca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
-- lib/lace/lex.lua
--
-- Lua Access Control Engine -- Ruleset lexer
--
-- Copyright 2012 Daniel Silverstone <dsilvers@digital-scurf.org>
--
-- For Licence terms, see COPYING
--

--- Lace Internals - Ruleset lexer.
--
-- The lexer for Lace is only used internally and is generally not accessed
-- from outside of Lace.  It is exposed only for testing and validation
-- purposes.

local M = {}

local lexer_line_cache = {}

local lex_one_line

local function _lex_one_line(line, terminator)
   local r = {}
   local acc = ""
   local c
   local escaping = false
   local quoting = false
   local force_empty = false
   local spos, cpos = 1, 0
   while #line > 0 do
      c, line = line:match("^(.)(.*)$")
      cpos = cpos + 1
      if escaping then 
	 if quoting then
	    if c == "n" then
	       acc = acc .. "\n"
	    elseif c == "t" then
	       acc = acc .. "\t"
	    else
	       acc = acc .. c
	    end
	 else
	    acc = acc .. c
	 end
	 escaping = false
      else
	 if c == terminator and quoting == false then
	    -- Reached the terminator, break out
	    break
	 elseif c == "'" and quoting == false then
	    -- Start single quotes
	    quoting = c
	    force_empty = true
	 elseif c == '"' and quoting == false then
	    -- Start double quotes
	    quoting = c
	    force_empty = true
	 elseif c == '[' and quoting == false then
	    if acc == "" then
	       -- Something worth lexing
	       local ltab, rest, warns = lex_one_line(line, "]")
	       -- For now, assume the accumulator is good enough
	       cpos = cpos + #line - #rest
	       r[#r+1] = { spos = spos, epos = cpos, sub = ltab }
	       spos = cpos + 1
	       line = rest
	       acc = ""
	    end
	 elseif c == "'" and quoting == c then
	    -- End single quotes
	    quoting = false
	 elseif c == '"' and quoting == c then
	    -- End double quotes
	    quoting = false
	 elseif c == "\\" then
	    -- A backslash, entering escaping mode
	    escaping = true
	 elseif quoting then
	    -- Within quotes, so accumulate
	    acc = acc .. c
	 elseif c == " " or c == "\t" then
	    -- A space (or tab) and not quoting, so clear the accumulator
	    if acc ~= "" or force_empty then
	       r[#r+1] = { spos = spos, epos = cpos - 1, str = acc }
	       spos = cpos + 1
	       force_empty = false
	    elseif cpos == spos then
	       -- Increment the start position since we've not found a word yet
	       spos = spos + 1
	    end
	    acc = ""
	 else
	    acc = acc .. c
	 end
      end
   end
   if acc ~= "" or force_empty then
      r[#r+1] = { spos = spos, epos = cpos, str = acc }
   end

   local warnings = {}
   if quoting then
      warnings[#warnings+1] = "Un-terminated quoted string"
   end
   if escaping then
      warnings[#warnings+1] = "Un-used escape at end"
   end

   return r, line, warnings
end

function lex_one_line(line, terminator)
   local tag = line .. "\n" .. tostring(terminator)
   if not lexer_line_cache[tag] then
      lexer_line_cache[tag] = { _lex_one_line(line, terminator) }
   end
   return lexer_line_cache[tag][1], lexer_line_cache[tag][2], lexer_line_cache[tag][3]
end

local cached_full_lexes = {}

--- Lexically analyse a ruleset.
-- @tparam string ruleset The ruleset to lex.
-- @tparam string sourcename The name of the source to go into debug info.
-- @treturn table A list of lexed lines, each line being a table of tokens
--                with their associated debug information.
function M.string(ruleset, sourcename)
   if cached_full_lexes[sourcename] and
      cached_full_lexes[sourcename][ruleset] then
      return cached_full_lexes[sourcename][ruleset]
   end
   local lines = {}
   local ret = { source = sourcename, lines = lines }
   local n = 1
   local warn
   if ruleset:match("[^\n]$") then
      ruleset = ruleset .. "\n"
   end
   for oneline in ruleset:gmatch("([^\n]*)\n") do
      local linetab = { original = oneline }
      if oneline:match("^[ \t]*#") or
	 oneline:match("^[ \t]*//") or
	 oneline:match("^[ \t]*%-%-") then
	 linetab.type = "comment"
      elseif oneline:match("^[ \t]*$") then
	 linetab.type = "whitespace"
      else
	 linetab.type = "rule"
	 linetab.content, rest_of_line, warn = lex_one_line(oneline)
	 assert(rest_of_line == "", "Content left after line lexing")
	 if #warn > 0 then
	    linetab.warnings = warn
	 end
      end
      lines[n] = linetab
      n = n + 1
   end
   cached_full_lexes[sourcename] = cached_full_lexes[sourcename] or {}
   cached_full_lexes[sourcename][ruleset] = ret
   return ret
end

return M