summaryrefslogtreecommitdiff
path: root/test/pat2pcre.lua
blob: 345f89dbe7b27e466f9f156b2344967505ff516a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
-- See Copyright Notice in the file LICENSE

-- Convert Lua regex pattern to its PCRE equivalent.

local t_esc = {
  a = "[:alpha:]",
  A = "[:^alpha:]",
  c = "[:cntrl:]",
  C = "[:^cntrl:]",
  l = "[:lower:]",
  L = "[:^lower:]",
  p = "[:punct:]",
  P = "[:^punct:]",
  u = "[:upper:]",
  U = "[:^upper:]",
  w = "[:alnum:]",
  W = "[:^alnum:]",
  x = "[:xdigit:]",
  X = "[:^xdigit:]",
  z = "\\x00",
  Z = "\\x01-\\xFF",
}

local function rep_normal (ch)
  assert (ch ~= "b", "\"%b\" subpattern is not supported")
  assert (ch ~= "0", "invalid capture index")
  local v = t_esc[ch]
  return v and ("[" .. v .. "]") or ("\\" .. ch)
end

local function rep_charclass (ch)
  return t_esc[ch] or ("\\" .. ch)
end

function pat2pcre (s)
  local ind = 0

  local function getc ()
    ind = ind + 1
    return string.sub (s, ind, ind)
  end

  local function getnum ()
    local num = string.match (s, "^\\(%d%d?%d?)", ind)
    if num then
      ind = ind + #num
      return string.format ("\\x%02X", num)
    end
  end

  local out, state = "", "normal"
  while ind < #s do
    local ch = getc ()
    if state == "normal" then
      if ch == "%" then
        out = out .. rep_normal (getc ())
      elseif ch == "-" then
        out = out .. "*?"
      elseif ch == "." then
        out = out .. "\\C"
      elseif ch == "[" then
        out = out .. ch
        state = "charclass"
      else
        local num = getnum ()
        out = num and (out .. num) or (out .. ch)
      end
    elseif state == "charclass" then
      if ch == "%" then
        out = out .. rep_charclass (getc ())
      elseif ch == "]" then
        out = out .. ch
        state = "normal"
      else
        local num = getnum ()
        out = num and (out .. num) or (out .. ch)
      end
    end
  end
  return out
end

return pat2pcre