1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
|
# emacs: -*- mode: python; coding: utf-8; py-indent-offset: 4; indent-tabs-mode: t -*-
# vi: set ft=python sts=4 ts=4 sw=4 noet :
# This file is part of Fail2Ban.
#
# Fail2Ban is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Fail2Ban is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Fail2Ban; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Author: Cyril Jaquier
#
__author__ = "Cyril Jaquier"
__copyright__ = "Copyright (c) 2004 Cyril Jaquier"
__license__ = "GPL"
import re, time
from abc import abstractmethod
from .strptime import reGroupDictStrptime, timeRE, getTimePatternRE
from ..helpers import getLogger
logSys = getLogger(__name__)
# check already grouped contains "(", but ignores char "\(" and conditional "(?(id)...)":
RE_GROUPED = re.compile(r'(?<!(?:\(\?))(?<!\\)\((?!\?)')
RE_GROUP = ( re.compile(r'^((?:\(\?\w+\))?\^?(?:\(\?\w+\))?)(.*?)(\$?)$'), r"\1(\2)\3" )
RE_EXLINE_NO_BOUNDS = re.compile(r'^\{UNB\}')
RE_EXLINE_BOUND_BEG = re.compile(r'^\{\^LN-BEG\}')
RE_EXSANC_BOUND_BEG = re.compile(r'^\((?:\?:)?\^\|\\b\|\\W\)')
RE_EXEANC_BOUND_BEG = re.compile(r'\(\?=\\b\|\\W\|\$\)$')
RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\((?:\?:)?\^)')
RE_NO_WRD_BOUND_END = re.compile(r'(?<!\\)(?:\$\)?|\\b|\\s|\*\*\)*)$')
RE_DEL_WRD_BOUNDS = ( re.compile(r'^\(*(?:\(\?\w+\))?\(*\*\*|(?<!\\)\*\*\)*$'),
lambda m: m.group().replace('**', '') )
RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\((?:\?:)?\^(?!\|))')
RE_LINE_BOUND_END = re.compile(r'(?<![\\\|])(?:\$\)?)$')
RE_ALPHA_PATTERN = re.compile(r'(?<!\%)\%[aAbBpc]')
RE_EPOCH_PATTERN = re.compile(r"(?<!\\)\{L?EPOCH\}", re.IGNORECASE)
class DateTemplate(object):
"""A template which searches for and returns a date from a log line.
This is an not functional abstract class which other templates should
inherit from.
Attributes
----------
name
regex
"""
LINE_BEGIN = 8
LINE_END = 4
WORD_BEGIN = 2
WORD_END = 1
def __init__(self):
self.name = ""
self.weight = 1.0
self.flags = 0
self.hits = 0
self.time = 0
self._regex = ""
self._cRegex = None
def getRegex(self):
return self._regex
def setRegex(self, regex, wordBegin=True, wordEnd=True):
r"""Sets regex to use for searching for date in log line.
Parameters
----------
regex : str
The regex the template will use for searching for a date.
wordBegin : bool
Defines whether the regex should be modified to search at beginning of a
word, by adding special boundary r'(?=^|\b|\W)' to start of regex.
Can be disabled with specifying of ** at front of regex.
Default True.
wordEnd : bool
Defines whether the regex should be modified to search at end of a word,
by adding special boundary r'(?=\b|\W|$)' to end of regex.
Can be disabled with specifying of ** at end of regex.
Default True.
Raises
------
re.error
If regular expression fails to compile
"""
# Warning: don't use lookahead for line-begin boundary,
# (e. g. r"^(?:\W{0,2})?" is much faster as r"(?:^|(?<=^\W)|(?<=^\W{2}))")
# because it may be very slow in negative case (by long log-lines not matching pattern)
regex = regex.strip()
boundBegin = wordBegin and not RE_NO_WRD_BOUND_BEG.search(regex)
boundEnd = wordEnd and not RE_NO_WRD_BOUND_END.search(regex)
# if no group add it now, should always have a group(1):
if not RE_GROUPED.search(regex):
regex = RE_GROUP[0].sub(RE_GROUP[1], regex)
self.flags = 0
# if word or line start boundary:
if boundBegin:
self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN
if wordBegin != 'start':
regex = r'(?=^|\b|\W)' + regex
else:
regex = r"^(?:\W{0,2})?" + regex
if not self.name.startswith('{^LN-BEG}'):
self.name = '{^LN-BEG}' + self.name
# if word end boundary:
if boundEnd:
self.flags |= DateTemplate.WORD_END
regex += r'(?=\b|\W|$)'
if not (self.flags & DateTemplate.LINE_BEGIN) and RE_LINE_BOUND_BEG.search(regex):
self.flags |= DateTemplate.LINE_BEGIN
if not (self.flags & DateTemplate.LINE_END) and RE_LINE_BOUND_END.search(regex):
self.flags |= DateTemplate.LINE_END
# remove possible special pattern "**" in front and end of regex:
regex = RE_DEL_WRD_BOUNDS[0].sub(RE_DEL_WRD_BOUNDS[1], regex)
self._regex = regex
logSys.log(4, ' constructed regex %s', regex)
self._cRegex = None
regex = property(getRegex, setRegex, doc=
"""Regex used to search for date.
""")
def _compileRegex(self):
"""Compile regex by first usage.
"""
if not self._cRegex:
try:
# print('*'*10 + (' compile - %-30.30s -- %s' % (getattr(self, 'pattern', self.regex), self.name)))
self._cRegex = re.compile(self.regex)
except Exception as e:
logSys.error('Compile %r failed, expression %r', self.name, self.regex)
raise e
def matchDate(self, line, *args):
"""Check if regex for date matches on a log line.
"""
if not self._cRegex:
self._compileRegex()
logSys.log(4, " search %s", self.regex)
dateMatch = self._cRegex.search(line, *args); # pos, endpos
if dateMatch:
self.hits += 1
# print('*'*10 + ('[%s] - %-30.30s -- %s' % ('*' if dateMatch else ' ', getattr(self, 'pattern', self.regex), self.name)))
return dateMatch
@abstractmethod
def getDate(self, line, dateMatch=None, default_tz=None):
"""Abstract method, which should return the date for a log line
This should return the date for a log line, typically taking the
date from the part of the line which matched the templates regex.
This requires abstraction, therefore just raises exception.
Parameters
----------
line : str
Log line, of which the date should be extracted from.
default_tz: if no explicit time zone is present in the line
passing this will interpret it as in that time zone.
Raises
------
NotImplementedError
Abstract method, therefore always returns this.
"""
raise NotImplementedError("getDate() is abstract")
@staticmethod
def unboundPattern(pattern):
return RE_EXEANC_BOUND_BEG.sub('',
RE_EXSANC_BOUND_BEG.sub('',
RE_EXLINE_BOUND_BEG.sub('', RE_EXLINE_NO_BOUNDS.sub('', pattern))
)
)
class DateEpoch(DateTemplate):
"""A date template which searches for Unix timestamps.
This includes Unix timestamps which appear at start of a line, optionally
within square braces (nsd), or on SELinux audit log lines.
Attributes
----------
name
regex
"""
def __init__(self, lineBeginOnly=False, pattern=None, longFrm=False):
DateTemplate.__init__(self)
self.name = "Epoch" if not pattern else pattern
self._longFrm = longFrm;
self._grpIdx = 1
epochRE = r"\d{10,11}\b(?:\.\d{3,6})?"
if longFrm:
self.name = "LongEpoch" if not pattern else pattern
epochRE = r"\d{10,11}(?:\d{3}(?:\.\d{1,6}|\d{3})?)?"
if pattern:
# pattern should capture/cut out the whole match:
regex = "(" + RE_EPOCH_PATTERN.sub(lambda v: "(%s)" % epochRE, pattern) + ")"
self._grpIdx = 2
self.setRegex(regex)
elif not lineBeginOnly:
regex = r"((?:^|(?P<square>(?<=^\[))|(?P<selinux>(?<=\baudit\()))%s)(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" % epochRE
self.setRegex(regex, wordBegin=False) ;# already line begin resp. word begin anchored
else:
regex = r"((?P<square>(?<=^\[))?%s)(?(square)(?=\]))" % epochRE
self.setRegex(regex, wordBegin='start', wordEnd=True)
def getDate(self, line, dateMatch=None, default_tz=None):
"""Method to return the date for a log line.
Parameters
----------
line : str
Log line, of which the date should be extracted from.
default_tz: ignored, Unix timestamps are time zone independent
Returns
-------
(float, str)
Tuple containing a Unix timestamp, and the string of the date
which was matched and in turned used to calculated the timestamp.
"""
if not dateMatch:
dateMatch = self.matchDate(line)
if dateMatch:
v = dateMatch.group(self._grpIdx)
# extract part of format which represents seconds since epoch
if self._longFrm and len(v) >= 13:
if len(v) >= 16 and '.' not in v:
v = float(v) / 1000000
else:
v = float(v) / 1000
return (float(v), dateMatch)
class DatePatternRegex(DateTemplate):
"""Date template, with regex/pattern
Parameters
----------
pattern : str
Sets the date templates pattern.
Attributes
----------
name
regex
pattern
"""
_patternRE, _patternName = getTimePatternRE()
_patternRE = re.compile(_patternRE)
def __init__(self, pattern=None, **kwargs):
super(DatePatternRegex, self).__init__()
self._pattern = None
if pattern is not None:
self.setRegex(pattern, **kwargs)
@property
def pattern(self):
"""The pattern used for regex with strptime "%" time fields.
This should be a valid regular expression, of which matching string
will be extracted from the log line. strptime style "%" fields will
be replaced by appropriate regular expressions, or custom regex
groups with names as per the strptime fields can also be used
instead.
"""
return self._pattern
@pattern.setter
def pattern(self, pattern):
self.setRegex(pattern)
def setRegex(self, pattern, wordBegin=True, wordEnd=True):
# original pattern:
self._pattern = pattern
# if unbound signalled - reset boundaries left and right:
if RE_EXLINE_NO_BOUNDS.search(pattern):
pattern = RE_EXLINE_NO_BOUNDS.sub('', pattern)
wordBegin = wordEnd = False
# if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin:
if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern):
pattern = RE_EXLINE_BOUND_BEG.sub('', pattern)
wordBegin = 'start'
try:
# wrap to regex:
fmt = self._patternRE.sub(r'%(\1)s', pattern)
self.name = fmt % self._patternName
regex = fmt % timeRE
# if expected add (?iu) for "ignore case" and "unicode":
if RE_ALPHA_PATTERN.search(pattern):
regex = r'(?iu)' + regex
super(DatePatternRegex, self).setRegex(regex, wordBegin, wordEnd)
except Exception as e:
raise TypeError("Failed to set datepattern '%s' (may be an invalid format or unescaped percent char): %s" % (pattern, e))
def getDate(self, line, dateMatch=None, default_tz=None):
"""Method to return the date for a log line.
This uses a custom version of strptime, using the named groups
from the instances `pattern` property.
Parameters
----------
line : str
Log line, of which the date should be extracted from.
default_tz: optionally used to correct timezone
Returns
-------
(float, str)
Tuple containing a Unix timestamp, and the string of the date
which was matched and in turned used to calculated the timestamp.
"""
if not dateMatch:
dateMatch = self.matchDate(line)
if dateMatch:
return (reGroupDictStrptime(dateMatch.groupdict(), default_tz=default_tz),
dateMatch)
class DateTai64n(DateTemplate):
"""A date template which matches TAI64N formate timestamps.
Attributes
----------
name
regex
"""
def __init__(self, wordBegin=False):
DateTemplate.__init__(self)
self.name = "TAI64N"
# We already know the format for TAI64N
self.setRegex("@[0-9a-f]{24}", wordBegin=wordBegin)
def getDate(self, line, dateMatch=None, default_tz=None):
"""Method to return the date for a log line.
Parameters
----------
line : str
Log line, of which the date should be extracted from.
default_tz: ignored, since TAI is time zone independent
Returns
-------
(float, str)
Tuple containing a Unix timestamp, and the string of the date
which was matched and in turned used to calculated the timestamp.
"""
if not dateMatch:
dateMatch = self.matchDate(line)
if dateMatch:
# extract part of format which represents seconds since epoch
value = dateMatch.group(1)
seconds_since_epoch = value[2:17]
# convert seconds from HEX into local time stamp
return (int(seconds_since_epoch, 16), dateMatch)
|