summaryrefslogtreecommitdiff
path: root/fail2ban/server/datetemplate.py
blob: 8f1aaeb44be359759f3e7e370d79fde7bb6b0079 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# emacs: -*- mode: python; coding: utf-8; py-indent-offset: 4; indent-tabs-mode: t -*-
# vi: set ft=python sts=4 ts=4 sw=4 noet :

# This file is part of Fail2Ban.
#
# Fail2Ban is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Fail2Ban is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Fail2Ban; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Author: Cyril Jaquier
# 

__author__ = "Cyril Jaquier"
__copyright__ = "Copyright (c) 2004 Cyril Jaquier"
__license__ = "GPL"

import re, time
from abc import abstractmethod

from .strptime import reGroupDictStrptime, timeRE, getTimePatternRE
from ..helpers import getLogger

logSys = getLogger(__name__)

# check already grouped contains "(", but ignores char "\(" and conditional "(?(id)...)":
RE_GROUPED = re.compile(r'(?<!(?:\(\?))(?<!\\)\((?!\?)')
RE_GROUP = ( re.compile(r'^((?:\(\?\w+\))?\^?(?:\(\?\w+\))?)(.*?)(\$?)$'), r"\1(\2)\3" )

RE_EXLINE_NO_BOUNDS = re.compile(r'^\{UNB\}')
RE_EXLINE_BOUND_BEG = re.compile(r'^\{\^LN-BEG\}')
RE_EXSANC_BOUND_BEG = re.compile(r'^\((?:\?:)?\^\|\\b\|\\W\)')
RE_EXEANC_BOUND_BEG = re.compile(r'\(\?=\\b\|\\W\|\$\)$')
RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\((?:\?:)?\^)')
RE_NO_WRD_BOUND_END = re.compile(r'(?<!\\)(?:\$\)?|\\b|\\s|\*\*\)*)$')
RE_DEL_WRD_BOUNDS = ( re.compile(r'^\(*(?:\(\?\w+\))?\(*\*\*|(?<!\\)\*\*\)*$'), 
	                    lambda m: m.group().replace('**', '') )

RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\((?:\?:)?\^(?!\|))')
RE_LINE_BOUND_END = re.compile(r'(?<![\\\|])(?:\$\)?)$')

RE_ALPHA_PATTERN = re.compile(r'(?<!\%)\%[aAbBpc]')

RE_EPOCH_PATTERN = re.compile(r"(?<!\\)\{L?EPOCH\}", re.IGNORECASE)


class DateTemplate(object):
	"""A template which searches for and returns a date from a log line.

	This is an not functional abstract class which other templates should
	inherit from.

	Attributes
	----------
	name
	regex
	"""

	LINE_BEGIN = 8
	LINE_END =   4
	WORD_BEGIN = 2
	WORD_END =   1

	def __init__(self):
		self.name = ""
		self.weight = 1.0
		self.flags = 0
		self.hits = 0
		self.time = 0
		self._regex = ""
		self._cRegex = None

	def getRegex(self):
		return self._regex

	def setRegex(self, regex, wordBegin=True, wordEnd=True):
		r"""Sets regex to use for searching for date in log line.

		Parameters
		----------
		regex : str
			The regex the template will use for searching for a date.
		wordBegin : bool
			Defines whether the regex should be modified to search at beginning of a
			word, by adding special boundary r'(?=^|\b|\W)' to start of regex.
			Can be disabled with specifying of ** at front of regex.
			Default True.
		wordEnd : bool
			Defines whether the regex should be modified to search at end of a word,
			by adding special boundary r'(?=\b|\W|$)' to end of regex.
			Can be disabled with specifying of ** at end of regex.
			Default True.

		Raises
		------
		re.error
			If regular expression fails to compile
		"""
		# Warning: don't use lookahead for line-begin boundary, 
		# (e. g. r"^(?:\W{0,2})?" is much faster as r"(?:^|(?<=^\W)|(?<=^\W{2}))")
		# because it may be very slow in negative case (by long log-lines not matching pattern)

		regex = regex.strip()
		boundBegin = wordBegin and not RE_NO_WRD_BOUND_BEG.search(regex)
		boundEnd = wordEnd and not RE_NO_WRD_BOUND_END.search(regex)
		# if no group add it now, should always have a group(1):
		if not RE_GROUPED.search(regex):
			regex = RE_GROUP[0].sub(RE_GROUP[1], regex)
		self.flags = 0
		# if word or line start boundary:
		if boundBegin:
			self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN
			if wordBegin != 'start':
				regex = r'(?=^|\b|\W)' + regex
			else:
				regex = r"^(?:\W{0,2})?" + regex
				if not self.name.startswith('{^LN-BEG}'):
					self.name = '{^LN-BEG}' + self.name
		# if word end boundary:
		if boundEnd:
			self.flags |= DateTemplate.WORD_END
			regex += r'(?=\b|\W|$)'
		if not (self.flags & DateTemplate.LINE_BEGIN) and RE_LINE_BOUND_BEG.search(regex):
			self.flags |= DateTemplate.LINE_BEGIN
		if not (self.flags & DateTemplate.LINE_END) and RE_LINE_BOUND_END.search(regex):
			self.flags |= DateTemplate.LINE_END
		# remove possible special pattern "**" in front and end of regex:
		regex = RE_DEL_WRD_BOUNDS[0].sub(RE_DEL_WRD_BOUNDS[1], regex)
		self._regex = regex
		logSys.log(4, '  constructed regex %s', regex)
		self._cRegex = None

	regex = property(getRegex, setRegex, doc=
		"""Regex used to search for date.
		""")

	def _compileRegex(self):
		"""Compile regex by first usage.
		"""
		if not self._cRegex:
			try:
				# print('*'*10 + (' compile - %-30.30s -- %s' % (getattr(self, 'pattern', self.regex), self.name)))
				self._cRegex = re.compile(self.regex)
			except Exception as e:
				logSys.error('Compile %r failed, expression %r', self.name, self.regex)
				raise e

	def matchDate(self, line, *args):
		"""Check if regex for date matches on a log line.
		"""
		if not self._cRegex:
			self._compileRegex()
		logSys.log(4, "   search %s", self.regex)
		dateMatch = self._cRegex.search(line, *args); # pos, endpos
		if dateMatch:
			self.hits += 1
		# print('*'*10 + ('[%s] - %-30.30s -- %s' % ('*' if dateMatch else ' ', getattr(self, 'pattern', self.regex), self.name)))
		return dateMatch

	@abstractmethod
	def getDate(self, line, dateMatch=None, default_tz=None):
		"""Abstract method, which should return the date for a log line

		This should return the date for a log line, typically taking the
		date from the part of the line which matched the templates regex.
		This requires abstraction, therefore just raises exception.

		Parameters
		----------
		line : str
			Log line, of which the date should be extracted from.
		default_tz: if no explicit time zone is present in the line
                            passing this will interpret it as in that time zone.

		Raises
		------
		NotImplementedError
			Abstract method, therefore always returns this.
		"""
		raise NotImplementedError("getDate() is abstract")

	@staticmethod
	def unboundPattern(pattern):
		return RE_EXEANC_BOUND_BEG.sub('',
			RE_EXSANC_BOUND_BEG.sub('',
				RE_EXLINE_BOUND_BEG.sub('', RE_EXLINE_NO_BOUNDS.sub('', pattern))
			)
		)


class DateEpoch(DateTemplate):
	"""A date template which searches for Unix timestamps.

	This includes Unix timestamps which appear at start of a line, optionally
	within square braces (nsd), or on SELinux audit log lines.

	Attributes
	----------
	name
	regex
	"""

	def __init__(self, lineBeginOnly=False, pattern=None, longFrm=False):
		DateTemplate.__init__(self)
		self.name = "Epoch" if not pattern else pattern
		self._longFrm = longFrm;
		self._grpIdx = 1
		epochRE = r"\d{10,11}\b(?:\.\d{3,6})?"
		if longFrm:
			self.name = "LongEpoch" if not pattern else pattern
			epochRE = r"\d{10,11}(?:\d{3}(?:\.\d{1,6}|\d{3})?)?"
		if pattern:
			# pattern should capture/cut out the whole match:
			regex = "(" + RE_EPOCH_PATTERN.sub(lambda v: "(%s)" % epochRE, pattern) + ")"
			self._grpIdx = 2
			self.setRegex(regex)
		elif not lineBeginOnly:
			regex = r"((?:^|(?P<square>(?<=^\[))|(?P<selinux>(?<=\baudit\()))%s)(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" % epochRE
			self.setRegex(regex, wordBegin=False) ;# already line begin resp. word begin anchored
		else:
			regex = r"((?P<square>(?<=^\[))?%s)(?(square)(?=\]))" % epochRE
			self.setRegex(regex, wordBegin='start', wordEnd=True)

	def getDate(self, line, dateMatch=None, default_tz=None):
		"""Method to return the date for a log line.

		Parameters
		----------
		line : str
			Log line, of which the date should be extracted from.
		default_tz: ignored, Unix timestamps are time zone independent

		Returns
		-------
		(float, str)
			Tuple containing a Unix timestamp, and the string of the date
			which was matched and in turned used to calculated the timestamp.
		"""
		if not dateMatch:
			dateMatch = self.matchDate(line)
		if dateMatch:
			v = dateMatch.group(self._grpIdx)
			# extract part of format which represents seconds since epoch
			if self._longFrm and len(v) >= 13:
				if len(v) >= 16 and '.' not in v:
					v = float(v) / 1000000
				else:
					v = float(v) / 1000
			return (float(v), dateMatch)


class DatePatternRegex(DateTemplate):
	"""Date template, with regex/pattern

	Parameters
	----------
	pattern : str
		Sets the date templates pattern.

	Attributes
	----------
	name
	regex
	pattern
	"""
	
	_patternRE, _patternName = getTimePatternRE()
	_patternRE = re.compile(_patternRE)

	def __init__(self, pattern=None, **kwargs):
		super(DatePatternRegex, self).__init__()
		self._pattern = None
		if pattern is not None:
			self.setRegex(pattern, **kwargs)

	@property
	def pattern(self):
		"""The pattern used for regex with strptime "%" time fields.

		This should be a valid regular expression, of which matching string
		will be extracted from the log line. strptime style "%" fields will
		be replaced by appropriate regular expressions, or custom regex
		groups with names as per the strptime fields can also be used
		instead.
		"""
		return self._pattern

	@pattern.setter
	def pattern(self, pattern):
		self.setRegex(pattern)

	def setRegex(self, pattern, wordBegin=True, wordEnd=True):
		# original pattern:
		self._pattern = pattern
		# if unbound signalled - reset boundaries left and right:
		if RE_EXLINE_NO_BOUNDS.search(pattern):
			pattern = RE_EXLINE_NO_BOUNDS.sub('', pattern)
			wordBegin = wordEnd = False
		# if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin:
		if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern):
			pattern = RE_EXLINE_BOUND_BEG.sub('', pattern)
			wordBegin = 'start'
		try:
			# wrap to regex:
			fmt = self._patternRE.sub(r'%(\1)s', pattern)
			self.name = fmt % self._patternName
			regex = fmt % timeRE
			# if expected add (?iu) for "ignore case" and "unicode":
			if RE_ALPHA_PATTERN.search(pattern):
				regex = r'(?iu)' + regex
			super(DatePatternRegex, self).setRegex(regex, wordBegin, wordEnd)
		except Exception as e:
			raise TypeError("Failed to set datepattern '%s' (may be an invalid format or unescaped percent char): %s" % (pattern, e))

	def getDate(self, line, dateMatch=None, default_tz=None):
		"""Method to return the date for a log line.

		This uses a custom version of strptime, using the named groups
		from the instances `pattern` property.

		Parameters
		----------
		line : str
			Log line, of which the date should be extracted from.
		default_tz: optionally used to correct timezone

		Returns
		-------
		(float, str)
			Tuple containing a Unix timestamp, and the string of the date
			which was matched and in turned used to calculated the timestamp.
		"""
		if not dateMatch:
			dateMatch = self.matchDate(line)
		if dateMatch:
			return (reGroupDictStrptime(dateMatch.groupdict(), default_tz=default_tz),
				dateMatch)


class DateTai64n(DateTemplate):
	"""A date template which matches TAI64N formate timestamps.

	Attributes
	----------
	name
	regex
	"""

	def __init__(self, wordBegin=False):
		DateTemplate.__init__(self)
		self.name = "TAI64N"
		# We already know the format for TAI64N
		self.setRegex("@[0-9a-f]{24}", wordBegin=wordBegin)

	def getDate(self, line, dateMatch=None, default_tz=None):
		"""Method to return the date for a log line.

		Parameters
		----------
		line : str
			Log line, of which the date should be extracted from.
		default_tz: ignored, since TAI is time zone independent

		Returns
		-------
		(float, str)
			Tuple containing a Unix timestamp, and the string of the date
			which was matched and in turned used to calculated the timestamp.
		"""
		if not dateMatch:
			dateMatch = self.matchDate(line)
		if dateMatch:
			# extract part of format which represents seconds since epoch
			value = dateMatch.group(1)
			seconds_since_epoch = value[2:17]
			# convert seconds from HEX into local time stamp
			return (int(seconds_since_epoch, 16), dateMatch)