blob: 2001714140331ac6ebf33d34aafdffd1ff5bdd67 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
# -*- coding: utf-8 -*-
"""
sphinx.web.antispam
~~~~~~~~~~~~~~~~~~~
Small module that performs anti spam tests based on the bad content
regex list provided by moin moin.
:copyright: 2007 by Armin Ronacher.
:license: Python license.
"""
from __future__ import with_statement
import re
import urllib
import time
from os import path
DOWNLOAD_URL = 'http://moinmaster.wikiwikiweb.de/BadContent?action=raw'
UPDATE_INTERVAL = 60 * 60 * 24 * 7
class AntiSpam(object):
"""
Class that reads a bad content database (flat file that is automatically
updated from the moin moin server) and checks strings against it.
"""
def __init__(self, bad_content_file):
self.bad_content_file = bad_content_file
lines = None
if not path.exists(self.bad_content_file):
last_change = 0
else:
last_change = path.getmtime(self.bad_content_file)
if last_change + UPDATE_INTERVAL < time.time():
try:
f = urllib.urlopen(DOWNLOAD_URL)
data = f.read()
except:
pass
else:
lines = [l.strip() for l in data.splitlines()
if not l.startswith('#')]
f = file(bad_content_file, 'w')
f.write('\n'.join(lines))
last_change = int(time.time())
if lines is None:
with file(bad_content_file) as f:
lines = [l.strip() for l in f]
self.rules = [re.compile(rule) for rule in lines if rule]
def is_spam(self, fields):
for regex in self.rules:
for field in fields:
if regex.search(field) is not None:
return True
return False
|