summaryrefslogtreecommitdiff
path: root/tools/integrate-redis-stats.py
blob: dcc103d1667126c4cb239e2dc14257928b2bc453 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
import os
import sys
import datetime
import redis

from itertools import izip, izip_longest

# Workaround current bug in docutils:
# http://permalink.gmane.org/gmane.text.docutils.devel/6324
import docutils.utils

root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path = [root] + sys.path

import store
import config

redis = redis.Redis()

# Get our search for the previous hour keys
current = datetime.datetime.utcnow()
lasthour = current - datetime.timedelta(hours=1)
search = "downloads:hour:%s:*:*" % lasthour.strftime("%y-%m-%d-%H")

# Make sure we haven't integrated this already
if redis.sismember("downloads:integrated", search):
    print("Already Integrated '%s'" % search)
    sys.exit(0)

# Fetch all of the keys
keys = redis.keys(search)

if not keys:
    print("No keys match '%s'" % search)
    sys.exit(0)

# Fetch all of the download counts (in batches of 200)
counts = []
for batch in izip_longest(*[iter(keys)] * 200):
    batch = [x for x in batch if x is not None]
    counts.extend(redis.mget(*batch))

# Combine the keys with the counts
downloads = izip(
                (int(y) for y in counts),
                (x.split(":")[-1] for x in keys),
            )

# Update the database
c = config.Config("/data/pypi/config.ini")
store = store.Store(c)
cursor = store.get_cursor()
cursor.executemany(
    "UPDATE release_files SET downloads = downloads + %s WHERE filename = %s",
    downloads,
)
cursor.commit()
cursor.close()

# Add this to our integrated set
redis.sadd("downloads:integrated", search)