diff options
author | Donald Stufft <donald@stufft.io> | 2013-05-20 00:24:09 -0400 |
---|---|---|
committer | Donald Stufft <donald@stufft.io> | 2013-05-20 00:24:09 -0400 |
commit | 927075fe7deb731b347c30236d7256d96957ed10 (patch) | |
tree | 2407402f99320d237547e1da45eddd1e63f51f38 /tools | |
parent | b48377a2027564a63040b6523ccab472fa861870 (diff) | |
download | decorator-927075fe7deb731b347c30236d7256d96957ed10.tar.gz |
Add a script to purge the bulk of the duplicate users
This script will look up all the duplicate users when compared
case insensitively and purge any of them who has never submitted
an item, does not have permission on any package, and where
last_login declares they have never logged in.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/duplicate_users.py | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/tools/duplicate_users.py b/tools/duplicate_users.py new file mode 100644 index 0000000..da39864 --- /dev/null +++ b/tools/duplicate_users.py @@ -0,0 +1,63 @@ +#!usr/bin/env python +import os +import sys +import itertools + +# Workaround current bug in docutils: +# http://permalink.gmane.org/gmane.text.docutils.devel/6324 +import docutils.utils + +root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(root) + +import store +import config + + +c = config.Config("config.ini") +store = store.Store(c) +store.open() +cursor = store.get_cursor() + +cursor.execute("SELECT LOWER(name) FROM users GROUP BY LOWER(name) HAVING COUNT(*) > 1") +duplicated = set([x[0] for x in cursor.fetchall()]) + +duplicates = {} +users = {} + +for username in duplicated: + cursor.execute("SELECT name, email, last_login FROM users WHERE LOWER(name)=LOWER(%s)", (username,)) + dups = cursor.fetchall() + + duplicates[username] = [x[0] for x in dups] + + for x in dups: + users[x[0]] = x + +print len(users), "duplicated users found with", len(duplicated), "total ci unique" + +total_users = users.values() +total_names = set(x[0] for x in total_users) + +delete = set(total_names) + +# Exclude any user who has ever submitted a journal from deletion +cursor.execute("SELECT DISTINCT ON (submitted_by) submitted_by FROM journals") +journaled = set(x[0] for x in cursor.fetchall()) +delete -= journaled + +# Exclude any user who is assigned a role on a package +cursor.execute("SELECT DISTINCT ON (user_name) user_name FROM roles") +roles = set(x[0] for x in cursor.fetchall()) +delete -= roles + +# Exclude any user who has logged in +cursor.execute("SELECT DISTINCT ON (name) name FROM users WHERE last_login != NULL") +logged_in = set(x[0] for x in cursor.fetchall()) +delete -= logged_in + +if delete: + cursor.execute("DELETE FROM users WHERE name in %s", (tuple(delete),)) + +store.commit() +store.close() |