"""passlib.pwd -- password generation helpers""" #============================================================================= # imports #============================================================================= from __future__ import absolute_import, division, print_function, unicode_literals # core import codecs from collections import defaultdict try: from collections.abc import MutableMapping except ImportError: # py2 compat from collections import MutableMapping from math import ceil, log as logf import logging; log = logging.getLogger(__name__) import pkg_resources import os # site # pkg from passlib import exc from passlib.utils.compat import PY2, irange, itervalues, int_types from passlib.utils import rng, getrandstr, to_unicode from passlib.utils.decor import memoized_property # local __all__ = [ "genword", "default_charsets", "genphrase", "default_wordsets", ] #============================================================================= # constants #============================================================================= # XXX: rename / publically document this map? entropy_aliases = dict( # barest protection from throttled online attack unsafe=12, # some protection from unthrottled online attack weak=24, # some protection from offline attacks fair=36, # reasonable protection from offline attacks strong=48, # very good protection from offline attacks secure=60, ) #============================================================================= # internal helpers #============================================================================= def _superclasses(obj, cls): """return remaining classes in object's MRO after cls""" mro = type(obj).__mro__ return mro[mro.index(cls)+1:] def _self_info_rate(source): """ returns 'rate of self-information' -- i.e. average (per-symbol) entropy of the sequence **source**, where probability of a given symbol occurring is calculated based on the number of occurrences within the sequence itself. if all elements of the source are unique, this should equal ``log(len(source), 2)``. :arg source: iterable containing 0+ symbols (e.g. list of strings or ints, string of characters, etc). :returns: float bits of entropy """ try: size = len(source) except TypeError: # if len() doesn't work, calculate size by summing counts later size = None counts = defaultdict(int) for char in source: counts[char] += 1 if size is None: values = counts.values() size = sum(values) else: values = itervalues(counts) if not size: return 0 # NOTE: the following performs ``- sum(value / size * logf(value / size, 2) for value in values)``, # it just does so with as much pulled out of the sum() loop as possible... return logf(size, 2) - sum(value * logf(value, 2) for value in values) / size # def _total_self_info(source): # """ # return total self-entropy of a sequence # (the average entropy per symbol * size of sequence) # """ # return _self_info_rate(source) * len(source) def _open_asset_path(path, encoding=None): """ :param asset_path: string containing absolute path to file, or package-relative path using format ``"python.module:relative/file/path"``. :returns: filehandle opened in 'rb' mode (unless encoding explicitly specified) """ if encoding: return codecs.getreader(encoding)(_open_asset_path(path)) if os.path.isabs(path): return open(path, "rb") package, sep, subpath = path.partition(":") if not sep: raise ValueError("asset path must be absolute file path " "or use 'pkg.name:sub/path' format: %r" % (path,)) return pkg_resources.resource_stream(package, subpath) #: type aliases _sequence_types = (list, tuple) _set_types = (set, frozenset) #: set of elements that ensure_unique() has validated already. _ensure_unique_cache = set() def _ensure_unique(source, param="source"): """ helper for generators -- Throws ValueError if source elements aren't unique. Error message will display (abbreviated) repr of the duplicates in a string/list """ # check cache to speed things up for frozensets / tuples / strings cache = _ensure_unique_cache hashable = True try: if source in cache: return True except TypeError: hashable = False # check if it has dup elements if isinstance(source, _set_types) or len(set(source)) == len(source): if hashable: try: cache.add(source) except TypeError: # XXX: under pypy, "list() in set()" above doesn't throw TypeError, # but trying to add unhashable it to a set *does*. pass return True # build list of duplicate values seen = set() dups = set() for elem in source: (dups if elem in seen else seen).add(elem) dups = sorted(dups) trunc = 8 if len(dups) > trunc: trunc = 5 dup_repr = ", ".join(repr(str(word)) for word in dups[:trunc]) if len(dups) > trunc: dup_repr += ", ... plus %d others" % (len(dups) - trunc) # throw error raise ValueError("`%s` cannot contain duplicate elements: %s" % (param, dup_repr)) #============================================================================= # base generator class #============================================================================= class SequenceGenerator(object): """ Base class used by word & phrase generators. These objects take a series of options, corresponding to those of the :func:`generate` function. They act as callables which can be used to generate a password or a list of 1+ passwords. They also expose some read-only informational attributes. Parameters ---------- :param entropy: Optionally specify the amount of entropy the resulting passwords should contain (as measured with respect to the generator itself). This will be used to auto-calculate the required password size. :param length: Optionally specify the length of password to generate, measured as count of whatever symbols the subclass uses (characters or words). Note if ``entropy`` requires a larger minimum length, that will be used instead. :param rng: Optionally provide a custom RNG source to use. Should be an instance of :class:`random.Random`, defaults to :class:`random.SystemRandom`. Attributes ---------- .. autoattribute:: length .. autoattribute:: symbol_count .. autoattribute:: entropy_per_symbol .. autoattribute:: entropy Subclassing ----------- Subclasses must implement the ``.__next__()`` method, and set ``.symbol_count`` before calling base ``__init__`` method. """ #============================================================================= # instance attrs #============================================================================= #: requested size of final password length = None #: requested entropy of final password requested_entropy = "strong" #: random number source to use rng = rng #: number of potential symbols (must be filled in by subclass) symbol_count = None #============================================================================= # init #============================================================================= def __init__(self, entropy=None, length=None, rng=None, **kwds): # make sure subclass set things up correctly assert self.symbol_count is not None, "subclass must set .symbol_count" # init length & requested entropy if entropy is not None or length is None: if entropy is None: entropy = self.requested_entropy entropy = entropy_aliases.get(entropy, entropy) if entropy <= 0: raise ValueError("`entropy` must be positive number") min_length = int(ceil(entropy / self.entropy_per_symbol)) if length is None or length < min_length: length = min_length self.requested_entropy = entropy if length < 1: raise ValueError("`length` must be positive integer") self.length = length # init other common options if rng is not None: self.rng = rng # hand off to parent if kwds and _superclasses(self, SequenceGenerator) == (object,): raise TypeError("Unexpected keyword(s): %s" % ", ".join(kwds.keys())) super(SequenceGenerator, self).__init__(**kwds) #============================================================================= # informational helpers #============================================================================= @memoized_property def entropy_per_symbol(self): """ Average entropy per symbol (assuming all symbols have equal probability) """ return logf(self.symbol_count, 2) @memoized_property def entropy(self): """ Effective entropy of generated passwords. This value will always be a multiple of :attr:`entropy_per_symbol`. If entropy is specified in constructor, :attr:`length` will be chosen so so that this value is the smallest multiple >= :attr:`requested_entropy`. """ return self.length * self.entropy_per_symbol #============================================================================= # generation #============================================================================= def __next__(self): """main generation function, should create one password/phrase""" raise NotImplementedError("implement in subclass") def __call__(self, returns=None): """ frontend used by genword() / genphrase() to create passwords """ if returns is None: return next(self) elif isinstance(returns, int_types): return [next(self) for _ in irange(returns)] elif returns is iter: return self else: raise exc.ExpectedTypeError(returns, ", int, or ", "returns") def __iter__(self): return self if PY2: def next(self): return self.__next__() #============================================================================= # eoc #============================================================================= #============================================================================= # default charsets #============================================================================= #: global dict of predefined characters sets default_charsets = dict( # ascii letters, digits, and some punctuation ascii_72='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*?/', # ascii letters and digits ascii_62='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', # ascii_50, without visually similar '1IiLl', '0Oo', '5S', '8B' ascii_50='234679abcdefghjkmnpqrstuvwxyzACDEFGHJKMNPQRTUVWXYZ', # lower case hexadecimal hex='0123456789abcdef', ) #============================================================================= # password generator #============================================================================= class WordGenerator(SequenceGenerator): """ Class which generates passwords by randomly choosing from a string of unique characters. Parameters ---------- :param chars: custom character string to draw from. :param charset: predefined charset to draw from. :param \\*\\*kwds: all other keywords passed to the :class:`SequenceGenerator` parent class. Attributes ---------- .. autoattribute:: chars .. autoattribute:: charset .. autoattribute:: default_charsets """ #============================================================================= # instance attrs #============================================================================= #: Predefined character set in use (set to None for instances using custom 'chars') charset = "ascii_62" #: string of chars to draw from -- usually filled in from charset chars = None #============================================================================= # init #============================================================================= def __init__(self, chars=None, charset=None, **kwds): # init chars and charset if chars: if charset: raise TypeError("`chars` and `charset` are mutually exclusive") else: if not charset: charset = self.charset assert charset chars = default_charsets[charset] self.charset = charset chars = to_unicode(chars, param="chars") _ensure_unique(chars, param="chars") self.chars = chars # hand off to parent super(WordGenerator, self).__init__(**kwds) # log.debug("WordGenerator(): entropy/char=%r", self.entropy_per_symbol) #============================================================================= # informational helpers #============================================================================= @memoized_property def symbol_count(self): return len(self.chars) #============================================================================= # generation #============================================================================= def __next__(self): # XXX: could do things like optionally ensure certain character groups # (e.g. letters & punctuation) are included return getrandstr(self.rng, self.chars, self.length) #============================================================================= # eoc #============================================================================= def genword(entropy=None, length=None, returns=None, **kwds): """Generate one or more random passwords. This function uses :mod:`random.SystemRandom` to generate one or more passwords using various character sets. The complexity of the password can be specified by size, or by the desired amount of entropy. Usage Example:: >>> # generate a random alphanumeric string with 48 bits of entropy (the default) >>> from passlib import pwd >>> pwd.genword() 'DnBHvDjMK6' >>> # generate a random hexadecimal string with 52 bits of entropy >>> pwd.genword(entropy=52, charset="hex") '310f1a7ac793f' :param entropy: Strength of resulting password, measured in 'guessing entropy' bits. An appropriate **length** value will be calculated based on the requested entropy amount, and the size of the character set. This can be a positive integer, or one of the following preset strings: ``"weak"`` (24), ``"fair"`` (36), ``"strong"`` (48), and ``"secure"`` (56). If neither this or **length** is specified, **entropy** will default to ``"strong"`` (48). :param length: Size of resulting password, measured in characters. If omitted, the size is auto-calculated based on the **entropy** parameter. If both **entropy** and **length** are specified, the stronger value will be used. :param returns: Controls what this function returns: * If ``None`` (the default), this function will generate a single password. * If an integer, this function will return a list containing that many passwords. * If the ``iter`` constant, will return an iterator that yields passwords. :param chars: Optionally specify custom string of characters to use when randomly generating a password. This option cannot be combined with **charset**. :param charset: The predefined character set to draw from (if not specified by **chars**). There are currently four presets available: * ``"ascii_62"`` (the default) -- all digits and ascii upper & lowercase letters. Provides ~5.95 entropy per character. * ``"ascii_50"`` -- subset which excludes visually similar characters (``1IiLl0Oo5S8B``). Provides ~5.64 entropy per character. * ``"ascii_72"`` -- all digits and ascii upper & lowercase letters, as well as some punctuation. Provides ~6.17 entropy per character. * ``"hex"`` -- Lower case hexadecimal. Providers 4 bits of entropy per character. :returns: :class:`!unicode` string containing randomly generated password; or list of 1+ passwords if :samp:`returns={int}` is specified. """ gen = WordGenerator(length=length, entropy=entropy, **kwds) return gen(returns) #============================================================================= # default wordsets #============================================================================= def _load_wordset(asset_path): """ load wordset from compressed datafile within package data. file should be utf-8 encoded :param asset_path: string containing absolute path to wordset file, or "python.module:relative/file/path". :returns: tuple of words, as loaded from specified words file. """ # open resource file, convert to tuple of words (strip blank lines & ws) with _open_asset_path(asset_path, "utf-8") as fh: gen = (word.strip() for word in fh) words = tuple(word for word in gen if word) # NOTE: works but not used # # detect if file uses " " format, and strip numeric prefix # def extract(row): # idx, word = row.replace("\t", " ").split(" ", 1) # if not idx.isdigit(): # raise ValueError("row is not dice index + word") # return word # try: # extract(words[-1]) # except ValueError: # pass # else: # words = tuple(extract(word) for word in words) log.debug("loaded %d-element wordset from %r", len(words), asset_path) return words class WordsetDict(MutableMapping): """ Special mapping used to store dictionary of wordsets. Different from a regular dict in that some wordsets may be lazy-loaded from an asset path. """ #: dict of key -> asset path paths = None #: dict of key -> value _loaded = None def __init__(self, *args, **kwds): self.paths = {} self._loaded = {} super(WordsetDict, self).__init__(*args, **kwds) def __getitem__(self, key): try: return self._loaded[key] except KeyError: pass path = self.paths[key] value = self._loaded[key] = _load_wordset(path) return value def set_path(self, key, path): """ set asset path to lazy-load wordset from. """ self.paths[key] = path def __setitem__(self, key, value): self._loaded[key] = value def __delitem__(self, key): if key in self: del self._loaded[key] self.paths.pop(key, None) else: del self.paths[key] @property def _keyset(self): keys = set(self._loaded) keys.update(self.paths) return keys def __iter__(self): return iter(self._keyset) def __len__(self): return len(self._keyset) # NOTE: speeds things up, and prevents contains from lazy-loading def __contains__(self, key): return key in self._loaded or key in self.paths #: dict of predefined word sets. #: key is name of wordset, value should be sequence of words. default_wordsets = WordsetDict() # register the wordsets built into passlib for name in "eff_long eff_short eff_prefixed bip39".split(): default_wordsets.set_path(name, "passlib:_data/wordsets/%s.txt" % name) #============================================================================= # passphrase generator #============================================================================= class PhraseGenerator(SequenceGenerator): """class which generates passphrases by randomly choosing from a list of unique words. :param wordset: wordset to draw from. :param preset: name of preset wordlist to use instead of ``wordset``. :param spaces: whether to insert spaces between words in output (defaults to ``True``). :param \\*\\*kwds: all other keywords passed to the :class:`SequenceGenerator` parent class. .. autoattribute:: wordset """ #============================================================================= # instance attrs #============================================================================= #: predefined wordset to use wordset = "eff_long" #: list of words to draw from words = None #: separator to use when joining words sep = " " #============================================================================= # init #============================================================================= def __init__(self, wordset=None, words=None, sep=None, **kwds): # load wordset if words is not None: if wordset is not None: raise TypeError("`words` and `wordset` are mutually exclusive") else: if wordset is None: wordset = self.wordset assert wordset words = default_wordsets[wordset] self.wordset = wordset # init words if not isinstance(words, _sequence_types): words = tuple(words) _ensure_unique(words, param="words") self.words = words # init separator if sep is None: sep = self.sep sep = to_unicode(sep, param="sep") self.sep = sep # hand off to parent super(PhraseGenerator, self).__init__(**kwds) ##log.debug("PhraseGenerator(): entropy/word=%r entropy/char=%r min_chars=%r", ## self.entropy_per_symbol, self.entropy_per_char, self.min_chars) #============================================================================= # informational helpers #============================================================================= @memoized_property def symbol_count(self): return len(self.words) #============================================================================= # generation #============================================================================= def __next__(self): words = (self.rng.choice(self.words) for _ in irange(self.length)) return self.sep.join(words) #============================================================================= # eoc #============================================================================= def genphrase(entropy=None, length=None, returns=None, **kwds): """Generate one or more random password / passphrases. This function uses :mod:`random.SystemRandom` to generate one or more passwords; it can be configured to generate alphanumeric passwords, or full english phrases. The complexity of the password can be specified by size, or by the desired amount of entropy. Usage Example:: >>> # generate random phrase with 48 bits of entropy >>> from passlib import pwd >>> pwd.genphrase() 'gangly robbing salt shove' >>> # generate a random phrase with 52 bits of entropy >>> # using a particular wordset >>> pwd.genword(entropy=52, wordset="bip39") 'wheat dilemma reward rescue diary' :param entropy: Strength of resulting password, measured in 'guessing entropy' bits. An appropriate **length** value will be calculated based on the requested entropy amount, and the size of the word set. This can be a positive integer, or one of the following preset strings: ``"weak"`` (24), ``"fair"`` (36), ``"strong"`` (48), and ``"secure"`` (56). If neither this or **length** is specified, **entropy** will default to ``"strong"`` (48). :param length: Length of resulting password, measured in words. If omitted, the size is auto-calculated based on the **entropy** parameter. If both **entropy** and **length** are specified, the stronger value will be used. :param returns: Controls what this function returns: * If ``None`` (the default), this function will generate a single password. * If an integer, this function will return a list containing that many passwords. * If the ``iter`` builtin, will return an iterator that yields passwords. :param words: Optionally specifies a list/set of words to use when randomly generating a passphrase. This option cannot be combined with **wordset**. :param wordset: The predefined word set to draw from (if not specified by **words**). There are currently four presets available: ``"eff_long"`` (the default) Wordset containing 7776 english words of ~7 letters. Constructed by the EFF, it offers ~12.9 bits of entropy per word. This wordset (and the other ``"eff_"`` wordsets) were `created by the EFF `_ to aid in generating passwords. See their announcement page for more details about the design & properties of these wordsets. ``"eff_short"`` Wordset containing 1296 english words of ~4.5 letters. Constructed by the EFF, it offers ~10.3 bits of entropy per word. ``"eff_prefixed"`` Wordset containing 1296 english words of ~8 letters, selected so that they each have a unique 3-character prefix. Constructed by the EFF, it offers ~10.3 bits of entropy per word. ``"bip39"`` Wordset of 2048 english words of ~5 letters, selected so that they each have a unique 4-character prefix. Published as part of Bitcoin's `BIP 39 `_, this wordset has exactly 11 bits of entropy per word. This list offers words that are typically shorter than ``"eff_long"`` (at the cost of slightly less entropy); and much shorter than ``"eff_prefixed"`` (at the cost of a longer unique prefix). :param sep: Optional separator to use when joining words. Defaults to ``" "`` (a space), but can be an empty string, a hyphen, etc. :returns: :class:`!unicode` string containing randomly generated passphrase; or list of 1+ passphrases if :samp:`returns={int}` is specified. """ gen = PhraseGenerator(entropy=entropy, length=length, **kwds) return gen(returns) #============================================================================= # strength measurement # # NOTE: # for a little while, had rough draft of password strength measurement alg here. # but not sure if there's value in yet another measurement algorithm, # that's not just duplicating the effort of libraries like zxcbn. # may revive it later, but for now, leaving some refs to others out there: # * NIST 800-63 has simple alg # * zxcvbn (https://tech.dropbox.com/2012/04/zxcvbn-realistic-password-strength-estimation/) # might also be good, and has approach similar to composite approach i was already thinking about, # but much more well thought out. # * passfault (https://github.com/c-a-m/passfault) looks thorough, # but may have licensing issues, plus porting to python looks like very big job :( # * give a look at running things through zlib - might be able to cheaply # catch extra redundancies. #============================================================================= #============================================================================= # eof #=============================================================================