diff options
Diffstat (limited to 'pyparsing/common.py')
-rw-r--r-- | pyparsing/common.py | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/pyparsing/common.py b/pyparsing/common.py index c6d91f6..0eb286e 100644 --- a/pyparsing/common.py +++ b/pyparsing/common.py @@ -358,6 +358,54 @@ class pyparsing_common: downcase_tokens = staticmethod(token_map(lambda t: t.lower())) """Parse action to convert tokens to lower case.""" + url = Regex( + # https://mathiasbynens.be/demo/url-regex + # https://gist.github.com/dperini/729294 + r"^" + + # protocol identifier (optional) + # short syntax // still required + r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" + + # user:pass BasicAuth (optional) + r"(?:(?P<auth>\S+(?::\S*)?)@)?" + + r"(?P<host>" + + # IP address exclusion + # private & local networks + r"(?!(?:10|127)(?:\.\d{1,3}){3})" + + r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" + + r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" + + # IP address dotted notation octets + # excludes loopback network 0.0.0.0 + # excludes reserved space >= 224.0.0.0 + # excludes network & broadcast addresses + # (first & last IP address of each class) + r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + + r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + + r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + + r"|" + + # host & domain names, may end with dot + # can be replaced by a shortest alternative + # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ + r"(?:" + + r"(?:" + + r"[a-z0-9\u00a1-\uffff]" + + r"[a-z0-9\u00a1-\uffff_-]{0,62}" + + r")?" + + r"[a-z0-9\u00a1-\uffff]\." + + r")+" + + # TLD identifier name, may end with dot + r"(?:[a-z\u00a1-\uffff]{2,}\.?)" + + r")" + + # port number (optional) + r"(?P<port>:\d{2,5})?" + + # resource path (optional) + r"(?P<path>\/[^?# ]*)?" + + # query string (optional) + r"(?P<query>\?[^#]*)?" + + # fragment (optional) + r"(?P<fragment>#\S*)?" + + r"$" + ).set_name("url") + # pre-PEP8 compatibility names convertToInteger = convert_to_integer convertToFloat = convert_to_float |