summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPradyun Gedam <pradyunsg@gmail.com>2023-01-29 01:36:04 +0000
committerGitHub <noreply@github.com>2023-01-29 01:36:04 +0000
commit60a45984404460192067f3990e0258deeeafa636 (patch)
tree3fbcc3a85d97f63b7831be05342d15964336c31d
parentfc747ca912c53d9a55b5c4ecdbc58416f1f61459 (diff)
parentacd7ef1f9aa74efbec786e3929faca1201b4b422 (diff)
downloadpip-60a45984404460192067f3990e0258deeeafa636.tar.gz
Merge pull request #11758 from pradyunsg/vendoring-update
-rw-r--r--news/certifi.vendor.rst1
-rw-r--r--news/chardet.vendor.rst1
-rw-r--r--news/platformdirs.vendor.rst2
-rw-r--r--news/requests.vendor.rst1
-rw-r--r--news/urllib3.vendor.rst1
-rw-r--r--src/pip/_vendor/certifi/__init__.py2
-rw-r--r--src/pip/_vendor/certifi/cacert.pem181
-rw-r--r--src/pip/_vendor/chardet.pyi1
-rw-r--r--src/pip/_vendor/chardet/__init__.py36
-rw-r--r--src/pip/_vendor/chardet/big5prober.py6
-rw-r--r--src/pip/_vendor/chardet/chardistribution.py54
-rw-r--r--src/pip/_vendor/chardet/charsetgroupprober.py31
-rw-r--r--src/pip/_vendor/chardet/charsetprober.py35
-rw-r--r--src/pip/_vendor/chardet/cli/chardetect.py42
-rw-r--r--src/pip/_vendor/chardet/codingstatemachine.py16
-rw-r--r--src/pip/_vendor/chardet/codingstatemachinedict.py19
-rw-r--r--src/pip/_vendor/chardet/cp949prober.py6
-rw-r--r--src/pip/_vendor/chardet/enums.py9
-rw-r--r--src/pip/_vendor/chardet/escprober.py26
-rw-r--r--src/pip/_vendor/chardet/escsm.py9
-rw-r--r--src/pip/_vendor/chardet/eucjpprober.py19
-rw-r--r--src/pip/_vendor/chardet/euckrprober.py6
-rw-r--r--src/pip/_vendor/chardet/euctwprober.py6
-rw-r--r--src/pip/_vendor/chardet/gb2312prober.py6
-rw-r--r--src/pip/_vendor/chardet/hebrewprober.py56
-rw-r--r--src/pip/_vendor/chardet/johabprober.py6
-rw-r--r--src/pip/_vendor/chardet/jpcntx.py31
-rw-r--r--src/pip/_vendor/chardet/latin1prober.py18
-rw-r--r--src/pip/_vendor/chardet/macromanprober.py162
-rw-r--r--src/pip/_vendor/chardet/mbcharsetprober.py32
-rw-r--r--src/pip/_vendor/chardet/mbcsgroupprober.py3
-rw-r--r--src/pip/_vendor/chardet/mbcssm.py23
-rw-r--r--src/pip/_vendor/chardet/metadata/languages.py37
-rw-r--r--src/pip/_vendor/chardet/py.typed0
-rw-r--r--src/pip/_vendor/chardet/resultdict.py16
-rw-r--r--src/pip/_vendor/chardet/sbcharsetprober.py52
-rw-r--r--src/pip/_vendor/chardet/sbcsgroupprober.py2
-rw-r--r--src/pip/_vendor/chardet/sjisprober.py19
-rw-r--r--src/pip/_vendor/chardet/universaldetector.py68
-rw-r--r--src/pip/_vendor/chardet/utf1632prober.py32
-rw-r--r--src/pip/_vendor/chardet/utf8prober.py16
-rw-r--r--src/pip/_vendor/chardet/version.py4
-rw-r--r--src/pip/_vendor/platformdirs/__init__.py12
-rw-r--r--src/pip/_vendor/platformdirs/unix.py4
-rw-r--r--src/pip/_vendor/platformdirs/version.py8
-rw-r--r--src/pip/_vendor/requests/__init__.py4
-rw-r--r--src/pip/_vendor/requests/__version__.py6
-rw-r--r--src/pip/_vendor/requests/models.py2
-rw-r--r--src/pip/_vendor/urllib3/_version.py2
-rw-r--r--src/pip/_vendor/urllib3/connectionpool.py2
-rw-r--r--src/pip/_vendor/urllib3/contrib/appengine.py2
-rw-r--r--src/pip/_vendor/urllib3/contrib/ntlmpool.py4
-rw-r--r--src/pip/_vendor/urllib3/contrib/pyopenssl.py7
-rw-r--r--src/pip/_vendor/urllib3/response.py13
-rw-r--r--src/pip/_vendor/urllib3/util/retry.py2
-rw-r--r--src/pip/_vendor/urllib3/util/url.py2
-rw-r--r--src/pip/_vendor/vendor.txt10
-rw-r--r--tests/functional/test_debug.py6
58 files changed, 676 insertions, 503 deletions
diff --git a/news/certifi.vendor.rst b/news/certifi.vendor.rst
new file mode 100644
index 000000000..f02ba9f41
--- /dev/null
+++ b/news/certifi.vendor.rst
@@ -0,0 +1 @@
+Upgrade certifi to 2022.12.7
diff --git a/news/chardet.vendor.rst b/news/chardet.vendor.rst
new file mode 100644
index 000000000..5aceb6c5e
--- /dev/null
+++ b/news/chardet.vendor.rst
@@ -0,0 +1 @@
+Upgrade chardet to 5.1.0
diff --git a/news/platformdirs.vendor.rst b/news/platformdirs.vendor.rst
index 04ee05723..5c15bfbd9 100644
--- a/news/platformdirs.vendor.rst
+++ b/news/platformdirs.vendor.rst
@@ -1 +1 @@
-Upgrade platformdirs to 2.5.3
+Upgrade platformdirs to 2.6.2
diff --git a/news/requests.vendor.rst b/news/requests.vendor.rst
new file mode 100644
index 000000000..9f91985c7
--- /dev/null
+++ b/news/requests.vendor.rst
@@ -0,0 +1 @@
+Upgrade requests to 2.28.2
diff --git a/news/urllib3.vendor.rst b/news/urllib3.vendor.rst
new file mode 100644
index 000000000..c9d10554e
--- /dev/null
+++ b/news/urllib3.vendor.rst
@@ -0,0 +1 @@
+Upgrade urllib3 to 1.26.14
diff --git a/src/pip/_vendor/certifi/__init__.py b/src/pip/_vendor/certifi/__init__.py
index af4bcc151..a3546f125 100644
--- a/src/pip/_vendor/certifi/__init__.py
+++ b/src/pip/_vendor/certifi/__init__.py
@@ -1,4 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
-__version__ = "2022.09.24"
+__version__ = "2022.12.07"
diff --git a/src/pip/_vendor/certifi/cacert.pem b/src/pip/_vendor/certifi/cacert.pem
index 400515511..df9e4e3c7 100644
--- a/src/pip/_vendor/certifi/cacert.pem
+++ b/src/pip/_vendor/certifi/cacert.pem
@@ -636,37 +636,6 @@ BA6+C4OmF4O5MBKgxTMVBbkN+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IB
ZQ==
-----END CERTIFICATE-----
-# Issuer: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C.
-# Subject: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C.
-# Label: "Network Solutions Certificate Authority"
-# Serial: 116697915152937497490437556386812487904
-# MD5 Fingerprint: d3:f3:a6:16:c0:fa:6b:1d:59:b1:2d:96:4d:0e:11:2e
-# SHA1 Fingerprint: 74:f8:a3:c3:ef:e7:b3:90:06:4b:83:90:3c:21:64:60:20:e5:df:ce
-# SHA256 Fingerprint: 15:f0:ba:00:a3:ac:7a:f3:ac:88:4c:07:2b:10:11:a0:77:bd:77:c0:97:f4:01:64:b2:f8:59:8a:bd:83:86:0c
------BEGIN CERTIFICATE-----
-MIID5jCCAs6gAwIBAgIQV8szb8JcFuZHFhfjkDFo4DANBgkqhkiG9w0BAQUFADBi
-MQswCQYDVQQGEwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMu
-MTAwLgYDVQQDEydOZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3Jp
-dHkwHhcNMDYxMjAxMDAwMDAwWhcNMjkxMjMxMjM1OTU5WjBiMQswCQYDVQQGEwJV
-UzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMuMTAwLgYDVQQDEydO
-ZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEiMA0GCSqG
-SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDkvH6SMG3G2I4rC7xGzuAnlt7e+foS0zwz
-c7MEL7xxjOWftiJgPl9dzgn/ggwbmlFQGiaJ3dVhXRncEg8tCqJDXRfQNJIg6nPP
-OCwGJgl6cvf6UDL4wpPTaaIjzkGxzOTVHzbRijr4jGPiFFlp7Q3Tf2vouAPlT2rl
-mGNpSAW+Lv8ztumXWWn4Zxmuk2GWRBXTcrA/vGp97Eh/jcOrqnErU2lBUzS1sLnF
-BgrEsEX1QV1uiUV7PTsmjHTC5dLRfbIR1PtYMiKagMnc/Qzpf14Dl847ABSHJ3A4
-qY5usyd2mFHgBeMhqxrVhSI8KbWaFsWAqPS7azCPL0YCorEMIuDTAgMBAAGjgZcw
-gZQwHQYDVR0OBBYEFCEwyfsA106Y2oeqKtCnLrFAMadMMA4GA1UdDwEB/wQEAwIB
-BjAPBgNVHRMBAf8EBTADAQH/MFIGA1UdHwRLMEkwR6BFoEOGQWh0dHA6Ly9jcmwu
-bmV0c29sc3NsLmNvbS9OZXR3b3JrU29sdXRpb25zQ2VydGlmaWNhdGVBdXRob3Jp
-dHkuY3JsMA0GCSqGSIb3DQEBBQUAA4IBAQC7rkvnt1frf6ott3NHhWrB5KUd5Oc8
-6fRZZXe1eltajSU24HqXLjjAV2CDmAaDn7l2em5Q4LqILPxFzBiwmZVRDuwduIj/
-h1AcgsLj4DKAv6ALR8jDMe+ZZzKATxcheQxpXN5eNK4CtSbqUN9/GGUsyfJj4akH
-/nxxH2szJGoeBfcFaMBqEssuXmHLrijTfsK0ZpEmXzwuJF/LWA/rKOyvEZbz3Htv
-wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN
-pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey
------END CERTIFICATE-----
-
# Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited
# Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited
# Label: "COMODO ECC Certification Authority"
@@ -2204,46 +2173,6 @@ KoZIzj0EAwMDaAAwZQIxAOVpEslu28YxuglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg
xwy8p2Fp8fc74SrL+SvzZpA3
-----END CERTIFICATE-----
-# Issuer: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden
-# Subject: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden
-# Label: "Staat der Nederlanden EV Root CA"
-# Serial: 10000013
-# MD5 Fingerprint: fc:06:af:7b:e8:1a:f1:9a:b4:e8:d2:70:1f:c0:f5:ba
-# SHA1 Fingerprint: 76:e2:7e:c1:4f:db:82:c1:c0:a6:75:b5:05:be:3d:29:b4:ed:db:bb
-# SHA256 Fingerprint: 4d:24:91:41:4c:fe:95:67:46:ec:4c:ef:a6:cf:6f:72:e2:8a:13:29:43:2f:9d:8a:90:7a:c4:cb:5d:ad:c1:5a
------BEGIN CERTIFICATE-----
-MIIFcDCCA1igAwIBAgIEAJiWjTANBgkqhkiG9w0BAQsFADBYMQswCQYDVQQGEwJO
-TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSkwJwYDVQQDDCBTdGFh
-dCBkZXIgTmVkZXJsYW5kZW4gRVYgUm9vdCBDQTAeFw0xMDEyMDgxMTE5MjlaFw0y
-MjEyMDgxMTEwMjhaMFgxCzAJBgNVBAYTAk5MMR4wHAYDVQQKDBVTdGFhdCBkZXIg
-TmVkZXJsYW5kZW4xKTAnBgNVBAMMIFN0YWF0IGRlciBOZWRlcmxhbmRlbiBFViBS
-b290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA48d+ifkkSzrS
-M4M1LGns3Amk41GoJSt5uAg94JG6hIXGhaTK5skuU6TJJB79VWZxXSzFYGgEt9nC
-UiY4iKTWO0Cmws0/zZiTs1QUWJZV1VD+hq2kY39ch/aO5ieSZxeSAgMs3NZmdO3d
-Z//BYY1jTw+bbRcwJu+r0h8QoPnFfxZpgQNH7R5ojXKhTbImxrpsX23Wr9GxE46p
-rfNeaXUmGD5BKyF/7otdBwadQ8QpCiv8Kj6GyzyDOvnJDdrFmeK8eEEzduG/L13l
-pJhQDBXd4Pqcfzho0LKmeqfRMb1+ilgnQ7O6M5HTp5gVXJrm0w912fxBmJc+qiXb
-j5IusHsMX/FjqTf5m3VpTCgmJdrV8hJwRVXj33NeN/UhbJCONVrJ0yPr08C+eKxC
-KFhmpUZtcALXEPlLVPxdhkqHz3/KRawRWrUgUY0viEeXOcDPusBCAUCZSCELa6fS
-/ZbV0b5GnUngC6agIk440ME8MLxwjyx1zNDFjFE7PZQIZCZhfbnDZY8UnCHQqv0X
-cgOPvZuM5l5Tnrmd74K74bzickFbIZTTRTeU0d8JOV3nI6qaHcptqAqGhYqCvkIH
-1vI4gnPah1vlPNOePqc7nvQDs/nxfRN0Av+7oeX6AHkcpmZBiFxgV6YuCcS6/ZrP
-px9Aw7vMWgpVSzs4dlG4Y4uElBbmVvMCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB
-/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFP6rAJCYniT8qcwaivsnuL8wbqg7
-MA0GCSqGSIb3DQEBCwUAA4ICAQDPdyxuVr5Os7aEAJSrR8kN0nbHhp8dB9O2tLsI
-eK9p0gtJ3jPFrK3CiAJ9Brc1AsFgyb/E6JTe1NOpEyVa/m6irn0F3H3zbPB+po3u
-2dfOWBfoqSmuc0iH55vKbimhZF8ZE/euBhD/UcabTVUlT5OZEAFTdfETzsemQUHS
-v4ilf0X8rLiltTMMgsT7B/Zq5SWEXwbKwYY5EdtYzXc7LMJMD16a4/CrPmEbUCTC
-wPTxGfARKbalGAKb12NMcIxHowNDXLldRqANb/9Zjr7dn3LDWyvfjFvO5QxGbJKy
-CqNMVEIYFRIYvdr8unRu/8G2oGTYqV9Vrp9canaW2HNnh/tNf1zuacpzEPuKqf2e
-vTY4SUmH9A4U8OmHuD+nT3pajnnUk+S7aFKErGzp85hwVXIy+TSrK0m1zSBi5Dp6
-Z2Orltxtrpfs/J92VoguZs9btsmksNcFuuEnL5O7Jiqik7Ab846+HUCjuTaPPoIa
-Gl6I6lD4WeKDRikL40Rc4ZW2aZCaFG+XroHPaO+Zmr615+F/+PoTRxZMzG0IQOeL
-eG9QgkRQP2YGiqtDhFZKDyAthg710tvSeopLzaXoTvFeJiUBWSOgftL2fiFX1ye8
-FVdMpEbB4IMeDExNH08GGeL5qPQ6gqGyeUN51q1veieQA6TqJIc/2b3Z6fJfUEkc
-7uzXLg==
------END CERTIFICATE-----
-
# Issuer: CN=IdenTrust Commercial Root CA 1 O=IdenTrust
# Subject: CN=IdenTrust Commercial Root CA 1 O=IdenTrust
# Label: "IdenTrust Commercial Root CA 1"
@@ -2851,116 +2780,6 @@ T8p+ck0LcIymSLumoRT2+1hEmRSuqguTaaApJUqlyyvdimYHFngVV3Eb7PVHhPOe
MTd61X8kreS8/f3MboPoDKi3QWwH3b08hpcv0g==
-----END CERTIFICATE-----
-# Issuer: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
-# Subject: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
-# Label: "TrustCor RootCert CA-1"
-# Serial: 15752444095811006489
-# MD5 Fingerprint: 6e:85:f1:dc:1a:00:d3:22:d5:b2:b2:ac:6b:37:05:45
-# SHA1 Fingerprint: ff:bd:cd:e7:82:c8:43:5e:3c:6f:26:86:5c:ca:a8:3a:45:5b:c3:0a
-# SHA256 Fingerprint: d4:0e:9c:86:cd:8f:e4:68:c1:77:69:59:f4:9e:a7:74:fa:54:86:84:b6:c4:06:f3:90:92:61:f4:dc:e2:57:5c
------BEGIN CERTIFICATE-----
-MIIEMDCCAxigAwIBAgIJANqb7HHzA7AZMA0GCSqGSIb3DQEBCwUAMIGkMQswCQYD
-VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk
-MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U
-cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRydXN0Q29y
-IFJvb3RDZXJ0IENBLTEwHhcNMTYwMjA0MTIzMjE2WhcNMjkxMjMxMTcyMzE2WjCB
-pDELMAkGA1UEBhMCUEExDzANBgNVBAgMBlBhbmFtYTEUMBIGA1UEBwwLUGFuYW1h
-IENpdHkxJDAiBgNVBAoMG1RydXN0Q29yIFN5c3RlbXMgUy4gZGUgUi5MLjEnMCUG
-A1UECwweVHJ1c3RDb3IgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MR8wHQYDVQQDDBZU
-cnVzdENvciBSb290Q2VydCBDQS0xMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
-CgKCAQEAv463leLCJhJrMxnHQFgKq1mqjQCj/IDHUHuO1CAmujIS2CNUSSUQIpid
-RtLByZ5OGy4sDjjzGiVoHKZaBeYei0i/mJZ0PmnK6bV4pQa81QBeCQryJ3pS/C3V
-seq0iWEk8xoT26nPUu0MJLq5nux+AHT6k61sKZKuUbS701e/s/OojZz0JEsq1pme
-9J7+wH5COucLlVPat2gOkEz7cD+PSiyU8ybdY2mplNgQTsVHCJCZGxdNuWxu72CV
-EY4hgLW9oHPY0LJ3xEXqWib7ZnZ2+AYfYW0PVcWDtxBWcgYHpfOxGgMFZA6dWorW
-hnAbJN7+KIor0Gqw/Hqi3LJ5DotlDwIDAQABo2MwYTAdBgNVHQ4EFgQU7mtJPHo/
-DeOxCbeKyKsZn3MzUOcwHwYDVR0jBBgwFoAU7mtJPHo/DeOxCbeKyKsZn3MzUOcw
-DwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQAD
-ggEBACUY1JGPE+6PHh0RU9otRCkZoB5rMZ5NDp6tPVxBb5UrJKF5mDo4Nvu7Zp5I
-/5CQ7z3UuJu0h3U/IJvOcs+hVcFNZKIZBqEHMwwLKeXx6quj7LUKdJDHfXLy11yf
-ke+Ri7fc7Waiz45mO7yfOgLgJ90WmMCV1Aqk5IGadZQ1nJBfiDcGrVmVCrDRZ9MZ
-yonnMlo2HD6CqFqTvsbQZJG2z9m2GM/bftJlo6bEjhcxwft+dtvTheNYsnd6djts
-L1Ac59v2Z3kf9YKVmgenFK+P3CghZwnS1k1aHBkcjndcw5QkPTJrS37UeJSDvjdN
-zl/HHk484IkzlQsPpTLWPFp5LBk=
------END CERTIFICATE-----
-
-# Issuer: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
-# Subject: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
-# Label: "TrustCor RootCert CA-2"
-# Serial: 2711694510199101698
-# MD5 Fingerprint: a2:e1:f8:18:0b:ba:45:d5:c7:41:2a:bb:37:52:45:64
-# SHA1 Fingerprint: b8:be:6d:cb:56:f1:55:b9:63:d4:12:ca:4e:06:34:c7:94:b2:1c:c0
-# SHA256 Fingerprint: 07:53:e9:40:37:8c:1b:d5:e3:83:6e:39:5d:ae:a5:cb:83:9e:50:46:f1:bd:0e:ae:19:51:cf:10:fe:c7:c9:65
------BEGIN CERTIFICATE-----
-MIIGLzCCBBegAwIBAgIIJaHfyjPLWQIwDQYJKoZIhvcNAQELBQAwgaQxCzAJBgNV
-BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw
-IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy
-dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEfMB0GA1UEAwwWVHJ1c3RDb3Ig
-Um9vdENlcnQgQ0EtMjAeFw0xNjAyMDQxMjMyMjNaFw0zNDEyMzExNzI2MzlaMIGk
-MQswCQYDVQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEg
-Q2l0eTEkMCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYD
-VQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRy
-dXN0Q29yIFJvb3RDZXJ0IENBLTIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK
-AoICAQCnIG7CKqJiJJWQdsg4foDSq8GbZQWU9MEKENUCrO2fk8eHyLAnK0IMPQo+
-QVqedd2NyuCb7GgypGmSaIwLgQ5WoD4a3SwlFIIvl9NkRvRUqdw6VC0xK5mC8tkq
-1+9xALgxpL56JAfDQiDyitSSBBtlVkxs1Pu2YVpHI7TYabS3OtB0PAx1oYxOdqHp
-2yqlO/rOsP9+aij9JxzIsekp8VduZLTQwRVtDr4uDkbIXvRR/u8OYzo7cbrPb1nK
-DOObXUm4TOJXsZiKQlecdu/vvdFoqNL0Cbt3Nb4lggjEFixEIFapRBF37120Hape
-az6LMvYHL1cEksr1/p3C6eizjkxLAjHZ5DxIgif3GIJ2SDpxsROhOdUuxTTCHWKF
-3wP+TfSvPd9cW436cOGlfifHhi5qjxLGhF5DUVCcGZt45vz27Ud+ez1m7xMTiF88
-oWP7+ayHNZ/zgp6kPwqcMWmLmaSISo5uZk3vFsQPeSghYA2FFn3XVDjxklb9tTNM
-g9zXEJ9L/cb4Qr26fHMC4P99zVvh1Kxhe1fVSntb1IVYJ12/+CtgrKAmrhQhJ8Z3
-mjOAPF5GP/fDsaOGM8boXg25NSyqRsGFAnWAoOsk+xWq5Gd/bnc/9ASKL3x74xdh
-8N0JqSDIvgmk0H5Ew7IwSjiqqewYmgeCK9u4nBit2uBGF6zPXQIDAQABo2MwYTAd
-BgNVHQ4EFgQU2f4hQG6UnrybPZx9mCAZ5YwwYrIwHwYDVR0jBBgwFoAU2f4hQG6U
-nrybPZx9mCAZ5YwwYrIwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYw
-DQYJKoZIhvcNAQELBQADggIBAJ5Fngw7tu/hOsh80QA9z+LqBrWyOrsGS2h60COX
-dKcs8AjYeVrXWoSK2BKaG9l9XE1wxaX5q+WjiYndAfrs3fnpkpfbsEZC89NiqpX+
-MWcUaViQCqoL7jcjx1BRtPV+nuN79+TMQjItSQzL/0kMmx40/W5ulop5A7Zv2wnL
-/V9lFDfhOPXzYRZY5LVtDQsEGz9QLX+zx3oaFoBg+Iof6Rsqxvm6ARppv9JYx1RX
-CI/hOWB3S6xZhBqI8d3LT3jX5+EzLfzuQfogsL7L9ziUwOHQhQ+77Sxzq+3+knYa
-ZH9bDTMJBzN7Bj8RpFxwPIXAz+OQqIN3+tvmxYxoZxBnpVIt8MSZj3+/0WvitUfW
-2dCFmU2Umw9Lje4AWkcdEQOsQRivh7dvDDqPys/cA8GiCcjl/YBeyGBCARsaU1q7
-N6a3vLqE6R5sGtRk2tRD/pOLS/IseRYQ1JMLiI+h2IYURpFHmygk71dSTlxCnKr3
-Sewn6EAes6aJInKc9Q0ztFijMDvd1GpUk74aTfOTlPf8hAs/hCBcNANExdqtvArB
-As8e5ZTZ845b2EzwnexhF7sUMlQMAimTHpKG9n/v55IFDlndmQguLvqcAFLTxWYp
-5KeXRKQOKIETNcX2b2TmQcTVL8w0RSXPQQCWPUouwpaYT05KnJe32x+SMsj/D1Fu
-1uwJ
------END CERTIFICATE-----
-
-# Issuer: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
-# Subject: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
-# Label: "TrustCor ECA-1"
-# Serial: 9548242946988625984
-# MD5 Fingerprint: 27:92:23:1d:0a:f5:40:7c:e9:e6:6b:9d:d8:f5:e7:6c
-# SHA1 Fingerprint: 58:d1:df:95:95:67:6b:63:c0:f0:5b:1c:17:4d:8b:84:0b:c8:78:bd
-# SHA256 Fingerprint: 5a:88:5d:b1:9c:01:d9:12:c5:75:93:88:93:8c:af:bb:df:03:1a:b2:d4:8e:91:ee:15:58:9b:42:97:1d:03:9c
------BEGIN CERTIFICATE-----
-MIIEIDCCAwigAwIBAgIJAISCLF8cYtBAMA0GCSqGSIb3DQEBCwUAMIGcMQswCQYD
-VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk
-MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U
-cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxFzAVBgNVBAMMDlRydXN0Q29y
-IEVDQS0xMB4XDTE2MDIwNDEyMzIzM1oXDTI5MTIzMTE3MjgwN1owgZwxCzAJBgNV
-BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw
-IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy
-dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEXMBUGA1UEAwwOVHJ1c3RDb3Ig
-RUNBLTEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPj+ARtZ+odnbb
-3w9U73NjKYKtR8aja+3+XzP4Q1HpGjORMRegdMTUpwHmspI+ap3tDvl0mEDTPwOA
-BoJA6LHip1GnHYMma6ve+heRK9jGrB6xnhkB1Zem6g23xFUfJ3zSCNV2HykVh0A5
-3ThFEXXQmqc04L/NyFIduUd+Dbi7xgz2c1cWWn5DkR9VOsZtRASqnKmcp0yJF4Ou
-owReUoCLHhIlERnXDH19MURB6tuvsBzvgdAsxZohmz3tQjtQJvLsznFhBmIhVE5/
-wZ0+fyCMgMsq2JdiyIMzkX2woloPV+g7zPIlstR8L+xNxqE6FXrntl019fZISjZF
-ZtS6mFjBAgMBAAGjYzBhMB0GA1UdDgQWBBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAf
-BgNVHSMEGDAWgBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAPBgNVHRMBAf8EBTADAQH/
-MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEABT41XBVwm8nHc2Fv
-civUwo/yQ10CzsSUuZQRg2dd4mdsdXa/uwyqNsatR5Nj3B5+1t4u/ukZMjgDfxT2
-AHMsWbEhBuH7rBiVDKP/mZb3Kyeb1STMHd3BOuCYRLDE5D53sXOpZCz2HAF8P11F
-hcCF5yWPldwX8zyfGm6wyuMdKulMY/okYWLW2n62HGz1Ah3UKt1VkOsqEUc8Ll50
-soIipX1TH0XsJ5F95yIW6MBoNtjG8U+ARDL54dHRHareqKucBK+tIA5kmE2la8BI
-WJZpTdwHjFGTot+fDz2LYLSCjaoITmJF4PkL0uDgPFveXHEnJcLmA4GLEFPjx1Wi
-tJ/X5g==
------END CERTIFICATE-----
-
# Issuer: CN=SSL.com Root Certification Authority RSA O=SSL Corporation
# Subject: CN=SSL.com Root Certification Authority RSA O=SSL Corporation
# Label: "SSL.com Root Certification Authority RSA"
diff --git a/src/pip/_vendor/chardet.pyi b/src/pip/_vendor/chardet.pyi
deleted file mode 100644
index 29e87e331..000000000
--- a/src/pip/_vendor/chardet.pyi
+++ /dev/null
@@ -1 +0,0 @@
-from chardet import * \ No newline at end of file
diff --git a/src/pip/_vendor/chardet/__init__.py b/src/pip/_vendor/chardet/__init__.py
index e91ad6182..fe581623d 100644
--- a/src/pip/_vendor/chardet/__init__.py
+++ b/src/pip/_vendor/chardet/__init__.py
@@ -15,19 +15,29 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import List, Union
+
+from .charsetgroupprober import CharSetGroupProber
+from .charsetprober import CharSetProber
from .enums import InputState
+from .resultdict import ResultDict
from .universaldetector import UniversalDetector
from .version import VERSION, __version__
__all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"]
-def detect(byte_str):
+def detect(
+ byte_str: Union[bytes, bytearray], should_rename_legacy: bool = False
+) -> ResultDict:
"""
Detect the encoding of the given byte string.
:param byte_str: The byte sequence to examine.
:type byte_str: ``bytes`` or ``bytearray``
+ :param should_rename_legacy: Should we rename legacy encodings
+ to their more modern equivalents?
+ :type should_rename_legacy: ``bool``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
@@ -35,12 +45,16 @@ def detect(byte_str):
f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
)
byte_str = bytearray(byte_str)
- detector = UniversalDetector()
+ detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
detector.feed(byte_str)
return detector.close()
-def detect_all(byte_str, ignore_threshold=False):
+def detect_all(
+ byte_str: Union[bytes, bytearray],
+ ignore_threshold: bool = False,
+ should_rename_legacy: bool = False,
+) -> List[ResultDict]:
"""
Detect all the possible encodings of the given byte string.
@@ -50,6 +64,9 @@ def detect_all(byte_str, ignore_threshold=False):
``UniversalDetector.MINIMUM_THRESHOLD``
in results.
:type ignore_threshold: ``bool``
+ :param should_rename_legacy: Should we rename legacy encodings
+ to their more modern equivalents?
+ :type should_rename_legacy: ``bool``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
@@ -58,15 +75,15 @@ def detect_all(byte_str, ignore_threshold=False):
)
byte_str = bytearray(byte_str)
- detector = UniversalDetector()
+ detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
detector.feed(byte_str)
detector.close()
if detector.input_state == InputState.HIGH_BYTE:
- results = []
- probers = []
+ results: List[ResultDict] = []
+ probers: List[CharSetProber] = []
for prober in detector.charset_probers:
- if hasattr(prober, "probers"):
+ if isinstance(prober, CharSetGroupProber):
probers.extend(p for p in prober.probers)
else:
probers.append(prober)
@@ -80,6 +97,11 @@ def detect_all(byte_str, ignore_threshold=False):
charset_name = detector.ISO_WIN_MAP.get(
lower_charset_name, charset_name
)
+ # Rename legacy encodings with superset encodings if asked
+ if should_rename_legacy:
+ charset_name = detector.LEGACY_MAP.get(
+ charset_name.lower(), charset_name
+ )
results.append(
{
"encoding": charset_name,
diff --git a/src/pip/_vendor/chardet/big5prober.py b/src/pip/_vendor/chardet/big5prober.py
index e4dfa7aa0..ef09c60e3 100644
--- a/src/pip/_vendor/chardet/big5prober.py
+++ b/src/pip/_vendor/chardet/big5prober.py
@@ -32,16 +32,16 @@ from .mbcssm import BIG5_SM_MODEL
class Big5Prober(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
self.distribution_analyzer = Big5DistributionAnalysis()
self.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "Big5"
@property
- def language(self):
+ def language(self) -> str:
return "Chinese"
diff --git a/src/pip/_vendor/chardet/chardistribution.py b/src/pip/_vendor/chardet/chardistribution.py
index 27b4a2939..176cb9964 100644
--- a/src/pip/_vendor/chardet/chardistribution.py
+++ b/src/pip/_vendor/chardet/chardistribution.py
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import Tuple, Union
+
from .big5freq import (
BIG5_CHAR_TO_FREQ_ORDER,
BIG5_TABLE_SIZE,
@@ -59,22 +61,22 @@ class CharDistributionAnalysis:
SURE_NO = 0.01
MINIMUM_DATA_THRESHOLD = 3
- def __init__(self):
+ def __init__(self) -> None:
# Mapping table to get frequency order from char order (get from
# GetOrder())
- self._char_to_freq_order = tuple()
- self._table_size = None # Size of above table
+ self._char_to_freq_order: Tuple[int, ...] = tuple()
+ self._table_size = 0 # Size of above table
# This is a constant value which varies from language to language,
# used in calculating confidence. See
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
# for further detail.
- self.typical_distribution_ratio = None
- self._done = None
- self._total_chars = None
- self._freq_chars = None
+ self.typical_distribution_ratio = 0.0
+ self._done = False
+ self._total_chars = 0
+ self._freq_chars = 0
self.reset()
- def reset(self):
+ def reset(self) -> None:
"""reset analyser, clear any state"""
# If this flag is set to True, detection is done and conclusion has
# been made
@@ -83,7 +85,7 @@ class CharDistributionAnalysis:
# The number of characters whose frequency order is less than 512
self._freq_chars = 0
- def feed(self, char, char_len):
+ def feed(self, char: Union[bytes, bytearray], char_len: int) -> None:
"""feed a character with known length"""
if char_len == 2:
# we only care about 2-bytes character in our distribution analysis
@@ -97,7 +99,7 @@ class CharDistributionAnalysis:
if 512 > self._char_to_freq_order[order]:
self._freq_chars += 1
- def get_confidence(self):
+ def get_confidence(self) -> float:
"""return confidence based on existing data"""
# if we didn't receive any character in our consideration range,
# return negative answer
@@ -114,12 +116,12 @@ class CharDistributionAnalysis:
# normalize confidence (we don't want to be 100% sure)
return self.SURE_YES
- def got_enough_data(self):
+ def got_enough_data(self) -> bool:
# It is not necessary to receive all data to draw conclusion.
# For charset detection, certain amount of data is enough
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
- def get_order(self, _):
+ def get_order(self, _: Union[bytes, bytearray]) -> int:
# We do not handle characters based on the original encoding string,
# but convert this encoding string to a number, here called order.
# This allows multiple encodings of a language to share one frequency
@@ -128,13 +130,13 @@ class CharDistributionAnalysis:
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
self._table_size = EUCTW_TABLE_SIZE
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for euc-TW encoding, we are interested
# first byte range: 0xc4 -- 0xfe
# second byte range: 0xa1 -- 0xfe
@@ -146,13 +148,13 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis):
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for euc-KR encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
@@ -164,13 +166,13 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis):
class JOHABDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
first_char = byte_str[0]
if 0x88 <= first_char < 0xD4:
code = first_char * 256 + byte_str[1]
@@ -179,13 +181,13 @@ class JOHABDistributionAnalysis(CharDistributionAnalysis):
class GB2312DistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
self._table_size = GB2312_TABLE_SIZE
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for GB2312 encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
@@ -197,13 +199,13 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
class Big5DistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
self._table_size = BIG5_TABLE_SIZE
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for big5 encoding, we are interested
# first byte range: 0xa4 -- 0xfe
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
@@ -217,13 +219,13 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
class SJISDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for sjis encoding, we are interested
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
@@ -242,13 +244,13 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for euc-JP encoding, we are interested
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
diff --git a/src/pip/_vendor/chardet/charsetgroupprober.py b/src/pip/_vendor/chardet/charsetgroupprober.py
index 778ff332b..6def56b4a 100644
--- a/src/pip/_vendor/chardet/charsetgroupprober.py
+++ b/src/pip/_vendor/chardet/charsetgroupprober.py
@@ -25,29 +25,30 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import List, Optional, Union
+
from .charsetprober import CharSetProber
-from .enums import ProbingState
+from .enums import LanguageFilter, ProbingState
class CharSetGroupProber(CharSetProber):
- def __init__(self, lang_filter=None):
+ def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
self._active_num = 0
- self.probers = []
- self._best_guess_prober = None
+ self.probers: List[CharSetProber] = []
+ self._best_guess_prober: Optional[CharSetProber] = None
- def reset(self):
+ def reset(self) -> None:
super().reset()
self._active_num = 0
for prober in self.probers:
- if prober:
- prober.reset()
- prober.active = True
- self._active_num += 1
+ prober.reset()
+ prober.active = True
+ self._active_num += 1
self._best_guess_prober = None
@property
- def charset_name(self):
+ def charset_name(self) -> Optional[str]:
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
@@ -55,17 +56,15 @@ class CharSetGroupProber(CharSetProber):
return self._best_guess_prober.charset_name
@property
- def language(self):
+ def language(self) -> Optional[str]:
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.language
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for prober in self.probers:
- if not prober:
- continue
if not prober.active:
continue
state = prober.feed(byte_str)
@@ -83,7 +82,7 @@ class CharSetGroupProber(CharSetProber):
return self.state
return self.state
- def get_confidence(self):
+ def get_confidence(self) -> float:
state = self.state
if state == ProbingState.FOUND_IT:
return 0.99
@@ -92,8 +91,6 @@ class CharSetGroupProber(CharSetProber):
best_conf = 0.0
self._best_guess_prober = None
for prober in self.probers:
- if not prober:
- continue
if not prober.active:
self.logger.debug("%s not active", prober.charset_name)
continue
diff --git a/src/pip/_vendor/chardet/charsetprober.py b/src/pip/_vendor/chardet/charsetprober.py
index 9f1afd999..a103ca113 100644
--- a/src/pip/_vendor/chardet/charsetprober.py
+++ b/src/pip/_vendor/chardet/charsetprober.py
@@ -28,8 +28,9 @@
import logging
import re
+from typing import Optional, Union
-from .enums import ProbingState
+from .enums import LanguageFilter, ProbingState
INTERNATIONAL_WORDS_PATTERN = re.compile(
b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?"
@@ -40,35 +41,40 @@ class CharSetProber:
SHORTCUT_THRESHOLD = 0.95
- def __init__(self, lang_filter=None):
- self._state = None
+ def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
+ self._state = ProbingState.DETECTING
+ self.active = True
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
- def reset(self):
+ def reset(self) -> None:
self._state = ProbingState.DETECTING
@property
- def charset_name(self):
+ def charset_name(self) -> Optional[str]:
return None
- def feed(self, byte_str):
+ @property
+ def language(self) -> Optional[str]:
+ raise NotImplementedError
+
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
raise NotImplementedError
@property
- def state(self):
+ def state(self) -> ProbingState:
return self._state
- def get_confidence(self):
+ def get_confidence(self) -> float:
return 0.0
@staticmethod
- def filter_high_byte_only(buf):
+ def filter_high_byte_only(buf: Union[bytes, bytearray]) -> bytes:
buf = re.sub(b"([\x00-\x7F])+", b" ", buf)
return buf
@staticmethod
- def filter_international_words(buf):
+ def filter_international_words(buf: Union[bytes, bytearray]) -> bytearray:
"""
We define three types of bytes:
alphabet: english alphabets [a-zA-Z]
@@ -102,7 +108,7 @@ class CharSetProber:
return filtered
@staticmethod
- def remove_xml_tags(buf):
+ def remove_xml_tags(buf: Union[bytes, bytearray]) -> bytes:
"""
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
@@ -117,10 +123,13 @@ class CharSetProber:
for curr, buf_char in enumerate(buf):
# Check if we're coming out of or entering an XML tag
- if buf_char == b">":
+
+ # https://github.com/python/typeshed/issues/8182
+ if buf_char == b">": # type: ignore[comparison-overlap]
prev = curr + 1
in_tag = False
- elif buf_char == b"<":
+ # https://github.com/python/typeshed/issues/8182
+ elif buf_char == b"<": # type: ignore[comparison-overlap]
if curr > prev and not in_tag:
# Keep everything after last non-extended-ASCII,
# non-alphabetic character
diff --git a/src/pip/_vendor/chardet/cli/chardetect.py b/src/pip/_vendor/chardet/cli/chardetect.py
index 7926fa37e..43f6e144f 100644
--- a/src/pip/_vendor/chardet/cli/chardetect.py
+++ b/src/pip/_vendor/chardet/cli/chardetect.py
@@ -15,12 +15,18 @@ If no paths are provided, it takes its input from stdin.
import argparse
import sys
+from typing import Iterable, List, Optional
from .. import __version__
from ..universaldetector import UniversalDetector
-def description_of(lines, name="stdin"):
+def description_of(
+ lines: Iterable[bytes],
+ name: str = "stdin",
+ minimal: bool = False,
+ should_rename_legacy: bool = False,
+) -> Optional[str]:
"""
Return a string describing the probable encoding of a file or
list of strings.
@@ -29,8 +35,11 @@ def description_of(lines, name="stdin"):
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
+ :param should_rename_legacy: Should we rename legacy encodings to
+ their more modern equivalents?
+ :type should_rename_legacy: ``bool``
"""
- u = UniversalDetector()
+ u = UniversalDetector(should_rename_legacy=should_rename_legacy)
for line in lines:
line = bytearray(line)
u.feed(line)
@@ -39,12 +48,14 @@ def description_of(lines, name="stdin"):
break
u.close()
result = u.result
+ if minimal:
+ return result["encoding"]
if result["encoding"]:
return f'{name}: {result["encoding"]} with confidence {result["confidence"]}'
return f"{name}: no result"
-def main(argv=None):
+def main(argv: Optional[List[str]] = None) -> None:
"""
Handles command line arguments and gets things started.
@@ -54,18 +65,29 @@ def main(argv=None):
"""
# Get command line arguments
parser = argparse.ArgumentParser(
- description="Takes one or more file paths and reports their detected \
- encodings"
+ description=(
+ "Takes one or more file paths and reports their detected encodings"
+ )
)
parser.add_argument(
"input",
- help="File whose encoding we would like to determine. \
- (default: stdin)",
+ help="File whose encoding we would like to determine. (default: stdin)",
type=argparse.FileType("rb"),
nargs="*",
default=[sys.stdin.buffer],
)
parser.add_argument(
+ "--minimal",
+ help="Print only the encoding to standard output",
+ action="store_true",
+ )
+ parser.add_argument(
+ "-l",
+ "--legacy",
+ help="Rename legacy encodings to more modern ones.",
+ action="store_true",
+ )
+ parser.add_argument(
"--version", action="version", version=f"%(prog)s {__version__}"
)
args = parser.parse_args(argv)
@@ -79,7 +101,11 @@ def main(argv=None):
"--help\n",
file=sys.stderr,
)
- print(description_of(f, f.name))
+ print(
+ description_of(
+ f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy
+ )
+ )
if __name__ == "__main__":
diff --git a/src/pip/_vendor/chardet/codingstatemachine.py b/src/pip/_vendor/chardet/codingstatemachine.py
index d3e3e825d..8ed4a8773 100644
--- a/src/pip/_vendor/chardet/codingstatemachine.py
+++ b/src/pip/_vendor/chardet/codingstatemachine.py
@@ -27,6 +27,7 @@
import logging
+from .codingstatemachinedict import CodingStateMachineDict
from .enums import MachineState
@@ -53,18 +54,19 @@ class CodingStateMachine:
encoding from consideration from here on.
"""
- def __init__(self, sm):
+ def __init__(self, sm: CodingStateMachineDict) -> None:
self._model = sm
self._curr_byte_pos = 0
self._curr_char_len = 0
- self._curr_state = None
+ self._curr_state = MachineState.START
+ self.active = True
self.logger = logging.getLogger(__name__)
self.reset()
- def reset(self):
+ def reset(self) -> None:
self._curr_state = MachineState.START
- def next_state(self, c):
+ def next_state(self, c: int) -> int:
# for each byte we get its class
# if it is first byte, we also get byte length
byte_class = self._model["class_table"][c]
@@ -77,12 +79,12 @@ class CodingStateMachine:
self._curr_byte_pos += 1
return self._curr_state
- def get_current_charlen(self):
+ def get_current_charlen(self) -> int:
return self._curr_char_len
- def get_coding_state_machine(self):
+ def get_coding_state_machine(self) -> str:
return self._model["name"]
@property
- def language(self):
+ def language(self) -> str:
return self._model["language"]
diff --git a/src/pip/_vendor/chardet/codingstatemachinedict.py b/src/pip/_vendor/chardet/codingstatemachinedict.py
new file mode 100644
index 000000000..7a3c4c7e3
--- /dev/null
+++ b/src/pip/_vendor/chardet/codingstatemachinedict.py
@@ -0,0 +1,19 @@
+from typing import TYPE_CHECKING, Tuple
+
+if TYPE_CHECKING:
+ # TypedDict was introduced in Python 3.8.
+ #
+ # TODO: Remove the else block and TYPE_CHECKING check when dropping support
+ # for Python 3.7.
+ from typing import TypedDict
+
+ class CodingStateMachineDict(TypedDict, total=False):
+ class_table: Tuple[int, ...]
+ class_factor: int
+ state_table: Tuple[int, ...]
+ char_len_table: Tuple[int, ...]
+ name: str
+ language: str # Optional key
+
+else:
+ CodingStateMachineDict = dict
diff --git a/src/pip/_vendor/chardet/cp949prober.py b/src/pip/_vendor/chardet/cp949prober.py
index 28a1f3dbb..fa7307ed8 100644
--- a/src/pip/_vendor/chardet/cp949prober.py
+++ b/src/pip/_vendor/chardet/cp949prober.py
@@ -32,7 +32,7 @@ from .mbcssm import CP949_SM_MODEL
class CP949Prober(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
@@ -41,9 +41,9 @@ class CP949Prober(MultiByteCharSetProber):
self.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "CP949"
@property
- def language(self):
+ def language(self) -> str:
return "Korean"
diff --git a/src/pip/_vendor/chardet/enums.py b/src/pip/_vendor/chardet/enums.py
index 32a77e76c..5e3e19823 100644
--- a/src/pip/_vendor/chardet/enums.py
+++ b/src/pip/_vendor/chardet/enums.py
@@ -4,6 +4,8 @@ All of the Enums that are used throughout the chardet package.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
+from enum import Enum, Flag
+
class InputState:
"""
@@ -15,12 +17,13 @@ class InputState:
HIGH_BYTE = 2
-class LanguageFilter:
+class LanguageFilter(Flag):
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
+ NONE = 0x00
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
@@ -31,7 +34,7 @@ class LanguageFilter:
CJK = CHINESE | JAPANESE | KOREAN
-class ProbingState:
+class ProbingState(Enum):
"""
This enum represents the different states a prober can be in.
"""
@@ -62,7 +65,7 @@ class SequenceLikelihood:
POSITIVE = 3
@classmethod
- def get_num_categories(cls):
+ def get_num_categories(cls) -> int:
""":returns: The number of likelihood categories in the enum."""
return 4
diff --git a/src/pip/_vendor/chardet/escprober.py b/src/pip/_vendor/chardet/escprober.py
index d9926115d..fd713830d 100644
--- a/src/pip/_vendor/chardet/escprober.py
+++ b/src/pip/_vendor/chardet/escprober.py
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import Optional, Union
+
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import LanguageFilter, MachineState, ProbingState
@@ -43,7 +45,7 @@ class EscCharSetProber(CharSetProber):
identify these encodings.
"""
- def __init__(self, lang_filter=None):
+ def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
self.coding_sm = []
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
@@ -53,17 +55,15 @@ class EscCharSetProber(CharSetProber):
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
if self.lang_filter & LanguageFilter.KOREAN:
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
- self.active_sm_count = None
- self._detected_charset = None
- self._detected_language = None
- self._state = None
+ self.active_sm_count = 0
+ self._detected_charset: Optional[str] = None
+ self._detected_language: Optional[str] = None
+ self._state = ProbingState.DETECTING
self.reset()
- def reset(self):
+ def reset(self) -> None:
super().reset()
for coding_sm in self.coding_sm:
- if not coding_sm:
- continue
coding_sm.active = True
coding_sm.reset()
self.active_sm_count = len(self.coding_sm)
@@ -71,20 +71,20 @@ class EscCharSetProber(CharSetProber):
self._detected_language = None
@property
- def charset_name(self):
+ def charset_name(self) -> Optional[str]:
return self._detected_charset
@property
- def language(self):
+ def language(self) -> Optional[str]:
return self._detected_language
- def get_confidence(self):
+ def get_confidence(self) -> float:
return 0.99 if self._detected_charset else 0.00
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for c in byte_str:
for coding_sm in self.coding_sm:
- if not coding_sm or not coding_sm.active:
+ if not coding_sm.active:
continue
coding_state = coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
diff --git a/src/pip/_vendor/chardet/escsm.py b/src/pip/_vendor/chardet/escsm.py
index 3aa0f4d96..11d4adf77 100644
--- a/src/pip/_vendor/chardet/escsm.py
+++ b/src/pip/_vendor/chardet/escsm.py
@@ -25,6 +25,7 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from .codingstatemachinedict import CodingStateMachineDict
from .enums import MachineState
# fmt: off
@@ -75,7 +76,7 @@ MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
-HZ_SM_MODEL = {
+HZ_SM_MODEL: CodingStateMachineDict = {
"class_table": HZ_CLS,
"class_factor": 6,
"state_table": HZ_ST,
@@ -134,7 +135,7 @@ ISO2022CN_ST = (
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
-ISO2022CN_SM_MODEL = {
+ISO2022CN_SM_MODEL: CodingStateMachineDict = {
"class_table": ISO2022CN_CLS,
"class_factor": 9,
"state_table": ISO2022CN_ST,
@@ -194,7 +195,7 @@ ISO2022JP_ST = (
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-ISO2022JP_SM_MODEL = {
+ISO2022JP_SM_MODEL: CodingStateMachineDict = {
"class_table": ISO2022JP_CLS,
"class_factor": 10,
"state_table": ISO2022JP_ST,
@@ -250,7 +251,7 @@ ISO2022KR_ST = (
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
-ISO2022KR_SM_MODEL = {
+ISO2022KR_SM_MODEL: CodingStateMachineDict = {
"class_table": ISO2022KR_CLS,
"class_factor": 6,
"state_table": ISO2022KR_ST,
diff --git a/src/pip/_vendor/chardet/eucjpprober.py b/src/pip/_vendor/chardet/eucjpprober.py
index abf2e66e2..39487f409 100644
--- a/src/pip/_vendor/chardet/eucjpprober.py
+++ b/src/pip/_vendor/chardet/eucjpprober.py
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import Union
+
from .chardistribution import EUCJPDistributionAnalysis
from .codingstatemachine import CodingStateMachine
from .enums import MachineState, ProbingState
@@ -34,26 +36,29 @@ from .mbcssm import EUCJP_SM_MODEL
class EUCJPProber(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
self.distribution_analyzer = EUCJPDistributionAnalysis()
self.context_analyzer = EUCJPContextAnalysis()
self.reset()
- def reset(self):
+ def reset(self) -> None:
super().reset()
self.context_analyzer.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "EUC-JP"
@property
- def language(self):
+ def language(self) -> str:
return "Japanese"
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+ assert self.coding_sm is not None
+ assert self.distribution_analyzer is not None
+
for i, byte in enumerate(byte_str):
# PY3K: byte_str is a byte array, so byte is an int, not a byte
coding_state = self.coding_sm.next_state(byte)
@@ -89,7 +94,9 @@ class EUCJPProber(MultiByteCharSetProber):
return self.state
- def get_confidence(self):
+ def get_confidence(self) -> float:
+ assert self.distribution_analyzer is not None
+
context_conf = self.context_analyzer.get_confidence()
distrib_conf = self.distribution_analyzer.get_confidence()
return max(context_conf, distrib_conf)
diff --git a/src/pip/_vendor/chardet/euckrprober.py b/src/pip/_vendor/chardet/euckrprober.py
index 154a6d216..1fc5de046 100644
--- a/src/pip/_vendor/chardet/euckrprober.py
+++ b/src/pip/_vendor/chardet/euckrprober.py
@@ -32,16 +32,16 @@ from .mbcssm import EUCKR_SM_MODEL
class EUCKRProber(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "EUC-KR"
@property
- def language(self):
+ def language(self) -> str:
return "Korean"
diff --git a/src/pip/_vendor/chardet/euctwprober.py b/src/pip/_vendor/chardet/euctwprober.py
index ca10a23ca..a37ab1899 100644
--- a/src/pip/_vendor/chardet/euctwprober.py
+++ b/src/pip/_vendor/chardet/euctwprober.py
@@ -32,16 +32,16 @@ from .mbcssm import EUCTW_SM_MODEL
class EUCTWProber(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
self.distribution_analyzer = EUCTWDistributionAnalysis()
self.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "EUC-TW"
@property
- def language(self):
+ def language(self) -> str:
return "Taiwan"
diff --git a/src/pip/_vendor/chardet/gb2312prober.py b/src/pip/_vendor/chardet/gb2312prober.py
index 251c04295..d423e7311 100644
--- a/src/pip/_vendor/chardet/gb2312prober.py
+++ b/src/pip/_vendor/chardet/gb2312prober.py
@@ -32,16 +32,16 @@ from .mbcssm import GB2312_SM_MODEL
class GB2312Prober(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
self.distribution_analyzer = GB2312DistributionAnalysis()
self.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "GB2312"
@property
- def language(self):
+ def language(self) -> str:
return "Chinese"
diff --git a/src/pip/_vendor/chardet/hebrewprober.py b/src/pip/_vendor/chardet/hebrewprober.py
index 3ca634bf3..785d0057b 100644
--- a/src/pip/_vendor/chardet/hebrewprober.py
+++ b/src/pip/_vendor/chardet/hebrewprober.py
@@ -25,8 +25,11 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import Optional, Union
+
from .charsetprober import CharSetProber
from .enums import ProbingState
+from .sbcharsetprober import SingleByteCharSetProber
# This prober doesn't actually recognize a language or a charset.
# It is a helper prober for the use of the Hebrew model probers
@@ -127,6 +130,7 @@ from .enums import ProbingState
class HebrewProber(CharSetProber):
+ SPACE = 0x20
# windows-1255 / ISO-8859-8 code points of interest
FINAL_KAF = 0xEA
NORMAL_KAF = 0xEB
@@ -152,31 +156,35 @@ class HebrewProber(CharSetProber):
VISUAL_HEBREW_NAME = "ISO-8859-8"
LOGICAL_HEBREW_NAME = "windows-1255"
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
- self._final_char_logical_score = None
- self._final_char_visual_score = None
- self._prev = None
- self._before_prev = None
- self._logical_prober = None
- self._visual_prober = None
+ self._final_char_logical_score = 0
+ self._final_char_visual_score = 0
+ self._prev = self.SPACE
+ self._before_prev = self.SPACE
+ self._logical_prober: Optional[SingleByteCharSetProber] = None
+ self._visual_prober: Optional[SingleByteCharSetProber] = None
self.reset()
- def reset(self):
+ def reset(self) -> None:
self._final_char_logical_score = 0
self._final_char_visual_score = 0
# The two last characters seen in the previous buffer,
# mPrev and mBeforePrev are initialized to space in order to simulate
# a word delimiter at the beginning of the data
- self._prev = " "
- self._before_prev = " "
+ self._prev = self.SPACE
+ self._before_prev = self.SPACE
# These probers are owned by the group prober.
- def set_model_probers(self, logical_prober, visual_prober):
+ def set_model_probers(
+ self,
+ logical_prober: SingleByteCharSetProber,
+ visual_prober: SingleByteCharSetProber,
+ ) -> None:
self._logical_prober = logical_prober
self._visual_prober = visual_prober
- def is_final(self, c):
+ def is_final(self, c: int) -> bool:
return c in [
self.FINAL_KAF,
self.FINAL_MEM,
@@ -185,7 +193,7 @@ class HebrewProber(CharSetProber):
self.FINAL_TSADI,
]
- def is_non_final(self, c):
+ def is_non_final(self, c: int) -> bool:
# The normal Tsadi is not a good Non-Final letter due to words like
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
# apostrophe is converted to a space in FilterWithoutEnglishLetters
@@ -198,7 +206,7 @@ class HebrewProber(CharSetProber):
# since these words are quite rare.
return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE]
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
# Final letter analysis for logical-visual decision.
# Look for evidence that the received buffer is either logical Hebrew
# or visual Hebrew.
@@ -232,9 +240,9 @@ class HebrewProber(CharSetProber):
byte_str = self.filter_high_byte_only(byte_str)
for cur in byte_str:
- if cur == " ":
+ if cur == self.SPACE:
# We stand on a space - a word just ended
- if self._before_prev != " ":
+ if self._before_prev != self.SPACE:
# next-to-last char was not a space so self._prev is not a
# 1 letter word
if self.is_final(self._prev):
@@ -247,9 +255,9 @@ class HebrewProber(CharSetProber):
else:
# Not standing on a space
if (
- (self._before_prev == " ")
+ (self._before_prev == self.SPACE)
and (self.is_final(self._prev))
- and (cur != " ")
+ and (cur != self.SPACE)
):
# case (3) [-2:space][-1:final letter][cur:not space]
self._final_char_visual_score += 1
@@ -261,7 +269,10 @@ class HebrewProber(CharSetProber):
return ProbingState.DETECTING
@property
- def charset_name(self):
+ def charset_name(self) -> str:
+ assert self._logical_prober is not None
+ assert self._visual_prober is not None
+
# Make the decision: is it Logical or Visual?
# If the final letter score distance is dominant enough, rely on it.
finalsub = self._final_char_logical_score - self._final_char_visual_score
@@ -289,11 +300,14 @@ class HebrewProber(CharSetProber):
return self.LOGICAL_HEBREW_NAME
@property
- def language(self):
+ def language(self) -> str:
return "Hebrew"
@property
- def state(self):
+ def state(self) -> ProbingState:
+ assert self._logical_prober is not None
+ assert self._visual_prober is not None
+
# Remain active as long as any of the model probers are active.
if (self._logical_prober.state == ProbingState.NOT_ME) and (
self._visual_prober.state == ProbingState.NOT_ME
diff --git a/src/pip/_vendor/chardet/johabprober.py b/src/pip/_vendor/chardet/johabprober.py
index 6f359d193..d7364ba61 100644
--- a/src/pip/_vendor/chardet/johabprober.py
+++ b/src/pip/_vendor/chardet/johabprober.py
@@ -32,16 +32,16 @@ from .mbcssm import JOHAB_SM_MODEL
class JOHABProber(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL)
self.distribution_analyzer = JOHABDistributionAnalysis()
self.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "Johab"
@property
- def language(self):
+ def language(self) -> str:
return "Korean"
diff --git a/src/pip/_vendor/chardet/jpcntx.py b/src/pip/_vendor/chardet/jpcntx.py
index 7a8e5be06..2f53bdda0 100644
--- a/src/pip/_vendor/chardet/jpcntx.py
+++ b/src/pip/_vendor/chardet/jpcntx.py
@@ -25,6 +25,7 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import List, Tuple, Union
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
# fmt: off
@@ -123,15 +124,15 @@ class JapaneseContextAnalysis:
MAX_REL_THRESHOLD = 1000
MINIMUM_DATA_THRESHOLD = 4
- def __init__(self):
- self._total_rel = None
- self._rel_sample = None
- self._need_to_skip_char_num = None
- self._last_char_order = None
- self._done = None
+ def __init__(self) -> None:
+ self._total_rel = 0
+ self._rel_sample: List[int] = []
+ self._need_to_skip_char_num = 0
+ self._last_char_order = -1
+ self._done = False
self.reset()
- def reset(self):
+ def reset(self) -> None:
self._total_rel = 0 # total sequence received
# category counters, each integer counts sequence in its category
self._rel_sample = [0] * self.NUM_OF_CATEGORY
@@ -143,7 +144,7 @@ class JapaneseContextAnalysis:
# been made
self._done = False
- def feed(self, byte_str, num_bytes):
+ def feed(self, byte_str: Union[bytes, bytearray], num_bytes: int) -> None:
if self._done:
return
@@ -172,29 +173,29 @@ class JapaneseContextAnalysis:
] += 1
self._last_char_order = order
- def got_enough_data(self):
+ def got_enough_data(self) -> bool:
return self._total_rel > self.ENOUGH_REL_THRESHOLD
- def get_confidence(self):
+ def get_confidence(self) -> float:
# This is just one way to calculate confidence. It works well for me.
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
return (self._total_rel - self._rel_sample[0]) / self._total_rel
return self.DONT_KNOW
- def get_order(self, _):
+ def get_order(self, _: Union[bytes, bytearray]) -> Tuple[int, int]:
return -1, 1
class SJISContextAnalysis(JapaneseContextAnalysis):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self._charset_name = "SHIFT_JIS"
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return self._charset_name
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
if not byte_str:
return -1, 1
# find out current char's byte length
@@ -216,7 +217,7 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
class EUCJPContextAnalysis(JapaneseContextAnalysis):
- def get_order(self, byte_str):
+ def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
if not byte_str:
return -1, 1
# find out current char's byte length
diff --git a/src/pip/_vendor/chardet/latin1prober.py b/src/pip/_vendor/chardet/latin1prober.py
index 241f14ab9..59a01d91b 100644
--- a/src/pip/_vendor/chardet/latin1prober.py
+++ b/src/pip/_vendor/chardet/latin1prober.py
@@ -26,6 +26,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import List, Union
+
from .charsetprober import CharSetProber
from .enums import ProbingState
@@ -96,26 +98,26 @@ Latin1ClassModel = (
class Latin1Prober(CharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
- self._last_char_class = None
- self._freq_counter = None
+ self._last_char_class = OTH
+ self._freq_counter: List[int] = []
self.reset()
- def reset(self):
+ def reset(self) -> None:
self._last_char_class = OTH
self._freq_counter = [0] * FREQ_CAT_NUM
super().reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "ISO-8859-1"
@property
- def language(self):
+ def language(self) -> str:
return ""
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
byte_str = self.remove_xml_tags(byte_str)
for c in byte_str:
char_class = Latin1_CharToClass[c]
@@ -128,7 +130,7 @@ class Latin1Prober(CharSetProber):
return self.state
- def get_confidence(self):
+ def get_confidence(self) -> float:
if self.state == ProbingState.NOT_ME:
return 0.01
diff --git a/src/pip/_vendor/chardet/macromanprober.py b/src/pip/_vendor/chardet/macromanprober.py
new file mode 100644
index 000000000..1425d10ec
--- /dev/null
+++ b/src/pip/_vendor/chardet/macromanprober.py
@@ -0,0 +1,162 @@
+######################## BEGIN LICENSE BLOCK ########################
+# This code was modified from latin1prober.py by Rob Speer <rob@lumino.so>.
+# The Original Code is Mozilla Universal charset detector code.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2001
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+# Rob Speer - adapt to MacRoman encoding
+# Mark Pilgrim - port to Python
+# Shy Shalom - original C code
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+# 02110-1301 USA
+######################### END LICENSE BLOCK #########################
+
+from typing import List, Union
+
+from .charsetprober import CharSetProber
+from .enums import ProbingState
+
+FREQ_CAT_NUM = 4
+
+UDF = 0 # undefined
+OTH = 1 # other
+ASC = 2 # ascii capital letter
+ASS = 3 # ascii small letter
+ACV = 4 # accent capital vowel
+ACO = 5 # accent capital other
+ASV = 6 # accent small vowel
+ASO = 7 # accent small other
+ODD = 8 # character that is unlikely to appear
+CLASS_NUM = 9 # total classes
+
+# The change from Latin1 is that we explicitly look for extended characters
+# that are infrequently-occurring symbols, and consider them to always be
+# improbable. This should let MacRoman get out of the way of more likely
+# encodings in most situations.
+
+# fmt: off
+MacRoman_CharToClass = (
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
+ OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
+ ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
+ OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
+ ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
+ ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV, # 80 - 87
+ ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV, # 88 - 8F
+ ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV, # 90 - 97
+ ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # 98 - 9F
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, # A0 - A7
+ OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV, # A8 - AF
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
+ OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV, # B8 - BF
+ OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH, # C0 - C7
+ OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV, # C8 - CF
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD, # D0 - D7
+ ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH, # D8 - DF
+ OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, # E0 - E7
+ ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # E8 - EF
+ ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD, # F0 - F7
+ ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD, # F8 - FF
+)
+
+# 0 : illegal
+# 1 : very unlikely
+# 2 : normal
+# 3 : very likely
+MacRomanClassModel = (
+# UDF OTH ASC ASS ACV ACO ASV ASO ODD
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, # UDF
+ 0, 3, 3, 3, 3, 3, 3, 3, 1, # OTH
+ 0, 3, 3, 3, 3, 3, 3, 3, 1, # ASC
+ 0, 3, 3, 3, 1, 1, 3, 3, 1, # ASS
+ 0, 3, 3, 3, 1, 2, 1, 2, 1, # ACV
+ 0, 3, 3, 3, 3, 3, 3, 3, 1, # ACO
+ 0, 3, 1, 3, 1, 1, 1, 3, 1, # ASV
+ 0, 3, 1, 3, 1, 1, 3, 3, 1, # ASO
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, # ODD
+)
+# fmt: on
+
+
+class MacRomanProber(CharSetProber):
+ def __init__(self) -> None:
+ super().__init__()
+ self._last_char_class = OTH
+ self._freq_counter: List[int] = []
+ self.reset()
+
+ def reset(self) -> None:
+ self._last_char_class = OTH
+ self._freq_counter = [0] * FREQ_CAT_NUM
+
+ # express the prior that MacRoman is a somewhat rare encoding;
+ # this can be done by starting out in a slightly improbable state
+ # that must be overcome
+ self._freq_counter[2] = 10
+
+ super().reset()
+
+ @property
+ def charset_name(self) -> str:
+ return "MacRoman"
+
+ @property
+ def language(self) -> str:
+ return ""
+
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+ byte_str = self.remove_xml_tags(byte_str)
+ for c in byte_str:
+ char_class = MacRoman_CharToClass[c]
+ freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class]
+ if freq == 0:
+ self._state = ProbingState.NOT_ME
+ break
+ self._freq_counter[freq] += 1
+ self._last_char_class = char_class
+
+ return self.state
+
+ def get_confidence(self) -> float:
+ if self.state == ProbingState.NOT_ME:
+ return 0.01
+
+ total = sum(self._freq_counter)
+ confidence = (
+ 0.0
+ if total < 0.01
+ else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total
+ )
+ confidence = max(confidence, 0.0)
+ # lower the confidence of MacRoman so that other more accurate
+ # detector can take priority.
+ confidence *= 0.73
+ return confidence
diff --git a/src/pip/_vendor/chardet/mbcharsetprober.py b/src/pip/_vendor/chardet/mbcharsetprober.py
index bf96ad5d4..666307e8f 100644
--- a/src/pip/_vendor/chardet/mbcharsetprober.py
+++ b/src/pip/_vendor/chardet/mbcharsetprober.py
@@ -27,8 +27,12 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import Optional, Union
+
+from .chardistribution import CharDistributionAnalysis
from .charsetprober import CharSetProber
-from .enums import MachineState, ProbingState
+from .codingstatemachine import CodingStateMachine
+from .enums import LanguageFilter, MachineState, ProbingState
class MultiByteCharSetProber(CharSetProber):
@@ -36,29 +40,24 @@ class MultiByteCharSetProber(CharSetProber):
MultiByteCharSetProber
"""
- def __init__(self, lang_filter=None):
+ def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
- self.distribution_analyzer = None
- self.coding_sm = None
- self._last_char = [0, 0]
+ self.distribution_analyzer: Optional[CharDistributionAnalysis] = None
+ self.coding_sm: Optional[CodingStateMachine] = None
+ self._last_char = bytearray(b"\0\0")
- def reset(self):
+ def reset(self) -> None:
super().reset()
if self.coding_sm:
self.coding_sm.reset()
if self.distribution_analyzer:
self.distribution_analyzer.reset()
- self._last_char = [0, 0]
-
- @property
- def charset_name(self):
- raise NotImplementedError
+ self._last_char = bytearray(b"\0\0")
- @property
- def language(self):
- raise NotImplementedError
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+ assert self.coding_sm is not None
+ assert self.distribution_analyzer is not None
- def feed(self, byte_str):
for i, byte in enumerate(byte_str):
coding_state = self.coding_sm.next_state(byte)
if coding_state == MachineState.ERROR:
@@ -91,5 +90,6 @@ class MultiByteCharSetProber(CharSetProber):
return self.state
- def get_confidence(self):
+ def get_confidence(self) -> float:
+ assert self.distribution_analyzer is not None
return self.distribution_analyzer.get_confidence()
diff --git a/src/pip/_vendor/chardet/mbcsgroupprober.py b/src/pip/_vendor/chardet/mbcsgroupprober.py
index 94488360c..6cb9cc7b3 100644
--- a/src/pip/_vendor/chardet/mbcsgroupprober.py
+++ b/src/pip/_vendor/chardet/mbcsgroupprober.py
@@ -30,6 +30,7 @@
from .big5prober import Big5Prober
from .charsetgroupprober import CharSetGroupProber
from .cp949prober import CP949Prober
+from .enums import LanguageFilter
from .eucjpprober import EUCJPProber
from .euckrprober import EUCKRProber
from .euctwprober import EUCTWProber
@@ -40,7 +41,7 @@ from .utf8prober import UTF8Prober
class MBCSGroupProber(CharSetGroupProber):
- def __init__(self, lang_filter=None):
+ def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
self.probers = [
UTF8Prober(),
diff --git a/src/pip/_vendor/chardet/mbcssm.py b/src/pip/_vendor/chardet/mbcssm.py
index d3b9c4b75..7bbe97e66 100644
--- a/src/pip/_vendor/chardet/mbcssm.py
+++ b/src/pip/_vendor/chardet/mbcssm.py
@@ -25,6 +25,7 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from .codingstatemachinedict import CodingStateMachineDict
from .enums import MachineState
# BIG5
@@ -74,7 +75,7 @@ BIG5_ST = (
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
-BIG5_SM_MODEL = {
+BIG5_SM_MODEL: CodingStateMachineDict = {
"class_table": BIG5_CLS,
"class_factor": 5,
"state_table": BIG5_ST,
@@ -117,7 +118,7 @@ CP949_ST = (
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
-CP949_SM_MODEL = {
+CP949_SM_MODEL: CodingStateMachineDict = {
"class_table": CP949_CLS,
"class_factor": 10,
"state_table": CP949_ST,
@@ -173,7 +174,7 @@ EUCJP_ST = (
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
-EUCJP_SM_MODEL = {
+EUCJP_SM_MODEL: CodingStateMachineDict = {
"class_table": EUCJP_CLS,
"class_factor": 6,
"state_table": EUCJP_ST,
@@ -226,7 +227,7 @@ EUCKR_ST = (
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
-EUCKR_SM_MODEL = {
+EUCKR_SM_MODEL: CodingStateMachineDict = {
"class_table": EUCKR_CLS,
"class_factor": 4,
"state_table": EUCKR_ST,
@@ -283,7 +284,7 @@ JOHAB_ST = (
JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2)
-JOHAB_SM_MODEL = {
+JOHAB_SM_MODEL: CodingStateMachineDict = {
"class_table": JOHAB_CLS,
"class_factor": 10,
"state_table": JOHAB_ST,
@@ -340,7 +341,7 @@ EUCTW_ST = (
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
-EUCTW_SM_MODEL = {
+EUCTW_SM_MODEL: CodingStateMachineDict = {
"class_table": EUCTW_CLS,
"class_factor": 7,
"state_table": EUCTW_ST,
@@ -402,7 +403,7 @@ GB2312_ST = (
# 2 here.
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
-GB2312_SM_MODEL = {
+GB2312_SM_MODEL: CodingStateMachineDict = {
"class_table": GB2312_CLS,
"class_factor": 7,
"state_table": GB2312_ST,
@@ -458,7 +459,7 @@ SJIS_ST = (
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
-SJIS_SM_MODEL = {
+SJIS_SM_MODEL: CodingStateMachineDict = {
"class_table": SJIS_CLS,
"class_factor": 6,
"state_table": SJIS_ST,
@@ -516,7 +517,7 @@ UCS2BE_ST = (
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
-UCS2BE_SM_MODEL = {
+UCS2BE_SM_MODEL: CodingStateMachineDict = {
"class_table": UCS2BE_CLS,
"class_factor": 6,
"state_table": UCS2BE_ST,
@@ -574,7 +575,7 @@ UCS2LE_ST = (
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
-UCS2LE_SM_MODEL = {
+UCS2LE_SM_MODEL: CodingStateMachineDict = {
"class_table": UCS2LE_CLS,
"class_factor": 6,
"state_table": UCS2LE_ST,
@@ -651,7 +652,7 @@ UTF8_ST = (
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
-UTF8_SM_MODEL = {
+UTF8_SM_MODEL: CodingStateMachineDict = {
"class_table": UTF8_CLS,
"class_factor": 16,
"state_table": UTF8_ST,
diff --git a/src/pip/_vendor/chardet/metadata/languages.py b/src/pip/_vendor/chardet/metadata/languages.py
index 1d37884c3..eb40c5f0c 100644
--- a/src/pip/_vendor/chardet/metadata/languages.py
+++ b/src/pip/_vendor/chardet/metadata/languages.py
@@ -6,6 +6,7 @@ This code is based on the language metadata from the uchardet project.
"""
from string import ascii_letters
+from typing import List, Optional
# TODO: Add Ukrainian (KOI8-U)
@@ -33,13 +34,13 @@ class Language:
def __init__(
self,
- name=None,
- iso_code=None,
- use_ascii=True,
- charsets=None,
- alphabet=None,
- wiki_start_pages=None,
- ):
+ name: Optional[str] = None,
+ iso_code: Optional[str] = None,
+ use_ascii: bool = True,
+ charsets: Optional[List[str]] = None,
+ alphabet: Optional[str] = None,
+ wiki_start_pages: Optional[List[str]] = None,
+ ) -> None:
super().__init__()
self.name = name
self.iso_code = iso_code
@@ -55,7 +56,7 @@ class Language:
self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None
self.wiki_start_pages = wiki_start_pages
- def __repr__(self):
+ def __repr__(self) -> str:
param_str = ", ".join(
f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
)
@@ -103,7 +104,7 @@ LANGUAGES = {
name="Danish",
iso_code="da",
use_ascii=True,
- charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="æøåÆØÅ",
wiki_start_pages=["Forside"],
),
@@ -111,8 +112,8 @@ LANGUAGES = {
name="German",
iso_code="de",
use_ascii=True,
- charsets=["ISO-8859-1", "WINDOWS-1252"],
- alphabet="äöüßÄÖÜ",
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
+ alphabet="äöüßẞÄÖÜ",
wiki_start_pages=["Wikipedia:Hauptseite"],
),
"Greek": Language(
@@ -127,7 +128,7 @@ LANGUAGES = {
name="English",
iso_code="en",
use_ascii=True,
- charsets=["ISO-8859-1", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
wiki_start_pages=["Main_Page"],
),
"Esperanto": Language(
@@ -143,7 +144,7 @@ LANGUAGES = {
name="Spanish",
iso_code="es",
use_ascii=True,
- charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ñáéíóúüÑÁÉÍÓÚÜ",
wiki_start_pages=["Wikipedia:Portada"],
),
@@ -161,7 +162,7 @@ LANGUAGES = {
name="Finnish",
iso_code="fi",
use_ascii=True,
- charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ÅÄÖŠŽåäöšž",
wiki_start_pages=["Wikipedia:Etusivu"],
),
@@ -169,7 +170,7 @@ LANGUAGES = {
name="French",
iso_code="fr",
use_ascii=True,
- charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ",
wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"],
),
@@ -203,7 +204,7 @@ LANGUAGES = {
name="Italian",
iso_code="it",
use_ascii=True,
- charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ÀÈÉÌÒÓÙàèéìòóù",
wiki_start_pages=["Pagina_principale"],
),
@@ -237,7 +238,7 @@ LANGUAGES = {
name="Dutch",
iso_code="nl",
use_ascii=True,
- charsets=["ISO-8859-1", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
wiki_start_pages=["Hoofdpagina"],
),
"Polish": Language(
@@ -253,7 +254,7 @@ LANGUAGES = {
name="Portuguese",
iso_code="pt",
use_ascii=True,
- charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
+ charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú",
wiki_start_pages=["Wikipédia:Página_principal"],
),
diff --git a/src/pip/_vendor/chardet/py.typed b/src/pip/_vendor/chardet/py.typed
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/pip/_vendor/chardet/py.typed
diff --git a/src/pip/_vendor/chardet/resultdict.py b/src/pip/_vendor/chardet/resultdict.py
new file mode 100644
index 000000000..7d36e64c4
--- /dev/null
+++ b/src/pip/_vendor/chardet/resultdict.py
@@ -0,0 +1,16 @@
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+ # TypedDict was introduced in Python 3.8.
+ #
+ # TODO: Remove the else block and TYPE_CHECKING check when dropping support
+ # for Python 3.7.
+ from typing import TypedDict
+
+ class ResultDict(TypedDict):
+ encoding: Optional[str]
+ confidence: float
+ language: Optional[str]
+
+else:
+ ResultDict = dict
diff --git a/src/pip/_vendor/chardet/sbcharsetprober.py b/src/pip/_vendor/chardet/sbcharsetprober.py
index 31d70e154..0ffbcdd2c 100644
--- a/src/pip/_vendor/chardet/sbcharsetprober.py
+++ b/src/pip/_vendor/chardet/sbcharsetprober.py
@@ -26,23 +26,20 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from collections import namedtuple
+from typing import Dict, List, NamedTuple, Optional, Union
from .charsetprober import CharSetProber
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
-SingleByteCharSetModel = namedtuple(
- "SingleByteCharSetModel",
- [
- "charset_name",
- "language",
- "char_to_order_map",
- "language_model",
- "typical_positive_ratio",
- "keep_ascii_letters",
- "alphabet",
- ],
-)
+
+class SingleByteCharSetModel(NamedTuple):
+ charset_name: str
+ language: str
+ char_to_order_map: Dict[int, int]
+ language_model: Dict[int, Dict[int, int]]
+ typical_positive_ratio: float
+ keep_ascii_letters: bool
+ alphabet: str
class SingleByteCharSetProber(CharSetProber):
@@ -51,22 +48,27 @@ class SingleByteCharSetProber(CharSetProber):
POSITIVE_SHORTCUT_THRESHOLD = 0.95
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
- def __init__(self, model, is_reversed=False, name_prober=None):
+ def __init__(
+ self,
+ model: SingleByteCharSetModel,
+ is_reversed: bool = False,
+ name_prober: Optional[CharSetProber] = None,
+ ) -> None:
super().__init__()
self._model = model
# TRUE if we need to reverse every pair in the model lookup
self._reversed = is_reversed
# Optional auxiliary prober for name decision
self._name_prober = name_prober
- self._last_order = None
- self._seq_counters = None
- self._total_seqs = None
- self._total_char = None
- self._control_char = None
- self._freq_char = None
+ self._last_order = 255
+ self._seq_counters: List[int] = []
+ self._total_seqs = 0
+ self._total_char = 0
+ self._control_char = 0
+ self._freq_char = 0
self.reset()
- def reset(self):
+ def reset(self) -> None:
super().reset()
# char order of last character
self._last_order = 255
@@ -78,18 +80,18 @@ class SingleByteCharSetProber(CharSetProber):
self._freq_char = 0
@property
- def charset_name(self):
+ def charset_name(self) -> Optional[str]:
if self._name_prober:
return self._name_prober.charset_name
return self._model.charset_name
@property
- def language(self):
+ def language(self) -> Optional[str]:
if self._name_prober:
return self._name_prober.language
return self._model.language
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
# TODO: Make filter_international_words keep things in self.alphabet
if not self._model.keep_ascii_letters:
byte_str = self.filter_international_words(byte_str)
@@ -139,7 +141,7 @@ class SingleByteCharSetProber(CharSetProber):
return self.state
- def get_confidence(self):
+ def get_confidence(self) -> float:
r = 0.01
if self._total_seqs > 0:
r = (
diff --git a/src/pip/_vendor/chardet/sbcsgroupprober.py b/src/pip/_vendor/chardet/sbcsgroupprober.py
index cad001cb1..890ae8465 100644
--- a/src/pip/_vendor/chardet/sbcsgroupprober.py
+++ b/src/pip/_vendor/chardet/sbcsgroupprober.py
@@ -48,7 +48,7 @@ from .sbcharsetprober import SingleByteCharSetProber
class SBCSGroupProber(CharSetGroupProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
hebrew_prober = HebrewProber()
logical_hebrew_prober = SingleByteCharSetProber(
diff --git a/src/pip/_vendor/chardet/sjisprober.py b/src/pip/_vendor/chardet/sjisprober.py
index 3bcbdb71d..91df07796 100644
--- a/src/pip/_vendor/chardet/sjisprober.py
+++ b/src/pip/_vendor/chardet/sjisprober.py
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import Union
+
from .chardistribution import SJISDistributionAnalysis
from .codingstatemachine import CodingStateMachine
from .enums import MachineState, ProbingState
@@ -34,26 +36,29 @@ from .mbcssm import SJIS_SM_MODEL
class SJISProber(MultiByteCharSetProber):
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
self.distribution_analyzer = SJISDistributionAnalysis()
self.context_analyzer = SJISContextAnalysis()
self.reset()
- def reset(self):
+ def reset(self) -> None:
super().reset()
self.context_analyzer.reset()
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return self.context_analyzer.charset_name
@property
- def language(self):
+ def language(self) -> str:
return "Japanese"
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
+ assert self.coding_sm is not None
+ assert self.distribution_analyzer is not None
+
for i, byte in enumerate(byte_str):
coding_state = self.coding_sm.next_state(byte)
if coding_state == MachineState.ERROR:
@@ -92,7 +97,9 @@ class SJISProber(MultiByteCharSetProber):
return self.state
- def get_confidence(self):
+ def get_confidence(self) -> float:
+ assert self.distribution_analyzer is not None
+
context_conf = self.context_analyzer.get_confidence()
distrib_conf = self.distribution_analyzer.get_confidence()
return max(context_conf, distrib_conf)
diff --git a/src/pip/_vendor/chardet/universaldetector.py b/src/pip/_vendor/chardet/universaldetector.py
index 22fcf8290..30c441dc2 100644
--- a/src/pip/_vendor/chardet/universaldetector.py
+++ b/src/pip/_vendor/chardet/universaldetector.py
@@ -39,12 +39,16 @@ class a user of ``chardet`` should use.
import codecs
import logging
import re
+from typing import List, Optional, Union
from .charsetgroupprober import CharSetGroupProber
+from .charsetprober import CharSetProber
from .enums import InputState, LanguageFilter, ProbingState
from .escprober import EscCharSetProber
from .latin1prober import Latin1Prober
+from .macromanprober import MacRomanProber
from .mbcsgroupprober import MBCSGroupProber
+from .resultdict import ResultDict
from .sbcsgroupprober import SBCSGroupProber
from .utf1632prober import UTF1632Prober
@@ -80,34 +84,55 @@ class UniversalDetector:
"iso-8859-9": "Windows-1254",
"iso-8859-13": "Windows-1257",
}
+ # Based on https://encoding.spec.whatwg.org/#names-and-labels
+ # but altered to match Python names for encodings and remove mappings
+ # that break tests.
+ LEGACY_MAP = {
+ "ascii": "Windows-1252",
+ "iso-8859-1": "Windows-1252",
+ "tis-620": "ISO-8859-11",
+ "iso-8859-9": "Windows-1254",
+ "gb2312": "GB18030",
+ "euc-kr": "CP949",
+ "utf-16le": "UTF-16",
+ }
- def __init__(self, lang_filter=LanguageFilter.ALL):
- self._esc_charset_prober = None
- self._utf1632_prober = None
- self._charset_probers = []
- self.result = None
- self.done = None
- self._got_data = None
- self._input_state = None
- self._last_char = None
+ def __init__(
+ self,
+ lang_filter: LanguageFilter = LanguageFilter.ALL,
+ should_rename_legacy: bool = False,
+ ) -> None:
+ self._esc_charset_prober: Optional[EscCharSetProber] = None
+ self._utf1632_prober: Optional[UTF1632Prober] = None
+ self._charset_probers: List[CharSetProber] = []
+ self.result: ResultDict = {
+ "encoding": None,
+ "confidence": 0.0,
+ "language": None,
+ }
+ self.done = False
+ self._got_data = False
+ self._input_state = InputState.PURE_ASCII
+ self._last_char = b""
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
- self._has_win_bytes = None
+ self._has_win_bytes = False
+ self.should_rename_legacy = should_rename_legacy
self.reset()
@property
- def input_state(self):
+ def input_state(self) -> int:
return self._input_state
@property
- def has_win_bytes(self):
+ def has_win_bytes(self) -> bool:
return self._has_win_bytes
@property
- def charset_probers(self):
+ def charset_probers(self) -> List[CharSetProber]:
return self._charset_probers
- def reset(self):
+ def reset(self) -> None:
"""
Reset the UniversalDetector and all of its probers back to their
initial states. This is called by ``__init__``, so you only need to
@@ -126,7 +151,7 @@ class UniversalDetector:
for prober in self._charset_probers:
prober.reset()
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> None:
"""
Takes a chunk of a document and feeds it through all of the relevant
charset probers.
@@ -166,6 +191,7 @@ class UniversalDetector:
elif byte_str.startswith(b"\xFE\xFF\x00\x00"):
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
self.result = {
+ # TODO: This encoding is not supported by Python. Should remove?
"encoding": "X-ISO-10646-UCS-4-3412",
"confidence": 1.0,
"language": "",
@@ -173,6 +199,7 @@ class UniversalDetector:
elif byte_str.startswith(b"\x00\x00\xFF\xFE"):
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
self.result = {
+ # TODO: This encoding is not supported by Python. Should remove?
"encoding": "X-ISO-10646-UCS-4-2143",
"confidence": 1.0,
"language": "",
@@ -242,6 +269,7 @@ class UniversalDetector:
if self.lang_filter & LanguageFilter.NON_CJK:
self._charset_probers.append(SBCSGroupProber())
self._charset_probers.append(Latin1Prober())
+ self._charset_probers.append(MacRomanProber())
for prober in self._charset_probers:
if prober.feed(byte_str) == ProbingState.FOUND_IT:
self.result = {
@@ -254,7 +282,7 @@ class UniversalDetector:
if self.WIN_BYTE_DETECTOR.search(byte_str):
self._has_win_bytes = True
- def close(self):
+ def close(self) -> ResultDict:
"""
Stop analyzing the current document and come up with a final
prediction.
@@ -288,7 +316,8 @@ class UniversalDetector:
max_prober = prober
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
charset_name = max_prober.charset_name
- lower_charset_name = max_prober.charset_name.lower()
+ assert charset_name is not None
+ lower_charset_name = charset_name.lower()
confidence = max_prober.get_confidence()
# Use Windows encoding name instead of ISO-8859 if we saw any
# extra Windows-specific bytes
@@ -297,6 +326,11 @@ class UniversalDetector:
charset_name = self.ISO_WIN_MAP.get(
lower_charset_name, charset_name
)
+ # Rename legacy encodings with superset encodings if asked
+ if self.should_rename_legacy:
+ charset_name = self.LEGACY_MAP.get(
+ (charset_name or "").lower(), charset_name
+ )
self.result = {
"encoding": charset_name,
"confidence": confidence,
diff --git a/src/pip/_vendor/chardet/utf1632prober.py b/src/pip/_vendor/chardet/utf1632prober.py
index 9fd1580b8..6bdec63d6 100644
--- a/src/pip/_vendor/chardet/utf1632prober.py
+++ b/src/pip/_vendor/chardet/utf1632prober.py
@@ -18,6 +18,8 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import List, Union
+
from .charsetprober import CharSetProber
from .enums import ProbingState
@@ -36,7 +38,7 @@ class UTF1632Prober(CharSetProber):
# a fixed constant ratio of expected zeros or non-zeros in modulo-position.
EXPECTED_RATIO = 0.94
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.position = 0
self.zeros_at_mod = [0] * 4
@@ -51,7 +53,7 @@ class UTF1632Prober(CharSetProber):
self.first_half_surrogate_pair_detected_16le = False
self.reset()
- def reset(self):
+ def reset(self) -> None:
super().reset()
self.position = 0
self.zeros_at_mod = [0] * 4
@@ -66,7 +68,7 @@ class UTF1632Prober(CharSetProber):
self.quad = [0, 0, 0, 0]
@property
- def charset_name(self):
+ def charset_name(self) -> str:
if self.is_likely_utf32be():
return "utf-32be"
if self.is_likely_utf32le():
@@ -79,16 +81,16 @@ class UTF1632Prober(CharSetProber):
return "utf-16"
@property
- def language(self):
+ def language(self) -> str:
return ""
- def approx_32bit_chars(self):
+ def approx_32bit_chars(self) -> float:
return max(1.0, self.position / 4.0)
- def approx_16bit_chars(self):
+ def approx_16bit_chars(self) -> float:
return max(1.0, self.position / 2.0)
- def is_likely_utf32be(self):
+ def is_likely_utf32be(self) -> bool:
approx_chars = self.approx_32bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
@@ -98,7 +100,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf32be
)
- def is_likely_utf32le(self):
+ def is_likely_utf32le(self) -> bool:
approx_chars = self.approx_32bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
@@ -108,7 +110,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf32le
)
- def is_likely_utf16be(self):
+ def is_likely_utf16be(self) -> bool:
approx_chars = self.approx_16bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
(self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars
@@ -118,7 +120,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf16be
)
- def is_likely_utf16le(self):
+ def is_likely_utf16le(self) -> bool:
approx_chars = self.approx_16bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
(self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars
@@ -128,7 +130,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf16le
)
- def validate_utf32_characters(self, quad):
+ def validate_utf32_characters(self, quad: List[int]) -> None:
"""
Validate if the quad of bytes is valid UTF-32.
@@ -150,7 +152,7 @@ class UTF1632Prober(CharSetProber):
):
self.invalid_utf32le = True
- def validate_utf16_characters(self, pair):
+ def validate_utf16_characters(self, pair: List[int]) -> None:
"""
Validate if the pair of bytes is valid UTF-16.
@@ -182,7 +184,7 @@ class UTF1632Prober(CharSetProber):
else:
self.invalid_utf16le = True
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for c in byte_str:
mod4 = self.position % 4
self.quad[mod4] = c
@@ -198,7 +200,7 @@ class UTF1632Prober(CharSetProber):
return self.state
@property
- def state(self):
+ def state(self) -> ProbingState:
if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}:
# terminal, decided states
return self._state
@@ -210,7 +212,7 @@ class UTF1632Prober(CharSetProber):
self._state = ProbingState.NOT_ME
return self._state
- def get_confidence(self):
+ def get_confidence(self) -> float:
return (
0.85
if (
diff --git a/src/pip/_vendor/chardet/utf8prober.py b/src/pip/_vendor/chardet/utf8prober.py
index 3aae09e86..d96354d97 100644
--- a/src/pip/_vendor/chardet/utf8prober.py
+++ b/src/pip/_vendor/chardet/utf8prober.py
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
+from typing import Union
+
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import MachineState, ProbingState
@@ -34,26 +36,26 @@ from .mbcssm import UTF8_SM_MODEL
class UTF8Prober(CharSetProber):
ONE_CHAR_PROB = 0.5
- def __init__(self):
+ def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
- self._num_mb_chars = None
+ self._num_mb_chars = 0
self.reset()
- def reset(self):
+ def reset(self) -> None:
super().reset()
self.coding_sm.reset()
self._num_mb_chars = 0
@property
- def charset_name(self):
+ def charset_name(self) -> str:
return "utf-8"
@property
- def language(self):
+ def language(self) -> str:
return ""
- def feed(self, byte_str):
+ def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for c in byte_str:
coding_state = self.coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
@@ -72,7 +74,7 @@ class UTF8Prober(CharSetProber):
return self.state
- def get_confidence(self):
+ def get_confidence(self) -> float:
unlike = 0.99
if self._num_mb_chars < 6:
unlike *= self.ONE_CHAR_PROB**self._num_mb_chars
diff --git a/src/pip/_vendor/chardet/version.py b/src/pip/_vendor/chardet/version.py
index a08a06b9a..c5e9d85cd 100644
--- a/src/pip/_vendor/chardet/version.py
+++ b/src/pip/_vendor/chardet/version.py
@@ -1,9 +1,9 @@
"""
This module exists only to simplify retrieving the version number of chardet
-from within setup.py and from chardet subpackages.
+from within setuptools and from chardet subpackages.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
-__version__ = "5.0.0"
+__version__ = "5.1.0"
VERSION = __version__.split(".")
diff --git a/src/pip/_vendor/platformdirs/__init__.py b/src/pip/_vendor/platformdirs/__init__.py
index 9d513dcf1..82d907163 100644
--- a/src/pip/_vendor/platformdirs/__init__.py
+++ b/src/pip/_vendor/platformdirs/__init__.py
@@ -7,13 +7,15 @@ from __future__ import annotations
import os
import sys
from pathlib import Path
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
- from pip._vendor.typing_extensions import Literal # pragma: no cover
+if sys.version_info >= (3, 8): # pragma: no cover (py38+)
+ from typing import Literal
+else: # pragma: no cover (py38+)
+ from pip._vendor.typing_extensions import Literal
from .api import PlatformDirsABC
-from .version import __version__, __version_info__
+from .version import __version__
+from .version import __version_tuple__ as __version_info__
def _set_platform_dir_class() -> type[PlatformDirsABC]:
@@ -26,7 +28,7 @@ def _set_platform_dir_class() -> type[PlatformDirsABC]:
if os.getenv("ANDROID_DATA") == "/data" and os.getenv("ANDROID_ROOT") == "/system":
- if os.getenv("SHELL") is not None:
+ if os.getenv("SHELL") or os.getenv("PREFIX"):
return Result
from pip._vendor.platformdirs.android import _android_folder
diff --git a/src/pip/_vendor/platformdirs/unix.py b/src/pip/_vendor/platformdirs/unix.py
index 2fbd4d4f3..9aca5a030 100644
--- a/src/pip/_vendor/platformdirs/unix.py
+++ b/src/pip/_vendor/platformdirs/unix.py
@@ -107,9 +107,9 @@ class Unix(PlatformDirsABC):
@property
def user_log_dir(self) -> str:
"""
- :return: log directory tied to the user, same as `user_data_dir` if not opinionated else ``log`` in it
+ :return: log directory tied to the user, same as `user_state_dir` if not opinionated else ``log`` in it
"""
- path = self.user_cache_dir
+ path = self.user_state_dir
if self.opinion:
path = os.path.join(path, "log")
return path
diff --git a/src/pip/_vendor/platformdirs/version.py b/src/pip/_vendor/platformdirs/version.py
index 6361dbf9c..9f6eb98e8 100644
--- a/src/pip/_vendor/platformdirs/version.py
+++ b/src/pip/_vendor/platformdirs/version.py
@@ -1,4 +1,4 @@
-"""Version information"""
-
-__version__ = "2.5.3"
-__version_info__ = (2, 5, 3)
+# file generated by setuptools_scm
+# don't change, don't track in version control
+__version__ = version = '2.6.2'
+__version_tuple__ = version_tuple = (2, 6, 2)
diff --git a/src/pip/_vendor/requests/__init__.py b/src/pip/_vendor/requests/__init__.py
index 9e97059d1..a47762480 100644
--- a/src/pip/_vendor/requests/__init__.py
+++ b/src/pip/_vendor/requests/__init__.py
@@ -77,8 +77,8 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
elif charset_normalizer_version:
major, minor, patch = charset_normalizer_version.split(".")[:3]
major, minor, patch = int(major), int(minor), int(patch)
- # charset_normalizer >= 2.0.0 < 3.0.0
- assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0)
+ # charset_normalizer >= 2.0.0 < 4.0.0
+ assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0)
else:
raise Exception("You need either charset_normalizer or chardet installed")
diff --git a/src/pip/_vendor/requests/__version__.py b/src/pip/_vendor/requests/__version__.py
index e725ada65..69be3dec7 100644
--- a/src/pip/_vendor/requests/__version__.py
+++ b/src/pip/_vendor/requests/__version__.py
@@ -5,10 +5,10 @@
__title__ = "requests"
__description__ = "Python HTTP for Humans."
__url__ = "https://requests.readthedocs.io"
-__version__ = "2.28.1"
-__build__ = 0x022801
+__version__ = "2.28.2"
+__build__ = 0x022802
__author__ = "Kenneth Reitz"
__author_email__ = "me@kennethreitz.org"
__license__ = "Apache 2.0"
-__copyright__ = "Copyright 2022 Kenneth Reitz"
+__copyright__ = "Copyright Kenneth Reitz"
__cake__ = "\u2728 \U0001f370 \u2728"
diff --git a/src/pip/_vendor/requests/models.py b/src/pip/_vendor/requests/models.py
index b45e81032..76e6f199c 100644
--- a/src/pip/_vendor/requests/models.py
+++ b/src/pip/_vendor/requests/models.py
@@ -438,7 +438,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
if not scheme:
raise MissingSchema(
f"Invalid URL {url!r}: No scheme supplied. "
- f"Perhaps you meant http://{url}?"
+ f"Perhaps you meant https://{url}?"
)
if not host:
diff --git a/src/pip/_vendor/urllib3/_version.py b/src/pip/_vendor/urllib3/_version.py
index 6fbc84b30..7c031661b 100644
--- a/src/pip/_vendor/urllib3/_version.py
+++ b/src/pip/_vendor/urllib3/_version.py
@@ -1,2 +1,2 @@
# This file is protected via CODEOWNERS
-__version__ = "1.26.12"
+__version__ = "1.26.14"
diff --git a/src/pip/_vendor/urllib3/connectionpool.py b/src/pip/_vendor/urllib3/connectionpool.py
index 96339e90a..708739279 100644
--- a/src/pip/_vendor/urllib3/connectionpool.py
+++ b/src/pip/_vendor/urllib3/connectionpool.py
@@ -862,7 +862,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
)
# Check if we should retry the HTTP response.
- has_retry_after = bool(response.getheader("Retry-After"))
+ has_retry_after = bool(response.headers.get("Retry-After"))
if retries.is_retry(method, response.status, has_retry_after):
try:
retries = retries.increment(method, url, response=response, _pool=self)
diff --git a/src/pip/_vendor/urllib3/contrib/appengine.py b/src/pip/_vendor/urllib3/contrib/appengine.py
index 668538695..1717ee22c 100644
--- a/src/pip/_vendor/urllib3/contrib/appengine.py
+++ b/src/pip/_vendor/urllib3/contrib/appengine.py
@@ -224,7 +224,7 @@ class AppEngineManager(RequestMethods):
)
# Check if we should retry the HTTP response.
- has_retry_after = bool(http_response.getheader("Retry-After"))
+ has_retry_after = bool(http_response.headers.get("Retry-After"))
if retries.is_retry(method, http_response.status, has_retry_after):
retries = retries.increment(method, url, response=http_response, _pool=self)
log.debug("Retry: %s", url)
diff --git a/src/pip/_vendor/urllib3/contrib/ntlmpool.py b/src/pip/_vendor/urllib3/contrib/ntlmpool.py
index 41a8fd174..471665754 100644
--- a/src/pip/_vendor/urllib3/contrib/ntlmpool.py
+++ b/src/pip/_vendor/urllib3/contrib/ntlmpool.py
@@ -69,7 +69,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
log.debug("Request headers: %s", headers)
conn.request("GET", self.authurl, None, headers)
res = conn.getresponse()
- reshdr = dict(res.getheaders())
+ reshdr = dict(res.headers)
log.debug("Response status: %s %s", res.status, res.reason)
log.debug("Response headers: %s", reshdr)
log.debug("Response data: %s [...]", res.read(100))
@@ -101,7 +101,7 @@ class NTLMConnectionPool(HTTPSConnectionPool):
conn.request("GET", self.authurl, None, headers)
res = conn.getresponse()
log.debug("Response status: %s %s", res.status, res.reason)
- log.debug("Response headers: %s", dict(res.getheaders()))
+ log.debug("Response headers: %s", dict(res.headers))
log.debug("Response data: %s [...]", res.read()[:100])
if res.status != 200:
if res.status == 401:
diff --git a/src/pip/_vendor/urllib3/contrib/pyopenssl.py b/src/pip/_vendor/urllib3/contrib/pyopenssl.py
index 528764a03..19e4aa97c 100644
--- a/src/pip/_vendor/urllib3/contrib/pyopenssl.py
+++ b/src/pip/_vendor/urllib3/contrib/pyopenssl.py
@@ -47,10 +47,10 @@ compression in Python 2 (see `CRIME attack`_).
"""
from __future__ import absolute_import
+import OpenSSL.crypto
import OpenSSL.SSL
from cryptography import x509
from cryptography.hazmat.backends.openssl import backend as openssl_backend
-from cryptography.hazmat.backends.openssl.x509 import _Certificate
try:
from cryptography.x509 import UnsupportedExtension
@@ -228,9 +228,8 @@ def get_subj_alt_name(peer_cert):
if hasattr(peer_cert, "to_cryptography"):
cert = peer_cert.to_cryptography()
else:
- # This is technically using private APIs, but should work across all
- # relevant versions before PyOpenSSL got a proper API for this.
- cert = _Certificate(openssl_backend, peer_cert._x509)
+ der = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, peer_cert)
+ cert = x509.load_der_x509_certificate(der, openssl_backend)
# We want to find the SAN extension. Ask Cryptography to locate it (it's
# faster than looping in Python)
diff --git a/src/pip/_vendor/urllib3/response.py b/src/pip/_vendor/urllib3/response.py
index 4969b70e3..8909f8454 100644
--- a/src/pip/_vendor/urllib3/response.py
+++ b/src/pip/_vendor/urllib3/response.py
@@ -3,6 +3,7 @@ from __future__ import absolute_import
import io
import logging
import sys
+import warnings
import zlib
from contextlib import contextmanager
from socket import error as SocketError
@@ -657,9 +658,21 @@ class HTTPResponse(io.IOBase):
# Backwards-compatibility methods for http.client.HTTPResponse
def getheaders(self):
+ warnings.warn(
+ "HTTPResponse.getheaders() is deprecated and will be removed "
+ "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
+ category=DeprecationWarning,
+ stacklevel=2,
+ )
return self.headers
def getheader(self, name, default=None):
+ warnings.warn(
+ "HTTPResponse.getheader() is deprecated and will be removed "
+ "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
+ category=DeprecationWarning,
+ stacklevel=2,
+ )
return self.headers.get(name, default)
# Backwards compatibility for http.cookiejar
diff --git a/src/pip/_vendor/urllib3/util/retry.py b/src/pip/_vendor/urllib3/util/retry.py
index 3398323fd..2490d5e5b 100644
--- a/src/pip/_vendor/urllib3/util/retry.py
+++ b/src/pip/_vendor/urllib3/util/retry.py
@@ -394,7 +394,7 @@ class Retry(object):
def get_retry_after(self, response):
"""Get the value of Retry-After in seconds."""
- retry_after = response.getheader("Retry-After")
+ retry_after = response.headers.get("Retry-After")
if retry_after is None:
return None
diff --git a/src/pip/_vendor/urllib3/util/url.py b/src/pip/_vendor/urllib3/util/url.py
index 86bd8b48a..d6d0bbcea 100644
--- a/src/pip/_vendor/urllib3/util/url.py
+++ b/src/pip/_vendor/urllib3/util/url.py
@@ -63,7 +63,7 @@ IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
-_HOST_PORT_PAT = ("^(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
+_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
REG_NAME_PAT,
IPV4_PAT,
IPV6_ADDRZ_PAT,
diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt
index 26afe72d1..67452d89f 100644
--- a/src/pip/_vendor/vendor.txt
+++ b/src/pip/_vendor/vendor.txt
@@ -4,14 +4,14 @@ distlib==0.3.6
distro==1.8.0
msgpack==1.0.4
packaging==21.3
-platformdirs==2.5.3
+platformdirs==2.6.2
pyparsing==3.0.9
pyproject-hooks==1.0.0
-requests==2.28.1
- certifi==2022.09.24
- chardet==5.0.0
+requests==2.28.2
+ certifi==2022.12.7
+ chardet==5.1.0
idna==3.4
- urllib3==1.26.12
+ urllib3==1.26.14
rich==12.6.0
pygments==2.13.0
typing_extensions==4.4.0
diff --git a/tests/functional/test_debug.py b/tests/functional/test_debug.py
index 41374f8cb..77cd732f9 100644
--- a/tests/functional/test_debug.py
+++ b/tests/functional/test_debug.py
@@ -1,6 +1,8 @@
+import re
from typing import List
import pytest
+from pip._vendor.packaging.version import Version
from pip._internal.commands.debug import create_vendor_txt_map
from pip._internal.utils import compatibility_tags
@@ -45,7 +47,9 @@ def test_debug__library_versions(script: PipTestEnvironment) -> None:
vendored_versions = create_vendor_txt_map()
for name, value in vendored_versions.items():
- assert f"{name}=={value}" in result.stdout
+ match = re.search(rf"{name}==(\S+)", result.stdout)
+ assert match is not None, f"Could not find {name} in output"
+ assert Version(match.group(1)) == Version(value)
@pytest.mark.parametrize(