From cb5095852dc8676461f95d28bf7a593d2fe5b03d Mon Sep 17 00:00:00 2001 From: James Barnett Date: Tue, 3 Apr 2018 18:36:57 -0700 Subject: [PATCH 1/8] Expand validators.url to allow FQDNs that have IDNA A-labels --- validators/url.py | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/validators/url.py b/validators/url.py index fdccf0c0..2e7740d3 100644 --- a/validators/url.py +++ b/validators/url.py @@ -4,11 +4,10 @@ ip_middle_octet = u"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5]))" ip_last_octet = u"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +protocol_identifier = u"(?:(?:https?|ftp)://)" regex = re.compile( - u"^" - # protocol identifier - u"(?:(?:https?|ftp)://)" + u"^" + protocol_identifier + u"" # user:pass authentication u"(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+" u"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?" @@ -53,8 +52,23 @@ re.UNICODE | re.IGNORECASE ) -pattern = re.compile(regex) +regex_idna_converter = re.compile( + u"^" + # protocol group + u"(?P" + protocol_identifier + u")" + # fqdn group: intentionally loose, only meant to isolate any + # potential fqdn so that idna decoding can be attempted. + u"(?P[^/:]+)" + # port number group + u"(?P:\d{2,5})?" + # resource/query/fragment group + u"(?P/.*)?" + u"$", + re.UNICODE | re.IGNORECASE +) +pattern = re.compile(regex) +pattern_idna_converter = re.compile(regex_idna_converter) @validator def url(value, public=False): @@ -109,6 +123,25 @@ def url(value, public=False): :param public: (default=False) Set True to only allow a public IP address """ result = pattern.match(value) + + #if initial match failed, attempt an idna conversion + if not result: + try: + #use regex to separate the potential idna fqdn + idna_result = pattern_idna_converter.match(value) + idna_dict = idna_result.groupdict() + #reassemble the URL after decoding the fqdn as idna + idna_value = u"{protocol}{fqdn}{port}{resource}".format( + protocol=idna_dict['protocol'], + fqdn=idna_dict['fqdn'].decode('idna'), + port=idna_dict['port'] or "", + resource=idna_dict['resource'] or "" + ) + result = pattern.match(idna_value) + #if pattern doesn't match or host can't decode as idna then pass + except (AttributeError,UnicodeError): + pass + if not public: return result From ef78dcbb712410a0c4d214988983b761e3eddf5e Mon Sep 17 00:00:00 2001 From: James Barnett Date: Wed, 4 Apr 2018 17:43:26 -0700 Subject: [PATCH 2/8] Tests for IDNA URLs --- tests/test_url.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test_url.py b/tests/test_url.py index 1187f274..4b0bdc0b 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -52,6 +52,30 @@ u'http://127.0.10.150', u'http://localhost', u'http://localhost:8000', + u'http://xn--mgbh0fb.xn--kgbechtv', + u'http://xn--mgbh0fb.xn--kgbechtv/', + u'http://xn--mgbh0fb.xn--kgbechtv:8080', + u'http://xn--mgbh0fb.xn--kgbechtv:8080/', + u'http://xn--mgbh0fb.xn--kgbechtv/foobar', + u'http://xn--mgbh0fb.xn--kgbechtv:8080/foobar', + u'http://xn--mgbh0fb.xn--kgbechtv/foo/?bar=baz&inga=42&quux', + u'http://xn--mgbh0fb.xn--kgbechtv:8080/foo/?bar=baz&inga=42&quux', + u'http://xn--mgbh0fb.xn--kgbechtv/foo_bar', + u'http://xn--mgbh0fb.xn--kgbechtv/123', + u'http://xn--mgbh0fb.xn--kgbechtv:8080/123', + u'http://xn--mgbh0fb.xn--kgbechtv/?foo=bar%20has-url-encoded%20stuff', + u'http://xn--mgbh0fb.xn--kgbechtv:8080/?foo=bar%20has-url-encoded%20stuff', + u'http://xn--mgbh0fb.xn--kgbechtv/foobar_(wikipedia)#cite-1', + u'http://xn--mgbh0fb.xn--kgbechtv:8080/foobar_(wikipedia)#cite-1', + u'http://xn--mgbh0fb.xn--kgbechtv/unicode_(✪)_in_parens', + u'http://xn--mgbh0fb.xn--kgbechtv:8080/unicode_(✪)_in_parens', + u'http://xn--mgbh0fb.xn--kgbechtv/(something)?after=parens', + u'ftp://xn--p1b6ci4b4b3a.xn--11b5bs3a9aj6g/foo', + u'https://userid:password@xn--fsqu00a.xn--0zwm56d', + u'https://userid:password@xn--fsqu00a.xn--0zwm56d:8080', + u'https://userid:password@xn--fsqu00a.xn--0zwm56d:8080/foobar' + u'https://用户名密码:密码@xn--fsqu00a.xn--0zwm56d' + u'https://%E7%94%A8%E6%88%B7%E5%90%8D%E5%AF%86%E7%A0%81:%E5%AF%86%E7%A0%81@xn--fsqu00a.xn--0zwm56d' ]) def test_returns_true_on_valid_url(address): assert url(address) @@ -118,6 +142,27 @@ def test_returns_true_on_valid_public_url(address, public): 'http://.www.foo.bar./', 'http://127.12.0.260', 'http://example.com/">user@example.com', + 'xn--mgbh0fb.xn--kgbechtv', + 'http://xn--mgbh0fb', + 'http://xn--mgbh0fb.xn---kgbechtv', + 'http://xn---mgbh0fb.xn--kgbechtv', + 'http://xn--mgbh0fb.xnk--gbechtv', + 'http://xnm--gbh0fb.xn--kgbechtv', + 'http:// xn--mgbh0fb.xn--kgbechtv', + ':// xn--mgbh0fb.xn--kgbechtv', + 'http://-xn--mgbh0fb.xn--kgbechtv', + 'http://xn--mgbh0fb-.xn--kgbechtv', + 'http://xn--mgbh0fb.-xn--kgbechtv', + 'http://xn--mgbh0fb.xn--kgbechtv-', + 'http://x-n--mgbh0fb.xn--kgbechtv', + 'http://xn--mgbh0fb.x-n--kgbechtv', + 'http://xn--mgbh0fb.xn--kgbechtv./', + 'http://xn--mgbh0fb..xn--kgbechtv', + 'http:///xn--mgbh0fb.xn--kgbechtv', + 'ttp://xn--mgbh0fb.xn--kgbechtv', + 'http://xn--mgbh0fb.xn--kgbechtv/">user@example.com', + u'http://xn--mgbh0fb.إختبار', + u'http://مثال.xn--kgbechtv', ]) def test_returns_failed_validation_on_invalid_url(address): assert isinstance(url(address), ValidationFailure) From 7a35664edae500bb3feac3b0efc18f4065f0fae9 Mon Sep 17 00:00:00 2001 From: James Barnett Date: Wed, 4 Apr 2018 17:44:15 -0700 Subject: [PATCH 3/8] Enable user:pass authentication on IDNA URLs --- validators/url.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/validators/url.py b/validators/url.py index 2e7740d3..b8d31b80 100644 --- a/validators/url.py +++ b/validators/url.py @@ -56,6 +56,9 @@ u"^" # protocol group u"(?P" + protocol_identifier + u")" + # user:pass group + u"(?P(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+" + u"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?)" # fqdn group: intentionally loose, only meant to isolate any # potential fqdn so that idna decoding can be attempted. u"(?P[^/:]+)" @@ -133,6 +136,7 @@ def url(value, public=False): #reassemble the URL after decoding the fqdn as idna idna_value = u"{protocol}{fqdn}{port}{resource}".format( protocol=idna_dict['protocol'], + userpass = idna_dict['userpass'] or "", fqdn=idna_dict['fqdn'].decode('idna'), port=idna_dict['port'] or "", resource=idna_dict['resource'] or "" From ca71d823cf6486589e273c357635b95d479aa72c Mon Sep 17 00:00:00 2001 From: James Barnett Date: Wed, 4 Apr 2018 18:11:09 -0700 Subject: [PATCH 4/8] Flake8 fixes --- validators/url.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/validators/url.py b/validators/url.py index b8d31b80..142bf3e8 100644 --- a/validators/url.py +++ b/validators/url.py @@ -60,7 +60,7 @@ u"(?P(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+" u"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?)" # fqdn group: intentionally loose, only meant to isolate any - # potential fqdn so that idna decoding can be attempted. + # potential fqdn so that idna decoding can be attempted. u"(?P[^/:]+)" # port number group u"(?P:\d{2,5})?" @@ -73,6 +73,7 @@ pattern = re.compile(regex) pattern_idna_converter = re.compile(regex_idna_converter) + @validator def url(value, public=False): """ @@ -127,23 +128,23 @@ def url(value, public=False): """ result = pattern.match(value) - #if initial match failed, attempt an idna conversion + # if initial match failed, attempt an idna conversion if not result: try: - #use regex to separate the potential idna fqdn + # use regex to separate the potential idna fqdn idna_result = pattern_idna_converter.match(value) idna_dict = idna_result.groupdict() - #reassemble the URL after decoding the fqdn as idna + # reassemble the URL after decoding the fqdn as idna idna_value = u"{protocol}{fqdn}{port}{resource}".format( protocol=idna_dict['protocol'], - userpass = idna_dict['userpass'] or "", + userpass=idna_dict['userpass'] or "", fqdn=idna_dict['fqdn'].decode('idna'), port=idna_dict['port'] or "", resource=idna_dict['resource'] or "" ) result = pattern.match(idna_value) - #if pattern doesn't match or host can't decode as idna then pass - except (AttributeError,UnicodeError): + # if pattern doesn't match or host can't decode as idna then pass + except (AttributeError, UnicodeError): pass if not public: From 639ddb69cdfaaf435e73d0f179cc29736fec491d Mon Sep 17 00:00:00 2001 From: James Barnett Date: Wed, 4 Apr 2018 18:16:45 -0700 Subject: [PATCH 5/8] Shorten test string for Flake8 --- tests/test_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_url.py b/tests/test_url.py index 4b0bdc0b..093ac67c 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -75,7 +75,7 @@ u'https://userid:password@xn--fsqu00a.xn--0zwm56d:8080', u'https://userid:password@xn--fsqu00a.xn--0zwm56d:8080/foobar' u'https://用户名密码:密码@xn--fsqu00a.xn--0zwm56d' - u'https://%E7%94%A8%E6%88%B7%E5%90%8D%E5%AF%86%E7%A0%81:%E5%AF%86%E7%A0%81@xn--fsqu00a.xn--0zwm56d' + u'https://%E7%94%A8:%E5%AF%86@xn--fsqu00a.xn--0zwm56d' ]) def test_returns_true_on_valid_url(address): assert url(address) From 72bd5dd5b5be58b76eb8ca56933ea120e2c825e5 Mon Sep 17 00:00:00 2001 From: James Barnett Date: Tue, 15 May 2018 16:56:18 -0700 Subject: [PATCH 6/8] Fix error where total domain length couldn`t exceed 63 chars -- that RFC1035 limit only applies to label length. --- validators/domain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/domain.py b/validators/domain.py index 7f7f2672..da8a250c 100644 --- a/validators/domain.py +++ b/validators/domain.py @@ -5,7 +5,7 @@ pattern = re.compile( r'^(([a-zA-Z]{1})|([a-zA-Z]{1}[a-zA-Z]{1})|' # domain pt.1 r'([a-zA-Z]{1}[0-9]{1})|([0-9]{1}[a-zA-Z]{1})|' # domain pt.2 - r'([a-zA-Z0-9][-_.a-zA-Z0-9]{0,61}[a-zA-Z0-9]))\.' # domain pt.3 + r'([a-zA-Z0-9][-_.a-zA-Z0-9]{0,61}[a-zA-Z0-9])){1,999}\.' # domain pt.3 r'([a-zA-Z]{2,13}|(xn--[a-zA-Z0-9]{2,30}))$' # TLD ) From e817f4cba8c68b5c3a447ab3a714dae3f3c60cd9 Mon Sep 17 00:00:00 2001 From: James Barnett Date: Tue, 15 May 2018 17:12:03 -0700 Subject: [PATCH 7/8] Change version name to indicate fork --- validators/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/__init__.py b/validators/__init__.py index 8fe8fc7c..6016ac39 100644 --- a/validators/__init__.py +++ b/validators/__init__.py @@ -14,4 +14,4 @@ from .utils import ValidationFailure, validator # noqa from .uuid import uuid # noqa -__version__ = '0.12.1' +__version__ = '0.12.3-nullripper' From 598a4af51fdc59a56063e86ea974f746e7113e2e Mon Sep 17 00:00:00 2001 From: James Barnett Date: Wed, 8 Mar 2023 14:03:08 -0800 Subject: [PATCH 8/8] Update version number for PEP 440 --- validators/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/__init__.py b/validators/__init__.py index 6016ac39..2f32d48a 100644 --- a/validators/__init__.py +++ b/validators/__init__.py @@ -14,4 +14,4 @@ from .utils import ValidationFailure, validator # noqa from .uuid import uuid # noqa -__version__ = '0.12.3-nullripper' +__version__ = '0.12.1337' \ No newline at end of file