Skip to content

Commit 95e70ec

Browse files
committed
maint: improve domain, email & hostname
- in `domain`: decode value to `utf-8` instead of `ascii`; use `re.match` directly - in `email`: split `ip_address` to `ipv4` and `ipv6` parameters - in `hostname`: split `skip_ip_addr` to skip `ipv4` & `ipv6` parameters
1 parent 050a2c2 commit 95e70ec

File tree

3 files changed

+280
-271
lines changed

3 files changed

+280
-271
lines changed

validators/domain.py

Lines changed: 61 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,61 @@
1-
"""Domain."""
2-
# -*- coding: utf-8 -*-
3-
4-
# standard
5-
import re
6-
7-
# local
8-
from .utils import validator
9-
10-
11-
@validator
12-
def domain(value: str, /, *, rfc_1034: bool = False, rfc_2782: bool = False):
13-
"""Return whether or not given value is a valid domain.
14-
15-
Examples:
16-
>>> domain('example.com')
17-
# Output: True
18-
>>> domain('example.com/')
19-
# Output: ValidationFailure(func=domain, ...)
20-
>>> # Supports IDN domains as well::
21-
>>> domain('xn----gtbspbbmkef.xn--p1ai')
22-
# Output: True
23-
24-
Args:
25-
value:
26-
Domain string to validate.
27-
rfc_1034:
28-
Allow trailing dot in domain name.
29-
Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
30-
rfc_2782:
31-
Domain name is of type service record.
32-
Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
33-
34-
35-
Returns:
36-
(Literal[True]):
37-
If `value` is a valid domain name.
38-
(ValidationFailure):
39-
If `value` is an invalid domain name.
40-
41-
Note:
42-
- *In version 0.10.0*:
43-
- Added support for internationalized domain name (IDN) validation.
44-
45-
> *New in version 0.9.0*.
46-
"""
47-
try:
48-
return not re.search(r"\s", value) and re.compile(
49-
# First character of the domain
50-
rf"^(?:[a-zA-Z0-9{'_'if rfc_2782 else ''}]"
51-
# Sub domain + hostname
52-
+ r"(?:[a-zA-Z0-9-_]{0,61}[A-Za-z0-9])?\.)"
53-
# First 61 characters of the gTLD
54-
+ r"+[A-Za-z0-9][A-Za-z0-9-_]{0,61}"
55-
# Last character of the gTLD
56-
+ rf"[A-Za-z]{r'.$' if rfc_1034 else r'$'}"
57-
).match(value.encode("idna").decode("ascii"))
58-
except UnicodeError:
59-
return False
1+
"""Domain."""
2+
# -*- coding: utf-8 -*-
3+
4+
# standard
5+
import re
6+
7+
# local
8+
from .utils import validator
9+
10+
11+
@validator
12+
def domain(value: str, /, *, rfc_1034: bool = False, rfc_2782: bool = False):
13+
"""Return whether or not given value is a valid domain.
14+
15+
Examples:
16+
>>> domain('example.com')
17+
# Output: True
18+
>>> domain('example.com/')
19+
# Output: ValidationFailure(func=domain, ...)
20+
>>> # Supports IDN domains as well::
21+
>>> domain('xn----gtbspbbmkef.xn--p1ai')
22+
# Output: True
23+
24+
Args:
25+
value:
26+
Domain string to validate.
27+
rfc_1034:
28+
Allow trailing dot in domain name.
29+
Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
30+
rfc_2782:
31+
Domain name is of type service record.
32+
Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
33+
34+
35+
Returns:
36+
(Literal[True]):
37+
If `value` is a valid domain name.
38+
(ValidationFailure):
39+
If `value` is an invalid domain name.
40+
41+
Note:
42+
- *In version 0.10.0*:
43+
- Added support for internationalized domain name (IDN) validation.
44+
45+
> *New in version 0.9.0*.
46+
"""
47+
try:
48+
return not re.search(r"\s", value) and re.match(
49+
# First character of the domain
50+
rf"^(?:[a-zA-Z0-9{'_'if rfc_2782 else ''}]"
51+
# Sub domain + hostname
52+
+ r"(?:[a-zA-Z0-9-_]{0,61}[A-Za-z0-9])?\.)"
53+
# First 61 characters of the gTLD
54+
+ r"+[A-Za-z0-9][A-Za-z0-9-_]{0,61}"
55+
# Last character of the gTLD
56+
+ rf"[A-Za-z]{r'.$' if rfc_1034 else r'$'}",
57+
value.encode("idna").decode("utf-8"),
58+
re.IGNORECASE,
59+
)
60+
except UnicodeError:
61+
return False

validators/email.py

Lines changed: 100 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,100 @@
1-
"""eMail."""
2-
# -*- coding: utf-8 -*-
3-
4-
# standard
5-
import re
6-
7-
# local
8-
from .hostname import hostname
9-
from .utils import validator
10-
11-
12-
@validator
13-
def email(
14-
value: str,
15-
/,
16-
*,
17-
simple_host: bool = False,
18-
ip_address: bool = False,
19-
rfc_1034: bool = False,
20-
rfc_2782: bool = False,
21-
):
22-
"""Validate an email address.
23-
24-
This was inspired from [Django's email validator][1].
25-
Also ref: [RFC 1034][2], [RFC 5321][3] and [RFC 5322][4].
26-
27-
[1]: https://github.com/django/django/blob/main/django/core/validators.py#L174
28-
[2]: https://www.rfc-editor.org/rfc/rfc1034
29-
[3]: https://www.rfc-editor.org/rfc/rfc5321
30-
[4]: https://www.rfc-editor.org/rfc/rfc5322
31-
32-
Examples:
33-
>>> email('[email protected]')
34-
# Output: True
35-
>>> email('bogus@@')
36-
# Output: ValidationFailure(email=email, args={'value': 'bogus@@'})
37-
38-
Args:
39-
value:
40-
eMail string to validate.
41-
simple_host:
42-
When the domain part is a simple hostname.
43-
ip_address:
44-
When the domain part is an IP address.
45-
rfc_1034:
46-
Allow trailing dot in domain name.
47-
Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
48-
rfc_2782:
49-
Domain name is of type service record.
50-
Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
51-
52-
Returns:
53-
(Literal[True]):
54-
If `value` is a valid eMail.
55-
(ValidationFailure):
56-
If `value` is an invalid eMail.
57-
58-
> *New in version 0.1.0*.
59-
"""
60-
if not value or value.count("@") != 1:
61-
return False
62-
63-
username_part, domain_part = value.rsplit("@", 1)
64-
65-
if len(username_part) > 64 or len(domain_part) > 253:
66-
# ref: RFC 1034 and 5231
67-
return False
68-
69-
if ip_address:
70-
if domain_part.startswith("[") and domain_part.endswith("]"):
71-
# ref: RFC 5321
72-
domain_part = domain_part.lstrip("[").rstrip("]")
73-
else:
74-
return False
75-
76-
return (
77-
bool(
78-
hostname(
79-
domain_part,
80-
skip_ip_addr=not ip_address,
81-
may_have_port=False,
82-
maybe_simple=simple_host,
83-
rfc_1034=rfc_1034,
84-
rfc_2782=rfc_2782,
85-
)
86-
)
87-
if re.match(
88-
# dot-atom
89-
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*$"
90-
# quoted-string
91-
+ r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"$)',
92-
username_part,
93-
re.IGNORECASE,
94-
)
95-
else False
96-
)
1+
"""eMail."""
2+
# -*- coding: utf-8 -*-
3+
4+
# standard
5+
import re
6+
7+
# local
8+
from .hostname import hostname
9+
from .utils import validator
10+
11+
12+
@validator
13+
def email(
14+
value: str,
15+
/,
16+
*,
17+
ipv6_address: bool = False,
18+
ipv4_address: bool = False,
19+
simple_host: bool = False,
20+
rfc_1034: bool = False,
21+
rfc_2782: bool = False,
22+
):
23+
"""Validate an email address.
24+
25+
This was inspired from [Django's email validator][1].
26+
Also ref: [RFC 1034][2], [RFC 5321][3] and [RFC 5322][4].
27+
28+
[1]: https://github.com/django/django/blob/main/django/core/validators.py#L174
29+
[2]: https://www.rfc-editor.org/rfc/rfc1034
30+
[3]: https://www.rfc-editor.org/rfc/rfc5321
31+
[4]: https://www.rfc-editor.org/rfc/rfc5322
32+
33+
Examples:
34+
>>> email('[email protected]')
35+
# Output: True
36+
>>> email('bogus@@')
37+
# Output: ValidationFailure(email=email, args={'value': 'bogus@@'})
38+
39+
Args:
40+
value:
41+
eMail string to validate.
42+
ipv6_address:
43+
When the domain part is an IPv6 address.
44+
ipv4_address:
45+
When the domain part is an IPv4 address.
46+
simple_host:
47+
When the domain part is a simple hostname.
48+
rfc_1034:
49+
Allow trailing dot in domain name.
50+
Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
51+
rfc_2782:
52+
Domain name is of type service record.
53+
Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
54+
55+
Returns:
56+
(Literal[True]):
57+
If `value` is a valid eMail.
58+
(ValidationFailure):
59+
If `value` is an invalid eMail.
60+
61+
> *New in version 0.1.0*.
62+
"""
63+
if not value or value.count("@") != 1:
64+
return False
65+
66+
username_part, domain_part = value.rsplit("@", 1)
67+
68+
if len(username_part) > 64 or len(domain_part) > 253:
69+
# ref: RFC 1034 and 5231
70+
return False
71+
72+
if ipv6_address or ipv4_address:
73+
if domain_part.startswith("[") and domain_part.endswith("]"):
74+
# ref: RFC 5321
75+
domain_part = domain_part.lstrip("[").rstrip("]")
76+
else:
77+
return False
78+
79+
return (
80+
bool(
81+
hostname(
82+
domain_part,
83+
skip_ipv6_addr=not ipv6_address,
84+
skip_ipv4_addr=not ipv4_address,
85+
may_have_port=False,
86+
maybe_simple=simple_host,
87+
rfc_1034=rfc_1034,
88+
rfc_2782=rfc_2782,
89+
)
90+
)
91+
if re.match(
92+
# dot-atom
93+
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*$"
94+
# quoted-string
95+
+ r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"$)',
96+
username_part,
97+
re.IGNORECASE,
98+
)
99+
else False
100+
)

0 commit comments

Comments
 (0)