From c0b11c1309a486640493f90253f2d020a834443a Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 24 Jan 2025 08:44:22 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`a?= =?UTF-8?q?scii=5Fdomain=5Fregex`=20by=2060%=20###=20Changes=20and=20Impro?= =?UTF-8?q?vements.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pydantic/v1/networks.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/pydantic/v1/networks.py b/pydantic/v1/networks.py index ba07b74867b..87afad3bcda 100644 --- a/pydantic/v1/networks.py +++ b/pydantic/v1/networks.py @@ -1,4 +1,5 @@ import re +from functools import lru_cache from ipaddress import ( IPv4Address, IPv4Interface, @@ -121,8 +122,7 @@ def url_regex() -> Pattern[str]: def multi_host_url_regex() -> Pattern[str]: - """ - Compiled multi host url regex. + """Compiled multi host url regex. Additionally to `url_regex` it allows to match multiple hosts. E.g. host1.db.net,host2.db.net @@ -138,15 +138,11 @@ def multi_host_url_regex() -> Pattern[str]: return _multi_host_url_regex_cache +@lru_cache(maxsize=None) def ascii_domain_regex() -> Pattern[str]: - global _ascii_domain_regex_cache - if _ascii_domain_regex_cache is None: - ascii_chunk = r'[_0-9a-z](?:[-_0-9a-z]{0,61}[_0-9a-z])?' - ascii_domain_ending = r'(?P\.[a-z]{2,63})?\.?' - _ascii_domain_regex_cache = re.compile( - fr'(?:{ascii_chunk}\.)*?{ascii_chunk}{ascii_domain_ending}', re.IGNORECASE - ) - return _ascii_domain_regex_cache + ascii_chunk = r'[_0-9a-z](?:[-_0-9a-z]{0,61}[_0-9a-z])?' + ascii_domain_ending = r'(?P\.[a-z]{2,63})?\.?' + return re.compile(rf'(?:{ascii_chunk}\.)*?{ascii_chunk}{ascii_domain_ending}', re.IGNORECASE) def int_domain_regex() -> Pattern[str]: @@ -154,7 +150,7 @@ def int_domain_regex() -> Pattern[str]: if _int_domain_regex_cache is None: int_chunk = r'[_0-9a-\U00040000](?:[-_0-9a-\U00040000]{0,61}[_0-9a-\U00040000])?' int_domain_ending = r'(?P(\.[^\W\d_]{2,63})|(\.(?:xn--)[_0-9a-z-]{2,63}))?\.?' - _int_domain_regex_cache = re.compile(fr'(?:{int_chunk}\.)*?{int_chunk}{int_domain_ending}', re.IGNORECASE) + _int_domain_regex_cache = re.compile(rf'(?:{int_chunk}\.)*?{int_chunk}{int_domain_ending}', re.IGNORECASE) return _int_domain_regex_cache @@ -287,8 +283,7 @@ def validate(cls, value: Any, field: 'ModelField', config: 'BaseConfig') -> 'Any @classmethod def _build_url(cls, m: Match[str], url: str, parts: 'Parts') -> 'AnyUrl': - """ - Validate hosts and build the AnyUrl object. Split from `validate` so this method + """Validate hosts and build the AnyUrl object. Split from `validate` so this method can be altered in `MultiHostDsn`. """ host, tld, host_type, rebuild = cls.validate_host(parts) @@ -318,8 +313,7 @@ def _validate_port(port: Optional[str]) -> None: @classmethod def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts': - """ - A method used to validate parts of a URL. + """A method used to validate parts of a URL. Could be overridden to set default values for parts if missing """ scheme = parts['scheme'] @@ -437,7 +431,7 @@ def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts': @classmethod def _build_url(cls, m: Match[str], url: str, parts: 'Parts') -> 'MultiHostDsn': - hosts_parts: List['HostParts'] = [] + hosts_parts: List[HostParts] = [] host_re = host_regex() for host in m.groupdict()['hosts'].split(','): d: Parts = host_re.match(host).groupdict() # type: ignore @@ -709,8 +703,8 @@ def validate(cls, value: NetworkType) -> Union[IPv4Network, IPv6Network]: def validate_email(value: Union[str]) -> Tuple[str, str]: - """ - Email address validation using https://pypi.org/project/email-validator/ + """Email address validation using https://pypi.org/project/email-validator/ + Notes: * raw ip address (literal) domain parts are not allowed. * "John Doe " style "pretty" email addresses are processed