2727``hn.C`` will be a reference to the module config, possibly yielding
2828unexpected results. See `Customizing the Parser <customize.html>`_.
2929"""
30+ import re
3031import sys
31- from collections .abc import Set
32+ from collections .abc import Iterable , Iterator , Mapping , Set
33+ from typing import Any , TypeVar
34+
35+ from typing_extensions import Self
3236
3337from nameparser .util import lc
3438from nameparser .config .prefixes import PREFIXES
3842from nameparser .config .suffixes import SUFFIX_NOT_ACRONYMS
3943from nameparser .config .titles import TITLES
4044from nameparser .config .titles import FIRST_NAME_TITLES
41- from nameparser .config .regexes import REGEXES
45+ from nameparser .config .regexes import EMPTY_REGEX , REGEXES
4246
4347DEFAULT_ENCODING = 'UTF-8'
4448
@@ -55,25 +59,25 @@ class SetManager(Set):
5559
5660 '''
5761
58- def __init__ (self , elements ) :
62+ def __init__ (self , elements : Iterable [ str ]) -> None :
5963 self .elements = set (elements )
6064
61- def __call__ (self ):
65+ def __call__ (self ) -> Set [ str ] :
6266 return self .elements
6367
64- def __repr__ (self ):
68+ def __repr__ (self ) -> str :
6569 return "SetManager({})" .format (self .elements ) # used for docs
6670
67- def __iter__ (self ):
71+ def __iter__ (self ) -> Iterator [ str ] :
6872 return iter (self .elements )
6973
70- def __contains__ (self , value ) :
74+ def __contains__ (self , value : object ) -> bool :
7175 return value in self .elements
7276
73- def __len__ (self ):
77+ def __len__ (self ) -> int :
7478 return len (self .elements )
7579
76- def add_with_encoding (self , s , encoding = None ):
80+ def add_with_encoding (self , s : str , encoding : str | None = None ) -> None :
7781 """
7882 Add the lower case and no-period version of the string to the set. Pass an
7983 explicit `encoding` parameter to specify the encoding of binary strings that
@@ -87,44 +91,58 @@ def add_with_encoding(self, s, encoding=None):
8791 s = s .decode (encoding )
8892 self .elements .add (lc (s ))
8993
90- def add (self , * strings ) :
94+ def add (self , * strings : str ) -> Self :
9195 """
9296 Add the lower case and no-period version of the string arguments to the set.
9397 Can pass a list of strings. Returns ``self`` for chaining.
9498 """
95- [self .add_with_encoding (s ) for s in strings ]
99+ for s in strings :
100+ self .add_with_encoding (s )
101+
96102 return self
97103
98- def remove (self , * strings ) :
104+ def remove (self , * strings : str ) -> Self :
99105 """
100106 Remove the lower case and no-period version of the string arguments from the set.
101107 Returns ``self`` for chaining.
102108 """
103- [self .elements .remove (lc (s )) for s in strings if lc (s ) in self .elements ]
109+ for s in strings :
110+ if (lower := lc (s )) in self .elements :
111+ self .elements .remove (lower )
112+
104113 return self
105114
106115
107- class TupleManager (dict ):
116+ T = TypeVar ('T' )
117+
118+
119+ class TupleManager (dict [str , T ]):
108120 '''
109121 A dictionary with dot.notation access. Subclass of ``dict``. Makes the tuple constants
110122 more friendly.
111123 '''
112124
113- def __getattr__ (self , attr ) :
125+ def __getattr__ (self , attr : str ) -> T | None :
114126 return self .get (attr )
127+
115128 __setattr__ = dict .__setitem__
116129 __delattr__ = dict .__delitem__
117130
118- def __getstate__ (self ):
131+ def __getstate__ (self ) -> Mapping [ str , T ] :
119132 return dict (self )
120133
121- def __setstate__ (self , state ) :
122- self .__init__ (state )
134+ def __setstate__ (self , state : Mapping [ str , T ]) -> None :
135+ self .update (state )
123136
124- def __reduce__ (self ):
137+ def __reduce__ (self ) -> tuple [ type , tuple [()], Mapping [ str , T ]] :
125138 return (TupleManager , (), self .__getstate__ ())
126139
127140
141+ class RegexTupleManager (TupleManager [re .Pattern [str ]]):
142+ def __getattr__ (self , attr : str ) -> re .Pattern [str ]:
143+ return self .get (attr , EMPTY_REGEX )
144+
145+
128146class Constants :
129147 """
130148 An instance of this class hold all of the configuration constants for the parser.
@@ -149,6 +167,17 @@ class Constants:
149167 :py:attr:`regexes` wrapped with :py:class:`TupleManager`.
150168 """
151169
170+ prefixes : SetManager
171+ suffix_acronyms : SetManager
172+ suffix_not_acronyms : SetManager
173+ titles : SetManager
174+ first_name_titles : SetManager
175+ conjunctions : SetManager
176+ capitalization_exceptions : TupleManager [str ]
177+ regexes : RegexTupleManager
178+
179+ _pst : Set [str ] | None
180+
152181 string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
153182 """
154183 The default string format use for all new `HumanName` instances.
@@ -168,17 +197,17 @@ class Constants:
168197 empty_attribute_default = ''
169198 """
170199 Default return value for empty attributes.
171-
200+
172201 .. doctest::
173-
202+
174203 >>> from nameparser.config import CONSTANTS
175204 >>> CONSTANTS.empty_attribute_default = None
176205 >>> name = HumanName("John Doe")
177206 >>> name.title
178207 None
179208 >>>name.first
180209 'John'
181-
210+
182211 """
183212
184213 capitalize_name = False
@@ -213,38 +242,38 @@ class Constants:
213242 """
214243
215244 def __init__ (self ,
216- prefixes = PREFIXES ,
217- suffix_acronyms = SUFFIX_ACRONYMS ,
218- suffix_not_acronyms = SUFFIX_NOT_ACRONYMS ,
219- titles = TITLES ,
220- first_name_titles = FIRST_NAME_TITLES ,
221- conjunctions = CONJUNCTIONS ,
222- capitalization_exceptions = CAPITALIZATION_EXCEPTIONS ,
223- regexes = REGEXES
224- ):
245+ prefixes : Iterable [ str ] = PREFIXES ,
246+ suffix_acronyms : Iterable [ str ] = SUFFIX_ACRONYMS ,
247+ suffix_not_acronyms : Iterable [ str ] = SUFFIX_NOT_ACRONYMS ,
248+ titles : Iterable [ str ] = TITLES ,
249+ first_name_titles : Iterable [ str ] = FIRST_NAME_TITLES ,
250+ conjunctions : Iterable [ str ] = CONJUNCTIONS ,
251+ capitalization_exceptions : TupleManager [ str ] | Iterable [ tuple [ str , str ]] = CAPITALIZATION_EXCEPTIONS ,
252+ regexes : RegexTupleManager | TupleManager [ re . Pattern [ str ]] | Iterable [ tuple [ str , re . Pattern [ str ]]] = REGEXES
253+ ) -> None :
225254 self .prefixes = SetManager (prefixes )
226255 self .suffix_acronyms = SetManager (suffix_acronyms )
227256 self .suffix_not_acronyms = SetManager (suffix_not_acronyms )
228257 self .titles = SetManager (titles )
229258 self .first_name_titles = SetManager (first_name_titles )
230259 self .conjunctions = SetManager (conjunctions )
231260 self .capitalization_exceptions = TupleManager (capitalization_exceptions )
232- self .regexes = TupleManager (regexes )
261+ self .regexes = RegexTupleManager (regexes )
233262 self ._pst = None
234263
235264 @property
236- def suffixes_prefixes_titles (self ):
265+ def suffixes_prefixes_titles (self ) -> Set [ str ] :
237266 if not self ._pst :
238267 self ._pst = self .prefixes | self .suffix_acronyms | self .suffix_not_acronyms | self .titles
239268 return self ._pst
240269
241- def __repr__ (self ):
270+ def __repr__ (self ) -> str :
242271 return "<Constants() instance>"
243272
244- def __setstate__ (self , state ) :
245- self .__init__ (state )
273+ def __setstate__ (self , state : Mapping [ str , Any ]) -> None :
274+ Constants .__init__ (self , state )
246275
247- def __getstate__ (self ):
276+ def __getstate__ (self ) -> Mapping [ str , Any ] :
248277 attrs = [x for x in dir (self ) if not x .startswith ('_' )]
249278 return dict ([(a , getattr (self , a )) for a in attrs ])
250279
0 commit comments