Skip to content

Commit 579a61c

Browse files
committed
Add type hints
This also required some refactoring: * I removed some code that seemed dead. * I created a subclass of TupleManager for regexes with a fallback. When accessing regexes previously, it was possible to get None. * I had to move all setter next to their getters due to a bug in mypy. See python/mypy#1465
1 parent ba42c4c commit 579a61c

6 files changed

Lines changed: 564 additions & 504 deletions

File tree

nameparser/config/__init__.py

Lines changed: 66 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@
2727
``hn.C`` will be a reference to the module config, possibly yielding
2828
unexpected results. See `Customizing the Parser <customize.html>`_.
2929
"""
30+
import re
3031
import sys
31-
from collections.abc import Set
32+
from collections.abc import Iterable, Iterator, Mapping, Set
33+
from typing import Any, TypeVar
34+
35+
from typing_extensions import Self
3236

3337
from nameparser.util import lc
3438
from nameparser.config.prefixes import PREFIXES
@@ -38,7 +42,7 @@
3842
from nameparser.config.suffixes import SUFFIX_NOT_ACRONYMS
3943
from nameparser.config.titles import TITLES
4044
from nameparser.config.titles import FIRST_NAME_TITLES
41-
from nameparser.config.regexes import REGEXES
45+
from nameparser.config.regexes import EMPTY_REGEX, REGEXES
4246

4347
DEFAULT_ENCODING = 'UTF-8'
4448

@@ -55,25 +59,25 @@ class SetManager(Set):
5559
5660
'''
5761

58-
def __init__(self, elements):
62+
def __init__(self, elements: Iterable[str]) -> None:
5963
self.elements = set(elements)
6064

61-
def __call__(self):
65+
def __call__(self) -> Set[str]:
6266
return self.elements
6367

64-
def __repr__(self):
68+
def __repr__(self) -> str:
6569
return "SetManager({})".format(self.elements) # used for docs
6670

67-
def __iter__(self):
71+
def __iter__(self) -> Iterator[str]:
6872
return iter(self.elements)
6973

70-
def __contains__(self, value):
74+
def __contains__(self, value: object) -> bool:
7175
return value in self.elements
7276

73-
def __len__(self):
77+
def __len__(self) -> int:
7478
return len(self.elements)
7579

76-
def add_with_encoding(self, s, encoding=None):
80+
def add_with_encoding(self, s: str, encoding: str | None = None) -> None:
7781
"""
7882
Add the lower case and no-period version of the string to the set. Pass an
7983
explicit `encoding` parameter to specify the encoding of binary strings that
@@ -87,44 +91,58 @@ def add_with_encoding(self, s, encoding=None):
8791
s = s.decode(encoding)
8892
self.elements.add(lc(s))
8993

90-
def add(self, *strings):
94+
def add(self, *strings: str) -> Self:
9195
"""
9296
Add the lower case and no-period version of the string arguments to the set.
9397
Can pass a list of strings. Returns ``self`` for chaining.
9498
"""
95-
[self.add_with_encoding(s) for s in strings]
99+
for s in strings:
100+
self.add_with_encoding(s)
101+
96102
return self
97103

98-
def remove(self, *strings):
104+
def remove(self, *strings: str) -> Self:
99105
"""
100106
Remove the lower case and no-period version of the string arguments from the set.
101107
Returns ``self`` for chaining.
102108
"""
103-
[self.elements.remove(lc(s)) for s in strings if lc(s) in self.elements]
109+
for s in strings:
110+
if (lower := lc(s)) in self.elements:
111+
self.elements.remove(lower)
112+
104113
return self
105114

106115

107-
class TupleManager(dict):
116+
T = TypeVar('T')
117+
118+
119+
class TupleManager(dict[str, T]):
108120
'''
109121
A dictionary with dot.notation access. Subclass of ``dict``. Makes the tuple constants
110122
more friendly.
111123
'''
112124

113-
def __getattr__(self, attr):
125+
def __getattr__(self, attr: str) -> T | None:
114126
return self.get(attr)
127+
115128
__setattr__ = dict.__setitem__
116129
__delattr__ = dict.__delitem__
117130

118-
def __getstate__(self):
131+
def __getstate__(self) -> Mapping[str, T]:
119132
return dict(self)
120133

121-
def __setstate__(self, state):
122-
self.__init__(state)
134+
def __setstate__(self, state: Mapping[str, T]) -> None:
135+
self.update(state)
123136

124-
def __reduce__(self):
137+
def __reduce__(self) -> tuple[type, tuple[()], Mapping[str, T]]:
125138
return (TupleManager, (), self.__getstate__())
126139

127140

141+
class RegexTupleManager(TupleManager[re.Pattern[str]]):
142+
def __getattr__(self, attr: str) -> re.Pattern[str]:
143+
return self.get(attr, EMPTY_REGEX)
144+
145+
128146
class Constants:
129147
"""
130148
An instance of this class hold all of the configuration constants for the parser.
@@ -149,6 +167,17 @@ class Constants:
149167
:py:attr:`regexes` wrapped with :py:class:`TupleManager`.
150168
"""
151169

170+
prefixes: SetManager
171+
suffix_acronyms: SetManager
172+
suffix_not_acronyms: SetManager
173+
titles: SetManager
174+
first_name_titles: SetManager
175+
conjunctions: SetManager
176+
capitalization_exceptions: TupleManager[str]
177+
regexes: RegexTupleManager
178+
179+
_pst: Set[str] | None
180+
152181
string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
153182
"""
154183
The default string format use for all new `HumanName` instances.
@@ -168,17 +197,17 @@ class Constants:
168197
empty_attribute_default = ''
169198
"""
170199
Default return value for empty attributes.
171-
200+
172201
.. doctest::
173-
202+
174203
>>> from nameparser.config import CONSTANTS
175204
>>> CONSTANTS.empty_attribute_default = None
176205
>>> name = HumanName("John Doe")
177206
>>> name.title
178207
None
179208
>>>name.first
180209
'John'
181-
210+
182211
"""
183212

184213
capitalize_name = False
@@ -213,38 +242,38 @@ class Constants:
213242
"""
214243

215244
def __init__(self,
216-
prefixes=PREFIXES,
217-
suffix_acronyms=SUFFIX_ACRONYMS,
218-
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
219-
titles=TITLES,
220-
first_name_titles=FIRST_NAME_TITLES,
221-
conjunctions=CONJUNCTIONS,
222-
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
223-
regexes=REGEXES
224-
):
245+
prefixes: Iterable[str] = PREFIXES,
246+
suffix_acronyms: Iterable[str] = SUFFIX_ACRONYMS,
247+
suffix_not_acronyms: Iterable[str] = SUFFIX_NOT_ACRONYMS,
248+
titles: Iterable[str] = TITLES,
249+
first_name_titles: Iterable[str] = FIRST_NAME_TITLES,
250+
conjunctions: Iterable[str] = CONJUNCTIONS,
251+
capitalization_exceptions: TupleManager[str] | Iterable[tuple[str, str]] = CAPITALIZATION_EXCEPTIONS,
252+
regexes: RegexTupleManager | TupleManager[re.Pattern[str]] | Iterable[tuple[str, re.Pattern[str]]] = REGEXES
253+
) -> None:
225254
self.prefixes = SetManager(prefixes)
226255
self.suffix_acronyms = SetManager(suffix_acronyms)
227256
self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
228257
self.titles = SetManager(titles)
229258
self.first_name_titles = SetManager(first_name_titles)
230259
self.conjunctions = SetManager(conjunctions)
231260
self.capitalization_exceptions = TupleManager(capitalization_exceptions)
232-
self.regexes = TupleManager(regexes)
261+
self.regexes = RegexTupleManager(regexes)
233262
self._pst = None
234263

235264
@property
236-
def suffixes_prefixes_titles(self):
265+
def suffixes_prefixes_titles(self) -> Set[str]:
237266
if not self._pst:
238267
self._pst = self.prefixes | self.suffix_acronyms | self.suffix_not_acronyms | self.titles
239268
return self._pst
240269

241-
def __repr__(self):
270+
def __repr__(self) -> str:
242271
return "<Constants() instance>"
243272

244-
def __setstate__(self, state):
245-
self.__init__(state)
273+
def __setstate__(self, state: Mapping[str, Any]) -> None:
274+
Constants.__init__(self, state)
246275

247-
def __getstate__(self):
276+
def __getstate__(self) -> Mapping[str, Any]:
248277
attrs = [x for x in dir(self) if not x.startswith('_')]
249278
return dict([(a, getattr(self, a)) for a in attrs])
250279

nameparser/config/regexes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
'[\u2600-\u26FF\u2700-\u27BF])+',
1717
re.UNICODE)
1818

19+
EMPTY_REGEX = re.compile('')
20+
1921
REGEXES = set([
2022
("spaces", re.compile(r"\s+", re.U)),
2123
("word", re.compile(r"(\w|\.)+", re.U)),

0 commit comments

Comments
 (0)