Skip to content

Commit dc283f3

Browse files
committed
Fix case handling for various capitalization issues
* Fix multi words capitalization, camelCase, proper nouns, abbreviation * Do not change suggested words in dictionary to lower case during build_dict() * Capitalization decision is decided in fix_case()
1 parent c84db35 commit dc283f3

File tree

3 files changed

+550
-7
lines changed

3 files changed

+550
-7
lines changed

codespell_lib/_spellchecker.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,9 @@ def build_dict(
5555
translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
5656
for line in f:
5757
[key, data] = line.split("->")
58-
# TODO: For now, convert both to lower.
59-
# Someday we can maybe add support for fixing caps.
58+
# Only convert key to lower case.
59+
# Do not modify data to lower case. Leave it as per dictionary.
6060
key = key.lower()
61-
data = data.lower()
6261
if key not in ignore_words:
6362
add_misspelling(key, data, misspellings)
6463
# generate alternative misspellings/fixes

codespell_lib/_text_util.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,46 @@
1717
"""
1818

1919

20+
def is_camel_case_word(input_word: str) -> bool:
21+
return (
22+
(input_word != input_word.lower())
23+
and (input_word != input_word.upper())
24+
and ("_" not in input_word)
25+
and ("-" not in input_word)
26+
and (" " not in input_word)
27+
)
28+
29+
30+
def is_camel_case_string(input_string: str) -> bool:
31+
return any(is_camel_case_word(word) for word in input_string.split(","))
32+
33+
2034
def fix_case(word: str, fixword: str) -> str:
21-
if word == word.capitalize():
22-
return ", ".join(w.strip().capitalize() for w in fixword.split(","))
35+
if fixword == fixword.upper():
36+
# abbreviation, acronym: fixword is in all upper case.
37+
# Use fixword as per dictionary.
38+
# Eg. asscii->ASCII
39+
return fixword
40+
if word == word.capitalize() and fixword == fixword.lower():
41+
# word is capitalized and fixword(s) in lower.
42+
# Capitalize/Title fixword(s).
43+
# Eg. Weather, Whether,
44+
return fixword.title()
45+
if word == word.capitalize() and not is_camel_case_string(fixword):
46+
# word is capitalized and fixword(s) contain mixed with no camelCase.
47+
# Capitalize/Title fixword(s).
48+
# Eg. skipt->skip, Skype, skipped,
49+
return fixword.title()
2350
if word == word.upper():
51+
# word is in all upper case, change fixword to upper.
52+
# Eg. MONDAY
2453
return fixword.upper()
25-
# they are both lower case
26-
# or we don't have any idea
54+
if word.lower() == fixword.lower():
55+
# Special feature only meant for private custom dictionary.
56+
# word is valid but fixword required in CamelCase.
57+
# Use fixword as per dictionary.
58+
# Eg. mysql->MySQL
59+
return fixword
60+
# word is in lower, capitalize, CamelCase or whatever.
61+
# Use fixword as per dictionary.
2762
return fixword

0 commit comments

Comments
 (0)