diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index 7b511e6d3e..60c09dbe04 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -55,10 +55,9 @@ def build_dict( translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] for line in f: [key, data] = line.split("->") - # TODO: For now, convert both to lower. - # Someday we can maybe add support for fixing caps. + # Only convert key to lower case. + # Do not modify data to lower case. Leave it as per dictionary. key = key.lower() - data = data.lower() if key not in ignore_words: add_misspelling(key, data, misspellings) # generate alternative misspellings/fixes diff --git a/codespell_lib/_text_util.py b/codespell_lib/_text_util.py index 18a2ec89b4..c97b102a6a 100644 --- a/codespell_lib/_text_util.py +++ b/codespell_lib/_text_util.py @@ -17,11 +17,46 @@ """ +def is_camel_case_word(input_word: str) -> bool: + return ( + (input_word != input_word.lower()) + and (input_word != input_word.upper()) + and ("_" not in input_word) + and ("-" not in input_word) + and (" " not in input_word) + ) + + +def is_camel_case_string(input_string: str) -> bool: + return any(is_camel_case_word(word) for word in input_string.split(",")) + + def fix_case(word: str, fixword: str) -> str: - if word == word.capitalize(): - return ", ".join(w.strip().capitalize() for w in fixword.split(",")) + if fixword == fixword.upper(): + # abbreviation, acronym: fixword is in all upper case. + # Use fixword as per dictionary. + # Eg. asscii->ASCII + return fixword + if word == word.capitalize() and fixword == fixword.lower(): + # word is capitalized and fixword(s) in lower. + # Capitalize/Title fixword(s). + # Eg. Weather, Whether, + return fixword.title() + if word == word.capitalize() and not is_camel_case_string(fixword): + # word is capitalized and fixword(s) contain mixed with no camelCase. + # Capitalize/Title fixword(s). + # Eg. skipt->skip, Skype, skipped, + return fixword.title() if word == word.upper(): + # word is in all upper case, change fixword to upper. + # Eg. MONDAY return fixword.upper() - # they are both lower case - # or we don't have any idea + if word.lower() == fixword.lower(): + # Special feature only meant for private custom dictionary. + # word is valid but fixword required in CamelCase. + # Use fixword as per dictionary. + # Eg. mysql->MySQL + return fixword + # word is in lower, capitalize, CamelCase or whatever. + # Use fixword as per dictionary. return fixword diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 5120e1e8a1..637b1f79c2 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -845,6 +845,515 @@ def test_case_handling_in_fixes( _helper_test_case_handling_in_fixes(tmp_path, capsys, reason=True) +def _helper_test_case_handling_in_fix_case( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + dict_entry: str, + bad_input: str, + expected_output: str, + reason: bool, +) -> None: + dictionary_name = tmp_path / "dictionary.txt" + if reason: + dictionary_name.write_text(dict_entry + " reason\n") + else: + dictionary_name.write_text(dict_entry + "\n") + + # the misspelled word is entirely lowercase + fname = tmp_path / "bad.txt" + fname.write_text(bad_input + "\n") + result = cs.main("-D", dictionary_name, fname, std=True) + assert isinstance(result, tuple) + code, stdout, _ = result + assert code == 1 + # all suggested fixes must be in expected_output + assert expected_output in stdout + # the reason, if any, must not be modified + if reason: + assert "reason" in stdout + + +def test_case_handling_in_fix_case( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test various case handling in fix_case() function.""" + # Test typical: Both misspelled and multiple suggested words are coded + # as lower case in dictionary. + # Verifying: Capitalize is consistent for all suggested words + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "early adoptor", + "adopter, adaptor", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "Early Adoptor", + "Adopter, Adaptor", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "EARLY ADOPTOR", + "ADOPTER, ADAPTOR", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "EaRlY AdOpToR", + "adopter, adaptor", + reason=False, + ) + # Verifying: Capitalize is consistent for all suggested words + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "early adoptor", + "adopter, adaptor", + reason=True, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "Early Adoptor", + "Adopter, Adaptor", + reason=True, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "EARLY ADOPTOR", + "ADOPTER, ADAPTOR", + reason=True, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "adoptor->adopter, adaptor,", + "EaRlY AdOpToR", + "adopter, adaptor", + reason=True, + ) + # Test abbreviation, acronym, initialism: Suggested word coded as + # upper case in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "asscii->ASCII", + "asscii", + "ASCII", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "asscii->ASCII", + "Asscii", + "ASCII", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "asscii->ASCII", + "AssCii", + "ASCII", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "asscii->ASCII", + "ASSCII", + "ASCII", + reason=False, + ) + # Test proper nouns: Misspelled coded as lower case in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "austrailia->Australia", + "austrailia", + "Australia", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "austrailia->Australia", + "Austrailia", + "Australia", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "austrailia->Australia", + "AustRailia", + "Australia", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "austrailia->Australia", + "AUSTRAILIA", + "AUSTRALIA", + reason=False, + ) + # Test proper nouns, brand names: Misspelled coded as capitalize + # in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "Micosoft->Microsoft", + "micosoft", + "Microsoft", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "Micosoft->Microsoft", + "Micosoft", + "Microsoft", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "Micosoft->Microsoft", + "MicoSoft", + "Microsoft", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "Micosoft->Microsoft", + "MICOSOFT", + "MICROSOFT", + reason=False, + ) + # Test typical single: Both misspelled and suggested word both coded + # as lower case in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "pinapple->pineapple", + "pinapple", + "pineapple", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "pinapple->pineapple", + "Pinapple", + "Pineapple", + reason=False, + ) + # Test typical multiple: Both misspelled and multiple suggested words + # both coded as lower case in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "uspported->supported, unsupported,", + "uspported", + "supported, unsupported", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "uspported->supported, unsupported,", + "Uspported", + "Supported, Unsupported", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "uspported->supported, unsupported,", + "USPPORTED", + "SUPPORTED, UNSUPPORTED", + reason=False, + ) + # Test typical multiple & mix: Misspelled coded in lower. Multiple + # suggested words coded as lower & capitalize case in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "skipt->skip, Skype, skipped,", + "skipt", + "skip, Skype, skipped", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "skipt->skip, Skype, skipped,", + "Skipt", + "Skip, Skype, Skipped", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "skipt->skip, Skype, skipped,", + "SKIPT", + "SKIP, SKYPE, SKIPPED", + reason=False, + ) + # Test CamelCase basic: Suggested word coded as CamelCase in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "lesstiff->LessTif", + "lesstiff", + "LessTif", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "lesstiff->LessTif", + "lessTiff", + "LessTif", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "lesstiff->LessTif", + "Lesstiff", + "LessTif", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "lesstiff->LessTif", + "LessTiff", + "LessTif", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "lesstiff->LessTif", + "LESSTIFF", + "LESSTIF", + reason=False, + ) + # Test CamelCase brand names: Suggested word coded as CamelCase + # in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mangodb->MongoDB", + "mangodb", + "MongoDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mangodb->MongoDB", + "mangoDb", + "MongoDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mangodb->MongoDB", + "mangoDB", + "MongoDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mangodb->MongoDB", + "Mangodb", + "MongoDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mangodb->MongoDB", + "MangoDb", + "MongoDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mangodb->MongoDB", + "MangoDB", + "MongoDB", + reason=False, + ) + # Test CamelCase brand names: Suggested word coded as CamelCase + # in dictionary. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "ebya->eBay", + "ebya", + "eBay", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "ebya->eBay", + "eBya", + "eBay", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "ebya->eBay", + "Ebya", + "eBay", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "ebya->eBay", + "EBya", + "eBay", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "ebya->eBay", + "EBYA", + "EBAY", + reason=False, + ) + # Special Test CamelCase, brand names: Misspelled is correct spelling + # but incorrect case. Suggested word is coded as CamelCase in + # dictionary. For custom dictionary only. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mariadb->MariaDB", + "mariadb", + "MariaDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mariadb->MariaDB", + "mariaDb", + "MariaDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mariadb->MariaDB", + "mariaDB", + "MariaDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mariadb->MariaDB", + "Mariadb", + "MariaDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mariadb->MariaDB", + "MariaDb", + "MariaDB", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mariadb->MariaDB", + "MariaDB", + "MariaDB", + reason=False, + ) + # Special Test CamelCase, brand names: Misspelled is correct spelling + # but incorrect case. Multiple suggested words are coded as CamelCase + # and lower case in dictionary. For custom dictionary only. + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mysql->MySQL, mysql,", + "mysql", + "MySQL, mysql", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mysql->MySQL, mysql,", + "mySql", + "MySQL, mysql", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mysql->MySQL, mysql,", + "mySQL", + "MySQL, mysql", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mysql->MySQL, mysql,", + "Mysql", + "MySQL, mysql", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mysql->MySQL, mysql,", + "MySql", + "MySQL, mysql", + reason=False, + ) + _helper_test_case_handling_in_fix_case( + tmp_path, + capsys, + "mysql->MySQL, mysql,", + "MySQL", + "MySQL, mysql", + reason=False, + ) + + def test_context( tmp_path: Path, capsys: pytest.CaptureFixture[str],