From 11d012a2aaf45c09bfff248a5fdb170af72f1709 Mon Sep 17 00:00:00 2001 From: Serhan Date: Wed, 11 Feb 2026 12:20:17 -0500 Subject: [PATCH 1/2] feat: warn on misspelled language suffix in prompt filenames (#451) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds fuzzy matching (Levenshtein distance ≤ 2, token length ≥ 4) to detect misspelled language suffixes in prompt filenames and warn the user before falling back to default_language. Refactors _is_known_language to share the language set via _get_known_languages. Co-Authored-By: Claude Opus 4.6 --- pdd/construct_paths.py | 92 ++++++++++--- tests/test_construct_paths.py | 247 ++++++++++++++++++++++++++++++++-- 2 files changed, 309 insertions(+), 30 deletions(-) diff --git a/pdd/construct_paths.py b/pdd/construct_paths.py index 1230e7af..df6951c7 100644 --- a/pdd/construct_paths.py +++ b/pdd/construct_paths.py @@ -533,56 +533,93 @@ def _candidate_prompt_path(input_files: Dict[str, Path]) -> Path | None: # New helper function to check if a language is known -def _is_known_language(language_name: str) -> bool: - """Return True if the language is recognized. +def _get_known_languages() -> set: + """Return the set of known language names (lowercase). - Prefer CSV in PDD_PATH if available; otherwise fall back to a built-in set - so basename/language inference does not fail when PDD_PATH is unset. + Prefer CSV in PDD_PATH if available; otherwise fall back to a built-in set. """ - language_name_lower = (language_name or "").lower() - if not language_name_lower: - return False - builtin_languages = { 'python', 'javascript', 'typescript', 'typescriptreact', 'javascriptreact', 'java', 'cpp', 'c', 'go', 'ruby', 'rust', 'kotlin', 'swift', 'csharp', 'php', 'scala', 'r', 'lua', 'perl', 'bash', 'shell', 'powershell', 'sql', 'prompt', 'html', 'css', 'makefile', - # Additional languages from language_format.csv 'haskell', 'dart', 'elixir', 'clojure', 'julia', 'erlang', 'fortran', 'nim', 'ocaml', 'groovy', 'coffeescript', 'fish', 'zsh', - 'prisma', 'lean', 'agda', - # Frontend / templating + 'prisma', 'lean', 'agda', 'lisp', 'scheme', 'ada', 'svelte', 'vue', 'scss', 'sass', 'less', 'jinja', 'handlebars', 'pug', 'ejs', 'twig', - # Modern / systems languages 'zig', 'mojo', 'solidity', - # Config / query / infra 'graphql', 'protobuf', 'terraform', 'hcl', 'nix', 'glsl', 'wgsl', 'starlark', 'dockerfile', - # Common data and config formats for architecture prompts and configs 'json', 'jsonl', 'yaml', 'yml', 'toml', 'ini' } pdd_path_str = os.getenv('PDD_PATH') if not pdd_path_str: - return language_name_lower in builtin_languages + return builtin_languages csv_file_path = Path(pdd_path_str) / 'data' / 'language_format.csv' if not csv_file_path.is_file(): - return language_name_lower in builtin_languages + return builtin_languages try: with open(csv_file_path, mode='r', encoding='utf-8', newline='') as csvfile: reader = csv.DictReader(csvfile) + csv_languages = set() for row in reader: - if row.get('language', '').lower() == language_name_lower: - return True + lang = row.get('language', '').strip().lower() + if lang: + csv_languages.add(lang) + return (csv_languages | builtin_languages) if csv_languages else builtin_languages except csv.Error as e: console.print(f"[error]CSV Error reading {csv_file_path}: {e}", style="error") - return language_name_lower in builtin_languages + return builtin_languages + - return language_name_lower in builtin_languages +def _is_known_language(language_name: str) -> bool: + """Return True if the language is recognized.""" + language_name_lower = (language_name or "").lower() + if not language_name_lower: + return False + return language_name_lower in _get_known_languages() + + +def _levenshtein_distance(s1: str, s2: str) -> int: + """Compute the Levenshtein edit distance between two strings.""" + if len(s1) < len(s2): + return _levenshtein_distance(s2, s1) + if not s2: + return len(s1) + prev_row = list(range(len(s2) + 1)) + for i, c1 in enumerate(s1): + curr_row = [i + 1] + for j, c2 in enumerate(s2): + cost = 0 if c1 == c2 else 1 + curr_row.append(min(curr_row[j] + 1, prev_row[j + 1] + 1, prev_row[j] + cost)) + prev_row = curr_row + return prev_row[-1] + + +def _closest_known_language(token: str, max_distance: int = 2) -> str | None: + """Return the closest known language if within max_distance, else None. + + Only considers tokens with length >= 4 to avoid false positives on short + language names like 'r', 'd', 'go'. + """ + if len(token) < 4: + return None + token_lower = token.lower() + known = _get_known_languages() + best_lang = None + best_dist = max_distance + 1 + for lang in known: + if len(lang) < 4: + continue + dist = _levenshtein_distance(token_lower, lang) + if dist < best_dist: + best_dist = dist + best_lang = lang + return best_lang if best_dist <= max_distance else None def _strip_language_suffix(path_like: os.PathLike[str]) -> str: @@ -753,12 +790,25 @@ def _determine_language( # Check if the token is a known language using the new helper if _is_known_language(token): return token.lower() + # Warn if token looks like a misspelled language + close_match = _closest_known_language(token) + if close_match: + click.echo( + f"Warning: '{token}' in prompt filename is not a recognized language. " + f"Did you mean '{close_match}'?", + err=True, + ) # 4 - Special handling for detect command - default to prompt for LLM prompts if command == "detect" and "change_file" in input_file_paths: return "prompt" - # 5 - If no language determined, raise error + # 5 - Fallback to default_language from .pddrc + default_lang = command_options.get("default_language") + if default_lang: + return default_lang.lower() + + # 6 - If no language determined, raise error raise ValueError("Could not determine language from input files or options.") diff --git a/tests/test_construct_paths.py b/tests/test_construct_paths.py index 68079c1c..a442563b 100644 --- a/tests/test_construct_paths.py +++ b/tests/test_construct_paths.py @@ -284,7 +284,8 @@ def dynamic_get_extension(lang_candidate): with pytest.raises(ValueError) as excinfo: with patch('pdd.construct_paths.get_extension', side_effect=dynamic_get_extension), \ patch('pdd.construct_paths.get_language', return_value=None), \ - patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str): + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): construct_paths( input_file_paths, force, quiet, command, command_options ) @@ -386,7 +387,8 @@ def mock_get_extension_func_case3(lang): def dynamic_get_ext_case4(lang): return "" # Always return "" with patch('pdd.construct_paths.get_extension', side_effect=dynamic_get_ext_case4), \ patch('pdd.construct_paths.get_language', return_value=None), \ - patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str): + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): with pytest.raises(ValueError) as excinfo: construct_paths(input_file_paths_4, True, True, 'generate', command_options_4) assert "Could not determine language" in str(excinfo.value) @@ -695,7 +697,8 @@ def test_construct_paths_unsupported_extension_error(tmpdir): def dynamic_get_ext_unsupported(lang): return "" # Always return "" with patch('pdd.construct_paths.get_extension', side_effect=dynamic_get_ext_unsupported), \ patch('pdd.construct_paths.get_language', return_value=None), \ - patch('pdd.construct_paths.generate_output_paths'): # Mock to prevent its errors + patch('pdd.construct_paths.generate_output_paths'), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): with pytest.raises(ValueError) as excinfo: construct_paths( input_file_paths, force, quiet, command, command_options @@ -1752,14 +1755,15 @@ def test_construct_paths_change_command_language_detection(tmpdir): # Test with a different command without language indicators with patch('pdd.construct_paths.get_extension', side_effect=lambda lang: '.py' if lang == 'python' else ''), \ patch('pdd.construct_paths.get_language', side_effect=lambda ext: 'python' if ext == '.py' else ''), \ - patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str): - + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + # The "generate" command should raise ValueError with no language indicators with pytest.raises(ValueError) as excinfo: _, input_strings, output_file_paths, language = construct_paths( input_file_paths_no_lang, force, quiet, "generate", command_options ) - + # The error should be about not being able to determine language assert "Could not determine language" in str(excinfo.value) @@ -1821,17 +1825,79 @@ def test_construct_paths_detect_command_language_detection(tmpdir): # Test with a different command without language indicators with patch('pdd.construct_paths.get_extension', side_effect=lambda lang: '.prompt' if lang == 'prompt' else '.py' if lang == 'python' else ''), \ patch('pdd.construct_paths.get_language', side_effect=lambda ext: 'python' if ext == '.py' else ''), \ - patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str): - + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths_dict_str), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + # The "generate" command should raise ValueError with no language indicators with pytest.raises(ValueError) as excinfo: _, input_strings, output_file_paths, language = construct_paths( input_file_paths_no_lang, force, quiet, "generate", command_options ) - + # The error should be about not being able to determine language assert "Could not determine language" in str(excinfo.value) + +def test_construct_paths_default_language_fallback(tmpdir): + """ + Test that _determine_language falls back to default_language from .pddrc + when no other language indicator is available (Issue #451). + """ + tmp_path = Path(str(tmpdir)) + prompt_file = tmp_path / 'test.prompt' + prompt_file.write_text('write a hello function') + + mock_output_paths = {'output': str(tmp_path / 'output.py')} + + # Case 1: default_language in command_options should be used as fallback + input_file_paths = {'prompt_file': str(prompt_file)} + command_options = {'default_language': 'python'} + with patch('pdd.construct_paths.get_extension', return_value='.py'), \ + patch('pdd.construct_paths.get_language', return_value=None), \ + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + _, _, _, language = construct_paths( + input_file_paths, True, True, 'generate', command_options + ) + assert language == 'python' + + # Case 2: explicit --language flag overrides default_language + command_options_2 = {'language': 'typescript', 'default_language': 'python'} + with patch('pdd.construct_paths.get_extension', return_value='.ts'), \ + patch('pdd.construct_paths.get_language', return_value=None), \ + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + _, _, _, language = construct_paths( + input_file_paths, True, True, 'generate', command_options_2 + ) + assert language == 'typescript' + + # Case 3: prompt filename suffix overrides default_language + prompt_file_js = tmp_path / 'test_javascript.prompt' + prompt_file_js.write_text('write a hello function') + input_file_paths_3 = {'prompt_file': str(prompt_file_js)} + command_options_3 = {'default_language': 'python'} + with patch('pdd.construct_paths.get_extension', side_effect=lambda l: '.js' if l == 'javascript' else '.py' if l == 'python' else ''), \ + patch('pdd.construct_paths.get_language', return_value=None), \ + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + _, _, _, language = construct_paths( + input_file_paths_3, True, True, 'generate', command_options_3 + ) + assert language == 'javascript' + + # Case 4: default_language is case-insensitive + command_options_4 = {'default_language': 'Python'} + with patch('pdd.construct_paths.get_extension', return_value='.py'), \ + patch('pdd.construct_paths.get_language', return_value=None), \ + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + _, _, _, language = construct_paths( + input_file_paths, True, True, 'generate', command_options_4 + ) + assert language == 'python' + + def test_construct_paths_bug_command_language_detection(tmpdir): """ Test that construct_paths correctly handles None language values for the bug command. @@ -3035,3 +3101,166 @@ def test_construct_paths_sync_mode_respects_env_prompts_dir(tmp_path, monkeypatc assert resolved_config["prompts_dir"] == "/custom/sync/prompts", \ f"Expected prompts_dir='/custom/sync/prompts' from PDD_PROMPTS_DIR in sync mode, got '{resolved_config['prompts_dir']}'" + +# --- Tests for _get_known_languages, _levenshtein_distance, _closest_known_language --- + +from pdd.construct_paths import _get_known_languages, _levenshtein_distance, _closest_known_language + + +class TestGetKnownLanguages: + """Tests for _get_known_languages helper.""" + + def test_returns_set(self): + result = _get_known_languages() + assert isinstance(result, set) + + def test_contains_common_languages(self): + known = _get_known_languages() + for lang in ['python', 'javascript', 'typescript', 'rust', 'go', 'java']: + assert lang in known + + def test_all_lowercase(self): + known = _get_known_languages() + for lang in known: + assert lang == lang.lower() + + +class TestLevenshteinDistance: + """Tests for _levenshtein_distance.""" + + def test_identical_strings(self): + assert _levenshtein_distance("python", "python") == 0 + + def test_single_deletion(self): + assert _levenshtein_distance("python", "pythn") == 1 + + def test_single_insertion(self): + assert _levenshtein_distance("pythn", "python") == 1 + + def test_single_substitution(self): + assert _levenshtein_distance("python", "pxthon") == 1 + + def test_empty_strings(self): + assert _levenshtein_distance("", "") == 0 + assert _levenshtein_distance("abc", "") == 3 + assert _levenshtein_distance("", "abc") == 3 + + def test_completely_different(self): + assert _levenshtein_distance("abc", "xyz") == 3 + + def test_typscript_typo(self): + assert _levenshtein_distance("typscript", "typescript") == 1 + + def test_javasript_typo(self): + assert _levenshtein_distance("javasript", "javascript") == 1 + + +class TestClosestKnownLanguage: + """Tests for _closest_known_language fuzzy matching.""" + + def test_typscript_matches_typescript(self): + assert _closest_known_language("typscript") == "typescript" + + def test_pythn_matches_python(self): + assert _closest_known_language("pythn") == "python" + + def test_javasript_matches_javascript(self): + assert _closest_known_language("javasript") == "javascript" + + def test_typescrit_matches_typescript(self): + assert _closest_known_language("typescrit") == "typescript" + + def test_exact_match_returns_none(self): + # Exact matches are handled by _is_known_language, not this function + # But if called, distance is 0 which is <= 2, so it returns the match + assert _closest_known_language("typescript") == "typescript" + + def test_gibberish_returns_none(self): + assert _closest_known_language("foobar") is None + assert _closest_known_language("xyzzy") is None + + def test_short_tokens_return_none(self): + assert _closest_known_language("go") is None + assert _closest_known_language("r") is None + assert _closest_known_language("d") is None + assert _closest_known_language("abc") is None + + def test_too_distant_returns_none(self): + assert _closest_known_language("abcdefgh") is None + + def test_case_insensitive(self): + assert _closest_known_language("Typscript") == "typescript" + assert _closest_known_language("PYTHN") == "python" + + +class TestMisspelledLanguageWarning: + """Tests for the warning when a prompt filename has a misspelled language suffix.""" + + def test_misspelled_suffix_warns_and_falls_back(self, tmpdir, capsys): + """A misspelled language suffix should warn and fall back to default_language.""" + tmp_path = Path(str(tmpdir)) + prompt_file = tmp_path / 'new_typscript.prompt' + prompt_file.write_text('write a hello function') + + input_file_paths = {'prompt_file': str(prompt_file)} + command_options = {'default_language': 'python'} + mock_output_paths = {'output': str(tmp_path / 'output.py')} + + with patch('pdd.construct_paths.get_extension', return_value='.py'), \ + patch('pdd.construct_paths.get_language', return_value=None), \ + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + _, _, _, language = construct_paths( + input_file_paths, True, True, 'generate', command_options + ) + # Falls back to default_language + assert language == 'python' + + captured = capsys.readouterr() + assert "typscript" in captured.err + assert "typescript" in captured.err + + def test_correct_suffix_no_warning(self, tmpdir, capsys): + """A correctly spelled language suffix should not produce a warning.""" + tmp_path = Path(str(tmpdir)) + prompt_file = tmp_path / 'new_typescript.prompt' + prompt_file.write_text('write a hello function') + + input_file_paths = {'prompt_file': str(prompt_file)} + command_options = {} + mock_output_paths = {'output': str(tmp_path / 'output.ts')} + + with patch('pdd.construct_paths.get_extension', side_effect=lambda l: '.ts' if l == 'typescript' else ''), \ + patch('pdd.construct_paths.get_language', return_value=None), \ + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + _, _, _, language = construct_paths( + input_file_paths, True, True, 'generate', command_options + ) + assert language == 'typescript' + + captured = capsys.readouterr() + assert "Did you mean" not in captured.err + + def test_no_warning_for_non_language_suffix(self, tmpdir, capsys): + """A suffix that's not close to any language should not warn.""" + tmp_path = Path(str(tmpdir)) + prompt_file = tmp_path / 'new_widgets.prompt' + prompt_file.write_text('write widgets') + + input_file_paths = {'prompt_file': str(prompt_file)} + command_options = {'default_language': 'python'} + mock_output_paths = {'output': str(tmp_path / 'output.py')} + + with patch('pdd.construct_paths.get_extension', return_value='.py'), \ + patch('pdd.construct_paths.get_language', return_value=None), \ + patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ + patch('pdd.construct_paths._find_pddrc_file', return_value=None): + _, _, _, language = construct_paths( + input_file_paths, True, True, 'generate', command_options + ) + assert language == 'python' + + captured = capsys.readouterr() + assert "Did you mean" not in captured.err + From 553866c64224cd609d7688de9f341667835f3038 Mon Sep 17 00:00:00 2001 From: Serhan Date: Wed, 11 Feb 2026 18:43:09 -0500 Subject: [PATCH 2/2] refactor: remove fuzzy language matching to handle in separate PR Move Levenshtein distance and closest language matching to a future feature PR. Keep _get_known_languages refactor and default_language fallback. Also fix language grouping per Copilot review. Co-Authored-By: Claude Opus 4.6 --- pdd/construct_paths.py | 48 +----------- tests/test_construct_paths.py | 142 +--------------------------------- 2 files changed, 4 insertions(+), 186 deletions(-) diff --git a/pdd/construct_paths.py b/pdd/construct_paths.py index df6951c7..95548ae1 100644 --- a/pdd/construct_paths.py +++ b/pdd/construct_paths.py @@ -545,7 +545,8 @@ def _get_known_languages() -> set: 'powershell', 'sql', 'prompt', 'html', 'css', 'makefile', 'haskell', 'dart', 'elixir', 'clojure', 'julia', 'erlang', 'fortran', 'nim', 'ocaml', 'groovy', 'coffeescript', 'fish', 'zsh', - 'prisma', 'lean', 'agda', 'lisp', 'scheme', 'ada', + 'prisma', 'lean', 'agda', + 'lisp', 'scheme', 'ada', 'svelte', 'vue', 'scss', 'sass', 'less', 'jinja', 'handlebars', 'pug', 'ejs', 'twig', 'zig', 'mojo', 'solidity', @@ -584,43 +585,6 @@ def _is_known_language(language_name: str) -> bool: return language_name_lower in _get_known_languages() -def _levenshtein_distance(s1: str, s2: str) -> int: - """Compute the Levenshtein edit distance between two strings.""" - if len(s1) < len(s2): - return _levenshtein_distance(s2, s1) - if not s2: - return len(s1) - prev_row = list(range(len(s2) + 1)) - for i, c1 in enumerate(s1): - curr_row = [i + 1] - for j, c2 in enumerate(s2): - cost = 0 if c1 == c2 else 1 - curr_row.append(min(curr_row[j] + 1, prev_row[j + 1] + 1, prev_row[j] + cost)) - prev_row = curr_row - return prev_row[-1] - - -def _closest_known_language(token: str, max_distance: int = 2) -> str | None: - """Return the closest known language if within max_distance, else None. - - Only considers tokens with length >= 4 to avoid false positives on short - language names like 'r', 'd', 'go'. - """ - if len(token) < 4: - return None - token_lower = token.lower() - known = _get_known_languages() - best_lang = None - best_dist = max_distance + 1 - for lang in known: - if len(lang) < 4: - continue - dist = _levenshtein_distance(token_lower, lang) - if dist < best_dist: - best_dist = dist - best_lang = lang - return best_lang if best_dist <= max_distance else None - def _strip_language_suffix(path_like: os.PathLike[str]) -> str: """ @@ -790,14 +754,6 @@ def _determine_language( # Check if the token is a known language using the new helper if _is_known_language(token): return token.lower() - # Warn if token looks like a misspelled language - close_match = _closest_known_language(token) - if close_match: - click.echo( - f"Warning: '{token}' in prompt filename is not a recognized language. " - f"Did you mean '{close_match}'?", - err=True, - ) # 4 - Special handling for detect command - default to prompt for LLM prompts if command == "detect" and "change_file" in input_file_paths: diff --git a/tests/test_construct_paths.py b/tests/test_construct_paths.py index a442563b..7b79d341 100644 --- a/tests/test_construct_paths.py +++ b/tests/test_construct_paths.py @@ -3102,9 +3102,9 @@ def test_construct_paths_sync_mode_respects_env_prompts_dir(tmp_path, monkeypatc f"Expected prompts_dir='/custom/sync/prompts' from PDD_PROMPTS_DIR in sync mode, got '{resolved_config['prompts_dir']}'" -# --- Tests for _get_known_languages, _levenshtein_distance, _closest_known_language --- +# --- Tests for _get_known_languages --- -from pdd.construct_paths import _get_known_languages, _levenshtein_distance, _closest_known_language +from pdd.construct_paths import _get_known_languages class TestGetKnownLanguages: @@ -3125,142 +3125,4 @@ def test_all_lowercase(self): assert lang == lang.lower() -class TestLevenshteinDistance: - """Tests for _levenshtein_distance.""" - - def test_identical_strings(self): - assert _levenshtein_distance("python", "python") == 0 - - def test_single_deletion(self): - assert _levenshtein_distance("python", "pythn") == 1 - - def test_single_insertion(self): - assert _levenshtein_distance("pythn", "python") == 1 - - def test_single_substitution(self): - assert _levenshtein_distance("python", "pxthon") == 1 - - def test_empty_strings(self): - assert _levenshtein_distance("", "") == 0 - assert _levenshtein_distance("abc", "") == 3 - assert _levenshtein_distance("", "abc") == 3 - - def test_completely_different(self): - assert _levenshtein_distance("abc", "xyz") == 3 - - def test_typscript_typo(self): - assert _levenshtein_distance("typscript", "typescript") == 1 - - def test_javasript_typo(self): - assert _levenshtein_distance("javasript", "javascript") == 1 - - -class TestClosestKnownLanguage: - """Tests for _closest_known_language fuzzy matching.""" - - def test_typscript_matches_typescript(self): - assert _closest_known_language("typscript") == "typescript" - - def test_pythn_matches_python(self): - assert _closest_known_language("pythn") == "python" - - def test_javasript_matches_javascript(self): - assert _closest_known_language("javasript") == "javascript" - - def test_typescrit_matches_typescript(self): - assert _closest_known_language("typescrit") == "typescript" - - def test_exact_match_returns_none(self): - # Exact matches are handled by _is_known_language, not this function - # But if called, distance is 0 which is <= 2, so it returns the match - assert _closest_known_language("typescript") == "typescript" - - def test_gibberish_returns_none(self): - assert _closest_known_language("foobar") is None - assert _closest_known_language("xyzzy") is None - - def test_short_tokens_return_none(self): - assert _closest_known_language("go") is None - assert _closest_known_language("r") is None - assert _closest_known_language("d") is None - assert _closest_known_language("abc") is None - - def test_too_distant_returns_none(self): - assert _closest_known_language("abcdefgh") is None - - def test_case_insensitive(self): - assert _closest_known_language("Typscript") == "typescript" - assert _closest_known_language("PYTHN") == "python" - - -class TestMisspelledLanguageWarning: - """Tests for the warning when a prompt filename has a misspelled language suffix.""" - - def test_misspelled_suffix_warns_and_falls_back(self, tmpdir, capsys): - """A misspelled language suffix should warn and fall back to default_language.""" - tmp_path = Path(str(tmpdir)) - prompt_file = tmp_path / 'new_typscript.prompt' - prompt_file.write_text('write a hello function') - - input_file_paths = {'prompt_file': str(prompt_file)} - command_options = {'default_language': 'python'} - mock_output_paths = {'output': str(tmp_path / 'output.py')} - - with patch('pdd.construct_paths.get_extension', return_value='.py'), \ - patch('pdd.construct_paths.get_language', return_value=None), \ - patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ - patch('pdd.construct_paths._find_pddrc_file', return_value=None): - _, _, _, language = construct_paths( - input_file_paths, True, True, 'generate', command_options - ) - # Falls back to default_language - assert language == 'python' - - captured = capsys.readouterr() - assert "typscript" in captured.err - assert "typescript" in captured.err - - def test_correct_suffix_no_warning(self, tmpdir, capsys): - """A correctly spelled language suffix should not produce a warning.""" - tmp_path = Path(str(tmpdir)) - prompt_file = tmp_path / 'new_typescript.prompt' - prompt_file.write_text('write a hello function') - - input_file_paths = {'prompt_file': str(prompt_file)} - command_options = {} - mock_output_paths = {'output': str(tmp_path / 'output.ts')} - - with patch('pdd.construct_paths.get_extension', side_effect=lambda l: '.ts' if l == 'typescript' else ''), \ - patch('pdd.construct_paths.get_language', return_value=None), \ - patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ - patch('pdd.construct_paths._find_pddrc_file', return_value=None): - _, _, _, language = construct_paths( - input_file_paths, True, True, 'generate', command_options - ) - assert language == 'typescript' - - captured = capsys.readouterr() - assert "Did you mean" not in captured.err - - def test_no_warning_for_non_language_suffix(self, tmpdir, capsys): - """A suffix that's not close to any language should not warn.""" - tmp_path = Path(str(tmpdir)) - prompt_file = tmp_path / 'new_widgets.prompt' - prompt_file.write_text('write widgets') - - input_file_paths = {'prompt_file': str(prompt_file)} - command_options = {'default_language': 'python'} - mock_output_paths = {'output': str(tmp_path / 'output.py')} - - with patch('pdd.construct_paths.get_extension', return_value='.py'), \ - patch('pdd.construct_paths.get_language', return_value=None), \ - patch('pdd.construct_paths.generate_output_paths', return_value=mock_output_paths), \ - patch('pdd.construct_paths._find_pddrc_file', return_value=None): - _, _, _, language = construct_paths( - input_file_paths, True, True, 'generate', command_options - ) - assert language == 'python' - - captured = capsys.readouterr() - assert "Did you mean" not in captured.err