Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces a new grep.py script for exact text and regex searches and refactors the existing search functionality into a dedicated semantic_search method. It also adds support for filtering search results by path, extension, and a maximum result count across the CLI tools and the underlying API client. Feedback focuses on improving the robustness of CLI argument parsing, specifically by adding error handling for non-integer inputs to the --max-results flag and ensuring consistent file path extraction logic in the new grep tool.
| location = result.get("location", {}) | ||
| file_path = location.get("path") or result.get("path") | ||
| matches = result.get("matches", []) | ||
|
|
||
| output.append(f"\n--- Result #{idx} [{result.get('kind', 'Artifact')}] ---") | ||
| if file_path: | ||
| output.append(f" File: {file_path}") | ||
| if result.get("identifier"): | ||
| output.append(f" Identifier: {result['identifier']}") |
There was a problem hiding this comment.
The result formatting in grep.py is missing logic to check for filePath and to extract the file path from the identifier if explicit path fields are missing. This logic is present in search.py and should be included here for consistency and to ensure the file path is displayed whenever possible.
location = result.get("location", {})
file_path = location.get("path") or result.get("filePath") or result.get("path")
identifier = result.get("identifier", "")
matches = result.get("matches", [])
if not file_path and identifier and "::" in identifier:
parts = identifier.split("::")
if len(parts) >= 2:
file_path = parts[1]
output.append(f"\n--- Result #{idx} [{result.get('kind', 'Artifact')}] ---")
if file_path:
output.append(f" File: {file_path}")
if identifier:
output.append(f" Identifier: {identifier}")| elif arg == "--max-results" and i + 1 < len(sys.argv): | ||
| max_results = int(sys.argv[i + 1]) | ||
| i += 2 |
There was a problem hiding this comment.
The int() conversion for --max-results will raise a ValueError and cause the script to crash with a stack trace if a non-integer value is provided. It is better to handle this gracefully with a user-friendly error message.
| elif arg == "--max-results" and i + 1 < len(sys.argv): | |
| max_results = int(sys.argv[i + 1]) | |
| i += 2 | |
| elif arg == "--max-results" and i + 1 < len(sys.argv): | |
| try: | |
| max_results = int(sys.argv[i + 1]) | |
| except ValueError: | |
| print(f"Error: --max-results must be an integer, got '{sys.argv[i + 1]}'", file=sys.stderr) | |
| sys.exit(1) | |
| i += 2 |
| else: | ||
| data_sources.append(arg) | ||
| i += 1 |
There was a problem hiding this comment.
The current argument parsing logic treats any unknown argument as a data source. This can lead to confusing behavior if a user makes a typo in a flag (e.g., --max-result instead of --max-results), as the typo will be added to the list of data sources and likely cause an API error later. It is safer to validate that unknown arguments do not start with --.
elif arg.startswith("--"):
print(f"Error: Unknown option '{arg}'", file=sys.stderr)
sys.exit(1)
else:
data_sources.append(arg)
i += 1| if arg == "--max-results" and i + 1 < len(sys.argv): | ||
| max_results = int(sys.argv[i + 1]) | ||
| i += 2 |
There was a problem hiding this comment.
The int() conversion for --max-results will raise a ValueError if a non-integer value is provided. Handling this gracefully provides a better user experience.
| if arg == "--max-results" and i + 1 < len(sys.argv): | |
| max_results = int(sys.argv[i + 1]) | |
| i += 2 | |
| if arg == "--max-results" and i + 1 < len(sys.argv): | |
| try: | |
| max_results = int(sys.argv[i + 1]) | |
| except ValueError: | |
| print(f"Error: --max-results must be an integer, got '{sys.argv[i + 1]}'", file=sys.stderr) | |
| sys.exit(1) | |
| i += 2 |
| elif arg == "--max-results" and i + 1 < len(sys.argv): | ||
| max_results = int(sys.argv[i + 1]) | ||
| i += 2 |
There was a problem hiding this comment.
The int() conversion for --max-results should be wrapped in a try-except block to avoid a stack trace on invalid user input.
| elif arg == "--max-results" and i + 1 < len(sys.argv): | |
| max_results = int(sys.argv[i + 1]) | |
| i += 2 | |
| elif arg == "--max-results" and i + 1 < len(sys.argv): | |
| try: | |
| max_results = int(sys.argv[i + 1]) | |
| except ValueError: | |
| print(f"Error: --max-results must be an integer, got '{sys.argv[i + 1]}'", file=sys.stderr) | |
| sys.exit(1) | |
| i += 2 |
| elif arg == "--max-results" and i + 1 < len(sys.argv): | ||
| max_results = int(sys.argv[i + 1]) | ||
| i += 2 |
There was a problem hiding this comment.
The int() conversion for --max-results should be wrapped in a try-except block to avoid a stack trace on invalid user input.
| elif arg == "--max-results" and i + 1 < len(sys.argv): | |
| max_results = int(sys.argv[i + 1]) | |
| i += 2 | |
| elif arg == "--max-results" and i + 1 < len(sys.argv): | |
| try: | |
| max_results = int(sys.argv[i + 1]) | |
| except ValueError: | |
| print(f"Error: --max-results must be an integer, got '{sys.argv[i + 1]}'", file=sys.stderr) | |
| sys.exit(1) | |
| i += 2 |
Summary
Testing