diff --git a/gcp/workers/importer/importer.py b/gcp/workers/importer/importer.py index 6385f193f09..a5300a6c023 100755 --- a/gcp/workers/importer/importer.py +++ b/gcp/workers/importer/importer.py @@ -230,7 +230,7 @@ def _infer_id_from_invalid_data(self, name: str, content: bytes) -> str: extension = os.path.splitext(name)[1] try: vulns = osv.parse_vulnerabilities_from_data( - content, extension, strict=False) + content, extension, strict=False, source_name=name) if vulns: return vulns[0].id except RuntimeError: @@ -342,7 +342,8 @@ def _vuln_ids_from_gcs_blob(self, client: storage.Client, vulns = osv.parse_vulnerabilities_from_data( blob_bytes, os.path.splitext(blob.name)[1], - strict=source_repo.strict_validation and self._strict_validation) + strict=source_repo.strict_validation and self._strict_validation, + source_name=blob.name) for vuln in vulns: vuln_ids.append(vuln.id) return vuln_ids @@ -409,7 +410,8 @@ def _convert_blob_to_vuln( vulns = osv.parse_vulnerabilities_from_data( blob_bytes, os.path.splitext(blob.name)[1], - strict=self._strict_validation) + strict=self._strict_validation, + source_name=blob.name) # TODO(andrewpollock): integrate with linter here. diff --git a/gcp/workers/worker/worker.py b/gcp/workers/worker/worker.py index 7ec2cce81e8..6d7a5dd5df4 100644 --- a/gcp/workers/worker/worker.py +++ b/gcp/workers/worker/worker.py @@ -389,7 +389,8 @@ def _source_update(self, message): vulnerabilities = osv.parse_vulnerabilities_from_data( blob, extension=os.path.splitext(path)[1], - key_path=source_repo.key_path) + key_path=source_repo.key_path, + source_name=path) except Exception: logging.exception('Failed to parse vulnerability %s', path) return diff --git a/osv/sources.py b/osv/sources.py index 8dde830fac1..6c09b30912e 100644 --- a/osv/sources.py +++ b/osv/sources.py @@ -143,18 +143,38 @@ def parse_vulnerabilities( _parse_vulnerability_dict(path), key_path, strict) -def parse_vulnerabilities_from_data( - data_text: str | bytes, - extension: str, - key_path=None, - strict=False) -> list[vulnerability_pb2.Vulnerability]: - """Parse vulnerabilities from data.""" - if extension in YAML_EXTENSIONS: - data = yaml.load(data_text, Loader=NoDatesSafeLoader) - elif extension in JSON_EXTENSIONS: - data = json.loads(data_text) - else: - raise RuntimeError('Unknown format ' + extension) +def parse_vulnerabilities_from_data(data_text: str | byte, + extension: str, + key_path=None, + strict=False, + source_name=None) -> list[vulnerability_pb2.Vulnerability]: + """Parse vulnerabilities from data. + + Args: + data_text: The raw vulnerability data. + extension: File extension (.json, .yaml, .yml). + key_path: Optional key path for nested data. + strict: If True, raises on validation errors. + source_name: Optional source identifier for error context. + + Returns: + List of parsed vulnerabilities. + + Raises: + RuntimeError: If parsing fails, includes source_name if provided. + """ + try: + if extension in YAML_EXTENSIONS: + data = yaml.load(data_text, Loader=NoDatesSafeLoader) + elif extension in JSON_EXTENSIONS: + data = json.loads(data_text) + else: + raise RuntimeError('Unknown format ' + extension) + except Exception as e: + if source_name: + raise RuntimeError( + f"Failed to parse vulnerability file '{source_name}'") from e + raise return _parse_vulnerabilities(data, key_path, strict)