Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions contributing/samples/bigquery/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:
5. `get_job_info`
Fetches metadata about a BigQuery job.

5. `execute_sql`
6. `execute_sql`

Runs or dry-runs a SQL query in BigQuery.

6. `ask_data_insights`
7. `ask_data_insights`

Natural language-in, natural language-out tool that answers questions
about structured data in BigQuery. Provides a one-stop solution for generating
Expand All @@ -38,23 +38,26 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:
the official [Conversational Analytics API documentation](https://cloud.google.com/gemini/docs/conversational-analytics-api/overview)
for instructions.

7. `forecast`
8. `forecast`

Perform time series forecasting using BigQuery's `AI.FORECAST` function,
leveraging the TimesFM 2.0 model.

8. `analyze_contribution`
9. `analyze_contribution`

Perform contribution analysis in BigQuery by creating a temporary
`CONTRIBUTION_ANALYSIS` model and then querying it with
`ML.GET_INSIGHTS` to find top contributors for a given metric.

9. `detect_anomalies`
10. `detect_anomalies`

Perform time series anomaly detection in BigQuery by creating a temporary
`ARIMA_PLUS` model and then querying it with
`ML.DETECT_ANOMALIES` to detect time series data anomalies.

11. `search_catalog`
Searches for data entries across projects using the Dataplex Catalog. This allows discovery of datasets, tables, and other assets.

## How to use

Set up environment variables in your `.env` file for using
Expand Down
9 changes: 5 additions & 4 deletions src/google/adk/tools/bigquery/bigquery_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
from .._google_credentials import BaseGoogleCredentialsConfig

BIGQUERY_TOKEN_CACHE_KEY = "bigquery_token_cache"
BIGQUERY_DEFAULT_SCOPE = ["https://www.googleapis.com/auth/bigquery"]

BIGQUERY_SCOPES = [
"https://www.googleapis.com/auth/bigquery",
"https://www.googleapis.com/auth/cloud-platform",
]

@experimental(FeatureName.GOOGLE_CREDENTIALS_CONFIG)
class BigQueryCredentialsConfig(BaseGoogleCredentialsConfig):
Expand All @@ -34,8 +36,7 @@ def __post_init__(self) -> BigQueryCredentialsConfig:
super().__post_init__()

if not self.scopes:
self.scopes = BIGQUERY_DEFAULT_SCOPE

self.scopes = BIGQUERY_SCOPES
# Set the token cache key
self._token_cache_key = BIGQUERY_TOKEN_CACHE_KEY

Expand Down
2 changes: 2 additions & 0 deletions src/google/adk/tools/bigquery/bigquery_toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from . import data_insights_tool
from . import metadata_tool
from . import query_tool
from . import search_tool
from ...features import experimental
from ...features import FeatureName
from ...tools.base_tool import BaseTool
Expand Down Expand Up @@ -87,6 +88,7 @@ async def get_tools(
query_tool.analyze_contribution,
query_tool.detect_anomalies,
data_insights_tool.ask_data_insights,
search_tool.search_catalog,
]
]

Expand Down
41 changes: 39 additions & 2 deletions src/google/adk/tools/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
import google.api_core.client_info
from google.auth.credentials import Credentials
from google.cloud import bigquery
from google.cloud import dataplex_v1
from google.api_core.gapic_v1 import client_info as gapic_client_info

from ... import version

USER_AGENT = f"adk-bigquery-tool google-adk/{version.__version__}"
USER_AGENT_BASE = f"google-adk/{version.__version__}"
BQ_USER_AGENT = f"adk-bigquery-tool {USER_AGENT_BASE}"
DP_USER_AGENT = f"adk-dataplex-tool {USER_AGENT_BASE}"


from typing import List
Expand All @@ -48,7 +52,7 @@ def get_bigquery_client(
A BigQuery client.
"""

user_agents = [USER_AGENT]
user_agents = [BQ_USER_AGENT]
if user_agent:
if isinstance(user_agent, str):
user_agents.append(user_agent)
Expand All @@ -67,3 +71,36 @@ def get_bigquery_client(
)

return bigquery_client

def get_dataplex_catalog_client(
*,
credentials: Credentials,
user_agent: Optional[Union[str, List[str]]] = None,
) -> dataplex_v1.CatalogServiceClient:
"""Get a Dataplex CatalogServiceClient with minimal necessary arguments.

Args:
credentials: The credentials to use for the request.
user_agent: Additional user agent string(s) to append.

Returns:
A Dataplex Client.
"""

user_agents = [DP_USER_AGENT]
if user_agent:
if isinstance(user_agent, str):
user_agents.append(user_agent)
else:
user_agents.extend([ua for ua in user_agent if ua])

client_info = gapic_client_info.ClientInfo(
user_agent=" ".join(user_agents)
)

dataplex_client = dataplex_v1.CatalogServiceClient(
credentials=credentials,
client_info=client_info,
)

return dataplex_client
130 changes: 130 additions & 0 deletions src/google/adk/tools/bigquery/search_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import logging
from typing import Any, Dict, List, Optional

from google.api_core import exceptions as api_exceptions
from google.auth.credentials import Credentials
from google.cloud import dataplex_v1

from . import client
from .config import BigQueryToolConfig

def _construct_search_query_helper(predicate: str, operator: str, items: List[str]) -> str:
if not items:
return ""
if len(items) == 1:
return f'{predicate}{operator}"{items[0]}"'

clauses = [f'{predicate}{operator}"{item}"' for item in items]
return "(" + " OR ".join(clauses) + ")"

def search_catalog(
prompt: str,
project_id: str,
credentials: Credentials,
settings: BigQueryToolConfig,
location: str,
page_size: int = 10,
project_ids_filter: Optional[List[str]] = None,
dataset_ids_filter: Optional[List[str]] = None,
types_filter: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""Search for BigQuery assets within Dataplex.

Args:
prompt (str): The base search query (natural language or keywords).
project_id (str): The Google Cloud project ID to scope the search.
credentials (Credentials): Credentials for the request.
settings (BigQueryToolConfig): BigQuery tool settings.
location (str): The Dataplex location to use.
page_size (int): Maximum number of results.
project_ids_filter (Optional[List[str]]): Specific project IDs to include in the search results.
If None, defaults to the scoping project_id.
dataset_ids_filter (Optional[List[str]]): BigQuery dataset IDs to filter by.
types_filter (Optional[List[str]]): Entry types to filter by (e.g., "TABLE", "DATASET").

Returns:
dict: Search results or error.
"""
try:
if not project_id:
return {"status": "ERROR", "error_details": "project_id must be provided."}

dataplex_client = client.get_dataplex_catalog_client(
credentials=credentials,
user_agent=[settings.application_name, "search_catalog"],
)

query_parts = []
if prompt:
query_parts.append(f"({prompt})")

# Filter by project IDs
projects_to_filter = project_ids_filter if project_ids_filter else [project_id]
if projects_to_filter:
query_parts.append(_construct_search_query_helper("projectid", "=", projects_to_filter))

# Filter by dataset IDs
if dataset_ids_filter:
dataset_resource_filters = [f'linked_resource:"//bigquery.googleapis.com/projects/{pid}/datasets/{did}/*"' for pid in projects_to_filter for did in dataset_ids_filter]
if dataset_resource_filters:
query_parts.append(f"({' OR '.join(dataset_resource_filters)})")
# Filter by entry types
if types_filter:
query_parts.append(_construct_search_query_helper("type", "=", types_filter))

# Always scope to BigQuery system
query_parts.append("system=BIGQUERY")

full_query = " AND ".join(filter(None, query_parts))

search_scope = f"projects/{project_id}/locations/{location}"

request = dataplex_v1.SearchEntriesRequest(
name=search_scope,
query=full_query,
page_size=page_size,
semantic_search=True,
)

response = dataplex_client.search_entries(request=request)

results = []
for result in response.results:
entry = result.dataplex_entry
source = entry.entry_source
results.append(
{
"name": entry.name,
"display_name": source.display_name or "",
"entry_type": entry.entry_type,
"update_time": str(entry.update_time),
"linked_resource": source.resource or "",
"description": source.description or "",
"location": source.location or "",
}
)
return {"status": "SUCCESS", "results": results}

except api_exceptions.GoogleAPICallError as e:
logging.exception("search_catalog tool: API call failed")
return {"status": "ERROR", "error_details": f"Dataplex API Error: {str(e)}"}
except Exception as ex:
logging.exception("search_catalog tool: Unexpected error")
return {"status": "ERROR", "error_details": str(ex)}

6 changes: 3 additions & 3 deletions tests/unittests/tools/bigquery/test_bigquery_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_valid_credentials_object_auth_credentials(self):
assert config.credentials == auth_creds
assert config.client_id is None
assert config.client_secret is None
assert config.scopes == ["https://www.googleapis.com/auth/bigquery"]
assert config.scopes == ["https://www.googleapis.com/auth/bigquery","https://www.googleapis.com/auth/cloud-platform"]

def test_valid_credentials_object_oauth2_credentials(self):
"""Test that providing valid Credentials object works correctly with
Expand Down Expand Up @@ -86,7 +86,7 @@ def test_valid_client_id_secret_pair_default_scope(self):
assert config.credentials is None
assert config.client_id == "test_client_id"
assert config.client_secret == "test_client_secret"
assert config.scopes == ["https://www.googleapis.com/auth/bigquery"]
assert config.scopes == ["https://www.googleapis.com/auth/bigquery","https://www.googleapis.com/auth/cloud-platform",]

def test_valid_client_id_secret_pair_w_scope(self):
"""Test that providing client ID and secret with explicit scopes works.
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_valid_client_id_secret_pair_w_empty_scope(self):
assert config.credentials is None
assert config.client_id == "test_client_id"
assert config.client_secret == "test_client_secret"
assert config.scopes == ["https://www.googleapis.com/auth/bigquery"]
assert config.scopes == ["https://www.googleapis.com/auth/bigquery","https://www.googleapis.com/auth/cloud-platform"]

def test_missing_client_secret_raises_error(self):
"""Test that missing client secret raises appropriate validation error.
Expand Down
Loading