Skip to content

Commit 1db13a0

Browse files
Merge main
Signed-off-by: Lukasz Gryglicki <lgryglicki@cncf.io> Assisted by [OpenAI](https://platform.openai.com/) Assisted by [GitHub Copilot](https://github.com/features/copilot)
2 parents 3722359 + cfce1ed commit 1db13a0

2 files changed

Lines changed: 280 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,4 @@ cla-backend-go/golang-api.log
269269
utils/otel_dd_go/otel_dd
270270
audit.json
271271
spans*.json
272+
api_usage.csv
Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Query Datadog span events and output per-route API usage statistics as CSV.
4+
5+
Default behavior:
6+
- Skips spans marked as attributes.custom.easycla.e2e == "true"
7+
- Groups by templated route attributes.custom.http.route
8+
- Outputs: api,n_calls,first,last (sorted by n_calls desc)
9+
10+
Env vars required:
11+
DD_SITE (e.g. datadoghq.com, datadoghq.eu, us3.datadoghq.com, ...)
12+
DD_API_KEY
13+
DD_APP_KEY
14+
15+
Example:
16+
./utils/otel_dd/api_usage_stats_ddog.py --from now-60m --to now > api_usage.csv
17+
./utils/otel_dd/api_usage_stats_ddog.py --no-skip-e2e | head
18+
./utils/otel_dd/api_usage_stats_ddog.py --from now-24h --to now > api_usage.csv
19+
./utils/otel_dd/api_usage_stats_ddog.py --verbose | head
20+
"""
21+
22+
from __future__ import annotations
23+
24+
import argparse
25+
import csv
26+
import datetime as dt
27+
import json
28+
import os
29+
import sys
30+
import urllib.error
31+
import urllib.request
32+
from typing import Any, Dict, List, Optional, Tuple
33+
34+
35+
def eprint(*args: Any) -> None:
36+
print(*args, file=sys.stderr)
37+
38+
39+
def parse_ts(ts: str) -> dt.datetime:
40+
# Datadog returns ISO 8601 with Z, e.g. "2026-02-26T08:25:15.686Z"
41+
# Convert to timezone-aware UTC datetime.
42+
ts = ts.strip()
43+
if ts.endswith("Z"):
44+
ts = ts[:-1] + "+00:00"
45+
return dt.datetime.fromisoformat(ts).astimezone(dt.timezone.utc)
46+
47+
48+
def fmt_ts(ts: dt.datetime) -> str:
49+
# Format as "YYYY-MM-DD HH:MM:SS.mmm" (milliseconds)
50+
ts_utc = ts.astimezone(dt.timezone.utc)
51+
return ts_utc.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
52+
53+
54+
def is_e2e_true(span: Dict[str, Any]) -> bool:
55+
attrs = span.get("attributes") or {}
56+
custom = attrs.get("custom") or {}
57+
easycla = custom.get("easycla") or {}
58+
v = easycla.get("e2e", False)
59+
return str(v).strip().lower() == "true"
60+
61+
62+
def extract_route(span: Dict[str, Any]) -> Optional[str]:
63+
"""
64+
Prefer templated HTTP route:
65+
attributes.custom.http.route -> "/v1/repository/{uuid}"
66+
67+
Fallback:
68+
attributes.resource_name -> "GET /v1/repository/{uuid}" (strip method)
69+
"""
70+
attrs = span.get("attributes") or {}
71+
custom = attrs.get("custom") or {}
72+
73+
http = custom.get("http") or {}
74+
route = http.get("route")
75+
if isinstance(route, str) and route.strip():
76+
return route.strip()
77+
78+
resource_name = attrs.get("resource_name")
79+
if isinstance(resource_name, str):
80+
rn = resource_name.strip()
81+
# Often "METHOD /path"
82+
parts = rn.split(None, 1)
83+
if len(parts) == 2 and parts[1].startswith("/"):
84+
return parts[1].strip()
85+
# Sometimes just "/path"
86+
if rn.startswith("/"):
87+
return rn
88+
89+
return None
90+
91+
92+
def extract_event_time(span: Dict[str, Any]) -> Optional[dt.datetime]:
93+
attrs = span.get("attributes") or {}
94+
ts = attrs.get("start_timestamp") or attrs.get("end_timestamp")
95+
if not isinstance(ts, str) or not ts.strip():
96+
return None
97+
try:
98+
return parse_ts(ts)
99+
except Exception:
100+
return None
101+
102+
103+
def datadog_post_json(url: str, headers: Dict[str, str], payload: Dict[str, Any], timeout_s: int = 30) -> Dict[str, Any]:
104+
body = json.dumps(payload).encode("utf-8")
105+
req = urllib.request.Request(url, data=body, headers=headers, method="POST")
106+
try:
107+
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
108+
raw = resp.read()
109+
return json.loads(raw.decode("utf-8"))
110+
except urllib.error.HTTPError as e:
111+
raw = e.read().decode("utf-8", errors="replace")
112+
raise RuntimeError(f"Datadog HTTP {e.code}: {raw}") from e
113+
except urllib.error.URLError as e:
114+
raise RuntimeError(f"Datadog request failed: {e}") from e
115+
116+
117+
def fetch_spans(
118+
dd_site: str,
119+
dd_api_key: str,
120+
dd_app_key: str,
121+
query: str,
122+
time_from: str,
123+
time_to: str,
124+
limit: int,
125+
verbose: bool,
126+
) -> List[Dict[str, Any]]:
127+
url = f"https://api.{dd_site}/api/v2/spans/events/search"
128+
headers = {
129+
"Content-Type": "application/json",
130+
"DD-API-KEY": dd_api_key,
131+
"DD-APPLICATION-KEY": dd_app_key,
132+
}
133+
134+
payload: Dict[str, Any] = {
135+
"data": {
136+
"type": "search_request",
137+
"attributes": {
138+
"filter": {
139+
"from": time_from,
140+
"to": time_to,
141+
"query": query,
142+
},
143+
"sort": "timestamp",
144+
"page": {"limit": limit},
145+
},
146+
}
147+
}
148+
149+
all_data: List[Dict[str, Any]] = []
150+
cursor: Optional[str] = None
151+
page_num = 0
152+
153+
while True:
154+
page_num += 1
155+
if cursor:
156+
payload["data"]["attributes"]["page"]["cursor"] = cursor
157+
else:
158+
payload["data"]["attributes"]["page"].pop("cursor", None)
159+
160+
if verbose:
161+
eprint(f"[ddog] fetching page {page_num} (cursor={cursor!r}) ...")
162+
163+
resp = datadog_post_json(url, headers, payload)
164+
data = resp.get("data") or []
165+
if not isinstance(data, list):
166+
raise RuntimeError("Unexpected Datadog response: 'data' is not a list")
167+
168+
all_data.extend(data)
169+
170+
meta = resp.get("meta") or {}
171+
page = meta.get("page") or {}
172+
173+
# Datadog APIs commonly use meta.page.after as the next cursor.
174+
next_cursor = None
175+
if isinstance(page, dict):
176+
next_cursor = page.get("after") or page.get("cursor") or page.get("next_cursor")
177+
178+
if not next_cursor:
179+
break
180+
if len(data) == 0:
181+
break # safety
182+
cursor = str(next_cursor)
183+
184+
return all_data
185+
186+
187+
def main() -> int:
188+
p = argparse.ArgumentParser(description="Datadog span API usage stats (CSV)")
189+
# Match your bash ergonomics, but default to SKIP
190+
g = p.add_mutually_exclusive_group()
191+
g.add_argument("--skip-e2e", action="store_true", help="Skip e2e spans (default)")
192+
g.add_argument("--no-skip-e2e", action="store_true", help="Include e2e spans")
193+
194+
p.add_argument("--from", dest="time_from", default="now-60m", help='Time range start (Datadog format), default "now-60m"')
195+
p.add_argument("--to", dest="time_to", default="now", help='Time range end (Datadog format), default "now"')
196+
p.add_argument("--query", default="service:easycla-backend env:dev", help='Datadog query string (default: "service:easycla-backend env:dev")')
197+
p.add_argument("--limit", type=int, default=5000, help="Page limit per request (default: 5000)")
198+
p.add_argument("--verbose", action="store_true", help="Log progress to stderr")
199+
200+
args = p.parse_args()
201+
202+
# Default skip-e2e unless explicitly --no-skip-e2e
203+
skip_e2e = True
204+
if args.no_skip_e2e:
205+
skip_e2e = False
206+
207+
dd_site = os.getenv("DD_SITE")
208+
dd_api_key = os.getenv("DD_API_KEY")
209+
dd_app_key = os.getenv("DD_APP_KEY")
210+
211+
missing = [k for k, v in (("DD_SITE", dd_site), ("DD_API_KEY", dd_api_key), ("DD_APP_KEY", dd_app_key)) if not v]
212+
if missing:
213+
eprint(f"ERROR: missing env var(s): {', '.join(missing)}")
214+
return 2
215+
216+
spans = fetch_spans(
217+
dd_site=dd_site, # type: ignore[arg-type]
218+
dd_api_key=dd_api_key, # type: ignore[arg-type]
219+
dd_app_key=dd_app_key, # type: ignore[arg-type]
220+
query=args.query,
221+
time_from=args.time_from,
222+
time_to=args.time_to,
223+
limit=args.limit,
224+
verbose=args.verbose,
225+
)
226+
227+
# route -> (count, min_ts, max_ts)
228+
stats: Dict[str, Tuple[int, dt.datetime, dt.datetime]] = {}
229+
230+
kept = 0
231+
skipped_e2e = 0
232+
skipped_missing_route = 0
233+
skipped_missing_ts = 0
234+
235+
for span in spans:
236+
if skip_e2e and is_e2e_true(span):
237+
skipped_e2e += 1
238+
continue
239+
240+
route = extract_route(span)
241+
if not route:
242+
skipped_missing_route += 1
243+
continue
244+
245+
t = extract_event_time(span)
246+
if not t:
247+
skipped_missing_ts += 1
248+
continue
249+
250+
kept += 1
251+
if route not in stats:
252+
stats[route] = (1, t, t)
253+
else:
254+
cnt, tmin, tmax = stats[route]
255+
stats[route] = (cnt + 1, min(tmin, t), max(tmax, t))
256+
257+
# Sort by count desc, then route
258+
rows = sorted(((route, cnt, tmin, tmax) for route, (cnt, tmin, tmax) in stats.items()),
259+
key=lambda x: (-x[1], x[0]))
260+
261+
w = csv.writer(sys.stdout, lineterminator="\n")
262+
w.writerow(["api", "n_calls", "first", "last"])
263+
for route, cnt, tmin, tmax in rows:
264+
w.writerow([route, cnt, fmt_ts(tmin), fmt_ts(tmax)])
265+
266+
if args.verbose:
267+
eprint(f"[ddog] spans fetched: {len(spans)}")
268+
eprint(f"[ddog] spans kept: {kept}")
269+
if skip_e2e:
270+
eprint(f"[ddog] e2e skipped: {skipped_e2e}")
271+
eprint(f"[ddog] no-route: {skipped_missing_route}")
272+
eprint(f"[ddog] no-ts: {skipped_missing_ts}")
273+
eprint(f"[ddog] routes: {len(stats)}")
274+
275+
return 0
276+
277+
278+
if __name__ == "__main__":
279+
raise SystemExit(main())

0 commit comments

Comments
 (0)