Skip to content

Commit 5ef09a1

Browse files
committed
add tests for langchain and remove wrong model for guardrails agent
1 parent fe7eabd commit 5ef09a1

6 files changed

Lines changed: 232 additions & 5 deletions

File tree

javascript/ql/lib/ext/openai.model.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,5 @@ extensions:
2121
- ["@openai/agents", "Member[tool].Argument[0].Member[description]", "system-prompt-injection"]
2222
- ["@openai/guardrails", "Member[tool].Argument[0].Member[description]", "system-prompt-injection"]
2323
- ["@openai/guardrails", "Member[GuardrailAgent].Member[create].Argument[2]", "system-prompt-injection"]
24+
- ["@openai/agents", "Member[run].Argument[1]", "user-prompt-injection"]
25+
- ["@openai/agents", "Member[Runner].Instance.Member[run].Argument[1]", "user-prompt-injection"]

javascript/ql/src/experimental/semmle/javascript/frameworks/OpenAI.qll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,10 @@ module AgentSDK {
243243
}
244244

245245
/**
246-
* Gets user prompt sinks for run(agent, input).
247-
* Covers string input and user-role array messages.
246+
* Gets role-filtered user prompt sinks for run(agent, input).
247+
* The string-input case is handled via MaD (openai.model.yml).
248248
*/
249249
API::Node getUserPromptNode() {
250-
// run(agent, "string") — string input is the user prompt
251-
result = run().getParameter(1)
252-
or
253250
// run(agent, [{ role: "user", content: ... }])
254251
exists(API::Node msg |
255252
msg = run().getParameter(1).getArrayElement() and

javascript/ql/test/experimental/Security/CWE-1427/SystemPromptInjection/SystemPromptInjection.expected

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ edges
6565
| gemini_test.js:85:43:85:49 | persona | gemini_test.js:85:26:85:49 | "Talk l ... persona | provenance | |
6666
| gemini_test.js:95:43:95:49 | persona | gemini_test.js:95:26:95:49 | "Talk l ... persona | provenance | |
6767
| gemini_test.js:105:43:105:49 | persona | gemini_test.js:105:26:105:49 | "Talk l ... persona | provenance | |
68+
| langchain_test.js:9:9:9:15 | persona | langchain_test.js:16:54:16:60 | persona | provenance | |
69+
| langchain_test.js:9:9:9:15 | persona | langchain_test.js:19:31:19:37 | persona | provenance | |
70+
| langchain_test.js:9:9:9:15 | persona | langchain_test.js:25:36:25:42 | persona | provenance | |
71+
| langchain_test.js:9:19:9:35 | req.query.persona | langchain_test.js:9:9:9:15 | persona | provenance | |
72+
| langchain_test.js:16:54:16:60 | persona | langchain_test.js:16:37:16:60 | "Talk l ... persona | provenance | |
73+
| langchain_test.js:19:31:19:37 | persona | langchain_test.js:19:14:19:37 | "Talk l ... persona | provenance | |
74+
| langchain_test.js:25:36:25:42 | persona | langchain_test.js:25:19:25:42 | "Talk l ... persona | provenance | |
6875
| openai_test.js:11:9:11:15 | persona | openai_test.js:19:36:19:42 | persona | provenance | |
6976
| openai_test.js:11:9:11:15 | persona | openai_test.js:29:35:29:41 | persona | provenance | |
7077
| openai_test.js:11:9:11:15 | persona | openai_test.js:44:35:44:41 | persona | provenance | |
@@ -154,6 +161,14 @@ nodes
154161
| gemini_test.js:95:43:95:49 | persona | semmle.label | persona |
155162
| gemini_test.js:105:26:105:49 | "Talk l ... persona | semmle.label | "Talk l ... persona |
156163
| gemini_test.js:105:43:105:49 | persona | semmle.label | persona |
164+
| langchain_test.js:9:9:9:15 | persona | semmle.label | persona |
165+
| langchain_test.js:9:19:9:35 | req.query.persona | semmle.label | req.query.persona |
166+
| langchain_test.js:16:37:16:60 | "Talk l ... persona | semmle.label | "Talk l ... persona |
167+
| langchain_test.js:16:54:16:60 | persona | semmle.label | persona |
168+
| langchain_test.js:19:14:19:37 | "Talk l ... persona | semmle.label | "Talk l ... persona |
169+
| langchain_test.js:19:31:19:37 | persona | semmle.label | persona |
170+
| langchain_test.js:25:19:25:42 | "Talk l ... persona | semmle.label | "Talk l ... persona |
171+
| langchain_test.js:25:36:25:42 | persona | semmle.label | persona |
157172
| openai_test.js:11:9:11:15 | persona | semmle.label | persona |
158173
| openai_test.js:11:19:11:35 | req.query.persona | semmle.label | req.query.persona |
159174
| openai_test.js:19:19:19:42 | "Talk l ... persona | semmle.label | "Talk l ... persona |
@@ -206,6 +221,9 @@ subpaths
206221
| gemini_test.js:85:26:85:49 | "Talk l ... persona | gemini_test.js:8:19:8:35 | req.query.persona | gemini_test.js:85:26:85:49 | "Talk l ... persona | This prompt construction depends on a $@. | gemini_test.js:8:19:8:35 | req.query.persona | user-provided value |
207222
| gemini_test.js:95:26:95:49 | "Talk l ... persona | gemini_test.js:8:19:8:35 | req.query.persona | gemini_test.js:95:26:95:49 | "Talk l ... persona | This prompt construction depends on a $@. | gemini_test.js:8:19:8:35 | req.query.persona | user-provided value |
208223
| gemini_test.js:105:26:105:49 | "Talk l ... persona | gemini_test.js:8:19:8:35 | req.query.persona | gemini_test.js:105:26:105:49 | "Talk l ... persona | This prompt construction depends on a $@. | gemini_test.js:8:19:8:35 | req.query.persona | user-provided value |
224+
| langchain_test.js:16:37:16:60 | "Talk l ... persona | langchain_test.js:9:19:9:35 | req.query.persona | langchain_test.js:16:37:16:60 | "Talk l ... persona | This prompt construction depends on a $@. | langchain_test.js:9:19:9:35 | req.query.persona | user-provided value |
225+
| langchain_test.js:19:14:19:37 | "Talk l ... persona | langchain_test.js:9:19:9:35 | req.query.persona | langchain_test.js:19:14:19:37 | "Talk l ... persona | This prompt construction depends on a $@. | langchain_test.js:9:19:9:35 | req.query.persona | user-provided value |
226+
| langchain_test.js:25:19:25:42 | "Talk l ... persona | langchain_test.js:9:19:9:35 | req.query.persona | langchain_test.js:25:19:25:42 | "Talk l ... persona | This prompt construction depends on a $@. | langchain_test.js:9:19:9:35 | req.query.persona | user-provided value |
209227
| openai_test.js:19:19:19:42 | "Talk l ... persona | openai_test.js:11:19:11:35 | req.query.persona | openai_test.js:19:19:19:42 | "Talk l ... persona | This prompt construction depends on a $@. | openai_test.js:11:19:11:35 | req.query.persona | user-provided value |
210228
| openai_test.js:29:18:29:41 | "Talk l ... persona | openai_test.js:11:19:11:35 | req.query.persona | openai_test.js:29:18:29:41 | "Talk l ... persona | This prompt construction depends on a $@. | openai_test.js:11:19:11:35 | req.query.persona | user-provided value |
211229
| openai_test.js:44:18:44:41 | "Talk l ... persona | openai_test.js:11:19:11:35 | req.query.persona | openai_test.js:44:18:44:41 | "Talk l ... persona | This prompt construction depends on a $@. | openai_test.js:11:19:11:35 | req.query.persona | user-provided value |
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
const express = require("express");
2+
const { ChatOpenAI } = require("@langchain/openai");
3+
const { HumanMessage, SystemMessage } = require("@langchain/core/messages");
4+
const { createAgent } = require("langchain");
5+
6+
const app = express();
7+
8+
app.get("/test", async (req, res) => {
9+
const persona = req.query.persona;
10+
const query = req.query.query;
11+
12+
const chatModel = new ChatOpenAI({ model: "gpt-4" });
13+
14+
// === SystemMessage (SHOULD ALERT) ===
15+
16+
const sysMsg1 = new SystemMessage("Talk like a " + persona); // $ Alert[js/prompt-injection]
17+
18+
const sysMsg2 = new SystemMessage({
19+
content: "Talk like a " + persona, // $ Alert[js/prompt-injection]
20+
});
21+
22+
// === createAgent with systemPrompt (SHOULD ALERT) ===
23+
24+
const agent = createAgent({
25+
systemPrompt: "Talk like a " + persona, // $ Alert[js/prompt-injection]
26+
});
27+
28+
// === Barrier test: user role content in shared array (SHOULD NOT ALERT) ===
29+
// When user input goes into a HumanMessage alongside a SystemMessage,
30+
// the system prompt query should NOT alert on the HumanMessage content.
31+
32+
await chatModel.invoke([
33+
new SystemMessage("You are a helpful assistant"),
34+
new HumanMessage({ role: "user", content: query }), // OK - user role content is not a system prompt
35+
]);
36+
37+
// Same pattern with raw message objects passed to invoke
38+
await chatModel.invoke([
39+
{ role: "system", content: "You are a helpful assistant" },
40+
{ role: "user", content: query }, // OK - user role content blocked by barrier
41+
]);
42+
43+
// === Constant comparison sanitizer (SHOULD NOT ALERT) ===
44+
45+
if (persona === "pirate") {
46+
const sysMsg3 = new SystemMessage("Talk like a " + persona); // OK - sanitized
47+
}
48+
49+
res.send("done");
50+
});

javascript/ql/test/experimental/Security/CWE-1427/UserPromptInjection/UserPromptInjection.expected

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,24 @@ edges
99
| gemini_user_test.js:8:9:8:17 | userInput | gemini_user_test.js:51:13:51:21 | userInput | provenance | |
1010
| gemini_user_test.js:8:9:8:17 | userInput | gemini_user_test.js:58:13:58:21 | userInput | provenance | |
1111
| gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:8:9:8:17 | userInput | provenance | |
12+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:18:26:18:34 | userInput | provenance | |
13+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:22:26:22:34 | userInput | provenance | |
14+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:26:24:26:32 | userInput | provenance | |
15+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:30:27:30:35 | userInput | provenance | |
16+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:34:26:34:34 | userInput | provenance | |
17+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:38:30:38:38 | userInput | provenance | |
18+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:42:33:42:41 | userInput | provenance | |
19+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:44:44:44:52 | userInput | provenance | |
20+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:49:31:49:39 | userInput | provenance | |
21+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:54:29:54:37 | userInput | provenance | |
22+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:59:34:59:42 | userInput | provenance | |
23+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:65:27:65:35 | userInput | provenance | |
24+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:71:27:71:35 | userInput | provenance | |
25+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:77:29:77:37 | userInput | provenance | |
26+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:81:31:81:39 | userInput | provenance | |
27+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:85:37:85:45 | userInput | provenance | |
28+
| langchain_user_test.js:13:9:13:17 | userInput | langchain_user_test.js:90:21:90:29 | userInput | provenance | |
29+
| langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:13:9:13:17 | userInput | provenance | |
1230
| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:23:12:23:20 | userInput | provenance | |
1331
| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:32:18:32:26 | userInput | provenance | |
1432
| openai_user_test.js:15:9:15:17 | userInput | openai_user_test.js:43:18:43:26 | userInput | provenance | |
@@ -39,6 +57,25 @@ nodes
3957
| gemini_user_test.js:44:13:44:21 | userInput | semmle.label | userInput |
4058
| gemini_user_test.js:51:13:51:21 | userInput | semmle.label | userInput |
4159
| gemini_user_test.js:58:13:58:21 | userInput | semmle.label | userInput |
60+
| langchain_user_test.js:13:9:13:17 | userInput | semmle.label | userInput |
61+
| langchain_user_test.js:13:21:13:39 | req.query.userInput | semmle.label | req.query.userInput |
62+
| langchain_user_test.js:18:26:18:34 | userInput | semmle.label | userInput |
63+
| langchain_user_test.js:22:26:22:34 | userInput | semmle.label | userInput |
64+
| langchain_user_test.js:26:24:26:32 | userInput | semmle.label | userInput |
65+
| langchain_user_test.js:30:27:30:35 | userInput | semmle.label | userInput |
66+
| langchain_user_test.js:34:26:34:34 | userInput | semmle.label | userInput |
67+
| langchain_user_test.js:38:30:38:38 | userInput | semmle.label | userInput |
68+
| langchain_user_test.js:42:33:42:41 | userInput | semmle.label | userInput |
69+
| langchain_user_test.js:44:44:44:52 | userInput | semmle.label | userInput |
70+
| langchain_user_test.js:49:31:49:39 | userInput | semmle.label | userInput |
71+
| langchain_user_test.js:54:29:54:37 | userInput | semmle.label | userInput |
72+
| langchain_user_test.js:59:34:59:42 | userInput | semmle.label | userInput |
73+
| langchain_user_test.js:65:27:65:35 | userInput | semmle.label | userInput |
74+
| langchain_user_test.js:71:27:71:35 | userInput | semmle.label | userInput |
75+
| langchain_user_test.js:77:29:77:37 | userInput | semmle.label | userInput |
76+
| langchain_user_test.js:81:31:81:39 | userInput | semmle.label | userInput |
77+
| langchain_user_test.js:85:37:85:45 | userInput | semmle.label | userInput |
78+
| langchain_user_test.js:90:21:90:29 | userInput | semmle.label | userInput |
4279
| openai_user_test.js:15:9:15:17 | userInput | semmle.label | userInput |
4380
| openai_user_test.js:15:21:15:39 | req.query.userInput | semmle.label | req.query.userInput |
4481
| openai_user_test.js:23:12:23:20 | userInput | semmle.label | userInput |
@@ -67,6 +104,23 @@ subpaths
67104
| gemini_user_test.js:44:13:44:21 | userInput | gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:44:13:44:21 | userInput | This prompt construction depends on a $@. | gemini_user_test.js:8:21:8:39 | req.query.userInput | user-provided value |
68105
| gemini_user_test.js:51:13:51:21 | userInput | gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:51:13:51:21 | userInput | This prompt construction depends on a $@. | gemini_user_test.js:8:21:8:39 | req.query.userInput | user-provided value |
69106
| gemini_user_test.js:58:13:58:21 | userInput | gemini_user_test.js:8:21:8:39 | req.query.userInput | gemini_user_test.js:58:13:58:21 | userInput | This prompt construction depends on a $@. | gemini_user_test.js:8:21:8:39 | req.query.userInput | user-provided value |
107+
| langchain_user_test.js:18:26:18:34 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:18:26:18:34 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
108+
| langchain_user_test.js:22:26:22:34 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:22:26:22:34 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
109+
| langchain_user_test.js:26:24:26:32 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:26:24:26:32 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
110+
| langchain_user_test.js:30:27:30:35 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:30:27:30:35 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
111+
| langchain_user_test.js:34:26:34:34 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:34:26:34:34 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
112+
| langchain_user_test.js:38:30:38:38 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:38:30:38:38 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
113+
| langchain_user_test.js:42:33:42:41 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:42:33:42:41 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
114+
| langchain_user_test.js:44:44:44:52 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:44:44:44:52 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
115+
| langchain_user_test.js:49:31:49:39 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:49:31:49:39 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
116+
| langchain_user_test.js:54:29:54:37 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:54:29:54:37 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
117+
| langchain_user_test.js:59:34:59:42 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:59:34:59:42 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
118+
| langchain_user_test.js:65:27:65:35 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:65:27:65:35 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
119+
| langchain_user_test.js:71:27:71:35 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:71:27:71:35 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
120+
| langchain_user_test.js:77:29:77:37 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:77:29:77:37 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
121+
| langchain_user_test.js:81:31:81:39 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:81:31:81:39 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
122+
| langchain_user_test.js:85:37:85:45 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:85:37:85:45 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
123+
| langchain_user_test.js:90:21:90:29 | userInput | langchain_user_test.js:13:21:13:39 | req.query.userInput | langchain_user_test.js:90:21:90:29 | userInput | This prompt construction depends on a $@. | langchain_user_test.js:13:21:13:39 | req.query.userInput | user-provided value |
70124
| openai_user_test.js:23:12:23:20 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:23:12:23:20 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value |
71125
| openai_user_test.js:32:18:32:26 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:32:18:32:26 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value |
72126
| openai_user_test.js:43:18:43:26 | userInput | openai_user_test.js:15:21:15:39 | req.query.userInput | openai_user_test.js:43:18:43:26 | userInput | This prompt construction depends on a $@. | openai_user_test.js:15:21:15:39 | req.query.userInput | user-provided value |

0 commit comments

Comments
 (0)