Skip to content

Commit ef7742c

Browse files
authored
Fix origin_server_auth URL encoding for mixed-encoding URLs (#12802)
isUriEncoded() incorrectly returned true on first %XX found, skipping re-encoding of unencoded chars like (). URLs like /app/(channel)/%5B%5B... caused S3 signature mismatch (403). Fix: Renamed to isCanonical(), now checks entire string. canonicalEncode() decodes then re-encodes for consistent output. Also normalizes lowercase hex (%2f → %2F).
1 parent 102abf2 commit ef7742c

5 files changed

Lines changed: 608 additions & 70 deletions

File tree

plugins/origin_server_auth/aws_auth_v4.cc

Lines changed: 93 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,35 @@ base16Encode(const char *in, size_t inLen)
6666
return result.str();
6767
}
6868

69+
/**
70+
* @brief URI-decode a character string
71+
*
72+
* Decodes percent-encoded characters (e.g., %20 -> space, %2F -> /)
73+
*
74+
* @param in string to be URI decoded
75+
* @return decoded string
76+
*/
77+
String
78+
uriDecode(const String &in)
79+
{
80+
String result;
81+
result.reserve(in.length()); /* Decoded string will be same size or smaller */
82+
83+
for (size_t i = 0; i < in.length(); i++) {
84+
if (in[i] == '%' && i + 2 < in.length() && std::isxdigit(static_cast<unsigned char>(in[i + 1])) &&
85+
std::isxdigit(static_cast<unsigned char>(in[i + 2]))) {
86+
/* Decode %XX to character */
87+
char hex[3] = {in[i + 1], in[i + 2], '\0'};
88+
result += static_cast<char>(std::strtol(hex, nullptr, 16));
89+
i += 2; /* Skip past the hex digits */
90+
} else {
91+
result += in[i];
92+
}
93+
}
94+
95+
return result;
96+
}
97+
6998
/**
7099
* @brief URI-encode a character string (AWS specific version, see spec)
71100
*
@@ -108,67 +137,95 @@ uriEncode(const String &in, bool isObjectName)
108137
}
109138

110139
/**
111-
* @brief checks if the string is URI-encoded (AWS specific encoding version, see spec)
140+
* @brief Check if a string is already in AWS SigV4 canonical form.
112141
*
113-
* @see AWS spec: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
142+
* A string is canonical if it either:
143+
* 1. Contains only unreserved characters (A-Z, a-z, 0-9, '-', '.', '_', '~')
144+
* and optionally '/' for object names - no encoding needed
145+
* 2. Is properly percent-encoded with UPPERCASE hex digits
114146
*
115-
* @note According to the following RFC if the string is encoded and contains '%' it should
116-
* be followed by 2 hexadecimal symbols otherwise '%' should be encoded with %25:
117-
* https://tools.ietf.org/html/rfc3986#section-2.1
147+
* @see AWS spec: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
148+
* @see https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
118149
*
119-
* @param in string to be URI checked
120-
* @param isObjectName if true encoding didn't encode '/', kept it as it is.
121-
* @return true if encoded, false not encoded.
150+
* @param in string to check
151+
* @param isObjectName if true, '/' is allowed unencoded (object name context).
152+
* @return true if already canonical (no processing needed), false if normalization required.
122153
*/
123154
bool
124-
isUriEncoded(const String &in, bool isObjectName)
155+
isCanonical(const String &in, bool isObjectName)
125156
{
126157
for (size_t pos = 0; pos < in.length(); pos++) {
127-
char c = in[pos];
158+
unsigned char c = static_cast<unsigned char>(in[pos]);
128159

129-
if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
130-
/* found a unreserved character which should not have been be encoded regardless
131-
* 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'. */
160+
if (std::isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
161+
/* Unreserved characters don't need encoding:
162+
* 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~' */
132163
continue;
133164
}
134165

135-
if (' ' == c) {
136-
/* space should have been encoded with %20 if the string was encoded */
137-
return false;
138-
}
139-
140-
if ('/' == c && !isObjectName) {
141-
/* if this is not an object name '/' should have been encoded */
142-
return false;
166+
if ('/' == c && isObjectName) {
167+
/* '/' is allowed unencoded in object names */
168+
continue;
143169
}
144170

145171
if ('%' == c) {
146-
if (pos + 2 < in.length() && std::isxdigit(in[pos + 1]) && std::isxdigit(in[pos + 2])) {
147-
/* if string was encoded we should have exactly 2 hexadecimal chars following it */
148-
return true;
149-
} else {
150-
/* lonely '%' should have been encoded with %25 according to the RFC so likely not encoded */
151-
return false;
172+
if (pos + 2 < in.length()) {
173+
unsigned char c1 = static_cast<unsigned char>(in[pos + 1]);
174+
unsigned char c2 = static_cast<unsigned char>(in[pos + 2]);
175+
if (std::isxdigit(c1) && std::isxdigit(c2)) {
176+
/* Valid percent-encoded sequence found. AWS SigV4 requires UPPERCASE hex digits.
177+
* If lowercase hex is found, return false to trigger normalization.
178+
* See: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
179+
* "Letters in the hexadecimal value must be uppercase, for example "%1A"." */
180+
if (std::islower(c1) || std::islower(c2)) {
181+
return false; /* Lowercase hex needs normalization to uppercase */
182+
}
183+
pos += 2; /* Skip past the hex digits */
184+
continue;
185+
}
152186
}
187+
/* Lone '%' or incomplete sequence - needs encoding as %25 */
188+
return false;
153189
}
190+
191+
/* Any other character needs encoding (space, '(', ')', '[', ']', etc.) */
192+
return false;
154193
}
155194

156-
return false;
195+
/* String is already in canonical form:
196+
* - Only contains unreserved chars, slashes (for object names), or
197+
* - Properly percent-encoded sequences with uppercase hex
198+
* No decode/re-encode needed. */
199+
return true;
157200
}
158201

159202
String
160203
canonicalEncode(const String &in, bool isObjectName)
161204
{
162-
String canonical;
163-
if (!isUriEncoded(in, isObjectName)) {
164-
/* Not URI-encoded */
165-
canonical = uriEncode(in, isObjectName);
166-
} else {
167-
/* URI-encoded, then don't encode since AWS does not encode which is not mentioned in the spec,
168-
* asked AWS, still waiting for confirmation */
169-
canonical = in;
205+
if (isCanonical(in, isObjectName)) {
206+
/* Fully URI-encoded with uppercase hex, return as-is */
207+
return in;
170208
}
171209

210+
/* Input needs normalization. This handles:
211+
* 1. Unencoded strings - encode all reserved characters
212+
* 2. Partially encoded - some chars encoded, some not (e.g., parentheses vs brackets)
213+
* 3. Lowercase hex - convert %2f to %2F per AWS SigV4 requirement
214+
*
215+
* Decode first to get the raw string, then re-encode with AWS canonical rules.
216+
*
217+
* Example (mixed encoding):
218+
* Input: /app/(channel)/%5B%5Bparts%5D%5D/page.js (parentheses not encoded)
219+
* Decode: /app/(channel)/[[parts]]/page.js
220+
* Encode: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
221+
*
222+
* Example (lowercase hex):
223+
* Input: /path/%5btest%5d/file.js (lowercase hex)
224+
* Decode: /path/[test]/file.js
225+
* Encode: /path/%5Btest%5D/file.js (uppercase hex)
226+
*/
227+
String decoded = uriDecode(in);
228+
String canonical = uriEncode(decoded, isObjectName);
172229
return canonical;
173230
}
174231

0 commit comments

Comments
 (0)