@@ -66,6 +66,35 @@ base16Encode(const char *in, size_t inLen)
6666 return result.str ();
6767}
6868
69+ /* *
70+ * @brief URI-decode a character string
71+ *
72+ * Decodes percent-encoded characters (e.g., %20 -> space, %2F -> /)
73+ *
74+ * @param in string to be URI decoded
75+ * @return decoded string
76+ */
77+ String
78+ uriDecode (const String &in)
79+ {
80+ String result;
81+ result.reserve (in.length ()); /* Decoded string will be same size or smaller */
82+
83+ for (size_t i = 0 ; i < in.length (); i++) {
84+ if (in[i] == ' %' && i + 2 < in.length () && std::isxdigit (static_cast <unsigned char >(in[i + 1 ])) &&
85+ std::isxdigit (static_cast <unsigned char >(in[i + 2 ]))) {
86+ /* Decode %XX to character */
87+ char hex[3 ] = {in[i + 1 ], in[i + 2 ], ' \0 ' };
88+ result += static_cast <char >(std::strtol (hex, nullptr , 16 ));
89+ i += 2 ; /* Skip past the hex digits */
90+ } else {
91+ result += in[i];
92+ }
93+ }
94+
95+ return result;
96+ }
97+
6998/* *
7099 * @brief URI-encode a character string (AWS specific version, see spec)
71100 *
@@ -108,67 +137,95 @@ uriEncode(const String &in, bool isObjectName)
108137}
109138
110139/* *
111- * @brief checks if the string is URI-encoded ( AWS specific encoding version, see spec)
140+ * @brief Check if a string is already in AWS SigV4 canonical form.
112141 *
113- * @see AWS spec: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
142+ * A string is canonical if it either:
143+ * 1. Contains only unreserved characters (A-Z, a-z, 0-9, '-', '.', '_', '~')
144+ * and optionally '/' for object names - no encoding needed
145+ * 2. Is properly percent-encoded with UPPERCASE hex digits
114146 *
115- * @note According to the following RFC if the string is encoded and contains '%' it should
116- * be followed by 2 hexadecimal symbols otherwise '%' should be encoded with %25:
117- * https://tools.ietf.org/html/rfc3986#section-2.1
147+ * @see AWS spec: http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
148+ * @see https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
118149 *
119- * @param in string to be URI checked
120- * @param isObjectName if true encoding didn't encode '/', kept it as it is .
121- * @return true if encoded , false not encoded .
150+ * @param in string to check
151+ * @param isObjectName if true, '/' is allowed unencoded (object name context) .
152+ * @return true if already canonical (no processing needed) , false if normalization required .
122153 */
123154bool
124- isUriEncoded (const String &in, bool isObjectName)
155+ isCanonical (const String &in, bool isObjectName)
125156{
126157 for (size_t pos = 0 ; pos < in.length (); pos++) {
127- char c = in[pos];
158+ unsigned char c = static_cast < unsigned char >( in[pos]) ;
128159
129- if (isalnum (c) || c == ' -' || c == ' _' || c == ' .' || c == ' ~' ) {
130- /* found a unreserved character which should not have been be encoded regardless
131- * 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~'. */
160+ if (std:: isalnum (c) || c == ' -' || c == ' _' || c == ' .' || c == ' ~' ) {
161+ /* Unreserved characters don't need encoding:
162+ * 'A'-'Z', 'a'-'z', '0'-'9', '-', '.', '_', and '~' */
132163 continue ;
133164 }
134165
135- if (' ' == c) {
136- /* space should have been encoded with %20 if the string was encoded */
137- return false ;
138- }
139-
140- if (' /' == c && !isObjectName) {
141- /* if this is not an object name '/' should have been encoded */
142- return false ;
166+ if (' /' == c && isObjectName) {
167+ /* '/' is allowed unencoded in object names */
168+ continue ;
143169 }
144170
145171 if (' %' == c) {
146- if (pos + 2 < in.length () && std::isxdigit (in[pos + 1 ]) && std::isxdigit (in[pos + 2 ])) {
147- /* if string was encoded we should have exactly 2 hexadecimal chars following it */
148- return true ;
149- } else {
150- /* lonely '%' should have been encoded with %25 according to the RFC so likely not encoded */
151- return false ;
172+ if (pos + 2 < in.length ()) {
173+ unsigned char c1 = static_cast <unsigned char >(in[pos + 1 ]);
174+ unsigned char c2 = static_cast <unsigned char >(in[pos + 2 ]);
175+ if (std::isxdigit (c1) && std::isxdigit (c2)) {
176+ /* Valid percent-encoded sequence found. AWS SigV4 requires UPPERCASE hex digits.
177+ * If lowercase hex is found, return false to trigger normalization.
178+ * See: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
179+ * "Letters in the hexadecimal value must be uppercase, for example "%1A"." */
180+ if (std::islower (c1) || std::islower (c2)) {
181+ return false ; /* Lowercase hex needs normalization to uppercase */
182+ }
183+ pos += 2 ; /* Skip past the hex digits */
184+ continue ;
185+ }
152186 }
187+ /* Lone '%' or incomplete sequence - needs encoding as %25 */
188+ return false ;
153189 }
190+
191+ /* Any other character needs encoding (space, '(', ')', '[', ']', etc.) */
192+ return false ;
154193 }
155194
156- return false ;
195+ /* String is already in canonical form:
196+ * - Only contains unreserved chars, slashes (for object names), or
197+ * - Properly percent-encoded sequences with uppercase hex
198+ * No decode/re-encode needed. */
199+ return true ;
157200}
158201
159202String
160203canonicalEncode (const String &in, bool isObjectName)
161204{
162- String canonical;
163- if (!isUriEncoded (in, isObjectName)) {
164- /* Not URI-encoded */
165- canonical = uriEncode (in, isObjectName);
166- } else {
167- /* URI-encoded, then don't encode since AWS does not encode which is not mentioned in the spec,
168- * asked AWS, still waiting for confirmation */
169- canonical = in;
205+ if (isCanonical (in, isObjectName)) {
206+ /* Fully URI-encoded with uppercase hex, return as-is */
207+ return in;
170208 }
171209
210+ /* Input needs normalization. This handles:
211+ * 1. Unencoded strings - encode all reserved characters
212+ * 2. Partially encoded - some chars encoded, some not (e.g., parentheses vs brackets)
213+ * 3. Lowercase hex - convert %2f to %2F per AWS SigV4 requirement
214+ *
215+ * Decode first to get the raw string, then re-encode with AWS canonical rules.
216+ *
217+ * Example (mixed encoding):
218+ * Input: /app/(channel)/%5B%5Bparts%5D%5D/page.js (parentheses not encoded)
219+ * Decode: /app/(channel)/[[parts]]/page.js
220+ * Encode: /app/%28channel%29/%5B%5Bparts%5D%5D/page.js
221+ *
222+ * Example (lowercase hex):
223+ * Input: /path/%5btest%5d/file.js (lowercase hex)
224+ * Decode: /path/[test]/file.js
225+ * Encode: /path/%5Btest%5D/file.js (uppercase hex)
226+ */
227+ String decoded = uriDecode (in);
228+ String canonical = uriEncode (decoded, isObjectName);
172229 return canonical;
173230}
174231
0 commit comments