1 /*
2 * Copyright 2012 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import io.netty.util.CharsetUtil;
19 import io.netty.util.internal.PlatformDependent;
20
21 import java.net.URI;
22 import java.net.URLDecoder;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedHashMap;
27 import java.util.List;
28 import java.util.Map;
29
30 import static io.netty.util.internal.ObjectUtil.checkNotNull;
31 import static io.netty.util.internal.ObjectUtil.checkPositive;
32 import static io.netty.util.internal.StringUtil.EMPTY_STRING;
33 import static io.netty.util.internal.StringUtil.SPACE;
34 import static io.netty.util.internal.StringUtil.decodeHexByte;
35
36 /**
37 * Splits an HTTP query string into a path string and key-value parameter pairs.
38 * This decoder is for one time use only. Create a new instance for each URI:
39 * <pre>
40 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world&x=1;y=2");
41 * assert decoder.path().equals("/hello");
42 * assert decoder.parameters().get("recipient").get(0).equals("world");
43 * assert decoder.parameters().get("x").get(0).equals("1");
44 * assert decoder.parameters().get("y").get(0).equals("2");
45 * </pre>
46 *
47 * This decoder can also decode the content of an HTTP POST request whose
48 * content type is <tt>application/x-www-form-urlencoded</tt>:
49 * <pre>
50 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("recipient=world&x=1;y=2", false);
51 * ...
52 * </pre>
53 *
54 * <h3>HashDOS vulnerability fix</h3>
55 *
56 * As a workaround to the <a href="https://netty.io/s/hashdos">HashDOS</a> vulnerability, the decoder
57 * limits the maximum number of decoded key-value parameter pairs, up to {@literal 1024} by
58 * default, and you can configure it when you construct the decoder by passing an additional
59 * integer parameter.
60 *
61 * @see QueryStringEncoder
62 */
63 public class QueryStringDecoder {
64
65 private static final int DEFAULT_MAX_PARAMS = 1024;
66
67 private final Charset charset;
68 private final String uri;
69 private final int maxParams;
70 private final boolean semicolonIsNormalChar;
71 private int pathEndIdx;
72 private String path;
73 private Map<String, List<String>> params;
74
75 /**
76 * Creates a new decoder that decodes the specified URI. The decoder will
77 * assume that the query string is encoded in UTF-8.
78 */
79 public QueryStringDecoder(String uri) {
80 this(uri, HttpConstants.DEFAULT_CHARSET);
81 }
82
83 /**
84 * Creates a new decoder that decodes the specified URI encoded in the
85 * specified charset.
86 */
87 public QueryStringDecoder(String uri, boolean hasPath) {
88 this(uri, HttpConstants.DEFAULT_CHARSET, hasPath);
89 }
90
91 /**
92 * Creates a new decoder that decodes the specified URI encoded in the
93 * specified charset.
94 */
95 public QueryStringDecoder(String uri, Charset charset) {
96 this(uri, charset, true);
97 }
98
99 /**
100 * Creates a new decoder that decodes the specified URI encoded in the
101 * specified charset.
102 */
103 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
104 this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
105 }
106
107 /**
108 * Creates a new decoder that decodes the specified URI encoded in the
109 * specified charset.
110 */
111 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
112 this(uri, charset, hasPath, maxParams, false);
113 }
114
115 /**
116 * Creates a new decoder that decodes the specified URI encoded in the
117 * specified charset.
118 */
119 public QueryStringDecoder(String uri, Charset charset, boolean hasPath,
120 int maxParams, boolean semicolonIsNormalChar) {
121 this.uri = checkNotNull(uri, "uri");
122 this.charset = checkNotNull(charset, "charset");
123 this.maxParams = checkPositive(maxParams, "maxParams");
124 this.semicolonIsNormalChar = semicolonIsNormalChar;
125
126 // `-1` means that path end index will be initialized lazily
127 pathEndIdx = hasPath ? -1 : 0;
128 }
129
130 /**
131 * Creates a new decoder that decodes the specified URI. The decoder will
132 * assume that the query string is encoded in UTF-8.
133 */
134 public QueryStringDecoder(URI uri) {
135 this(uri, HttpConstants.DEFAULT_CHARSET);
136 }
137
138 /**
139 * Creates a new decoder that decodes the specified URI encoded in the
140 * specified charset.
141 */
142 public QueryStringDecoder(URI uri, Charset charset) {
143 this(uri, charset, DEFAULT_MAX_PARAMS);
144 }
145
146 /**
147 * Creates a new decoder that decodes the specified URI encoded in the
148 * specified charset.
149 */
150 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
151 this(uri, charset, maxParams, false);
152 }
153
154 /**
155 * Creates a new decoder that decodes the specified URI encoded in the
156 * specified charset.
157 */
158 public QueryStringDecoder(URI uri, Charset charset, int maxParams, boolean semicolonIsNormalChar) {
159 String rawPath = uri.getRawPath();
160 if (rawPath == null) {
161 rawPath = EMPTY_STRING;
162 }
163 String rawQuery = uri.getRawQuery();
164 // Also take care of cut of things like "http://localhost"
165 this.uri = rawQuery == null? rawPath : rawPath + '?' + rawQuery;
166 this.charset = checkNotNull(charset, "charset");
167 this.maxParams = checkPositive(maxParams, "maxParams");
168 this.semicolonIsNormalChar = semicolonIsNormalChar;
169 pathEndIdx = rawPath.length();
170 }
171
172 @Override
173 public String toString() {
174 return uri();
175 }
176
177 /**
178 * Returns the uri used to initialize this {@link QueryStringDecoder}.
179 */
180 public String uri() {
181 return uri;
182 }
183
184 /**
185 * Returns the decoded path string of the URI.
186 */
187 public String path() {
188 if (path == null) {
189 path = decodeComponent(uri, 0, pathEndIdx(), charset, true);
190 }
191 return path;
192 }
193
194 /**
195 * Returns the decoded key-value parameter pairs of the URI.
196 */
197 public Map<String, List<String>> parameters() {
198 if (params == null) {
199 params = decodeParams(uri, pathEndIdx(), charset, maxParams, semicolonIsNormalChar);
200 }
201 return params;
202 }
203
204 /**
205 * Returns the raw path string of the URI.
206 */
207 public String rawPath() {
208 return uri.substring(0, pathEndIdx());
209 }
210
211 /**
212 * Returns raw query string of the URI.
213 */
214 public String rawQuery() {
215 int start = pathEndIdx() + 1;
216 return start < uri.length() ? uri.substring(start) : EMPTY_STRING;
217 }
218
219 private int pathEndIdx() {
220 if (pathEndIdx == -1) {
221 pathEndIdx = findPathEndIndex(uri);
222 }
223 return pathEndIdx;
224 }
225
226 private static Map<String, List<String>> decodeParams(String s, int from, Charset charset, int paramsLimit,
227 boolean semicolonIsNormalChar) {
228 int len = s.length();
229 if (from >= len) {
230 return Collections.emptyMap();
231 }
232 if (s.charAt(from) == '?') {
233 from++;
234 }
235 Map<String, List<String>> params = new LinkedHashMap<String, List<String>>();
236 int nameStart = from;
237 int valueStart = -1;
238 int i;
239 loop:
240 for (i = from; i < len; i++) {
241 switch (s.charAt(i)) {
242 case '=':
243 if (nameStart == i) {
244 nameStart = i + 1;
245 } else if (valueStart < nameStart) {
246 valueStart = i + 1;
247 }
248 break;
249 case ';':
250 if (semicolonIsNormalChar) {
251 continue;
252 }
253 // fall-through
254 case '&':
255 if (addParam(s, nameStart, valueStart, i, params, charset)) {
256 paramsLimit--;
257 if (paramsLimit == 0) {
258 return params;
259 }
260 }
261 nameStart = i + 1;
262 break;
263 case '#':
264 break loop;
265 default:
266 // continue
267 }
268 }
269 addParam(s, nameStart, valueStart, i, params, charset);
270 return params;
271 }
272
273 private static boolean addParam(String s, int nameStart, int valueStart, int valueEnd,
274 Map<String, List<String>> params, Charset charset) {
275 if (nameStart >= valueEnd) {
276 return false;
277 }
278 if (valueStart <= nameStart) {
279 valueStart = valueEnd + 1;
280 }
281 String name = decodeComponent(s, nameStart, valueStart - 1, charset, false);
282 String value = decodeComponent(s, valueStart, valueEnd, charset, false);
283 List<String> values = params.get(name);
284 if (values == null) {
285 values = new ArrayList<String>(1); // Often there's only 1 value.
286 params.put(name, values);
287 }
288 values.add(value);
289 return true;
290 }
291
292 /**
293 * Decodes a bit of a URL encoded by a browser.
294 * <p>
295 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
296 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
297 * @param s The string to decode (can be empty).
298 * @return The decoded string, or {@code s} if there's nothing to decode.
299 * If the string to decode is {@code null}, returns an empty string.
300 * @throws IllegalArgumentException if the string contains a malformed
301 * escape sequence.
302 */
303 public static String decodeComponent(final String s) {
304 return decodeComponent(s, HttpConstants.DEFAULT_CHARSET);
305 }
306
307 /**
308 * Decodes a bit of a URL encoded by a browser.
309 * <p>
310 * The string is expected to be encoded as per RFC 3986, Section 2.
311 * This is the encoding used by JavaScript functions {@code encodeURI}
312 * and {@code encodeURIComponent}, but not {@code escape}. For example
313 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
314 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
315 * <p>
316 * This is essentially equivalent to calling
317 * {@link URLDecoder#decode(String, String)}
318 * except that it's over 2x faster and generates less garbage for the GC.
319 * Actually this function doesn't allocate any memory if there's nothing
320 * to decode, the argument itself is returned.
321 * @param s The string to decode (can be empty).
322 * @param charset The charset to use to decode the string (should really
323 * be {@link CharsetUtil#UTF_8}.
324 * @return The decoded string, or {@code s} if there's nothing to decode.
325 * If the string to decode is {@code null}, returns an empty string.
326 * @throws IllegalArgumentException if the string contains a malformed
327 * escape sequence.
328 */
329 public static String decodeComponent(final String s, final Charset charset) {
330 if (s == null) {
331 return EMPTY_STRING;
332 }
333 return decodeComponent(s, 0, s.length(), charset, false);
334 }
335
336 private static String decodeComponent(String s, int from, int toExcluded, Charset charset, boolean isPath) {
337 int len = toExcluded - from;
338 if (len <= 0) {
339 return EMPTY_STRING;
340 }
341 int firstEscaped = -1;
342 for (int i = from; i < toExcluded; i++) {
343 char c = s.charAt(i);
344 if (c == '%' || c == '+' && !isPath) {
345 firstEscaped = i;
346 break;
347 }
348 }
349 if (firstEscaped == -1) {
350 return s.substring(from, toExcluded);
351 }
352
353 // Each encoded byte takes 3 characters (e.g. "%20")
354 int decodedCapacity = (toExcluded - firstEscaped) / 3;
355 byte[] buf = PlatformDependent.allocateUninitializedArray(decodedCapacity);
356 int bufIdx;
357
358 StringBuilder strBuf = new StringBuilder(len);
359 strBuf.append(s, from, firstEscaped);
360
361 for (int i = firstEscaped; i < toExcluded; i++) {
362 char c = s.charAt(i);
363 if (c != '%') {
364 strBuf.append(c != '+' || isPath? c : SPACE);
365 continue;
366 }
367
368 bufIdx = 0;
369 do {
370 if (i + 3 > toExcluded) {
371 throw new IllegalArgumentException("unterminated escape sequence at index " + i + " of: " + s);
372 }
373 buf[bufIdx++] = decodeHexByte(s, i + 1);
374 i += 3;
375 } while (i < toExcluded && s.charAt(i) == '%');
376 i--;
377
378 strBuf.append(new String(buf, 0, bufIdx, charset));
379 }
380 return strBuf.toString();
381 }
382
383 private static int findPathEndIndex(String uri) {
384 int len = uri.length();
385 for (int i = 0; i < len; i++) {
386 char c = uri.charAt(i);
387 if (c == '?' || c == '#') {
388 return i;
389 }
390 }
391 return len;
392 }
393 }