1 /*
2 * Copyright 2022 The Netty Project
3 *
4 * The Netty Project licenses this file to you under the Apache License,
5 * version 2.0 (the "License"); you may not use this file except in compliance
6 * with the License. You may obtain a copy of the License at:
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package io.netty.handler.codec.http;
17
18 import io.netty.util.AsciiString;
19 import io.netty.util.internal.UnstableApi;
20
21 import static io.netty.util.AsciiString.contentEqualsIgnoreCase;
22
23 /**
24 * Functions used to perform various validations of HTTP header names and values.
25 */
26 @UnstableApi
27 public final class HttpHeaderValidationUtil {
28 private HttpHeaderValidationUtil() {
29 }
30
31 /**
32 * Check if a header name is "connection related".
33 * <p>
34 * The <a href="https://datatracker.ietf.org/doc/html/rfc9110#section-7.6.1">RFC9110</a> only specify an incomplete
35 * list of the following headers:
36 *
37 * <ul>
38 * <li><tt>Connection</tt></li>
39 * <li><tt>Proxy-Connection</tt></li>
40 * <li><tt>Keep-Alive</tt></li>
41 * <li><tt>TE</tt></li>
42 * <li><tt>Transfer-Encoding</tt></li>
43 * <li><tt>Upgrade</tt></li>
44 * </ul>
45 *
46 * @param name the name of the header to check. The check is case-insensitive.
47 * @param ignoreTeHeader {@code true} if the <tt>TE</tt> header should be ignored by this check.
48 * This is relevant for HTTP/2 header validation, where the <tt>TE</tt> header has special rules.
49 * @return {@code true} if the given header name is one of the specified connection-related headers.
50 */
51 @SuppressWarnings("deprecation") // We need to check for deprecated headers as well.
52 public static boolean isConnectionHeader(CharSequence name, boolean ignoreTeHeader) {
53 // These are the known standard and non-standard connection related headers:
54 // - upgrade (7 chars)
55 // - connection (10 chars)
56 // - keep-alive (10 chars)
57 // - proxy-connection (16 chars)
58 // - transfer-encoding (17 chars)
59 //
60 // See https://datatracker.ietf.org/doc/html/rfc9113#section-8.2.2
61 // and https://datatracker.ietf.org/doc/html/rfc9110#section-7.6.1
62 // for the list of connection related headers.
63 //
64 // We scan for these based on the length, then double-check any matching name.
65 int len = name.length();
66 switch (len) {
67 case 2: return ignoreTeHeader? false : contentEqualsIgnoreCase(name, HttpHeaderNames.TE);
68 case 7: return contentEqualsIgnoreCase(name, HttpHeaderNames.UPGRADE);
69 case 10: return contentEqualsIgnoreCase(name, HttpHeaderNames.CONNECTION) ||
70 contentEqualsIgnoreCase(name, HttpHeaderNames.KEEP_ALIVE);
71 case 16: return contentEqualsIgnoreCase(name, HttpHeaderNames.PROXY_CONNECTION);
72 case 17: return contentEqualsIgnoreCase(name, HttpHeaderNames.TRANSFER_ENCODING);
73 default:
74 return false;
75 }
76 }
77
78 /**
79 * If the given header is {@link HttpHeaderNames#TE} and the given header value is <em>not</em>
80 * {@link HttpHeaderValues#TRAILERS}, then return {@code true}. Otherwie, {@code false}.
81 * <p>
82 * The string comparisons are case-insensitive.
83 * <p>
84 * This check is important for HTTP/2 header validation.
85 *
86 * @param name the header name to check if it is <tt>TE</tt> or not.
87 * @param value the header value to check if it is something other than <tt>TRAILERS</tt>.
88 * @return {@code true} only if the header name is <tt>TE</tt>, and the header value is <em>not</em>
89 * <tt>TRAILERS</tt>. Otherwise, {@code false}.
90 */
91 public static boolean isTeNotTrailers(CharSequence name, CharSequence value) {
92 if (name.length() == 2) {
93 return contentEqualsIgnoreCase(name, HttpHeaderNames.TE) &&
94 !contentEqualsIgnoreCase(value, HttpHeaderValues.TRAILERS);
95 }
96 return false;
97 }
98
99 /**
100 * Validate the given HTTP header value by searching for any illegal characters.
101 *
102 * @param value the HTTP header value to validate.
103 * @return the index of the first illegal character found, or {@code -1} if there are none and the header value is
104 * valid.
105 */
106 public static int validateValidHeaderValue(CharSequence value) {
107 int length = value.length();
108 if (length == 0) {
109 return -1;
110 }
111 if (value instanceof AsciiString) {
112 return verifyValidHeaderValueAsciiString((AsciiString) value);
113 }
114 return verifyValidHeaderValueCharSequence(value);
115 }
116
117 private static int verifyValidHeaderValueAsciiString(AsciiString value) {
118 // Validate value to field-content rule.
119 // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
120 // field-vchar = VCHAR / obs-text
121 // VCHAR = %x21-7E ; visible (printing) characters
122 // obs-text = %x80-FF
123 // SP = %x20
124 // HTAB = %x09 ; horizontal tab
125 // See: https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
126 // And: https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
127 final byte[] array = value.array();
128 final int start = value.arrayOffset();
129 int b = array[start] & 0xFF;
130 if (b < 0x21 || b == 0x7F) {
131 return 0;
132 }
133 int length = value.length();
134 for (int i = start + 1; i < length; i++) {
135 b = array[i] & 0xFF;
136 if (b < 0x20 && b != 0x09 || b == 0x7F) {
137 return i - start;
138 }
139 }
140 return -1;
141 }
142
143 private static int verifyValidHeaderValueCharSequence(CharSequence value) {
144 // Validate value to field-content rule.
145 // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
146 // field-vchar = VCHAR / obs-text
147 // VCHAR = %x21-7E ; visible (printing) characters
148 // obs-text = %x80-FF
149 // SP = %x20
150 // HTAB = %x09 ; horizontal tab
151 // See: https://datatracker.ietf.org/doc/html/rfc7230#section-3.2
152 // And: https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
153 int b = value.charAt(0);
154 if (b < 0x21 || b == 0x7F) {
155 return 0;
156 }
157 int length = value.length();
158 for (int i = 1; i < length; i++) {
159 b = value.charAt(i);
160 if (b < 0x20 && b != 0x09 || b == 0x7F) {
161 return i;
162 }
163 }
164 return -1;
165 }
166
167 /**
168 * Validate a <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> contains only allowed
169 * characters.
170 * <p>
171 * The <a href="https://tools.ietf.org/html/rfc2616#section-2.2">token</a> format is used for variety of HTTP
172 * components, like <a href="https://tools.ietf.org/html/rfc6265#section-4.1.1">cookie-name</a>,
173 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">field-name</a> of a
174 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2">header-field</a>, or
175 * <a href="https://tools.ietf.org/html/rfc7231#section-4">request method</a>.
176 *
177 * @param token the token to validate.
178 * @return the index of the first invalid token character found, or {@code -1} if there are none.
179 */
180 public static int validateToken(CharSequence token) {
181 if (token instanceof AsciiString) {
182 return validateAsciiStringToken((AsciiString) token);
183 }
184 return validateCharSequenceToken(token);
185 }
186
187 /**
188 * Validate that an {@link AsciiString} contain onlu valid
189 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
190 *
191 * @param token the ascii string to validate.
192 */
193 private static int validateAsciiStringToken(AsciiString token) {
194 byte[] array = token.array();
195 for (int i = token.arrayOffset(), len = token.arrayOffset() + token.length(); i < len; i++) {
196 if (!BitSet128.contains(array[i], TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
197 return i - token.arrayOffset();
198 }
199 }
200 return -1;
201 }
202
203 /**
204 * Validate that a {@link CharSequence} contain onlu valid
205 * <a href="https://tools.ietf.org/html/rfc7230#section-3.2.6">token</a> characters.
206 *
207 * @param token the character sequence to validate.
208 */
209 private static int validateCharSequenceToken(CharSequence token) {
210 for (int i = 0, len = token.length(); i < len; i++) {
211 byte value = (byte) token.charAt(i);
212 if (!BitSet128.contains(value, TOKEN_CHARS_HIGH, TOKEN_CHARS_LOW)) {
213 return i;
214 }
215 }
216 return -1;
217 }
218
219 private static final long TOKEN_CHARS_HIGH;
220 private static final long TOKEN_CHARS_LOW;
221 static {
222 // HEADER
223 // header-field = field-name ":" OWS field-value OWS
224 //
225 // field-name = token
226 // token = 1*tchar
227 //
228 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
229 // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
230 // / DIGIT / ALPHA
231 // ; any VCHAR, except delimiters.
232 // Delimiters are chosen
233 // from the set of US-ASCII visual characters not allowed in a token
234 // (DQUOTE and "(),/:;<=>?@[\]{}")
235 //
236 // COOKIE
237 // cookie-pair = cookie-name "=" cookie-value
238 // cookie-name = token
239 // token = 1*<any CHAR except CTLs or separators>
240 // CTL = <any US-ASCII control character
241 // (octets 0 - 31) and DEL (127)>
242 // separators = "(" | ")" | "<" | ">" | "@"
243 // | "," | ";" | ":" | "\" | <">
244 // | "/" | "[" | "]" | "?" | "="
245 // | "{" | "}" | SP | HT
246 //
247 // field-name's token is equivalent to cookie-name's token, we can reuse the tchar mask for both:
248 BitSet128 tokenChars = new BitSet128()
249 .range('0', '9').range('a', 'z').range('A', 'Z') // Alphanumeric.
250 .bits('-', '.', '_', '~') // Unreserved characters.
251 .bits('!', '#', '$', '%', '&', '\'', '*', '+', '^', '`', '|'); // Token special characters.
252 TOKEN_CHARS_HIGH = tokenChars.high();
253 TOKEN_CHARS_LOW = tokenChars.low();
254 }
255
256 private static final class BitSet128 {
257 private long high;
258 private long low;
259
260 BitSet128 range(char fromInc, char toInc) {
261 for (int bit = fromInc; bit <= toInc; bit++) {
262 if (bit < 64) {
263 low |= 1L << bit;
264 } else {
265 high |= 1L << bit - 64;
266 }
267 }
268 return this;
269 }
270
271 BitSet128 bits(char... bits) {
272 for (char bit : bits) {
273 if (bit < 64) {
274 low |= 1L << bit;
275 } else {
276 high |= 1L << bit - 64;
277 }
278 }
279 return this;
280 }
281
282 long high() {
283 return high;
284 }
285
286 long low() {
287 return low;
288 }
289
290 static boolean contains(byte bit, long high, long low) {
291 if (bit < 0) {
292 return false;
293 }
294 if (bit < 64) {
295 return 0 != (low & 1L << bit);
296 }
297 return 0 != (high & 1L << bit - 64);
298 }
299 }
300 }