xref: /unit/src/nxt_http_parse.c (revision 0:a63ceefd6ab0)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #include <nxt_main.h>
8 
9 
10 static nxt_int_t nxt_http_split_header_part(nxt_http_split_header_parse_t *shp,
11     u_char *start, u_char *end);
12 static nxt_int_t nxt_http_split_header_join(nxt_http_split_header_parse_t *shp);
13 
14 
15 nxt_int_t
16 nxt_http_status_parse(nxt_http_status_parse_t *sp, nxt_buf_mem_t *b)
17 {
18     u_char  ch, *p;
19     enum {
20         sw_start = 0,
21         sw_H,
22         sw_HT,
23         sw_HTT,
24         sw_HTTP,
25         sw_major_digit,
26         sw_dot,
27         sw_minor_digit,
28         sw_space_after_version,
29         sw_status_start,
30         sw_status_code,
31         sw_status_text,
32         sw_end,
33     } state;
34 
35     state = sp->state;
36 
37     for (p = b->pos; p < b->free; p++) {
38 
39         ch = *p;
40 
41         switch (state) {
42 
43         /* "HTTP/" */
44         case sw_start:
45             if (nxt_fast_path(ch == 'H')) {
46                 state = sw_H;
47                 continue;
48             }
49 
50             return NXT_ERROR;
51 
52         case sw_H:
53             if (nxt_fast_path(ch == 'T')) {
54                 state = sw_HT;
55                 continue;
56             }
57 
58             return NXT_ERROR;
59 
60         case sw_HT:
61             if (nxt_fast_path(ch == 'T')) {
62                 state = sw_HTT;
63                 continue;
64             }
65 
66             return NXT_ERROR;
67 
68         case sw_HTT:
69             if (nxt_fast_path(ch == 'P')) {
70                 state = sw_HTTP;
71                 continue;
72             }
73 
74             return NXT_ERROR;
75 
76         case sw_HTTP:
77             if (nxt_fast_path(ch == '/')) {
78                 state = sw_major_digit;
79                 continue;
80             }
81 
82             return NXT_ERROR;
83 
84         /*
85          * Only HTTP/x.x format is tested because it
86          * is unlikely that other formats will appear.
87          */
88         case sw_major_digit:
89             if (nxt_fast_path(ch >= '1' && ch <= '9')) {
90                 sp->http_version = 10 * (ch - '0');
91                 state = sw_dot;
92                 continue;
93             }
94 
95             return NXT_ERROR;
96 
97         case sw_dot:
98             if (nxt_fast_path(ch == '.')) {
99                 state = sw_minor_digit;
100                 continue;
101             }
102 
103             return NXT_ERROR;
104 
105         case sw_minor_digit:
106             if (nxt_fast_path(ch >= '0' && ch <= '9')) {
107                 sp->http_version += ch - '0';
108                 state = sw_space_after_version;
109                 continue;
110             }
111 
112             return NXT_ERROR;
113 
114         case sw_space_after_version:
115             if (nxt_fast_path(ch == ' ')) {
116                 state = sw_status_start;
117                 continue;
118             }
119 
120             return NXT_ERROR;
121 
122         case sw_status_start:
123             if (nxt_slow_path(ch == ' ')) {
124                 continue;
125             }
126 
127             sp->start = p;
128             state = sw_status_code;
129 
130             /* Fall through. */
131 
132         /* HTTP status code. */
133         case sw_status_code:
134             if (nxt_fast_path(ch >= '0' && ch <= '9')) {
135                 sp->code = sp->code * 10 + (ch - '0');
136                 continue;
137             }
138 
139             switch (ch) {
140             case ' ':
141                 state = sw_status_text;
142                 continue;
143             case '.':                    /* IIS may send 403.1, 403.2, etc. */
144                 state = sw_status_text;
145                 continue;
146             case NXT_CR:
147                 sp->end = p;
148                 state = sw_end;
149                 continue;
150             case NXT_LF:
151                 sp->end = p;
152                 goto done;
153             default:
154                 return NXT_ERROR;
155             }
156 
157         /* Any text until end of line. */
158         case sw_status_text:
159             switch (ch) {
160             case NXT_CR:
161                 sp->end = p;
162                 state = sw_end;
163                 continue;
164             case NXT_LF:
165                 sp->end = p;
166                 goto done;
167             }
168             continue;
169 
170         /* End of status line. */
171         case sw_end:
172             if (nxt_fast_path(ch == NXT_LF)) {
173                 goto done;
174             }
175 
176             return NXT_ERROR;
177         }
178     }
179 
180     b->pos = p;
181     sp->state = state;
182 
183     return NXT_AGAIN;
184 
185 done:
186 
187     b->pos = p + 1;
188 
189     return NXT_OK;
190 }
191 
192 
193 nxt_int_t
194 nxt_http_header_parse(nxt_http_header_parse_t *hp, nxt_buf_mem_t *b)
195 {
196     u_char    c, ch, *p;
197     uint32_t  hash;
198     enum {
199         sw_start = 0,
200         sw_name,
201         sw_space_before_value,
202         sw_value,
203         sw_space_after_value,
204         sw_ignore_line,
205         sw_almost_done,
206         sw_header_almost_done,
207     } state;
208 
209     static const u_char  normal[256]  nxt_aligned(64) =
210         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
211         "\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
212 
213         /* These 64 bytes should reside in one cache line */
214         "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
215         "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
216 
217         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
218         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
219         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
220         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
221 
222     nxt_prefetch(&normal[0]);
223     nxt_prefetch(&normal[64]);
224 
225     state = hp->state;
226     hash = hp->header_hash;
227 
228     for (p = b->pos; p < b->free; p++) {
229         ch = *p;
230 
231         switch (state) {
232 
233         /* first char */
234         case sw_start:
235             hp->header_name_start = p;
236             hp->invalid_header = 0;
237 
238             switch (ch) {
239             case NXT_CR:
240                 hp->header_end = p;
241                 state = sw_header_almost_done;
242                 break;
243             case NXT_LF:
244                 hp->header_end = p;
245                 goto header_done;
246             default:
247                 state = sw_name;
248 
249                 c = normal[ch];
250 
251                 if (c) {
252                     hash = nxt_djb_hash_add(NXT_DJB_HASH_INIT, c);
253                     break;
254                 }
255 
256                 if (ch == '_') {
257                     hash = nxt_djb_hash_add(NXT_DJB_HASH_INIT, ch);
258                     hp->underscore = 1;
259                     break;
260                 }
261 
262                 hp->invalid_header = 1;
263                 break;
264             }
265             break;
266 
267         /* header name */
268         case sw_name:
269             c = normal[ch];
270 
271             if (c) {
272                 hash = nxt_djb_hash_add(hash, c);
273                 break;
274             }
275 
276             if (ch == ':') {
277                 hp->header_name_end = p;
278                 state = sw_space_before_value;
279                 break;
280             }
281 
282             if (ch == NXT_CR) {
283                 hp->header_name_end = p;
284                 hp->header_start = p;
285                 hp->header_end = p;
286                 state = sw_almost_done;
287                 break;
288             }
289 
290             if (ch == NXT_LF) {
291                 hp->header_name_end = p;
292                 hp->header_start = p;
293                 hp->header_end = p;
294                 goto done;
295             }
296 
297             if (ch == '_') {
298                 hash = nxt_djb_hash_add(hash, ch);
299                 hp->underscore = 1;
300                 break;
301             }
302 
303             /* IIS may send the duplicate "HTTP/1.1 ..." lines */
304             if (ch == '/'
305                 && hp->upstream
306                 && p - hp->header_name_start == 4
307                 && nxt_memcmp(hp->header_name_start, "HTTP", 4) == 0)
308             {
309                 state = sw_ignore_line;
310                 break;
311             }
312 
313             hp->invalid_header = 1;
314             break;
315 
316         /* space* before header value */
317         case sw_space_before_value:
318             switch (ch) {
319             case ' ':
320                 break;
321             case NXT_CR:
322                 hp->header_start = p;
323                 hp->header_end = p;
324                 state = sw_almost_done;
325                 break;
326             case NXT_LF:
327                 hp->header_start = p;
328                 hp->header_end = p;
329                 goto done;
330             case '\0':
331                 hp->invalid_header = 1;
332                 /* Fall through. */
333             default:
334                 hp->header_start = p;
335                 state = sw_value;
336                 break;
337             }
338             break;
339 
340         /* header value */
341         case sw_value:
342             switch (ch) {
343             case ' ':
344                 hp->header_end = p;
345                 state = sw_space_after_value;
346                 break;
347             case NXT_CR:
348                 hp->header_end = p;
349                 state = sw_almost_done;
350                 break;
351             case NXT_LF:
352                 hp->header_end = p;
353                 goto done;
354             case '\0':
355                 hp->invalid_header = 1;
356                 break;
357             }
358             break;
359 
360         /* space* before end of header line */
361         case sw_space_after_value:
362             switch (ch) {
363             case ' ':
364                 break;
365             case NXT_CR:
366                 state = sw_almost_done;
367                 break;
368             case NXT_LF:
369                 goto done;
370             case '\0':
371                 hp->invalid_header = 1;
372                 /* Fall through. */
373             default:
374                 state = sw_value;
375                 break;
376             }
377             break;
378 
379         /* ignore header line */
380         case sw_ignore_line:
381             switch (ch) {
382             case NXT_LF:
383                 state = sw_start;
384                 break;
385             default:
386                 break;
387             }
388             break;
389 
390         /* end of header line */
391         case sw_almost_done:
392             switch (ch) {
393             case NXT_LF:
394                 goto done;
395             case NXT_CR:
396                 break;
397             default:
398                 return NXT_DECLINED;
399             }
400             break;
401 
402         /* end of header */
403         case sw_header_almost_done:
404             switch (ch) {
405             case NXT_LF:
406                 goto header_done;
407             default:
408                 return NXT_DECLINED;
409             }
410         }
411     }
412 
413     b->pos = p;
414     hp->state = state;
415     hp->header_hash = hash;
416 
417     return NXT_AGAIN;
418 
419 done:
420 
421     b->pos = p + 1;
422     hp->state = sw_start;
423     hp->header_hash = hash;
424 
425     return NXT_OK;
426 
427 header_done:
428 
429     b->pos = p + 1;
430     hp->state = sw_start;
431 
432     return NXT_DONE;
433 }
434 
435 
436 nxt_int_t
437 nxt_http_split_header_parse(nxt_http_split_header_parse_t *shp,
438     nxt_buf_mem_t *b)
439 {
440     u_char     *end;
441     nxt_int_t  ret;
442 
443     if (shp->parts == NULL || nxt_array_is_empty(shp->parts)) {
444 
445         ret = nxt_http_header_parse(&shp->parse, b);
446 
447         if (nxt_fast_path(ret == NXT_OK)) {
448             return ret;
449         }
450 
451         if (nxt_fast_path(ret == NXT_AGAIN)) {
452             /* A buffer is over. */
453 
454             if (shp->parse.state == 0) {
455                 /*
456                  * A previous parsed header line is
457                  * over right on the end of the buffer.
458                  */
459                 return ret;
460             }
461             /*
462              * Add the first header line part and return NXT_AGAIN on success.
463              */
464             return nxt_http_split_header_part(shp, shp->parse.header_name_start,
465                                               b->pos);
466         }
467 
468         return ret;
469     }
470 
471     /* A header line is split in buffers. */
472 
473     end = nxt_memchr(b->pos, NXT_LF, b->free - b->pos);
474 
475     if (end != NULL) {
476 
477         /* The last header line part found. */
478         end++;
479 
480         ret = nxt_http_split_header_part(shp, b->pos, end);
481 
482         if (nxt_fast_path(ret != NXT_ERROR)) {
483             /* ret == NXT_AGAIN: success, mark the part if it were parsed. */
484             b->pos = end;
485 
486             return nxt_http_split_header_join(shp);
487         }
488 
489         return ret;
490     }
491 
492     /* Add another header line part and return NXT_AGAIN on success. */
493 
494     return nxt_http_split_header_part(shp, b->pos, b->free);
495 }
496 
497 
498 static nxt_int_t
499 nxt_http_split_header_part(nxt_http_split_header_parse_t *shp, u_char *start,
500     u_char *end)
501 {
502     nxt_http_header_part_t  *part;
503 
504     nxt_thread_log_debug("http source header part: \"%*s\"",
505                          end - start, start);
506 
507     if (shp->parts == NULL) {
508         shp->parts = nxt_array_create(shp->mem_pool, 2,
509                                      sizeof(nxt_http_header_part_t));
510         if (nxt_slow_path(shp->parts == NULL)) {
511             return NXT_ERROR;
512         }
513     }
514 
515     if (!nxt_array_is_empty(shp->parts)) {
516 
517         part = nxt_array_last(shp->parts);
518 
519         if (part->end == end) {
520             part->end = end;
521             return NXT_AGAIN;
522         }
523     }
524 
525     part = nxt_array_add(shp->parts);
526 
527     if (nxt_fast_path(part != NULL)) {
528         part->start = start;
529         part->end = end;
530         return NXT_AGAIN;
531     }
532 
533     return NXT_ERROR;
534 }
535 
536 
537 static nxt_int_t
538 nxt_http_split_header_join(nxt_http_split_header_parse_t *shp)
539 {
540     u_char                  *p;
541     size_t                  size;
542     nxt_uint_t              n;
543     nxt_buf_mem_t           b;
544     nxt_http_header_part_t  *part;
545 
546     part = shp->parts->elts;
547     n = shp->parts->nelts;
548 
549     if (n == 1) {
550         /*
551          * A header line was read by parts, but resides continuously in a
552          * stream source buffer, so use disposition in the original buffer.
553          */
554         b.pos = part->start;
555         b.free = part->end;
556 
557     } else {
558         /* Join header line parts to store the header line and ot parse it. */
559 
560         size = 0;
561 
562         do {
563             size += part->end - part->start;
564             part++;
565             n--;
566         } while (n != 0);
567 
568         p = nxt_mem_alloc(shp->mem_pool, size);
569         if (nxt_slow_path(p == NULL)) {
570             return NXT_ERROR;
571         }
572 
573         b.pos = p;
574 
575         part = shp->parts->elts;
576         n = shp->parts->nelts;
577 
578         do {
579             p = nxt_cpymem(p, part->start, part->end - part->start);
580             part++;
581             n--;
582         } while (n != 0);
583 
584         b.free = p;
585     }
586 
587     /* b.start and b.end are not required for parsing. */
588 
589     nxt_array_reset(shp->parts);
590 
591     /* Reset a header parse state to the sw_start. */
592     shp->parse.state = 0;
593 
594     return nxt_http_header_parse(&shp->parse, &b);
595 }
596