xref: /unit/src/nxt_http_parse.c (revision 2448:243735980417)
1 
2 /*
3  * Copyright (C) NGINX, Inc.
4  * Copyright (C) Valentin V. Bartenev
5  */
6 
7 #include <nxt_main.h>
8 
9 
10 static nxt_int_t nxt_http_parse_unusual_target(nxt_http_request_parse_t *rp,
11     u_char **pos, const u_char *end);
12 static nxt_int_t nxt_http_parse_request_line(nxt_http_request_parse_t *rp,
13     u_char **pos, const u_char *end);
14 static nxt_int_t nxt_http_parse_field_name(nxt_http_request_parse_t *rp,
15     u_char **pos, const u_char *end);
16 static nxt_int_t nxt_http_parse_field_value(nxt_http_request_parse_t *rp,
17     u_char **pos, const u_char *end);
18 static u_char *nxt_http_lookup_field_end(u_char *p, const u_char *end);
19 static nxt_int_t nxt_http_parse_field_end(nxt_http_request_parse_t *rp,
20     u_char **pos, const u_char *end);
21 
22 static nxt_int_t nxt_http_field_hash_test(nxt_lvlhsh_query_t *lhq, void *data);
23 
24 static nxt_int_t nxt_http_field_hash_collision(nxt_lvlhsh_query_t *lhq,
25     void *data);
26 
27 
28 #define NXT_HTTP_MAX_FIELD_NAME         0xFF
29 #define NXT_HTTP_MAX_FIELD_VALUE        NXT_INT32_T_MAX
30 
31 #define NXT_HTTP_FIELD_LVLHSH_SHIFT     5
32 
33 
34 typedef enum {
35     NXT_HTTP_TARGET_SPACE = 1,   /* \s  */
36     NXT_HTTP_TARGET_HASH,        /*  #  */
37     NXT_HTTP_TARGET_AGAIN,
38     NXT_HTTP_TARGET_BAD,         /* \0\r\n */
39 
40     /* traps below are used for extended check only */
41 
42     NXT_HTTP_TARGET_SLASH = 5,   /*  /  */
43     NXT_HTTP_TARGET_DOT,         /*  .  */
44     NXT_HTTP_TARGET_ARGS_MARK,   /*  ?  */
45     NXT_HTTP_TARGET_QUOTE_MARK,  /*  %  */
46 } nxt_http_target_traps_e;
47 
48 
49 static const uint8_t  nxt_http_target_chars[256] nxt_aligned(64) = {
50     /* \0                               \n        \r       */
51         4, 0, 0, 0,  0, 0, 0, 0,   0, 0, 4, 0,  0, 4, 0, 0,
52         0, 0, 0, 0,  0, 0, 0, 0,   0, 0, 0, 0,  0, 0, 0, 0,
53 
54     /* \s  !  "  #   $  %  &  '    (  )  *  +   ,  -  .  / */
55         1, 0, 0, 2,  0, 8, 0, 0,   0, 0, 0, 0,  0, 0, 6, 5,
56 
57     /*  0  1  2  3   4  5  6  7    8  9  :  ;   <  =  >  ? */
58         0, 0, 0, 0,  0, 0, 0, 0,   0, 0, 0, 0,  0, 0, 0, 7,
59 };
60 
61 
62 nxt_inline nxt_http_target_traps_e
nxt_http_parse_target(u_char ** pos,const u_char * end)63 nxt_http_parse_target(u_char **pos, const u_char *end)
64 {
65     u_char      *p;
66     nxt_uint_t  trap;
67 
68     p = *pos;
69 
70     while (nxt_fast_path(end - p >= 10)) {
71 
72 #define nxt_target_test_char(ch)                                              \
73                                                                               \
74         trap = nxt_http_target_chars[ch];                                     \
75                                                                               \
76         if (nxt_slow_path(trap != 0)) {                                       \
77             *pos = &(ch);                                                     \
78             return trap;                                                      \
79         }
80 
81 /* enddef */
82 
83         nxt_target_test_char(p[0]);
84         nxt_target_test_char(p[1]);
85         nxt_target_test_char(p[2]);
86         nxt_target_test_char(p[3]);
87 
88         nxt_target_test_char(p[4]);
89         nxt_target_test_char(p[5]);
90         nxt_target_test_char(p[6]);
91         nxt_target_test_char(p[7]);
92 
93         nxt_target_test_char(p[8]);
94         nxt_target_test_char(p[9]);
95 
96         p += 10;
97     }
98 
99     while (p != end) {
100         nxt_target_test_char(*p); p++;
101     }
102 
103     return NXT_HTTP_TARGET_AGAIN;
104 }
105 
106 
107 nxt_int_t
nxt_http_parse_request_init(nxt_http_request_parse_t * rp,nxt_mp_t * mp)108 nxt_http_parse_request_init(nxt_http_request_parse_t *rp, nxt_mp_t *mp)
109 {
110     rp->mem_pool = mp;
111 
112     rp->fields = nxt_list_create(mp, 8, sizeof(nxt_http_field_t));
113     if (nxt_slow_path(rp->fields == NULL)) {
114         return NXT_ERROR;
115     }
116 
117     rp->field_hash = NXT_HTTP_FIELD_HASH_INIT;
118 
119     return NXT_OK;
120 }
121 
122 
123 nxt_int_t
nxt_http_parse_request(nxt_http_request_parse_t * rp,nxt_buf_mem_t * b)124 nxt_http_parse_request(nxt_http_request_parse_t *rp, nxt_buf_mem_t *b)
125 {
126     nxt_int_t  rc;
127 
128     if (rp->handler == NULL) {
129         rp->handler = &nxt_http_parse_request_line;
130     }
131 
132     do {
133         rc = rp->handler(rp, &b->pos, b->free);
134     } while (rc == NXT_OK);
135 
136     return rc;
137 }
138 
139 
140 nxt_int_t
nxt_http_parse_fields(nxt_http_request_parse_t * rp,nxt_buf_mem_t * b)141 nxt_http_parse_fields(nxt_http_request_parse_t *rp, nxt_buf_mem_t *b)
142 {
143     nxt_int_t  rc;
144 
145     if (rp->handler == NULL) {
146         rp->handler = &nxt_http_parse_field_name;
147     }
148 
149     do {
150         rc = rp->handler(rp, &b->pos, b->free);
151     } while (rc == NXT_OK);
152 
153     return rc;
154 }
155 
156 
157 static nxt_int_t
nxt_http_parse_request_line(nxt_http_request_parse_t * rp,u_char ** pos,const u_char * end)158 nxt_http_parse_request_line(nxt_http_request_parse_t *rp, u_char **pos,
159     const u_char *end)
160 {
161     u_char                   *p, ch, *after_slash, *args;
162     nxt_int_t                rc;
163     nxt_bool_t               rest;
164     nxt_http_ver_t           ver;
165     nxt_http_target_traps_e  trap;
166 
167     static const nxt_http_ver_t  http11 = { "HTTP/1.1" };
168     static const nxt_http_ver_t  http10 = { "HTTP/1.0" };
169 
170     p = *pos;
171 
172     rp->method.start = p;
173 
174     for ( ;; ) {
175 
176         while (nxt_fast_path(end - p >= 8)) {
177 
178 #define nxt_method_test_char(ch)                                              \
179                                                                               \
180             if (nxt_slow_path((ch) < 'A' || (ch) > 'Z')) {                    \
181                 p = &(ch);                                                    \
182                 goto method_unusual_char;                                     \
183             }
184 
185 /* enddef */
186 
187             nxt_method_test_char(p[0]);
188             nxt_method_test_char(p[1]);
189             nxt_method_test_char(p[2]);
190             nxt_method_test_char(p[3]);
191 
192             nxt_method_test_char(p[4]);
193             nxt_method_test_char(p[5]);
194             nxt_method_test_char(p[6]);
195             nxt_method_test_char(p[7]);
196 
197             p += 8;
198         }
199 
200         while (p != end) {
201             nxt_method_test_char(*p); p++;
202         }
203 
204         rp->method.length = p - rp->method.start;
205 
206         return NXT_AGAIN;
207 
208     method_unusual_char:
209 
210         ch = *p;
211 
212         if (nxt_fast_path(ch == ' ')) {
213             rp->method.length = p - rp->method.start;
214             break;
215         }
216 
217         if (ch == '_' || ch == '-') {
218             p++;
219             continue;
220         }
221 
222         if (rp->method.start == p && (ch == '\r' || ch == '\n')) {
223             rp->method.start++;
224             p++;
225             continue;
226         }
227 
228         rp->method.length = p - rp->method.start;
229 
230         return NXT_HTTP_PARSE_INVALID;
231     }
232 
233     p++;
234 
235     if (nxt_slow_path(p == end)) {
236         return NXT_AGAIN;
237     }
238 
239     /* target */
240 
241     ch = *p;
242 
243     if (nxt_slow_path(ch != '/')) {
244         rc = nxt_http_parse_unusual_target(rp, &p, end);
245 
246         if (nxt_slow_path(rc != NXT_OK)) {
247             return rc;
248         }
249     }
250 
251     rp->target_start = p;
252 
253     after_slash = p + 1;
254     args = NULL;
255     rest = 0;
256 
257 continue_target:
258 
259     for ( ;; ) {
260         p++;
261 
262         trap = nxt_http_parse_target(&p, end);
263 
264         switch (trap) {
265         case NXT_HTTP_TARGET_SLASH:
266             if (nxt_slow_path(after_slash == p)) {
267                 rp->complex_target = 1;
268                 goto rest_of_target;
269             }
270 
271             after_slash = p + 1;
272             continue;
273 
274         case NXT_HTTP_TARGET_DOT:
275             if (nxt_slow_path(after_slash == p)) {
276                 rp->complex_target = 1;
277                 goto rest_of_target;
278             }
279 
280             continue;
281 
282         case NXT_HTTP_TARGET_ARGS_MARK:
283             args = p + 1;
284             goto rest_of_target;
285 
286         case NXT_HTTP_TARGET_SPACE:
287             rp->target_end = p;
288             goto space_after_target;
289 #if 0
290         case NXT_HTTP_TARGET_QUOTE_MARK:
291             rp->quoted_target = 1;
292             goto rest_of_target;
293 #else
294         case NXT_HTTP_TARGET_QUOTE_MARK:
295 #endif
296         case NXT_HTTP_TARGET_HASH:
297             rp->complex_target = 1;
298             goto rest_of_target;
299 
300         case NXT_HTTP_TARGET_AGAIN:
301             rp->target_end = p;
302             return NXT_AGAIN;
303 
304         case NXT_HTTP_TARGET_BAD:
305             rp->target_end = p;
306             return NXT_HTTP_PARSE_INVALID;
307         }
308 
309         nxt_unreachable();
310     }
311 
312 rest_of_target:
313 
314     rest = 1;
315 
316     for ( ;; ) {
317         p++;
318 
319         trap = nxt_http_parse_target(&p, end);
320 
321         switch (trap) {
322         case NXT_HTTP_TARGET_SPACE:
323             rp->target_end = p;
324             goto space_after_target;
325 
326         case NXT_HTTP_TARGET_HASH:
327             rp->complex_target = 1;
328             continue;
329 
330         case NXT_HTTP_TARGET_AGAIN:
331             rp->target_end = p;
332             return NXT_AGAIN;
333 
334         case NXT_HTTP_TARGET_BAD:
335             rp->target_end = p;
336             return NXT_HTTP_PARSE_INVALID;
337 
338         default:
339             continue;
340         }
341 
342         nxt_unreachable();
343     }
344 
345 space_after_target:
346 
347     if (nxt_slow_path(end - p < 10)) {
348 
349         do {
350             p++;
351 
352             if (p == end) {
353                 return NXT_AGAIN;
354             }
355 
356         } while (*p == ' ');
357 
358         if (memcmp(p, "HTTP/", nxt_min(end - p, 5)) == 0) {
359 
360             switch (end - p) {
361             case 8:
362                 if (p[7] < '0' || p[7] > '9') {
363                     break;
364                 }
365                 /* Fall through. */
366             case 7:
367                 if (p[6] != '.') {
368                     break;
369                 }
370                 /* Fall through. */
371             case 6:
372                 if (p[5] < '0' || p[5] > '9') {
373                     break;
374                 }
375                 /* Fall through. */
376             default:
377                 return NXT_AGAIN;
378             }
379         }
380 
381         //rp->space_in_target = 1;
382 
383         if (rest) {
384             goto rest_of_target;
385         }
386 
387         goto continue_target;
388     }
389 
390     /* " HTTP/1.1\r\n" or " HTTP/1.1\n" */
391 
392     if (nxt_slow_path(p[9] != '\r' && p[9] != '\n')) {
393 
394         if (p[1] == ' ') {
395             /* surplus space after tartet */
396             p++;
397             goto space_after_target;
398         }
399 
400         //rp->space_in_target = 1;
401 
402         if (rest) {
403             goto rest_of_target;
404         }
405 
406         goto continue_target;
407     }
408 
409     nxt_memcpy(ver.str, &p[1], 8);
410 
411     if (nxt_fast_path(ver.ui64 == http11.ui64
412                       || ver.ui64 == http10.ui64
413                       || (memcmp(ver.str, "HTTP/1.", 7) == 0
414                           && ver.s.minor >= '0' && ver.s.minor <= '9')))
415     {
416         rp->version.ui64 = ver.ui64;
417 
418         p += 9;
419         if (nxt_fast_path(*p == '\r')) {
420 
421             if (nxt_slow_path(p + 1 == end)) {
422                 return NXT_AGAIN;
423             }
424 
425             if (nxt_slow_path(p[1] != '\n')) {
426                 return NXT_HTTP_PARSE_INVALID;
427             }
428 
429             *pos = p + 2;
430 
431         } else {
432             *pos = p + 1;
433         }
434 
435         rp->request_line_end = p;
436 
437         if (rp->complex_target != 0
438 #if 0
439             || rp->quoted_target != 0
440 #endif
441            )
442         {
443             rc = nxt_http_parse_complex_target(rp);
444 
445             if (nxt_slow_path(rc != NXT_OK)) {
446                 return rc;
447             }
448 
449             return nxt_http_parse_field_name(rp, pos, end);
450         }
451 
452         rp->path.start = rp->target_start;
453 
454         if (args != NULL) {
455             rp->path.length = args - rp->target_start - 1;
456 
457             rp->args.length = rp->target_end - args;
458             rp->args.start = args;
459 
460         } else {
461             rp->path.length = rp->target_end - rp->target_start;
462         }
463 
464         return nxt_http_parse_field_name(rp, pos, end);
465     }
466 
467     if (memcmp(ver.s.prefix, "HTTP/", 5) == 0
468         && ver.s.major >= '0' && ver.s.major <= '9'
469         && ver.s.point == '.'
470         && ver.s.minor >= '0' && ver.s.minor <= '9')
471     {
472         rp->version.ui64 = ver.ui64;
473         return NXT_HTTP_PARSE_UNSUPPORTED_VERSION;
474     }
475 
476     return NXT_HTTP_PARSE_INVALID;
477 }
478 
479 
480 static nxt_int_t
nxt_http_parse_unusual_target(nxt_http_request_parse_t * rp,u_char ** pos,const u_char * end)481 nxt_http_parse_unusual_target(nxt_http_request_parse_t *rp, u_char **pos,
482     const u_char *end)
483 {
484     u_char  *p, ch;
485 
486     p = *pos;
487 
488     ch = *p;
489 
490     if (ch == ' ') {
491         /* skip surplus spaces before target */
492 
493         do {
494             p++;
495 
496             if (nxt_slow_path(p == end)) {
497                 return NXT_AGAIN;
498             }
499 
500             ch = *p;
501 
502         } while (ch == ' ');
503 
504         if (ch == '/') {
505             *pos = p;
506             return NXT_OK;
507         }
508     }
509 
510     /* absolute path or '*' */
511 
512     /* TODO */
513 
514     return NXT_HTTP_PARSE_INVALID;
515 }
516 
517 
518 static nxt_int_t
nxt_http_parse_field_name(nxt_http_request_parse_t * rp,u_char ** pos,const u_char * end)519 nxt_http_parse_field_name(nxt_http_request_parse_t *rp, u_char **pos,
520     const u_char *end)
521 {
522     u_char    *p, c;
523     size_t    len;
524     uint32_t  hash;
525 
526     static const u_char  normal[256]  nxt_aligned(64) =
527         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
528     /*   \s ! " # $ % & ' ( ) * + ,        . /                 : ; < = > ?   */
529         "\0\1\0\1\1\1\1\1\0\0\1\1\0" "-" "\1\0" "0123456789" "\0\0\0\0\0\0"
530 
531     /*    @                                 [ \ ] ^ _                        */
532         "\0" "abcdefghijklmnopqrstuvwxyz" "\0\0\0\1\1"
533     /*    `                                 { | } ~                          */
534         "\1" "abcdefghijklmnopqrstuvwxyz" "\0\1\0\1\0"
535 
536         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
537         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
538         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
539         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
540 
541     p = *pos + rp->field_name.length;
542     hash = rp->field_hash;
543 
544     while (nxt_fast_path(end - p >= 8)) {
545 
546 #define nxt_field_name_test_char(ch)                                          \
547                                                                               \
548         c = normal[ch];                                                       \
549                                                                               \
550         if (nxt_slow_path(c <= '\1')) {                                       \
551             if (c == '\0') {                                                  \
552                 p = &(ch);                                                    \
553                 goto name_end;                                                \
554             }                                                                 \
555                                                                               \
556             rp->skip_field = rp->discard_unsafe_fields;                       \
557             c = ch;                                                           \
558         }                                                                     \
559                                                                               \
560         hash = nxt_http_field_hash_char(hash, c);
561 
562 /* enddef */
563 
564         nxt_field_name_test_char(p[0]);
565         nxt_field_name_test_char(p[1]);
566         nxt_field_name_test_char(p[2]);
567         nxt_field_name_test_char(p[3]);
568 
569         nxt_field_name_test_char(p[4]);
570         nxt_field_name_test_char(p[5]);
571         nxt_field_name_test_char(p[6]);
572         nxt_field_name_test_char(p[7]);
573 
574         p += 8;
575     }
576 
577     while (nxt_fast_path(p != end)) {
578         nxt_field_name_test_char(*p); p++;
579     }
580 
581     len = p - *pos;
582 
583     if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_NAME)) {
584         return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
585     }
586 
587     rp->field_hash = hash;
588     rp->field_name.length = len;
589 
590     rp->handler = &nxt_http_parse_field_name;
591 
592     return NXT_AGAIN;
593 
594 name_end:
595 
596     if (nxt_fast_path(*p == ':')) {
597         if (nxt_slow_path(p == *pos)) {
598             return NXT_HTTP_PARSE_INVALID;
599         }
600 
601         len = p - *pos;
602 
603         if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_NAME)) {
604             return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
605         }
606 
607         rp->field_hash = hash;
608 
609         rp->field_name.length = len;
610         rp->field_name.start = *pos;
611 
612         *pos = p + 1;
613 
614         return nxt_http_parse_field_value(rp, pos, end);
615     }
616 
617     if (nxt_slow_path(p != *pos)) {
618         return NXT_HTTP_PARSE_INVALID;
619     }
620 
621     return nxt_http_parse_field_end(rp, pos, end);
622 }
623 
624 
625 static nxt_int_t
nxt_http_parse_field_value(nxt_http_request_parse_t * rp,u_char ** pos,const u_char * end)626 nxt_http_parse_field_value(nxt_http_request_parse_t *rp, u_char **pos,
627     const u_char *end)
628 {
629     u_char  *p, *start, ch;
630     size_t  len;
631 
632     p = *pos;
633 
634     for ( ;; ) {
635         if (nxt_slow_path(p == end)) {
636             *pos = p;
637             rp->handler = &nxt_http_parse_field_value;
638             return NXT_AGAIN;
639         }
640 
641         ch = *p;
642 
643         if (ch != ' ' && ch != '\t') {
644             break;
645         }
646 
647         p++;
648     }
649 
650     start = p;
651 
652     p += rp->field_value.length;
653 
654     for ( ;; ) {
655         p = nxt_http_lookup_field_end(p, end);
656 
657         if (nxt_slow_path(p == end)) {
658             *pos = start;
659 
660             len = p - start;
661 
662             if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_VALUE)) {
663                 return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
664             }
665 
666             rp->field_value.length = len;
667             rp->handler = &nxt_http_parse_field_value;
668             return NXT_AGAIN;
669         }
670 
671         ch = *p;
672 
673         if (nxt_fast_path(ch == '\r' || ch == '\n')) {
674             break;
675         }
676 
677         if (ch != '\t') {
678             return NXT_HTTP_PARSE_INVALID;
679         }
680 
681         p++;
682     }
683 
684     *pos = p;
685 
686     if (nxt_fast_path(p != start)) {
687 
688         while (p[-1] == ' ' || p[-1] == '\t') {
689             p--;
690         }
691     }
692 
693     len = p - start;
694 
695     if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_VALUE)) {
696         return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
697     }
698 
699     rp->field_value.length = len;
700     rp->field_value.start = start;
701 
702     return nxt_http_parse_field_end(rp, pos, end);
703 }
704 
705 
706 static u_char *
nxt_http_lookup_field_end(u_char * p,const u_char * end)707 nxt_http_lookup_field_end(u_char *p, const u_char *end)
708 {
709     while (nxt_fast_path(end - p >= 16)) {
710 
711 #define nxt_field_end_test_char(ch)                                           \
712                                                                               \
713         if (nxt_slow_path((ch) < 0x20)) {                                     \
714             return &(ch);                                                     \
715         }
716 
717 /* enddef */
718 
719         nxt_field_end_test_char(p[0]);
720         nxt_field_end_test_char(p[1]);
721         nxt_field_end_test_char(p[2]);
722         nxt_field_end_test_char(p[3]);
723 
724         nxt_field_end_test_char(p[4]);
725         nxt_field_end_test_char(p[5]);
726         nxt_field_end_test_char(p[6]);
727         nxt_field_end_test_char(p[7]);
728 
729         nxt_field_end_test_char(p[8]);
730         nxt_field_end_test_char(p[9]);
731         nxt_field_end_test_char(p[10]);
732         nxt_field_end_test_char(p[11]);
733 
734         nxt_field_end_test_char(p[12]);
735         nxt_field_end_test_char(p[13]);
736         nxt_field_end_test_char(p[14]);
737         nxt_field_end_test_char(p[15]);
738 
739         p += 16;
740     }
741 
742     while (nxt_fast_path(end - p >= 4)) {
743 
744         nxt_field_end_test_char(p[0]);
745         nxt_field_end_test_char(p[1]);
746         nxt_field_end_test_char(p[2]);
747         nxt_field_end_test_char(p[3]);
748 
749         p += 4;
750     }
751 
752     switch (end - p) {
753     case 3:
754         nxt_field_end_test_char(*p); p++;
755         /* Fall through. */
756     case 2:
757         nxt_field_end_test_char(*p); p++;
758         /* Fall through. */
759     case 1:
760         nxt_field_end_test_char(*p); p++;
761         /* Fall through. */
762     case 0:
763         break;
764     default:
765         nxt_unreachable();
766     }
767 
768     return p;
769 }
770 
771 
772 static nxt_int_t
nxt_http_parse_field_end(nxt_http_request_parse_t * rp,u_char ** pos,const u_char * end)773 nxt_http_parse_field_end(nxt_http_request_parse_t *rp, u_char **pos,
774     const u_char *end)
775 {
776     u_char            *p;
777     nxt_http_field_t  *field;
778 
779     p = *pos;
780 
781     if (nxt_fast_path(*p == '\r')) {
782         p++;
783 
784         if (nxt_slow_path(p == end)) {
785             rp->handler = &nxt_http_parse_field_end;
786             return NXT_AGAIN;
787         }
788     }
789 
790     if (nxt_fast_path(*p == '\n')) {
791         *pos = p + 1;
792 
793         if (rp->field_name.length != 0) {
794             if (rp->skip_field) {
795                 rp->skip_field = 0;
796 
797             } else {
798                 field = nxt_list_add(rp->fields);
799 
800                 if (nxt_slow_path(field == NULL)) {
801                     return NXT_ERROR;
802                 }
803 
804                 field->hash = nxt_http_field_hash_end(rp->field_hash);
805                 field->skip = 0;
806                 field->hopbyhop = 0;
807 
808                 field->name_length = rp->field_name.length;
809                 field->value_length = rp->field_value.length;
810                 field->name = rp->field_name.start;
811                 field->value = rp->field_value.start;
812             }
813 
814             rp->field_hash = NXT_HTTP_FIELD_HASH_INIT;
815 
816             rp->field_name.length = 0;
817             rp->field_value.length = 0;
818 
819             rp->handler = &nxt_http_parse_field_name;
820             return NXT_OK;
821         }
822 
823         return NXT_DONE;
824     }
825 
826     return NXT_HTTP_PARSE_INVALID;
827 }
828 
829 
830 #define nxt_http_is_normal(c)                                                 \
831     (nxt_fast_path((nxt_http_normal[c / 8] & (1 << (c & 7))) != 0))
832 
833 
834 static const uint8_t  nxt_http_normal[32]  nxt_aligned(32) = {
835 
836                              /*        \0   \r  \n                         */
837     0xFE, 0xDB, 0xFF, 0xFF,  /* 1111 1110  1101 1011  1111 1111  1111 1111 */
838 
839                              /* '&%$ #"!   /.-, |*)(  7654 3210  ?>=< ;:98 */
840     0xD6, 0x37, 0xFF, 0x7F,  /* 1101 0110  0011 0111  1111 1111  0111 1111 */
841 
842                              /* GFED CBA@  ONML KJIH  WVUT SRQP  _^]\ [ZYX */
843     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
844 
845                              /* gfed cba`  onml kjih  wvut srqp   ~}| {zyx */
846     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
847 
848     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
849     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
850     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
851     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
852 };
853 
854 
855 nxt_int_t
nxt_http_parse_complex_target(nxt_http_request_parse_t * rp)856 nxt_http_parse_complex_target(nxt_http_request_parse_t *rp)
857 {
858     u_char  *p, *u, c, ch, high, *args;
859 
860     enum {
861         sw_normal = 0,
862         sw_slash,
863         sw_dot,
864         sw_dot_dot,
865         sw_quoted,
866         sw_quoted_second,
867     } state, saved_state;
868 
869     nxt_prefetch(nxt_http_normal);
870 
871     state = sw_normal;
872     saved_state = sw_normal;
873     p = rp->target_start;
874 
875     u = nxt_mp_alloc(rp->mem_pool, rp->target_end - p + 1);
876     if (nxt_slow_path(u == NULL)) {
877         return NXT_ERROR;
878     }
879 
880     rp->path.length = 0;
881     rp->path.start = u;
882 
883     high = '\0';
884     args = NULL;
885 
886     while (p < rp->target_end) {
887 
888         ch = *p++;
889 
890     again:
891 
892         switch (state) {
893 
894         case sw_normal:
895 
896             if (nxt_http_is_normal(ch)) {
897                 *u++ = ch;
898                 continue;
899             }
900 
901             switch (ch) {
902             case '/':
903                 state = sw_slash;
904                 *u++ = ch;
905                 continue;
906             case '%':
907                 saved_state = state;
908                 state = sw_quoted;
909                 continue;
910             case '?':
911                 args = p;
912                 goto args;
913             case '#':
914                 goto done;
915             default:
916                 *u++ = ch;
917                 continue;
918             }
919 
920             break;
921 
922         case sw_slash:
923 
924             if (nxt_http_is_normal(ch)) {
925                 state = sw_normal;
926                 *u++ = ch;
927                 continue;
928             }
929 
930             switch (ch) {
931             case '/':
932                 continue;
933             case '.':
934                 state = sw_dot;
935                 *u++ = ch;
936                 continue;
937             case '%':
938                 saved_state = state;
939                 state = sw_quoted;
940                 continue;
941             case '?':
942                 args = p;
943                 goto args;
944             case '#':
945                 goto done;
946             default:
947                 state = sw_normal;
948                 *u++ = ch;
949                 continue;
950             }
951 
952             break;
953 
954         case sw_dot:
955 
956             if (nxt_http_is_normal(ch)) {
957                 state = sw_normal;
958                 *u++ = ch;
959                 continue;
960             }
961 
962             switch (ch) {
963             case '/':
964                 state = sw_slash;
965                 u--;
966                 continue;
967             case '.':
968                 state = sw_dot_dot;
969                 *u++ = ch;
970                 continue;
971             case '%':
972                 saved_state = state;
973                 state = sw_quoted;
974                 continue;
975             case '?':
976                 u--;
977                 args = p;
978                 goto args;
979             case '#':
980                 u--;
981                 goto done;
982             default:
983                 state = sw_normal;
984                 *u++ = ch;
985                 continue;
986             }
987 
988             break;
989 
990         case sw_dot_dot:
991 
992             if (nxt_http_is_normal(ch)) {
993                 state = sw_normal;
994                 *u++ = ch;
995                 continue;
996             }
997 
998             switch (ch) {
999 
1000             case '/':
1001             case '?':
1002             case '#':
1003                 u -= 5;
1004 
1005                 for ( ;; ) {
1006                     if (u < rp->path.start) {
1007                         return NXT_HTTP_PARSE_INVALID;
1008                     }
1009 
1010                     if (*u == '/') {
1011                         u++;
1012                         break;
1013                     }
1014 
1015                     u--;
1016                 }
1017 
1018                 if (ch == '?') {
1019                     args = p;
1020                     goto args;
1021                 }
1022 
1023                 if (ch == '#') {
1024                     goto done;
1025                 }
1026 
1027                 state = sw_slash;
1028                 break;
1029 
1030             case '%':
1031                 saved_state = state;
1032                 state = sw_quoted;
1033                 continue;
1034 
1035             default:
1036                 state = sw_normal;
1037                 *u++ = ch;
1038                 continue;
1039             }
1040 
1041             break;
1042 
1043         case sw_quoted:
1044             //rp->quoted_target = 1;
1045 
1046             if (ch >= '0' && ch <= '9') {
1047                 high = (u_char) (ch - '0');
1048                 state = sw_quoted_second;
1049                 continue;
1050             }
1051 
1052             c = (u_char) (ch | 0x20);
1053             if (c >= 'a' && c <= 'f') {
1054                 high = (u_char) (c - 'a' + 10);
1055                 state = sw_quoted_second;
1056                 continue;
1057             }
1058 
1059             return NXT_HTTP_PARSE_INVALID;
1060 
1061         case sw_quoted_second:
1062             if (ch >= '0' && ch <= '9') {
1063                 ch = (u_char) ((high << 4) + ch - '0');
1064 
1065                 if (ch == '%') {
1066                     state = sw_normal;
1067                     *u++ = '%';
1068 
1069                     if (rp->encoded_slashes) {
1070                         *u++ = '2';
1071                         *u++ = '5';
1072                     }
1073 
1074                     continue;
1075                 }
1076 
1077                 if (ch == '#') {
1078                     state = sw_normal;
1079                     *u++ = '#';
1080                     continue;
1081                 }
1082 
1083                 if (ch == '\0') {
1084                     return NXT_HTTP_PARSE_INVALID;
1085                 }
1086 
1087                 state = saved_state;
1088                 goto again;
1089             }
1090 
1091             c = (u_char) (ch | 0x20);
1092             if (c >= 'a' && c <= 'f') {
1093                 ch = (u_char) ((high << 4) + c - 'a' + 10);
1094 
1095                 if (ch == '?') {
1096                     state = sw_normal;
1097                     *u++ = ch;
1098                     continue;
1099                 }
1100 
1101                 if (ch == '/' && rp->encoded_slashes) {
1102                     state = sw_normal;
1103                     *u++ = '%';
1104                     *u++ = '2';
1105                     *u++ = p[-1];  /* 'f' or 'F' */
1106                     continue;
1107                 }
1108 
1109                 state = saved_state;
1110                 goto again;
1111             }
1112 
1113             return NXT_HTTP_PARSE_INVALID;
1114         }
1115     }
1116 
1117     if (state >= sw_dot) {
1118         if (state >= sw_quoted) {
1119             return NXT_HTTP_PARSE_INVALID;
1120         }
1121 
1122         /* "/." and "/.." must be normalized similar to "/./" and "/../". */
1123         ch = '/';
1124         goto again;
1125     }
1126 
1127 args:
1128 
1129     for (/* void */; p < rp->target_end; p++) {
1130         if (*p == '#') {
1131             break;
1132         }
1133     }
1134 
1135     if (args != NULL) {
1136         rp->args.length = p - args;
1137         rp->args.start = args;
1138     }
1139 
1140 done:
1141 
1142     rp->path.length = u - rp->path.start;
1143 
1144     return NXT_OK;
1145 }
1146 
1147 
1148 const nxt_lvlhsh_proto_t  nxt_http_fields_hash_proto  nxt_aligned(64) = {
1149     NXT_LVLHSH_BUCKET_SIZE(64),
1150     { NXT_HTTP_FIELD_LVLHSH_SHIFT, 0, 0, 0, 0, 0, 0, 0 },
1151     nxt_http_field_hash_test,
1152     nxt_lvlhsh_alloc,
1153     nxt_lvlhsh_free,
1154 };
1155 
1156 
1157 static nxt_int_t
nxt_http_field_hash_test(nxt_lvlhsh_query_t * lhq,void * data)1158 nxt_http_field_hash_test(nxt_lvlhsh_query_t *lhq, void *data)
1159 {
1160     nxt_http_field_proc_t  *field;
1161 
1162     field = data;
1163 
1164     if (nxt_strcasestr_eq(&lhq->key, &field->name)) {
1165         return NXT_OK;
1166     }
1167 
1168     return NXT_DECLINED;
1169 }
1170 
1171 
1172 static nxt_int_t
nxt_http_field_hash_collision(nxt_lvlhsh_query_t * lhq,void * data)1173 nxt_http_field_hash_collision(nxt_lvlhsh_query_t *lhq, void *data)
1174 {
1175     return NXT_OK;
1176 }
1177 
1178 
1179 nxt_int_t
nxt_http_fields_hash(nxt_lvlhsh_t * hash,nxt_http_field_proc_t items[],nxt_uint_t count)1180 nxt_http_fields_hash(nxt_lvlhsh_t *hash,
1181     nxt_http_field_proc_t items[], nxt_uint_t count)
1182 {
1183     u_char              ch;
1184     uint32_t            key;
1185     nxt_str_t           *name;
1186     nxt_int_t           ret;
1187     nxt_uint_t          i, j;
1188     nxt_lvlhsh_query_t  lhq;
1189 
1190     lhq.replace = 0;
1191     lhq.proto = &nxt_http_fields_hash_proto;
1192 
1193     for (i = 0; i < count; i++) {
1194         key = NXT_HTTP_FIELD_HASH_INIT;
1195         name = &items[i].name;
1196 
1197         for (j = 0; j < name->length; j++) {
1198             ch = nxt_lowcase(name->start[j]);
1199             key = nxt_http_field_hash_char(key, ch);
1200         }
1201 
1202         lhq.key_hash = nxt_http_field_hash_end(key) & 0xFFFF;
1203         lhq.key = *name;
1204         lhq.value = &items[i];
1205 
1206         ret = nxt_lvlhsh_insert(hash, &lhq);
1207 
1208         if (nxt_slow_path(ret != NXT_OK)) {
1209             return NXT_ERROR;
1210         }
1211     }
1212 
1213     return NXT_OK;
1214 }
1215 
1216 
1217 nxt_uint_t
nxt_http_fields_hash_collisions(nxt_lvlhsh_t * hash,nxt_http_field_proc_t items[],nxt_uint_t count,nxt_bool_t level)1218 nxt_http_fields_hash_collisions(nxt_lvlhsh_t *hash,
1219     nxt_http_field_proc_t items[], nxt_uint_t count, nxt_bool_t level)
1220 {
1221     u_char              ch;
1222     uint32_t            key, mask;
1223     nxt_str_t           *name;
1224     nxt_uint_t          colls, i, j;
1225     nxt_lvlhsh_proto_t  proto;
1226     nxt_lvlhsh_query_t  lhq;
1227 
1228     proto = nxt_http_fields_hash_proto;
1229     proto.test = nxt_http_field_hash_collision;
1230 
1231     lhq.replace = 0;
1232     lhq.proto = &proto;
1233 
1234     mask = level ? (1 << NXT_HTTP_FIELD_LVLHSH_SHIFT) - 1 : 0xFFFF;
1235 
1236     colls = 0;
1237 
1238     for (i = 0; i < count; i++) {
1239         key = NXT_HTTP_FIELD_HASH_INIT;
1240         name = &items[i].name;
1241 
1242         for (j = 0; j < name->length; j++) {
1243             ch = nxt_lowcase(name->start[j]);
1244             key = nxt_http_field_hash_char(key, ch);
1245         }
1246 
1247         lhq.key_hash = nxt_http_field_hash_end(key) & mask;
1248         lhq.value = &items[i];
1249 
1250         if (nxt_lvlhsh_insert(hash, &lhq) == NXT_DECLINED) {
1251             colls++;
1252         }
1253     }
1254 
1255     return colls;
1256 }
1257 
1258 
1259 nxt_int_t
nxt_http_fields_process(nxt_list_t * fields,nxt_lvlhsh_t * hash,void * ctx)1260 nxt_http_fields_process(nxt_list_t *fields, nxt_lvlhsh_t *hash, void *ctx)
1261 {
1262     nxt_int_t         ret;
1263     nxt_http_field_t  *field;
1264 
1265     nxt_list_each(field, fields) {
1266 
1267         ret = nxt_http_field_process(field, hash, ctx);
1268         if (nxt_slow_path(ret != NXT_OK)) {
1269             return ret;
1270         }
1271 
1272     } nxt_list_loop;
1273 
1274     return NXT_OK;
1275 }
1276