xref: /unit/src/nxt_http_parse.c (revision 1170:830d6af7d24c)
1 
2 /*
3  * Copyright (C) NGINX, Inc.
4  * Copyright (C) Valentin V. Bartenev
5  */
6 
7 #include <nxt_main.h>
8 
9 
10 static nxt_int_t nxt_http_parse_unusual_target(nxt_http_request_parse_t *rp,
11     u_char **pos, u_char *end);
12 static nxt_int_t nxt_http_parse_request_line(nxt_http_request_parse_t *rp,
13     u_char **pos, u_char *end);
14 static nxt_int_t nxt_http_parse_field_name(nxt_http_request_parse_t *rp,
15     u_char **pos, u_char *end);
16 static nxt_int_t nxt_http_parse_field_value(nxt_http_request_parse_t *rp,
17     u_char **pos, u_char *end);
18 static u_char *nxt_http_lookup_field_end(u_char *p, u_char *end);
19 static nxt_int_t nxt_http_parse_field_end(nxt_http_request_parse_t *rp,
20     u_char **pos, u_char *end);
21 
22 static nxt_int_t nxt_http_parse_complex_target(nxt_http_request_parse_t *rp);
23 
24 static nxt_int_t nxt_http_field_hash_test(nxt_lvlhsh_query_t *lhq, void *data);
25 static void *nxt_http_field_hash_alloc(void *pool, size_t size);
26 static void nxt_http_field_hash_free(void *pool, void *p);
27 
28 static nxt_int_t nxt_http_field_hash_collision(nxt_lvlhsh_query_t *lhq,
29     void *data);
30 
31 
32 #define NXT_HTTP_MAX_FIELD_NAME         0xFF
33 #define NXT_HTTP_MAX_FIELD_VALUE        NXT_INT32_T_MAX
34 
35 #define NXT_HTTP_FIELD_LVLHSH_SHIFT     5
36 
37 
38 typedef enum {
39     NXT_HTTP_TARGET_SPACE = 1,   /* \s  */
40     NXT_HTTP_TARGET_HASH,        /*  #  */
41     NXT_HTTP_TARGET_AGAIN,
42     NXT_HTTP_TARGET_BAD,         /* \0\r\n */
43 
44     /* traps below are used for extended check only */
45 
46     NXT_HTTP_TARGET_SLASH = 5,   /*  /  */
47     NXT_HTTP_TARGET_DOT,         /*  .  */
48     NXT_HTTP_TARGET_ARGS_MARK,   /*  ?  */
49     NXT_HTTP_TARGET_QUOTE_MARK,  /*  %  */
50 } nxt_http_target_traps_e;
51 
52 
53 static const uint8_t  nxt_http_target_chars[256] nxt_aligned(64) = {
54     /* \0                               \n        \r       */
55         4, 0, 0, 0,  0, 0, 0, 0,   0, 0, 4, 0,  0, 4, 0, 0,
56         0, 0, 0, 0,  0, 0, 0, 0,   0, 0, 0, 0,  0, 0, 0, 0,
57 
58     /* \s  !  "  #   $  %  &  '    (  )  *  +   ,  -  .  / */
59         1, 0, 0, 2,  0, 8, 0, 0,   0, 0, 0, 0,  0, 0, 6, 5,
60 
61     /*  0  1  2  3   4  5  6  7    8  9  :  ;   <  =  >  ? */
62         0, 0, 0, 0,  0, 0, 0, 0,   0, 0, 0, 0,  0, 0, 0, 7,
63 };
64 
65 
66 nxt_inline nxt_http_target_traps_e
67 nxt_http_parse_target(u_char **pos, u_char *end)
68 {
69     u_char      *p;
70     nxt_uint_t  trap;
71 
72     p = *pos;
73 
74     while (nxt_fast_path(end - p >= 10)) {
75 
76 #define nxt_target_test_char(ch)                                              \
77                                                                               \
78         trap = nxt_http_target_chars[ch];                                     \
79                                                                               \
80         if (nxt_slow_path(trap != 0)) {                                       \
81             *pos = &(ch);                                                     \
82             return trap;                                                      \
83         }
84 
85 /* enddef */
86 
87         nxt_target_test_char(p[0]);
88         nxt_target_test_char(p[1]);
89         nxt_target_test_char(p[2]);
90         nxt_target_test_char(p[3]);
91 
92         nxt_target_test_char(p[4]);
93         nxt_target_test_char(p[5]);
94         nxt_target_test_char(p[6]);
95         nxt_target_test_char(p[7]);
96 
97         nxt_target_test_char(p[8]);
98         nxt_target_test_char(p[9]);
99 
100         p += 10;
101     }
102 
103     while (p != end) {
104         nxt_target_test_char(*p); p++;
105     }
106 
107     return NXT_HTTP_TARGET_AGAIN;
108 }
109 
110 
111 nxt_int_t
112 nxt_http_parse_request_init(nxt_http_request_parse_t *rp, nxt_mp_t *mp)
113 {
114     rp->mem_pool = mp;
115 
116     rp->fields = nxt_list_create(mp, 8, sizeof(nxt_http_field_t));
117     if (nxt_slow_path(rp->fields == NULL)) {
118         return NXT_ERROR;
119     }
120 
121     rp->field_hash = NXT_HTTP_FIELD_HASH_INIT;
122 
123     return NXT_OK;
124 }
125 
126 
127 nxt_int_t
128 nxt_http_parse_request(nxt_http_request_parse_t *rp, nxt_buf_mem_t *b)
129 {
130     nxt_int_t  rc;
131 
132     if (rp->handler == NULL) {
133         rp->handler = &nxt_http_parse_request_line;
134     }
135 
136     do {
137         rc = rp->handler(rp, &b->pos, b->free);
138     } while (rc == NXT_OK);
139 
140     return rc;
141 }
142 
143 
144 nxt_int_t
145 nxt_http_parse_fields(nxt_http_request_parse_t *rp, nxt_buf_mem_t *b)
146 {
147     nxt_int_t  rc;
148 
149     if (rp->handler == NULL) {
150         rp->handler = &nxt_http_parse_field_name;
151     }
152 
153     do {
154         rc = rp->handler(rp, &b->pos, b->free);
155     } while (rc == NXT_OK);
156 
157     return rc;
158 }
159 
160 
161 static nxt_int_t
162 nxt_http_parse_request_line(nxt_http_request_parse_t *rp, u_char **pos,
163     u_char *end)
164 {
165     u_char                   *p, ch, *after_slash, *exten, *args;
166     nxt_int_t                rc;
167     nxt_http_ver_t           ver;
168     nxt_http_target_traps_e  trap;
169 
170     static const nxt_http_ver_t  http11 = { "HTTP/1.1" };
171     static const nxt_http_ver_t  http10 = { "HTTP/1.0" };
172 
173     p = *pos;
174 
175     rp->method.start = p;
176 
177     for ( ;; ) {
178 
179         while (nxt_fast_path(end - p >= 8)) {
180 
181 #define nxt_method_test_char(ch)                                              \
182                                                                               \
183             if (nxt_slow_path((ch) < 'A' || (ch) > 'Z')) {                    \
184                 p = &(ch);                                                    \
185                 goto method_unusual_char;                                     \
186             }
187 
188 /* enddef */
189 
190             nxt_method_test_char(p[0]);
191             nxt_method_test_char(p[1]);
192             nxt_method_test_char(p[2]);
193             nxt_method_test_char(p[3]);
194 
195             nxt_method_test_char(p[4]);
196             nxt_method_test_char(p[5]);
197             nxt_method_test_char(p[6]);
198             nxt_method_test_char(p[7]);
199 
200             p += 8;
201         }
202 
203         while (p != end) {
204             nxt_method_test_char(*p); p++;
205         }
206 
207         rp->method.length = p - rp->method.start;
208 
209         return NXT_AGAIN;
210 
211     method_unusual_char:
212 
213         ch = *p;
214 
215         if (nxt_fast_path(ch == ' ')) {
216             rp->method.length = p - rp->method.start;
217             break;
218         }
219 
220         if (ch == '_' || ch == '-') {
221             p++;
222             continue;
223         }
224 
225         if (rp->method.start == p && (ch == '\r' || ch == '\n')) {
226             rp->method.start++;
227             p++;
228             continue;
229         }
230 
231         rp->method.length = p - rp->method.start;
232 
233         return NXT_HTTP_PARSE_INVALID;
234     }
235 
236     p++;
237 
238     if (nxt_slow_path(p == end)) {
239         return NXT_AGAIN;
240     }
241 
242     /* target */
243 
244     ch = *p;
245 
246     if (nxt_slow_path(ch != '/')) {
247         rc = nxt_http_parse_unusual_target(rp, &p, end);
248 
249         if (nxt_slow_path(rc != NXT_OK)) {
250             return rc;
251         }
252     }
253 
254     rp->target_start = p;
255 
256     after_slash = p + 1;
257     exten = NULL;
258     args = NULL;
259 
260     for ( ;; ) {
261         p++;
262 
263         trap = nxt_http_parse_target(&p, end);
264 
265         switch (trap) {
266         case NXT_HTTP_TARGET_SLASH:
267             if (nxt_slow_path(after_slash == p)) {
268                 rp->complex_target = 1;
269                 goto rest_of_target;
270             }
271 
272             after_slash = p + 1;
273             exten = NULL;
274             continue;
275 
276         case NXT_HTTP_TARGET_DOT:
277             if (nxt_slow_path(after_slash == p)) {
278                 rp->complex_target = 1;
279                 goto rest_of_target;
280             }
281 
282             exten = p + 1;
283             continue;
284 
285         case NXT_HTTP_TARGET_ARGS_MARK:
286             args = p + 1;
287             goto rest_of_target;
288 
289         case NXT_HTTP_TARGET_SPACE:
290             rp->target_end = p;
291             goto space_after_target;
292 
293         case NXT_HTTP_TARGET_QUOTE_MARK:
294             rp->quoted_target = 1;
295             goto rest_of_target;
296 
297         case NXT_HTTP_TARGET_HASH:
298             rp->complex_target = 1;
299             goto rest_of_target;
300 
301         case NXT_HTTP_TARGET_AGAIN:
302             rp->target_end = p;
303             return NXT_AGAIN;
304 
305         case NXT_HTTP_TARGET_BAD:
306             rp->target_end = p;
307             return NXT_HTTP_PARSE_INVALID;
308         }
309 
310         nxt_unreachable();
311     }
312 
313 rest_of_target:
314 
315     for ( ;; ) {
316         p++;
317 
318         trap = nxt_http_parse_target(&p, end);
319 
320         switch (trap) {
321         case NXT_HTTP_TARGET_SPACE:
322             rp->target_end = p;
323             goto space_after_target;
324 
325         case NXT_HTTP_TARGET_HASH:
326             rp->complex_target = 1;
327             continue;
328 
329         case NXT_HTTP_TARGET_AGAIN:
330             rp->target_end = p;
331             return NXT_AGAIN;
332 
333         case NXT_HTTP_TARGET_BAD:
334             rp->target_end = p;
335             return NXT_HTTP_PARSE_INVALID;
336 
337         default:
338             continue;
339         }
340 
341         nxt_unreachable();
342     }
343 
344 space_after_target:
345 
346     if (nxt_slow_path(end - p < 10)) {
347 
348         do {
349             p++;
350 
351             if (p == end) {
352                 return NXT_AGAIN;
353             }
354 
355         } while (*p == ' ');
356 
357         if (nxt_memcmp(p, "HTTP/", nxt_min(end - p, 5)) == 0) {
358 
359             switch (end - p) {
360             case 8:
361                 if (p[7] < '0' || p[7] > '9') {
362                     break;
363                 }
364                 /* Fall through. */
365             case 7:
366                 if (p[6] != '.') {
367                     break;
368                 }
369                 /* Fall through. */
370             case 6:
371                 if (p[5] < '0' || p[5] > '9') {
372                     break;
373                 }
374                 /* Fall through. */
375             default:
376                 return NXT_AGAIN;
377             }
378         }
379 
380         rp->space_in_target = 1;
381         goto rest_of_target;
382     }
383 
384     /* " HTTP/1.1\r\n" or " HTTP/1.1\n" */
385 
386     if (nxt_slow_path(p[9] != '\r' && p[9] != '\n')) {
387 
388         if (p[1] == ' ') {
389             /* surplus space after tartet */
390             p++;
391             goto space_after_target;
392         }
393 
394         rp->space_in_target = 1;
395         goto rest_of_target;
396     }
397 
398     nxt_memcpy(ver.str, &p[1], 8);
399 
400     if (nxt_fast_path(ver.ui64 == http11.ui64
401                       || ver.ui64 == http10.ui64
402                       || (nxt_memcmp(ver.str, "HTTP/1.", 7) == 0
403                           && ver.s.minor >= '0' && ver.s.minor <= '9')))
404     {
405         rp->version.ui64 = ver.ui64;
406 
407         if (nxt_fast_path(p[9] == '\r')) {
408             p += 10;
409 
410             if (nxt_slow_path(p == end)) {
411                 return NXT_AGAIN;
412             }
413 
414             if (nxt_slow_path(*p != '\n')) {
415                 return NXT_HTTP_PARSE_INVALID;
416             }
417 
418             *pos = p + 1;
419 
420         } else {
421             *pos = p + 10;
422         }
423 
424         if (rp->complex_target != 0 || rp->quoted_target != 0) {
425             rc = nxt_http_parse_complex_target(rp);
426 
427             if (nxt_slow_path(rc != NXT_OK)) {
428                 return rc;
429             }
430 
431             return nxt_http_parse_field_name(rp, pos, end);
432         }
433 
434         rp->path.start = rp->target_start;
435 
436         if (args != NULL) {
437             rp->path.length = args - rp->target_start - 1;
438 
439             rp->args.length = rp->target_end - args;
440             rp->args.start = args;
441 
442         } else {
443             rp->path.length = rp->target_end - rp->target_start;
444         }
445 
446         if (exten != NULL) {
447             rp->exten.length = (rp->path.start + rp->path.length) - exten;
448             rp->exten.start = exten;
449         }
450 
451         return nxt_http_parse_field_name(rp, pos, end);
452     }
453 
454     if (nxt_memcmp(ver.s.prefix, "HTTP/", 5) == 0
455         && ver.s.major >= '0' && ver.s.major <= '9'
456         && ver.s.point == '.'
457         && ver.s.minor >= '0' && ver.s.minor <= '9')
458     {
459         rp->version.ui64 = ver.ui64;
460         return NXT_HTTP_PARSE_UNSUPPORTED_VERSION;
461     }
462 
463     return NXT_HTTP_PARSE_INVALID;
464 }
465 
466 
467 static nxt_int_t
468 nxt_http_parse_unusual_target(nxt_http_request_parse_t *rp, u_char **pos,
469     u_char *end)
470 {
471     u_char  *p, ch;
472 
473     p = *pos;
474 
475     ch = *p;
476 
477     if (ch == ' ') {
478         /* skip surplus spaces before target */
479 
480         do {
481             p++;
482 
483             if (nxt_slow_path(p == end)) {
484                 return NXT_AGAIN;
485             }
486 
487             ch = *p;
488 
489         } while (ch == ' ');
490 
491         if (ch == '/') {
492             *pos = p;
493             return NXT_OK;
494         }
495     }
496 
497     /* absolute path or '*' */
498 
499     /* TODO */
500 
501     return NXT_HTTP_PARSE_INVALID;
502 }
503 
504 
505 static nxt_int_t
506 nxt_http_parse_field_name(nxt_http_request_parse_t *rp, u_char **pos,
507     u_char *end)
508 {
509     u_char    *p, c;
510     size_t    len;
511     uint32_t  hash;
512 
513     static const u_char  normal[256]  nxt_aligned(64) =
514         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
515         "\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
516 
517         /* These 64 bytes should reside in one cache line. */
518         "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0_"
519         "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
520 
521         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
522         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
523         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
524         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
525 
526     p = *pos + rp->field_name.length;
527     hash = rp->field_hash;
528 
529     while (nxt_fast_path(end - p >= 8)) {
530 
531 #define nxt_field_name_test_char(ch)                                          \
532                                                                               \
533         c = normal[ch];                                                       \
534                                                                               \
535         if (nxt_slow_path(c == '\0')) {                                       \
536             p = &(ch);                                                        \
537             goto name_end;                                                    \
538         }                                                                     \
539                                                                               \
540         hash = nxt_http_field_hash_char(hash, c);
541 
542 /* enddef */
543 
544         nxt_field_name_test_char(p[0]);
545         nxt_field_name_test_char(p[1]);
546         nxt_field_name_test_char(p[2]);
547         nxt_field_name_test_char(p[3]);
548 
549         nxt_field_name_test_char(p[4]);
550         nxt_field_name_test_char(p[5]);
551         nxt_field_name_test_char(p[6]);
552         nxt_field_name_test_char(p[7]);
553 
554         p += 8;
555     }
556 
557     while (nxt_fast_path(p != end)) {
558         nxt_field_name_test_char(*p); p++;
559     }
560 
561     len = p - *pos;
562 
563     if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_NAME)) {
564         return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
565     }
566 
567     rp->field_hash = hash;
568     rp->field_name.length = len;
569 
570     rp->handler = &nxt_http_parse_field_name;
571 
572     return NXT_AGAIN;
573 
574 name_end:
575 
576     if (nxt_fast_path(*p == ':')) {
577         if (nxt_slow_path(p == *pos)) {
578             return NXT_HTTP_PARSE_INVALID;
579         }
580 
581         len = p - *pos;
582 
583         if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_NAME)) {
584             return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
585         }
586 
587         rp->field_hash = hash;
588 
589         rp->field_name.length = len;
590         rp->field_name.start = *pos;
591 
592         *pos = p + 1;
593 
594         return nxt_http_parse_field_value(rp, pos, end);
595     }
596 
597     if (nxt_slow_path(p != *pos)) {
598         return NXT_HTTP_PARSE_INVALID;
599     }
600 
601     return nxt_http_parse_field_end(rp, pos, end);
602 }
603 
604 
605 static nxt_int_t
606 nxt_http_parse_field_value(nxt_http_request_parse_t *rp, u_char **pos,
607     u_char *end)
608 {
609     u_char  *p, *start, ch;
610     size_t  len;
611 
612     p = *pos;
613 
614     for ( ;; ) {
615         if (nxt_slow_path(p == end)) {
616             *pos = p;
617             rp->handler = &nxt_http_parse_field_value;
618             return NXT_AGAIN;
619         }
620 
621         ch = *p;
622 
623         if (ch != ' ' && ch != '\t') {
624             break;
625         }
626 
627         p++;
628     }
629 
630     start = p;
631 
632     p += rp->field_value.length;
633 
634     for ( ;; ) {
635         p = nxt_http_lookup_field_end(p, end);
636 
637         if (nxt_slow_path(p == end)) {
638             *pos = start;
639 
640             len = p - start;
641 
642             if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_VALUE)) {
643                 return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
644             }
645 
646             rp->field_value.length = len;
647             rp->handler = &nxt_http_parse_field_value;
648             return NXT_AGAIN;
649         }
650 
651         ch = *p;
652 
653         if (nxt_fast_path(ch == '\r' || ch == '\n')) {
654             break;
655         }
656 
657         if (ch != '\t') {
658             return NXT_HTTP_PARSE_INVALID;
659         }
660 
661         p++;
662     }
663 
664     *pos = p;
665 
666     if (nxt_fast_path(p != start)) {
667 
668         while (p[-1] == ' ' || p[-1] == '\t') {
669             p--;
670         }
671     }
672 
673     len = p - start;
674 
675     if (nxt_slow_path(len > NXT_HTTP_MAX_FIELD_VALUE)) {
676         return NXT_HTTP_PARSE_TOO_LARGE_FIELD;
677     }
678 
679     rp->field_value.length = len;
680     rp->field_value.start = start;
681 
682     return nxt_http_parse_field_end(rp, pos, end);
683 }
684 
685 
686 static u_char *
687 nxt_http_lookup_field_end(u_char *p, u_char *end)
688 {
689     while (nxt_fast_path(end - p >= 16)) {
690 
691 #define nxt_field_end_test_char(ch)                                           \
692                                                                               \
693         if (nxt_slow_path((ch) < 0x20)) {                                     \
694             return &(ch);                                                     \
695         }
696 
697 /* enddef */
698 
699         nxt_field_end_test_char(p[0]);
700         nxt_field_end_test_char(p[1]);
701         nxt_field_end_test_char(p[2]);
702         nxt_field_end_test_char(p[3]);
703 
704         nxt_field_end_test_char(p[4]);
705         nxt_field_end_test_char(p[5]);
706         nxt_field_end_test_char(p[6]);
707         nxt_field_end_test_char(p[7]);
708 
709         nxt_field_end_test_char(p[8]);
710         nxt_field_end_test_char(p[9]);
711         nxt_field_end_test_char(p[10]);
712         nxt_field_end_test_char(p[11]);
713 
714         nxt_field_end_test_char(p[12]);
715         nxt_field_end_test_char(p[13]);
716         nxt_field_end_test_char(p[14]);
717         nxt_field_end_test_char(p[15]);
718 
719         p += 16;
720     }
721 
722     while (nxt_fast_path(end - p >= 4)) {
723 
724         nxt_field_end_test_char(p[0]);
725         nxt_field_end_test_char(p[1]);
726         nxt_field_end_test_char(p[2]);
727         nxt_field_end_test_char(p[3]);
728 
729         p += 4;
730     }
731 
732     switch (end - p) {
733     case 3:
734         nxt_field_end_test_char(*p); p++;
735         /* Fall through. */
736     case 2:
737         nxt_field_end_test_char(*p); p++;
738         /* Fall through. */
739     case 1:
740         nxt_field_end_test_char(*p); p++;
741         /* Fall through. */
742     case 0:
743         break;
744     default:
745         nxt_unreachable();
746     }
747 
748     return p;
749 }
750 
751 
752 static nxt_int_t
753 nxt_http_parse_field_end(nxt_http_request_parse_t *rp, u_char **pos,
754     u_char *end)
755 {
756     u_char            *p;
757     nxt_http_field_t  *field;
758 
759     p = *pos;
760 
761     if (nxt_fast_path(*p == '\r')) {
762         p++;
763 
764         if (nxt_slow_path(p == end)) {
765             rp->handler = &nxt_http_parse_field_end;
766             return NXT_AGAIN;
767         }
768     }
769 
770     if (nxt_fast_path(*p == '\n')) {
771         *pos = p + 1;
772 
773         if (rp->field_name.length != 0) {
774             field = nxt_list_add(rp->fields);
775 
776             if (nxt_slow_path(field == NULL)) {
777                 return NXT_ERROR;
778             }
779 
780             field->hash = nxt_http_field_hash_end(rp->field_hash);
781             field->skip = 0;
782 
783             field->name_length = rp->field_name.length;
784             field->value_length = rp->field_value.length;
785             field->name = rp->field_name.start;
786             field->value = rp->field_value.start;
787 
788             rp->field_hash = NXT_HTTP_FIELD_HASH_INIT;
789 
790             rp->field_name.length = 0;
791             rp->field_value.length = 0;
792 
793             rp->handler = &nxt_http_parse_field_name;
794             return NXT_OK;
795         }
796 
797         return NXT_DONE;
798     }
799 
800     return NXT_HTTP_PARSE_INVALID;
801 }
802 
803 
804 #define                                                                       \
805 nxt_http_is_normal(c)                                                         \
806     (nxt_fast_path((nxt_http_normal[c / 8] & (1 << (c & 7))) != 0))
807 
808 
809 static const uint8_t  nxt_http_normal[32]  nxt_aligned(32) = {
810 
811                              /*        \0   \r  \n                         */
812     0xFE, 0xDB, 0xFF, 0xFF,  /* 1111 1110  1101 1011  1111 1111  1111 1111 */
813 
814                              /* '&%$ #"!   /.-, |*)(  7654 3210  ?>=< ;:98 */
815     0xD6, 0x37, 0xFF, 0x7F,  /* 1101 0110  0011 0111  1111 1111  0111 1111 */
816 
817                              /* GFED CBA@  ONML KJIH  WVUT SRQP  _^]\ [ZYX */
818     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
819 
820                              /* gfed cba`  onml kjih  wvut srqp   ~}| {zyx */
821     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
822 
823     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
824     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
825     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
826     0xFF, 0xFF, 0xFF, 0xFF,  /* 1111 1111  1111 1111  1111 1111  1111 1111 */
827 };
828 
829 
830 static nxt_int_t
831 nxt_http_parse_complex_target(nxt_http_request_parse_t *rp)
832 {
833     u_char  *p, *u, c, ch, high, *exten, *args;
834 
835     enum {
836         sw_normal = 0,
837         sw_slash,
838         sw_dot,
839         sw_dot_dot,
840         sw_quoted,
841         sw_quoted_second,
842     } state, saved_state;
843 
844     nxt_prefetch(nxt_http_normal);
845 
846     state = sw_normal;
847     saved_state = sw_normal;
848     p = rp->target_start;
849 
850     u = nxt_mp_alloc(rp->mem_pool, rp->target_end - p + 1);
851     if (nxt_slow_path(u == NULL)) {
852         return NXT_ERROR;
853     }
854 
855     rp->path.length = 0;
856     rp->path.start = u;
857 
858     high = '\0';
859     exten = NULL;
860     args = NULL;
861 
862     while (p < rp->target_end) {
863 
864         ch = *p++;
865 
866     again:
867 
868         switch (state) {
869 
870         case sw_normal:
871 
872             if (nxt_http_is_normal(ch)) {
873                 *u++ = ch;
874                 continue;
875             }
876 
877             switch (ch) {
878             case '/':
879                 exten = NULL;
880                 state = sw_slash;
881                 *u++ = ch;
882                 continue;
883             case '%':
884                 saved_state = state;
885                 state = sw_quoted;
886                 continue;
887             case '?':
888                 args = p;
889                 goto args;
890             case '#':
891                 goto done;
892             case '.':
893                 exten = u + 1;
894                 *u++ = ch;
895                 continue;
896             default:
897                 *u++ = ch;
898                 continue;
899             }
900 
901             break;
902 
903         case sw_slash:
904 
905             if (nxt_http_is_normal(ch)) {
906                 state = sw_normal;
907                 *u++ = ch;
908                 continue;
909             }
910 
911             switch (ch) {
912             case '/':
913                 continue;
914             case '.':
915                 state = sw_dot;
916                 *u++ = ch;
917                 continue;
918             case '%':
919                 saved_state = state;
920                 state = sw_quoted;
921                 continue;
922             case '?':
923                 args = p;
924                 goto args;
925             case '#':
926                 goto done;
927             default:
928                 state = sw_normal;
929                 *u++ = ch;
930                 continue;
931             }
932 
933             break;
934 
935         case sw_dot:
936 
937             if (nxt_http_is_normal(ch)) {
938                 state = sw_normal;
939                 *u++ = ch;
940                 continue;
941             }
942 
943             switch (ch) {
944             case '/':
945                 state = sw_slash;
946                 u--;
947                 continue;
948             case '.':
949                 state = sw_dot_dot;
950                 *u++ = ch;
951                 continue;
952             case '%':
953                 saved_state = state;
954                 state = sw_quoted;
955                 continue;
956             case '?':
957                 args = p;
958                 goto args;
959             case '#':
960                 goto done;
961             default:
962                 state = sw_normal;
963                 *u++ = ch;
964                 continue;
965             }
966 
967             break;
968 
969         case sw_dot_dot:
970 
971             if (nxt_http_is_normal(ch)) {
972                 state = sw_normal;
973                 *u++ = ch;
974                 continue;
975             }
976 
977             switch (ch) {
978             case '/':
979                 state = sw_slash;
980                 u -= 5;
981                 for ( ;; ) {
982                     if (u < rp->path.start) {
983                         return NXT_HTTP_PARSE_INVALID;
984                     }
985                     if (*u == '/') {
986                         u++;
987                         break;
988                     }
989                     u--;
990                 }
991                 break;
992 
993             case '%':
994                 saved_state = state;
995                 state = sw_quoted;
996                 continue;
997             case '?':
998                 args = p;
999                 goto args;
1000             case '#':
1001                 goto done;
1002             default:
1003                 state = sw_normal;
1004                 *u++ = ch;
1005                 continue;
1006             }
1007 
1008             break;
1009 
1010         case sw_quoted:
1011             rp->quoted_target = 1;
1012 
1013             if (ch >= '0' && ch <= '9') {
1014                 high = (u_char) (ch - '0');
1015                 state = sw_quoted_second;
1016                 continue;
1017             }
1018 
1019             c = (u_char) (ch | 0x20);
1020             if (c >= 'a' && c <= 'f') {
1021                 high = (u_char) (c - 'a' + 10);
1022                 state = sw_quoted_second;
1023                 continue;
1024             }
1025 
1026             return NXT_HTTP_PARSE_INVALID;
1027 
1028         case sw_quoted_second:
1029             if (ch >= '0' && ch <= '9') {
1030                 ch = (u_char) ((high << 4) + ch - '0');
1031 
1032                 if (ch == '%') {
1033                     state = sw_normal;
1034                     *u++ = '%';
1035 
1036                     if (rp->encoded_slashes) {
1037                         *u++ = '2';
1038                         *u++ = '5';
1039                     }
1040 
1041                     continue;
1042                 }
1043 
1044                 if (ch == '#') {
1045                     state = sw_normal;
1046                     *u++ = '#';
1047                     continue;
1048                 }
1049 
1050                 if (ch == '\0') {
1051                     return NXT_HTTP_PARSE_INVALID;
1052                 }
1053 
1054                 state = saved_state;
1055                 goto again;
1056             }
1057 
1058             c = (u_char) (ch | 0x20);
1059             if (c >= 'a' && c <= 'f') {
1060                 ch = (u_char) ((high << 4) + c - 'a' + 10);
1061 
1062                 if (ch == '?') {
1063                     state = sw_normal;
1064                     *u++ = ch;
1065                     continue;
1066                 }
1067 
1068                 if (ch == '/' && rp->encoded_slashes) {
1069                     state = sw_normal;
1070                     *u++ = '%';
1071                     *u++ = '2';
1072                     *u++ = p[-1];  /* 'f' or 'F' */
1073                     continue;
1074                 }
1075 
1076                 state = saved_state;
1077                 goto again;
1078             }
1079 
1080             return NXT_HTTP_PARSE_INVALID;
1081         }
1082     }
1083 
1084     if (state >= sw_quoted) {
1085         return NXT_HTTP_PARSE_INVALID;
1086     }
1087 
1088 args:
1089 
1090     for (/* void */; p < rp->target_end; p++) {
1091         if (*p == '#') {
1092             break;
1093         }
1094     }
1095 
1096     if (args != NULL) {
1097         rp->args.length = p - args;
1098         rp->args.start = args;
1099     }
1100 
1101 done:
1102 
1103     rp->path.length = u - rp->path.start;
1104 
1105     if (exten) {
1106         rp->exten.length = u - exten;
1107         rp->exten.start = exten;
1108     }
1109 
1110     return NXT_OK;
1111 }
1112 
1113 
1114 const nxt_lvlhsh_proto_t  nxt_http_fields_hash_proto  nxt_aligned(64) = {
1115     NXT_LVLHSH_BUCKET_SIZE(64),
1116     { NXT_HTTP_FIELD_LVLHSH_SHIFT, 0, 0, 0, 0, 0, 0, 0 },
1117     nxt_http_field_hash_test,
1118     nxt_http_field_hash_alloc,
1119     nxt_http_field_hash_free,
1120 };
1121 
1122 
1123 static nxt_int_t
1124 nxt_http_field_hash_test(nxt_lvlhsh_query_t *lhq, void *data)
1125 {
1126     nxt_http_field_proc_t  *field;
1127 
1128     field = data;
1129 
1130     if (nxt_strcasestr_eq(&lhq->key, &field->name)) {
1131         return NXT_OK;
1132     }
1133 
1134     return NXT_DECLINED;
1135 }
1136 
1137 
1138 static void *
1139 nxt_http_field_hash_alloc(void *pool, size_t size)
1140 {
1141     return nxt_mp_align(pool, size, size);
1142 }
1143 
1144 
1145 static void
1146 nxt_http_field_hash_free(void *pool, void *p)
1147 {
1148     nxt_mp_free(pool, p);
1149 }
1150 
1151 
1152 static nxt_int_t
1153 nxt_http_field_hash_collision(nxt_lvlhsh_query_t *lhq, void *data)
1154 {
1155     return NXT_OK;
1156 }
1157 
1158 
1159 nxt_int_t
1160 nxt_http_fields_hash(nxt_lvlhsh_t *hash, nxt_mp_t *mp,
1161     nxt_http_field_proc_t items[], nxt_uint_t count)
1162 {
1163     u_char              ch;
1164     uint32_t            key;
1165     nxt_str_t           *name;
1166     nxt_int_t           ret;
1167     nxt_uint_t          i, j;
1168     nxt_lvlhsh_query_t  lhq;
1169 
1170     lhq.replace = 0;
1171     lhq.proto = &nxt_http_fields_hash_proto;
1172     lhq.pool = mp;
1173 
1174     for (i = 0; i < count; i++) {
1175         key = NXT_HTTP_FIELD_HASH_INIT;
1176         name = &items[i].name;
1177 
1178         for (j = 0; j < name->length; j++) {
1179             ch = nxt_lowcase(name->start[j]);
1180             key = nxt_http_field_hash_char(key, ch);
1181         }
1182 
1183         lhq.key_hash = nxt_http_field_hash_end(key) & 0xFFFF;
1184         lhq.key = *name;
1185         lhq.value = &items[i];
1186 
1187         ret = nxt_lvlhsh_insert(hash, &lhq);
1188 
1189         if (nxt_slow_path(ret != NXT_OK)) {
1190             return NXT_ERROR;
1191         }
1192     }
1193 
1194     return NXT_OK;
1195 }
1196 
1197 
1198 nxt_uint_t
1199 nxt_http_fields_hash_collisions(nxt_lvlhsh_t *hash, nxt_mp_t *mp,
1200     nxt_http_field_proc_t items[], nxt_uint_t count, nxt_bool_t level)
1201 {
1202     u_char              ch;
1203     uint32_t            key, mask;
1204     nxt_str_t           *name;
1205     nxt_uint_t          colls, i, j;
1206     nxt_lvlhsh_proto_t  proto;
1207     nxt_lvlhsh_query_t  lhq;
1208 
1209     proto = nxt_http_fields_hash_proto;
1210     proto.test = nxt_http_field_hash_collision;
1211 
1212     lhq.replace = 0;
1213     lhq.proto = &proto;
1214     lhq.pool = mp;
1215 
1216     mask = level ? (1 << NXT_HTTP_FIELD_LVLHSH_SHIFT) - 1 : 0xFFFF;
1217 
1218     colls = 0;
1219 
1220     for (i = 0; i < count; i++) {
1221         key = NXT_HTTP_FIELD_HASH_INIT;
1222         name = &items[i].name;
1223 
1224         for (j = 0; j < name->length; j++) {
1225             ch = nxt_lowcase(name->start[j]);
1226             key = nxt_http_field_hash_char(key, ch);
1227         }
1228 
1229         lhq.key_hash = nxt_http_field_hash_end(key) & mask;
1230         lhq.value = &items[i];
1231 
1232         if (nxt_lvlhsh_insert(hash, &lhq) == NXT_DECLINED) {
1233             colls++;
1234         }
1235     }
1236 
1237     return colls;
1238 }
1239 
1240 
1241 nxt_int_t
1242 nxt_http_fields_process(nxt_list_t *fields, nxt_lvlhsh_t *hash, void *ctx)
1243 {
1244     nxt_int_t         ret;
1245     nxt_http_field_t  *field;
1246 
1247     nxt_list_each(field, fields) {
1248 
1249         ret = nxt_http_field_process(field, hash, ctx);
1250         if (nxt_slow_path(ret != NXT_OK)) {
1251             return ret;
1252         }
1253 
1254     } nxt_list_loop;
1255 
1256     return NXT_OK;
1257 }
1258