xref: /unit/src/nxt_string.c (revision 1474:9af10e099d09)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #include <nxt_main.h>
8 
9 
10 nxt_str_t *
11 nxt_str_alloc(nxt_mp_t *mp, size_t length)
12 {
13     nxt_str_t  *s;
14 
15     /* The string start is allocated aligned to be close to nxt_str_t. */
16     s = nxt_mp_get(mp, sizeof(nxt_str_t) + length);
17 
18     if (nxt_fast_path(s != NULL)) {
19         s->length = length;
20         s->start = nxt_pointer_to(s, sizeof(nxt_str_t));
21     }
22 
23     return s;
24 }
25 
26 
27 /*
28  * nxt_str_dup() creates a new string with a copy of a source string.
29  * If length of the source string is zero, then the new string anyway
30  * gets a pointer somewhere in mem_pool.
31  */
32 
33 nxt_str_t *
34 nxt_str_dup(nxt_mp_t *mp, nxt_str_t *dst, const nxt_str_t *src)
35 {
36     u_char  *p;
37 
38     if (dst == NULL) {
39         /* The string start is allocated aligned to be close to nxt_str_t. */
40         dst = nxt_mp_get(mp, sizeof(nxt_str_t) + src->length);
41         if (nxt_slow_path(dst == NULL)) {
42             return NULL;
43         }
44 
45         p = (u_char *) dst;
46         p += sizeof(nxt_str_t);
47         dst->start = p;
48 
49     } else {
50         dst->start = nxt_mp_nget(mp, src->length);
51         if (nxt_slow_path(dst->start == NULL)) {
52             return NULL;
53         }
54     }
55 
56     nxt_memcpy(dst->start, src->start, src->length);
57     dst->length = src->length;
58 
59     return dst;
60 }
61 
62 
63 /*
64  * nxt_str_cstrz() creates a C style zero-terminated copy of a source
65  * nxt_str_t.  The function is intended to create strings suitable
66  * for libc and kernel interfaces so result is pointer to char instead
67  * of u_char to minimize casts.
68  */
69 
70 char *
71 nxt_str_cstrz(nxt_mp_t *mp, const nxt_str_t *src)
72 {
73     char  *p, *dst;
74 
75     dst = nxt_mp_alloc(mp, src->length + 1);
76 
77     if (nxt_fast_path(dst != NULL)) {
78         p = nxt_cpymem(dst, src->start, src->length);
79         *p = '\0';
80     }
81 
82     return dst;
83 }
84 
85 
86 void
87 nxt_memcpy_lowcase(u_char *dst, const u_char *src, size_t length)
88 {
89     u_char  c;
90 
91     while (length != 0) {
92         c = *src++;
93         *dst++ = nxt_lowcase(c);
94         length--;
95     }
96 }
97 
98 
99 void
100 nxt_memcpy_upcase(u_char *dst, const u_char *src, size_t length)
101 {
102     u_char  c;
103 
104     while (length != 0) {
105         c = *src++;
106         *dst++ = nxt_upcase(c);
107         length--;
108     }
109 }
110 
111 
112 u_char *
113 nxt_cpystr(u_char *dst, const u_char *src)
114 {
115     for ( ;; ) {
116         *dst = *src;
117 
118         if (*dst == '\0') {
119             break;
120         }
121 
122         dst++;
123         src++;
124     }
125 
126     return dst;
127 }
128 
129 
130 u_char *
131 nxt_cpystrn(u_char *dst, const u_char *src, size_t length)
132 {
133     if (length == 0) {
134         return dst;
135     }
136 
137     while (--length != 0) {
138         *dst = *src;
139 
140         if (*dst == '\0') {
141             return dst;
142         }
143 
144         dst++;
145         src++;
146     }
147 
148     *dst = '\0';
149 
150     return dst;
151 }
152 
153 
154 nxt_int_t
155 nxt_strcasecmp(const u_char *s1, const u_char *s2)
156 {
157     u_char     c1, c2;
158     nxt_int_t  n;
159 
160     for ( ;; ) {
161         c1 = *s1++;
162         c2 = *s2++;
163 
164         c1 = nxt_lowcase(c1);
165         c2 = nxt_lowcase(c2);
166 
167         n = c1 - c2;
168 
169         if (n != 0) {
170             return n;
171         }
172 
173         if (c1 == 0) {
174             return 0;
175         }
176     }
177 }
178 
179 
180 nxt_int_t
181 nxt_strncasecmp(const u_char *s1, const u_char *s2, size_t length)
182 {
183     u_char     c1, c2;
184     nxt_int_t  n;
185 
186     while (length-- != 0) {
187         c1 = *s1++;
188         c2 = *s2++;
189 
190         c1 = nxt_lowcase(c1);
191         c2 = nxt_lowcase(c2);
192 
193         n = c1 - c2;
194 
195         if (n != 0) {
196             return n;
197         }
198 
199         if (c1 == 0) {
200             return 0;
201         }
202     }
203 
204     return 0;
205 }
206 
207 
208 nxt_int_t
209 nxt_memcasecmp(const void *p1, const void *p2, size_t length)
210 {
211     u_char        c1, c2;
212     nxt_int_t     n;
213     const u_char  *s1, *s2;
214 
215     s1 = p1;
216     s2 = p2;
217 
218     while (length-- != 0) {
219         c1 = *s1++;
220         c2 = *s2++;
221 
222         c1 = nxt_lowcase(c1);
223         c2 = nxt_lowcase(c2);
224 
225         n = c1 - c2;
226 
227         if (n != 0) {
228             return n;
229         }
230     }
231 
232     return 0;
233 }
234 
235 
236 /*
237  * nxt_memstrn() is intended for search of static substring "ss"
238  * with known length "length" in string "s" limited by parameter "end".
239  * Zeros are ignored in both strings.
240  */
241 
242 u_char *
243 nxt_memstrn(const u_char *s, const u_char *end, const char *ss, size_t length)
244 {
245     u_char  c1, c2, *s2;
246 
247     s2 = (u_char *) ss;
248     c2 = *s2++;
249     length--;
250 
251     while (s < end) {
252         c1 = *s++;
253 
254         if (c1 == c2) {
255 
256             if (s + length > end) {
257                 return NULL;
258             }
259 
260             if (nxt_memcmp(s, s2, length) == 0) {
261                 return (u_char *) s - 1;
262             }
263         }
264     }
265 
266     return NULL;
267 }
268 
269 
270 /*
271  * nxt_strcasestrn() is intended for caseless search of static substring
272  * "ss" with known length "length" in string "s" limited by parameter "end".
273  * Zeros are ignored in both strings.
274  */
275 
276 u_char *
277 nxt_memcasestrn(const u_char *s, const u_char *end, const char *ss,
278     size_t length)
279 {
280     u_char  c1, c2, *s2;
281 
282     s2 = (u_char *) ss;
283     c2 = *s2++;
284     c2 = nxt_lowcase(c2);
285     length--;
286 
287     while (s < end) {
288         c1 = *s++;
289         c1 = nxt_lowcase(c1);
290 
291         if (c1 == c2) {
292 
293             if (s + length > end) {
294                 return NULL;
295             }
296 
297             if (nxt_memcasecmp(s, s2, length) == 0) {
298                 return (u_char *) s - 1;
299             }
300         }
301     }
302 
303     return NULL;
304 }
305 
306 
307 /*
308  * nxt_rstrstrn() is intended to search for static substring "ss"
309  * with known length "length" in string "s" limited by parameter "end"
310  * in reverse order.  Zeros are ignored in both strings.
311  */
312 
313 u_char *
314 nxt_rmemstrn(const u_char *s, const u_char *end, const char *ss, size_t length)
315 {
316     u_char        c1, c2;
317     const u_char  *s1, *s2;
318 
319     s1 = end - length;
320     s2 = (u_char *) ss;
321     c2 = *s2++;
322     length--;
323 
324     while (s < s1) {
325         c1 = *s1;
326 
327         if (c1 == c2) {
328             if (nxt_memcmp(s1 + 1, s2, length) == 0) {
329                 return (u_char *) s1;
330             }
331         }
332 
333         s1--;
334     }
335 
336     return NULL;
337 }
338 
339 
340 size_t
341 nxt_str_strip(u_char *start, u_char *end)
342 {
343     u_char  *p;
344 
345     for (p = end - 1; p >= start; p--) {
346         if (*p != '\r' && *p != '\n') {
347             break;
348         }
349     }
350 
351     return (p + 1) - start;
352 }
353 
354 
355 nxt_int_t
356 nxt_strverscmp(const u_char *s1, const u_char *s2)
357 {
358     u_char     c1, c2;
359     nxt_int_t  diff;
360 
361     enum {
362         st_str = 0,
363         st_num,
364         st_zero,
365         st_frac,
366     } state;
367 
368     state = st_str;
369 
370     for ( ;; ) {
371         c1 = *s1++;
372         c2 = *s2++;
373 
374         diff = c1 - c2;
375 
376         if (diff != 0) {
377             break;
378         }
379 
380         if (c1 == '\0') {
381             return 0;
382         }
383 
384         if (!nxt_isdigit(c1)) {
385             state = st_str;
386             continue;
387         }
388 
389         if (state == st_str) {
390             state = (c1 != '0') ? st_num : st_zero;
391             continue;
392         }
393 
394         if (state == st_zero && c1 != '0') {
395             state = st_frac;
396             continue;
397         }
398     }
399 
400     switch (state) {
401 
402     case st_str:
403 
404         if ((u_char) (c1 - '1') > 8 || (u_char) (c2 - '1') > 8) {
405             return diff;
406         }
407 
408         c1 = *s1++;
409         c2 = *s2++;
410 
411         /* Fall through. */
412 
413     case st_num:
414 
415         while (nxt_isdigit(c1) && nxt_isdigit(c2)) {
416             c1 = *s1++;
417             c2 = *s2++;
418         }
419 
420         if (nxt_isdigit(c1)) {
421             return 1;
422         }
423 
424         if (nxt_isdigit(c2)) {
425             return -1;
426         }
427 
428         return diff;
429 
430     case st_zero:
431 
432         if (c1 == '0' || c2 == '\0') {
433             return -1;
434         }
435 
436         if (c2 == '0' || c1 == '\0') {
437             return 1;
438         }
439 
440         /* Fall through. */
441 
442     case st_frac:
443     default:
444         return diff;
445     }
446 }
447 
448 
449 nxt_bool_t
450 nxt_strvers_match(u_char *version, u_char *prefix, size_t length)
451 {
452     u_char  next, last;
453 
454     if (length == 0) {
455         return 1;
456     }
457 
458     if (nxt_strncmp(version, prefix, length) == 0) {
459 
460         next = version[length];
461 
462         if (next == '\0') {
463             return 1;
464         }
465 
466         last = version[length - 1];
467 
468         if (nxt_isdigit(last) != nxt_isdigit(next)) {
469             /* This is a version part boundary. */
470             return 1;
471         }
472     }
473 
474     return 0;
475 }
476 
477 
478 const uint8_t  nxt_hex2int[256]
479     nxt_aligned(32) =
480 {
481     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
482     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
483     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
484      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 16, 16, 16, 16, 16, 16,
485     16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
486     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
487     16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
488     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
489     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
490     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
491     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
492     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
493     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
494     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
495     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
496     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
497 };
498 
499 
500 static const uint32_t  nxt_uri_escape[] = {
501     0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
502 
503                 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
504     0xd000002d, /* 1101 0000 0000 0000 0000 0000 0010 1101 */
505 
506                 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
507     0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */
508 
509                 /*  ~}| {zyx wvut srqp onml kjih gfed cba` */
510     0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */
511 
512     0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
513     0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
514     0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
515     0xffffffff  /* 1111 1111 1111 1111 1111 1111 1111 1111 */
516 };
517 
518 
519 u_char *
520 nxt_decode_uri(u_char *dst, u_char *src, size_t length)
521 {
522     u_char   *end, ch;
523     uint8_t  d0, d1;
524 
525     nxt_prefetch(&nxt_hex2int['0']);
526 
527     end = src + length;
528 
529     while (src < end) {
530         ch = *src++;
531 
532         if (ch == '%') {
533             if (nxt_slow_path(end - src < 2)) {
534                 return NULL;
535             }
536 
537             d0 = nxt_hex2int[*src++];
538             d1 = nxt_hex2int[*src++];
539 
540             if (nxt_slow_path((d0 | d1) >= 16)) {
541                 return NULL;
542             }
543 
544             ch = (d0 << 4) + d1;
545         }
546 
547         *dst++ = ch;
548     }
549 
550     return dst;
551 }
552 
553 
554 u_char *
555 nxt_decode_uri_plus(u_char *dst, u_char *src, size_t length)
556 {
557     u_char   *end, ch;
558     uint8_t  d0, d1;
559 
560     nxt_prefetch(&nxt_hex2int['0']);
561 
562     end = src + length;
563 
564     while (src < end) {
565         ch = *src++;
566 
567         switch (ch) {
568         case '%':
569             if (nxt_slow_path(end - src < 2)) {
570                 return NULL;
571             }
572 
573             d0 = nxt_hex2int[*src++];
574             d1 = nxt_hex2int[*src++];
575 
576             if (nxt_slow_path((d0 | d1) >= 16)) {
577                 return NULL;
578             }
579 
580             ch = (d0 << 4) + d1;
581             break;
582 
583         case '+':
584             ch = ' ';
585             break;
586         }
587 
588         *dst++ = ch;
589     }
590 
591     return dst;
592 }
593 
594 
595 uintptr_t
596 nxt_encode_uri(u_char *dst, u_char *src, size_t length)
597 {
598     u_char      *end;
599     nxt_uint_t  n;
600 
601     static const u_char  hex[16] = "0123456789ABCDEF";
602 
603     end = src + length;
604 
605     if (dst == NULL) {
606 
607         /* Find the number of the characters to be escaped. */
608 
609         n = 0;
610 
611         while (src < end) {
612 
613             if (nxt_uri_escape[*src >> 5] & (1U << (*src & 0x1f))) {
614                 n++;
615             }
616 
617             src++;
618         }
619 
620         return (uintptr_t) n;
621     }
622 
623     while (src < end) {
624 
625         if (nxt_uri_escape[*src >> 5] & (1U << (*src & 0x1f))) {
626             *dst++ = '%';
627             *dst++ = hex[*src >> 4];
628             *dst++ = hex[*src & 0xf];
629 
630         } else {
631             *dst++ = *src;
632         }
633 
634         src++;
635     }
636 
637     return (uintptr_t) dst;
638 }
639 
640 
641 uintptr_t
642 nxt_encode_complex_uri(u_char *dst, u_char *src, size_t length)
643 {
644     u_char      *reserved, *end, ch;
645     nxt_uint_t  n;
646 
647     static const u_char  hex[16] = "0123456789ABCDEF";
648 
649     reserved = (u_char *) "?#\0";
650 
651     end = src + length;
652 
653     if (dst == NULL) {
654 
655         /* Find the number of the characters to be escaped. */
656 
657         n = 0;
658 
659         while (src < end) {
660             ch = *src++;
661 
662             if (nxt_uri_escape[ch >> 5] & (1U << (ch & 0x1f))) {
663                 if (ch == reserved[0]) {
664                     reserved++;
665                     continue;
666                 }
667 
668                 if (ch == reserved[1]) {
669                     reserved += 2;
670                     continue;
671                 }
672 
673                 n++;
674             }
675         }
676 
677         return (uintptr_t) n;
678     }
679 
680     while (src < end) {
681         ch = *src++;
682 
683         if (nxt_uri_escape[ch >> 5] & (1U << (ch & 0x1f))) {
684             if (ch == reserved[0]) {
685                 reserved++;
686 
687             } else if (ch == reserved[1]) {
688                 reserved += 2;
689 
690             } else {
691                 *dst++ = '%';
692                 *dst++ = hex[ch >> 4];
693                 *dst++ = hex[ch & 0xf];
694                 continue;
695             }
696         }
697 
698         *dst++ = ch;
699     }
700 
701     return (uintptr_t) dst;
702 }
703 
704 
705 nxt_bool_t
706 nxt_is_complex_uri_encoded(u_char *src, size_t length)
707 {
708     u_char   *reserved, *end, ch;
709     uint8_t  d0, d1;
710 
711     reserved = (u_char *) "?#\0";
712 
713     for (end = src + length; src < end; src++) {
714         ch = *src;
715 
716         if (nxt_uri_escape[ch >> 5] & (1U << (ch & 0x1f))) {
717             if (ch == '%') {
718                 if (end - src < 2) {
719                     return 0;
720                 }
721 
722                 d0 = nxt_hex2int[*++src];
723                 d1 = nxt_hex2int[*++src];
724 
725                 if ((d0 | d1) >= 16) {
726                     return 0;
727                 }
728 
729                 continue;
730             }
731 
732             if (ch == reserved[0]) {
733                 reserved++;
734                 continue;
735             }
736 
737             if (ch == reserved[1]) {
738                 reserved += 2;
739                 continue;
740             }
741 
742             return 0;
743         }
744     }
745 
746     return 1;
747 }
748