xref: /unit/src/nxt_string.c (revision 1234:e56683f325ab)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #include <nxt_main.h>
8 
9 
10 nxt_str_t *
11 nxt_str_alloc(nxt_mp_t *mp, size_t length)
12 {
13     nxt_str_t  *s;
14 
15     /* The string start is allocated aligned to be close to nxt_str_t. */
16     s = nxt_mp_get(mp, sizeof(nxt_str_t) + length);
17 
18     if (nxt_fast_path(s != NULL)) {
19         s->length = length;
20         s->start = nxt_pointer_to(s, sizeof(nxt_str_t));
21     }
22 
23     return s;
24 }
25 
26 
27 /*
28  * nxt_str_dup() creates a new string with a copy of a source string.
29  * If length of the source string is zero, then the new string anyway
30  * gets a pointer somewhere in mem_pool.
31  */
32 
33 nxt_str_t *
34 nxt_str_dup(nxt_mp_t *mp, nxt_str_t *dst, const nxt_str_t *src)
35 {
36     u_char  *p;
37 
38     if (dst == NULL) {
39         /* The string start is allocated aligned to be close to nxt_str_t. */
40         dst = nxt_mp_get(mp, sizeof(nxt_str_t) + src->length);
41         if (nxt_slow_path(dst == NULL)) {
42             return NULL;
43         }
44 
45         p = (u_char *) dst;
46         p += sizeof(nxt_str_t);
47         dst->start = p;
48 
49     } else {
50         dst->start = nxt_mp_nget(mp, src->length);
51         if (nxt_slow_path(dst->start == NULL)) {
52             return NULL;
53         }
54     }
55 
56     nxt_memcpy(dst->start, src->start, src->length);
57     dst->length = src->length;
58 
59     return dst;
60 }
61 
62 
63 /*
64  * nxt_str_cstrz() creates a C style zero-terminated copy of a source
65  * nxt_str_t.  The function is intended to create strings suitable
66  * for libc and kernel interfaces so result is pointer to char instead
67  * of u_char to minimize casts.
68  */
69 
70 char *
71 nxt_str_cstrz(nxt_mp_t *mp, const nxt_str_t *src)
72 {
73     char  *p, *dst;
74 
75     dst = nxt_mp_alloc(mp, src->length + 1);
76 
77     if (nxt_fast_path(dst != NULL)) {
78         p = nxt_cpymem(dst, src->start, src->length);
79         *p = '\0';
80     }
81 
82     return dst;
83 }
84 
85 
86 void
87 nxt_memcpy_lowcase(u_char *dst, const u_char *src, size_t length)
88 {
89     u_char  c;
90 
91     while (length != 0) {
92         c = *src++;
93         *dst++ = nxt_lowcase(c);
94         length--;
95     }
96 }
97 
98 
99 void
100 nxt_memcpy_upcase(u_char *dst, const u_char *src, size_t length)
101 {
102     u_char  c;
103 
104     while (length != 0) {
105         c = *src++;
106         *dst++ = nxt_upcase(c);
107         length--;
108     }
109 }
110 
111 
112 u_char *
113 nxt_cpystrn(u_char *dst, const u_char *src, size_t length)
114 {
115     if (length == 0) {
116         return dst;
117     }
118 
119     while (--length != 0) {
120         *dst = *src;
121 
122         if (*dst == '\0') {
123             return dst;
124         }
125 
126         dst++;
127         src++;
128     }
129 
130     *dst = '\0';
131 
132     return dst;
133 }
134 
135 
136 nxt_int_t
137 nxt_strcasecmp(const u_char *s1, const u_char *s2)
138 {
139     u_char     c1, c2;
140     nxt_int_t  n;
141 
142     for ( ;; ) {
143         c1 = *s1++;
144         c2 = *s2++;
145 
146         c1 = nxt_lowcase(c1);
147         c2 = nxt_lowcase(c2);
148 
149         n = c1 - c2;
150 
151         if (n != 0) {
152             return n;
153         }
154 
155         if (c1 == 0) {
156             return 0;
157         }
158     }
159 }
160 
161 
162 nxt_int_t
163 nxt_strncasecmp(const u_char *s1, const u_char *s2, size_t length)
164 {
165     u_char     c1, c2;
166     nxt_int_t  n;
167 
168     while (length-- != 0) {
169         c1 = *s1++;
170         c2 = *s2++;
171 
172         c1 = nxt_lowcase(c1);
173         c2 = nxt_lowcase(c2);
174 
175         n = c1 - c2;
176 
177         if (n != 0) {
178             return n;
179         }
180 
181         if (c1 == 0) {
182             return 0;
183         }
184     }
185 
186     return 0;
187 }
188 
189 
190 nxt_int_t
191 nxt_memcasecmp(const void *p1, const void *p2, size_t length)
192 {
193     u_char        c1, c2;
194     nxt_int_t     n;
195     const u_char  *s1, *s2;
196 
197     s1 = p1;
198     s2 = p2;
199 
200     while (length-- != 0) {
201         c1 = *s1++;
202         c2 = *s2++;
203 
204         c1 = nxt_lowcase(c1);
205         c2 = nxt_lowcase(c2);
206 
207         n = c1 - c2;
208 
209         if (n != 0) {
210             return n;
211         }
212     }
213 
214     return 0;
215 }
216 
217 
218 /*
219  * nxt_memstrn() is intended for search of static substring "ss"
220  * with known length "length" in string "s" limited by parameter "end".
221  * Zeros are ignored in both strings.
222  */
223 
224 u_char *
225 nxt_memstrn(const u_char *s, const u_char *end, const char *ss, size_t length)
226 {
227     u_char  c1, c2, *s2;
228 
229     s2 = (u_char *) ss;
230     c2 = *s2++;
231     length--;
232 
233     while (s < end) {
234         c1 = *s++;
235 
236         if (c1 == c2) {
237 
238             if (s + length > end) {
239                 return NULL;
240             }
241 
242             if (nxt_memcmp(s, s2, length) == 0) {
243                 return (u_char *) s - 1;
244             }
245         }
246     }
247 
248     return NULL;
249 }
250 
251 
252 /*
253  * nxt_strcasestrn() is intended for caseless search of static substring
254  * "ss" with known length "length" in string "s" limited by parameter "end".
255  * Zeros are ignored in both strings.
256  */
257 
258 u_char *
259 nxt_memcasestrn(const u_char *s, const u_char *end, const char *ss,
260     size_t length)
261 {
262     u_char  c1, c2, *s2;
263 
264     s2 = (u_char *) ss;
265     c2 = *s2++;
266     c2 = nxt_lowcase(c2);
267     length--;
268 
269     while (s < end) {
270         c1 = *s++;
271         c1 = nxt_lowcase(c1);
272 
273         if (c1 == c2) {
274 
275             if (s + length > end) {
276                 return NULL;
277             }
278 
279             if (nxt_memcasecmp(s, s2, length) == 0) {
280                 return (u_char *) s - 1;
281             }
282         }
283     }
284 
285     return NULL;
286 }
287 
288 
289 /*
290  * nxt_rstrstrn() is intended to search for static substring "ss"
291  * with known length "length" in string "s" limited by parameter "end"
292  * in reverse order.  Zeros are ignored in both strings.
293  */
294 
295 u_char *
296 nxt_rmemstrn(const u_char *s, const u_char *end, const char *ss, size_t length)
297 {
298     u_char        c1, c2;
299     const u_char  *s1, *s2;
300 
301     s1 = end - length;
302     s2 = (u_char *) ss;
303     c2 = *s2++;
304     length--;
305 
306     while (s < s1) {
307         c1 = *s1;
308 
309         if (c1 == c2) {
310             if (nxt_memcmp(s1 + 1, s2, length) == 0) {
311                 return (u_char *) s1;
312             }
313         }
314 
315         s1--;
316     }
317 
318     return NULL;
319 }
320 
321 
322 size_t
323 nxt_str_strip(u_char *start, u_char *end)
324 {
325     u_char  *p;
326 
327     for (p = end - 1; p >= start; p--) {
328         if (*p != '\r' && *p != '\n') {
329             break;
330         }
331     }
332 
333     return (p + 1) - start;
334 }
335 
336 
337 nxt_int_t
338 nxt_strverscmp(const u_char *s1, const u_char *s2)
339 {
340     u_char     c1, c2;
341     nxt_int_t  diff;
342 
343     enum {
344         st_str = 0,
345         st_num,
346         st_zero,
347         st_frac,
348     } state;
349 
350     state = st_str;
351 
352     for ( ;; ) {
353         c1 = *s1++;
354         c2 = *s2++;
355 
356         diff = c1 - c2;
357 
358         if (diff != 0) {
359             break;
360         }
361 
362         if (c1 == '\0') {
363             return 0;
364         }
365 
366         if (!nxt_isdigit(c1)) {
367             state = st_str;
368             continue;
369         }
370 
371         if (state == st_str) {
372             state = (c1 != '0') ? st_num : st_zero;
373             continue;
374         }
375 
376         if (state == st_zero && c1 != '0') {
377             state = st_frac;
378             continue;
379         }
380     }
381 
382     switch (state) {
383 
384     case st_str:
385 
386         if ((u_char) (c1 - '1') > 8 || (u_char) (c2 - '1') > 8) {
387             return diff;
388         }
389 
390         c1 = *s1++;
391         c2 = *s2++;
392 
393         /* Fall through. */
394 
395     case st_num:
396 
397         while (nxt_isdigit(c1) && nxt_isdigit(c2)) {
398             c1 = *s1++;
399             c2 = *s2++;
400         }
401 
402         if (nxt_isdigit(c1)) {
403             return 1;
404         }
405 
406         if (nxt_isdigit(c2)) {
407             return -1;
408         }
409 
410         return diff;
411 
412     case st_zero:
413 
414         if (c1 == '0' || c2 == '\0') {
415             return -1;
416         }
417 
418         if (c2 == '0' || c1 == '\0') {
419             return 1;
420         }
421 
422         /* Fall through. */
423 
424     case st_frac:
425     default:
426         return diff;
427     }
428 }
429 
430 
431 nxt_bool_t
432 nxt_strvers_match(u_char *version, u_char *prefix, size_t length)
433 {
434     u_char  next, last;
435 
436     if (length == 0) {
437         return 1;
438     }
439 
440     if (nxt_strncmp(version, prefix, length) == 0) {
441 
442         next = version[length];
443 
444         if (next == '\0') {
445             return 1;
446         }
447 
448         last = version[length - 1];
449 
450         if (nxt_isdigit(last) != nxt_isdigit(next)) {
451             /* This is a version part boundary. */
452             return 1;
453         }
454     }
455 
456     return 0;
457 }
458 
459 
460 u_char *
461 nxt_decode_uri(u_char *dst, u_char *src, size_t length)
462 {
463     u_char   *end, ch;
464     uint8_t  d0, d1;
465 
466     static const uint8_t  hex[256]
467         nxt_aligned(32) =
468     {
469         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
470         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
471         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
472          0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 16, 16, 16, 16, 16, 16,
473         16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
474         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
475         16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
476         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
477         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
478         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
479         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
480         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
481         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
482         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
483         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
484         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
485     };
486 
487     nxt_prefetch(&hex['0']);
488 
489     end = src + length;
490 
491     while (src < end) {
492         ch = *src++;
493 
494         if (ch == '%') {
495             if (nxt_slow_path(end - src < 2)) {
496                 return NULL;
497             }
498 
499             d0 = hex[*src++];
500             d1 = hex[*src++];
501 
502             if (nxt_slow_path((d0 | d1) >= 16)) {
503                 return NULL;
504             }
505 
506             ch = (d0 << 4) + d1;
507         }
508 
509         *dst++ = ch;
510     }
511 
512     return dst;
513 }
514 
515 
516 uintptr_t
517 nxt_encode_uri(u_char *dst, u_char *src, size_t length)
518 {
519     u_char      *end;
520     nxt_uint_t  n;
521 
522     static const u_char  hex[16] = "0123456789ABCDEF";
523 
524                     /* " ", "#", "%", "?", %00-%1F, %7F-%FF */
525 
526     static const uint32_t  escape[] = {
527         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
528 
529                     /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
530         0x80000029, /* 1000 0000 0000 0000 0000 0000 0010 1001 */
531 
532                     /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
533         0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
534 
535                     /* ~}| {zyx wvut srqp onml kjih gfed cba` */
536         0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */
537 
538         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
539         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
540         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
541         0xffffffff  /* 1111 1111 1111 1111 1111 1111 1111 1111 */
542     };
543 
544     end = src + length;
545 
546     if (dst == NULL) {
547 
548         /* Find the number of the characters to be escaped. */
549 
550         n = 0;
551 
552         while (src < end) {
553 
554             if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
555                 n++;
556             }
557 
558             src++;
559         }
560 
561         return (uintptr_t) n;
562     }
563 
564     while (src < end) {
565 
566         if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
567             *dst++ = '%';
568             *dst++ = hex[*src >> 4];
569             *dst++ = hex[*src & 0xf];
570 
571         } else {
572             *dst++ = *src;
573         }
574 
575         src++;
576     }
577 
578     return (uintptr_t) dst;
579 }
580