xref: /unit/src/nxt_string.c (revision 1183:6008abf06746)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #include <nxt_main.h>
8 
9 
10 nxt_str_t *
11 nxt_str_alloc(nxt_mp_t *mp, size_t length)
12 {
13     nxt_str_t  *s;
14 
15     /* The string start is allocated aligned to be close to nxt_str_t. */
16     s = nxt_mp_get(mp, sizeof(nxt_str_t) + length);
17 
18     if (nxt_fast_path(s != NULL)) {
19         s->length = length;
20         s->start = nxt_pointer_to(s, sizeof(nxt_str_t));
21     }
22 
23     return s;
24 }
25 
26 
27 /*
28  * nxt_str_dup() creates a new string with a copy of a source string.
29  * If length of the source string is zero, then the new string anyway
30  * gets a pointer somewhere in mem_pool.
31  */
32 
33 nxt_str_t *
34 nxt_str_dup(nxt_mp_t *mp, nxt_str_t *dst, const nxt_str_t *src)
35 {
36     u_char  *p;
37 
38     if (dst == NULL) {
39         /* The string start is allocated aligned to be close to nxt_str_t. */
40         dst = nxt_mp_get(mp, sizeof(nxt_str_t) + src->length);
41         if (nxt_slow_path(dst == NULL)) {
42             return NULL;
43         }
44 
45         p = (u_char *) dst;
46         p += sizeof(nxt_str_t);
47         dst->start = p;
48 
49     } else {
50         dst->start = nxt_mp_nget(mp, src->length);
51         if (nxt_slow_path(dst->start == NULL)) {
52             return NULL;
53         }
54     }
55 
56     nxt_memcpy(dst->start, src->start, src->length);
57     dst->length = src->length;
58 
59     return dst;
60 }
61 
62 
63 /*
64  * nxt_str_cstrz() creates a C style zero-terminated copy of a source
65  * nxt_str_t.  The function is intended to create strings suitable
66  * for libc and kernel interfaces so result is pointer to char instead
67  * of u_char to minimize casts.
68  */
69 
70 char *
71 nxt_str_cstrz(nxt_mp_t *mp, const nxt_str_t *src)
72 {
73     char  *p, *dst;
74 
75     dst = nxt_mp_alloc(mp, src->length + 1);
76 
77     if (nxt_fast_path(dst != NULL)) {
78         p = nxt_cpymem(dst, src->start, src->length);
79         *p = '\0';
80     }
81 
82     return dst;
83 }
84 
85 
86 void
87 nxt_memcpy_lowcase(u_char *dst, const u_char *src, size_t length)
88 {
89     u_char  c;
90 
91     while (length != 0) {
92         c = *src++;
93         *dst++ = nxt_lowcase(c);
94         length--;
95     }
96 }
97 
98 
99 void
100 nxt_memcpy_upcase(u_char *dst, const u_char *src, size_t length)
101 {
102     u_char  c;
103 
104     while (length != 0) {
105         c = *src++;
106         *dst++ = nxt_upcase(c);
107         length--;
108     }
109 }
110 
111 
112 u_char *
113 nxt_cpystrn(u_char *dst, const u_char *src, size_t length)
114 {
115     if (length == 0) {
116         return dst;
117     }
118 
119     while (--length != 0) {
120         *dst = *src;
121 
122         if (*dst == '\0') {
123             return dst;
124         }
125 
126         dst++;
127         src++;
128     }
129 
130     *dst = '\0';
131 
132     return dst;
133 }
134 
135 
136 nxt_int_t
137 nxt_strcasecmp(const u_char *s1, const u_char *s2)
138 {
139     u_char     c1, c2;
140     nxt_int_t  n;
141 
142     for ( ;; ) {
143         c1 = *s1++;
144         c2 = *s2++;
145 
146         c1 = nxt_lowcase(c1);
147         c2 = nxt_lowcase(c2);
148 
149         n = c1 - c2;
150 
151         if (n != 0) {
152             return n;
153         }
154 
155         if (c1 == 0) {
156             return 0;
157         }
158     }
159 }
160 
161 
162 nxt_int_t
163 nxt_strncasecmp(const u_char *s1, const u_char *s2, size_t length)
164 {
165     u_char     c1, c2;
166     nxt_int_t  n;
167 
168     while (length-- != 0) {
169         c1 = *s1++;
170         c2 = *s2++;
171 
172         c1 = nxt_lowcase(c1);
173         c2 = nxt_lowcase(c2);
174 
175         n = c1 - c2;
176 
177         if (n != 0) {
178             return n;
179         }
180 
181         if (c1 == 0) {
182             return 0;
183         }
184     }
185 
186     return 0;
187 }
188 
189 
190 nxt_int_t
191 nxt_memcasecmp(const u_char *s1, const u_char *s2, size_t length)
192 {
193     u_char     c1, c2;
194     nxt_int_t  n;
195 
196     while (length-- != 0) {
197         c1 = *s1++;
198         c2 = *s2++;
199 
200         c1 = nxt_lowcase(c1);
201         c2 = nxt_lowcase(c2);
202 
203         n = c1 - c2;
204 
205         if (n != 0) {
206             return n;
207         }
208     }
209 
210     return 0;
211 }
212 
213 
214 /*
215  * nxt_memstrn() is intended for search of static substring "ss"
216  * with known length "length" in string "s" limited by parameter "end".
217  * Zeros are ignored in both strings.
218  */
219 
220 u_char *
221 nxt_memstrn(const u_char *s, const u_char *end, const char *ss, size_t length)
222 {
223     u_char  c1, c2, *s2;
224 
225     s2 = (u_char *) ss;
226     c2 = *s2++;
227     length--;
228 
229     while (s < end) {
230         c1 = *s++;
231 
232         if (c1 == c2) {
233 
234             if (s + length > end) {
235                 return NULL;
236             }
237 
238             if (nxt_memcmp(s, s2, length) == 0) {
239                 return (u_char *) s - 1;
240             }
241         }
242     }
243 
244     return NULL;
245 }
246 
247 
248 /*
249  * nxt_strcasestrn() is intended for caseless search of static substring
250  * "ss" with known length "length" in string "s" limited by parameter "end".
251  * Zeros are ignored in both strings.
252  */
253 
254 u_char *
255 nxt_memcasestrn(const u_char *s, const u_char *end, const char *ss,
256     size_t length)
257 {
258     u_char  c1, c2, *s2;
259 
260     s2 = (u_char *) ss;
261     c2 = *s2++;
262     c2 = nxt_lowcase(c2);
263     length--;
264 
265     while (s < end) {
266         c1 = *s++;
267         c1 = nxt_lowcase(c1);
268 
269         if (c1 == c2) {
270 
271             if (s + length > end) {
272                 return NULL;
273             }
274 
275             if (nxt_memcasecmp(s, s2, length) == 0) {
276                 return (u_char *) s - 1;
277             }
278         }
279     }
280 
281     return NULL;
282 }
283 
284 
285 /*
286  * nxt_rstrstrn() is intended to search for static substring "ss"
287  * with known length "length" in string "s" limited by parameter "end"
288  * in reverse order.  Zeros are ignored in both strings.
289  */
290 
291 u_char *
292 nxt_rmemstrn(const u_char *s, const u_char *end, const char *ss, size_t length)
293 {
294     u_char        c1, c2;
295     const u_char  *s1, *s2;
296 
297     s1 = end - length;
298     s2 = (u_char *) ss;
299     c2 = *s2++;
300     length--;
301 
302     while (s < s1) {
303         c1 = *s1;
304 
305         if (c1 == c2) {
306             if (nxt_memcmp(s1 + 1, s2, length) == 0) {
307                 return (u_char *) s1;
308             }
309         }
310 
311         s1--;
312     }
313 
314     return NULL;
315 }
316 
317 
318 size_t
319 nxt_str_strip(u_char *start, u_char *end)
320 {
321     u_char  *p;
322 
323     for (p = end - 1; p >= start; p--) {
324         if (*p != '\r' && *p != '\n') {
325             break;
326         }
327     }
328 
329     return (p + 1) - start;
330 }
331 
332 
333 nxt_int_t
334 nxt_strverscmp(const u_char *s1, const u_char *s2)
335 {
336     u_char     c1, c2;
337     nxt_int_t  diff;
338 
339     enum {
340         st_str = 0,
341         st_num,
342         st_zero,
343         st_frac,
344     } state;
345 
346     state = st_str;
347 
348     for ( ;; ) {
349         c1 = *s1++;
350         c2 = *s2++;
351 
352         diff = c1 - c2;
353 
354         if (diff != 0) {
355             break;
356         }
357 
358         if (c1 == '\0') {
359             return 0;
360         }
361 
362         if (!nxt_isdigit(c1)) {
363             state = st_str;
364             continue;
365         }
366 
367         if (state == st_str) {
368             state = (c1 != '0') ? st_num : st_zero;
369             continue;
370         }
371 
372         if (state == st_zero && c1 != '0') {
373             state = st_frac;
374             continue;
375         }
376     }
377 
378     switch (state) {
379 
380     case st_str:
381 
382         if ((u_char) (c1 - '1') > 8 || (u_char) (c2 - '1') > 8) {
383             return diff;
384         }
385 
386         c1 = *s1++;
387         c2 = *s2++;
388 
389         /* Fall through. */
390 
391     case st_num:
392 
393         while (nxt_isdigit(c1) && nxt_isdigit(c2)) {
394             c1 = *s1++;
395             c2 = *s2++;
396         }
397 
398         if (nxt_isdigit(c1)) {
399             return 1;
400         }
401 
402         if (nxt_isdigit(c2)) {
403             return -1;
404         }
405 
406         return diff;
407 
408     case st_zero:
409 
410         if (c1 == '0' || c2 == '\0') {
411             return -1;
412         }
413 
414         if (c2 == '0' || c1 == '\0') {
415             return 1;
416         }
417 
418         /* Fall through. */
419 
420     case st_frac:
421     default:
422         return diff;
423     }
424 }
425 
426 
427 nxt_bool_t
428 nxt_strvers_match(u_char *version, u_char *prefix, size_t length)
429 {
430     u_char  next, last;
431 
432     if (length == 0) {
433         return 1;
434     }
435 
436     if (nxt_strncmp(version, prefix, length) == 0) {
437 
438         next = version[length];
439 
440         if (next == '\0') {
441             return 1;
442         }
443 
444         last = version[length - 1];
445 
446         if (nxt_isdigit(last) != nxt_isdigit(next)) {
447             /* This is a version part boundary. */
448             return 1;
449         }
450     }
451 
452     return 0;
453 }
454 
455 
456 u_char *
457 nxt_decode_uri(u_char *dst, u_char *src, size_t length)
458 {
459     u_char   *end, ch;
460     uint8_t  d0, d1;
461 
462     static const uint8_t  hex[256]
463         nxt_aligned(32) =
464     {
465         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
466         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
467         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
468          0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 16, 16, 16, 16, 16, 16,
469         16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
470         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
471         16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16,
472         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
473         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
474         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
475         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
476         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
477         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
478         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
479         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
480         16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
481     };
482 
483     nxt_prefetch(&hex['0']);
484 
485     end = src + length;
486 
487     while (src < end) {
488         ch = *src++;
489 
490         if (ch == '%') {
491             if (nxt_slow_path(end - src < 2)) {
492                 return NULL;
493             }
494 
495             d0 = hex[*src++];
496             d1 = hex[*src++];
497 
498             if (nxt_slow_path((d0 | d1) >= 16)) {
499                 return NULL;
500             }
501 
502             ch = (d0 << 4) + d1;
503         }
504 
505         *dst++ = ch;
506     }
507 
508     return dst;
509 }
510 
511 
512 uintptr_t
513 nxt_encode_uri(u_char *dst, u_char *src, size_t length)
514 {
515     u_char      *end;
516     nxt_uint_t  n;
517 
518     static const u_char  hex[16] = "0123456789ABCDEF";
519 
520                     /* " ", "#", "%", "?", %00-%1F, %7F-%FF */
521 
522     static const uint32_t  escape[] = {
523         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
524 
525                     /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
526         0x80000029, /* 1000 0000 0000 0000 0000 0000 0010 1001 */
527 
528                     /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
529         0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
530 
531                     /* ~}| {zyx wvut srqp onml kjih gfed cba` */
532         0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */
533 
534         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
535         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
536         0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
537         0xffffffff  /* 1111 1111 1111 1111 1111 1111 1111 1111 */
538     };
539 
540     end = src + length;
541 
542     if (dst == NULL) {
543 
544         /* Find the number of the characters to be escaped. */
545 
546         n = 0;
547 
548         while (src < end) {
549 
550             if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
551                 n++;
552             }
553 
554             src++;
555         }
556 
557         return (uintptr_t) n;
558     }
559 
560     while (src < end) {
561 
562         if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
563             *dst++ = '%';
564             *dst++ = hex[*src >> 4];
565             *dst++ = hex[*src & 0xf];
566 
567         } else {
568             *dst++ = *src;
569         }
570 
571         src++;
572     }
573 
574     return (uintptr_t) dst;
575 }
576