xref: /unit/src/test/nxt_utf8_test.c (revision 611:323e11065f83)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #include <nxt_main.h>
8 #include "nxt_tests.h"
9 
10 
11 #define NXT_UTF8_START_TEST  0xC2
12 //#define NXT_UTF8_START_TEST  0
13 
14 
15 static u_char  invalid[] = {
16 
17     /* Invalid first byte less than 0xC2. */
18     1, 0x80, 0x00, 0x00, 0x00,
19     1, 0xC0, 0x00, 0x00, 0x00,
20     2, 0xC0, 0x00, 0x00, 0x00,
21     3, 0xC0, 0x00, 0x00, 0x00,
22     4, 0xC0, 0x00, 0x00, 0x00,
23 
24     /* Invalid 0x0x110000 value. */
25     4, 0xF4, 0x90, 0x80, 0x80,
26 
27     /* Incomplete length. */
28     2, 0xE0, 0xAF, 0xB5, 0x00,
29 
30     /* Overlong values. */
31     2, 0xC0, 0x80, 0x00, 0x00,
32     2, 0xC1, 0xB3, 0x00, 0x00,
33     3, 0xE0, 0x80, 0x80, 0x00,
34     3, 0xE0, 0x81, 0xB3, 0x00,
35     3, 0xE0, 0x90, 0x9A, 0x00,
36     4, 0xF0, 0x80, 0x8A, 0x80,
37     4, 0xF0, 0x80, 0x81, 0xB3,
38     4, 0xF0, 0x80, 0xAF, 0xB5,
39 };
40 
41 
42 static nxt_int_t
43 nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len)
44 {
45     u_char        *p, utf8[4];
46     size_t        size;
47     uint32_t      u, d;
48     nxt_uint_t    i;
49     const u_char  *pp;
50 
51     pp = overlong;
52 
53     d = nxt_utf8_decode(&pp, overlong + len);
54 
55     len = pp - overlong;
56 
57     if (d != 0xFFFFFFFF) {
58         p = nxt_utf8_encode(utf8, d);
59 
60         size = (p != NULL) ? p - utf8 : 0;
61 
62         if (len != size || nxt_memcmp(overlong, utf8, size) != 0) {
63 
64             u = 0;
65             for (i = 0; i < len; i++) {
66                 u = (u << 8) + overlong[i];
67             }
68 
69             nxt_log_alert(thr->log,
70                           "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz",
71                           u, len, d, size);
72 
73             return NXT_ERROR;
74         }
75     }
76 
77     return NXT_OK;
78 }
79 
80 
81 nxt_int_t
82 nxt_utf8_test(nxt_thread_t *thr)
83 {
84     u_char        *p, utf8[4];
85     size_t        len;
86     int32_t       n;
87     uint32_t      u, d;
88     nxt_uint_t    i, k, l, m;
89     const u_char  *pp;
90 
91     nxt_thread_time_update(thr);
92 
93     nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test started");
94 
95     /* Test valid UTF-8. */
96 
97     for (u = 0; u < 0x110000; u++) {
98 
99         p = nxt_utf8_encode(utf8, u);
100 
101         if (p == NULL) {
102             nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u);
103             return NXT_ERROR;
104         }
105 
106         pp = utf8;
107 
108         d = nxt_utf8_decode(&pp, p);
109 
110         if (u != d) {
111             nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD",
112                           u, d);
113             return NXT_ERROR;
114         }
115     }
116 
117     /* Test some invalid UTF-8. */
118 
119     for (i = 0; i < sizeof(invalid); i += 5) {
120 
121         len = invalid[i];
122         utf8[0] = invalid[i + 1];
123         utf8[1] = invalid[i + 2];
124         utf8[2] = invalid[i + 3];
125         utf8[3] = invalid[i + 4];
126 
127         pp = utf8;
128 
129         d = nxt_utf8_decode(&pp, utf8 + len);
130 
131         if (d != 0xFFFFFFFF) {
132 
133             u = 0;
134             for (i = 0; i < len; i++) {
135                 u = (u << 8) + utf8[i];
136             }
137 
138             nxt_log_alert(thr->log,
139                           "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD",
140                           u, len, d);
141             return NXT_ERROR;
142         }
143     }
144 
145     /* Test all overlong UTF-8. */
146 
147     for (i = NXT_UTF8_START_TEST; i < 256; i++) {
148         utf8[0] = i;
149 
150         if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) {
151             return NXT_ERROR;
152         }
153 
154         for (k = 0; k < 256; k++) {
155             utf8[1] = k;
156 
157             if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) {
158                 return NXT_ERROR;
159             }
160 
161             for (l = 0; l < 256; l++) {
162                 utf8[2] = l;
163 
164                 if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) {
165                     return NXT_ERROR;
166                 }
167 
168                 for (m = 0; m < 256; m++) {
169                     utf8[3] = m;
170 
171                     if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) {
172                         return NXT_ERROR;
173                     }
174                 }
175             }
176         }
177     }
178 
179     n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ",
180                          (u_char *) "abc абв αβγ",
181                              sizeof("ABC АБВ ΑΒΓ") - 1,
182                              sizeof("abc абв αβγ") - 1);
183 
184     if (n != 0) {
185         nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed");
186         return NXT_ERROR;
187     }
188 
189     nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test passed");
190     return NXT_OK;
191 }
192