1
2 /*
3 * Copyright (C) Igor Sysoev
4 * Copyright (C) NGINX, Inc.
5 */
6
7 #include <nxt_main.h>
8 #include "nxt_tests.h"
9
10
11 #define NXT_UTF8_START_TEST 0xC2
12 //#define NXT_UTF8_START_TEST 0
13
14
15 static u_char invalid[] = {
16
17 /* Invalid first byte less than 0xC2. */
18 1, 0x80, 0x00, 0x00, 0x00,
19 1, 0xC0, 0x00, 0x00, 0x00,
20 2, 0xC0, 0x00, 0x00, 0x00,
21 3, 0xC0, 0x00, 0x00, 0x00,
22 4, 0xC0, 0x00, 0x00, 0x00,
23
24 /* Invalid 0x110000 value. */
25 4, 0xF4, 0x90, 0x80, 0x80,
26
27 /* Incomplete length. */
28 2, 0xE0, 0xAF, 0xB5, 0x00,
29
30 /* Overlong values. */
31 2, 0xC0, 0x80, 0x00, 0x00,
32 2, 0xC1, 0xB3, 0x00, 0x00,
33 3, 0xE0, 0x80, 0x80, 0x00,
34 3, 0xE0, 0x81, 0xB3, 0x00,
35 3, 0xE0, 0x90, 0x9A, 0x00,
36 4, 0xF0, 0x80, 0x8A, 0x80,
37 4, 0xF0, 0x80, 0x81, 0xB3,
38 4, 0xF0, 0x80, 0xAF, 0xB5,
39 };
40
41
42 static nxt_int_t
nxt_utf8_overlong(nxt_thread_t * thr,u_char * overlong,size_t len)43 nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len)
44 {
45 u_char *p, utf8[4];
46 size_t size;
47 uint32_t u, d;
48 nxt_uint_t i;
49 const u_char *pp;
50
51 pp = overlong;
52
53 d = nxt_utf8_decode(&pp, overlong + len);
54
55 len = pp - overlong;
56
57 if (d != 0xFFFFFFFF) {
58 p = nxt_utf8_encode(utf8, d);
59
60 size = (p != NULL) ? p - utf8 : 0;
61
62 if (len != size || memcmp(overlong, utf8, size) != 0) {
63
64 u = 0;
65 for (i = 0; i < len; i++) {
66 u = (u << 8) + overlong[i];
67 }
68
69 nxt_log_alert(thr->log,
70 "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz",
71 u, len, d, size);
72
73 return NXT_ERROR;
74 }
75 }
76
77 return NXT_OK;
78 }
79
80
81 nxt_int_t
nxt_utf8_test(nxt_thread_t * thr)82 nxt_utf8_test(nxt_thread_t *thr)
83 {
84 u_char *p, utf8[4];
85 size_t len;
86 int32_t n;
87 uint32_t u, d;
88 nxt_uint_t i, k, l, m;
89 const u_char *pp;
90
91 nxt_thread_time_update(thr);
92
93 nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test started");
94
95 /* Test valid UTF-8. */
96
97 for (u = 0; u < 0x110000; u++) {
98
99 p = nxt_utf8_encode(utf8, u);
100
101 if (p == NULL) {
102 nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u);
103 return NXT_ERROR;
104 }
105
106 pp = utf8;
107
108 d = nxt_utf8_decode(&pp, p);
109
110 if (u != d) {
111 nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD",
112 u, d);
113 return NXT_ERROR;
114 }
115 }
116
117 /* Test some invalid UTF-8. */
118
119 for (i = 0; i < sizeof(invalid); i += 5) {
120
121 len = invalid[i];
122 utf8[0] = invalid[i + 1];
123 utf8[1] = invalid[i + 2];
124 utf8[2] = invalid[i + 3];
125 utf8[3] = invalid[i + 4];
126
127 pp = utf8;
128
129 d = nxt_utf8_decode(&pp, utf8 + len);
130
131 if (d != 0xFFFFFFFF) {
132
133 u = 0;
134 for (i = 0; i < len; i++) {
135 u = (u << 8) + utf8[i];
136 }
137
138 nxt_log_alert(thr->log,
139 "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD",
140 u, len, d);
141 return NXT_ERROR;
142 }
143 }
144
145 /* Test all overlong UTF-8. */
146
147 for (i = NXT_UTF8_START_TEST; i < 256; i++) {
148 utf8[0] = i;
149
150 if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) {
151 return NXT_ERROR;
152 }
153
154 for (k = 0; k < 256; k++) {
155 utf8[1] = k;
156
157 if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) {
158 return NXT_ERROR;
159 }
160
161 for (l = 0; l < 256; l++) {
162 utf8[2] = l;
163
164 if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) {
165 return NXT_ERROR;
166 }
167
168 for (m = 0; m < 256; m++) {
169 utf8[3] = m;
170
171 if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) {
172 return NXT_ERROR;
173 }
174 }
175 }
176 }
177 }
178
179 n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ",
180 (u_char *) "abc абв αβγ",
181 nxt_length("ABC АБВ ΑΒΓ"),
182 nxt_length("abc абв αβγ"));
183
184 if (n != 0) {
185 nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed");
186 return NXT_ERROR;
187 }
188
189 nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test passed");
190 return NXT_OK;
191 }
192