1 2 /* 3 * Copyright (C) Igor Sysoev 4 * Copyright (C) NGINX, Inc. 5 */ 6 7 #include <nxt_main.h> 8 #include "nxt_tests.h" 9 10 11 #define NXT_UTF8_START_TEST 0xC2 12 //#define NXT_UTF8_START_TEST 0 13 14 15 static u_char invalid[] = { 16 17 /* Invalid first byte less than 0xC2. */ 18 1, 0x80, 0x00, 0x00, 0x00, 19 1, 0xC0, 0x00, 0x00, 0x00, 20 2, 0xC0, 0x00, 0x00, 0x00, 21 3, 0xC0, 0x00, 0x00, 0x00, 22 4, 0xC0, 0x00, 0x00, 0x00, 23 24 /* Invalid 0x0x110000 value. */ 25 4, 0xF4, 0x90, 0x80, 0x80, 26 27 /* Incomplete length. */ 28 2, 0xE0, 0xAF, 0xB5, 0x00, 29 30 /* Overlong values. */ 31 2, 0xC0, 0x80, 0x00, 0x00, 32 2, 0xC1, 0xB3, 0x00, 0x00, 33 3, 0xE0, 0x80, 0x80, 0x00, 34 3, 0xE0, 0x81, 0xB3, 0x00, 35 3, 0xE0, 0x90, 0x9A, 0x00, 36 4, 0xF0, 0x80, 0x8A, 0x80, 37 4, 0xF0, 0x80, 0x81, 0xB3, 38 4, 0xF0, 0x80, 0xAF, 0xB5, 39 }; 40 41 42 static nxt_int_t 43 nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len) 44 { 45 u_char *p, utf8[4]; 46 size_t size; 47 uint32_t u, d; 48 nxt_uint_t i; 49 const u_char *pp; 50 51 pp = overlong; 52 53 d = nxt_utf8_decode(&pp, overlong + len); 54 55 len = pp - overlong; 56 57 if (d != 0xFFFFFFFF) { 58 p = nxt_utf8_encode(utf8, d); 59 60 size = (p != NULL) ? p - utf8 : 0; 61 62 if (len != size || nxt_memcmp(overlong, utf8, size) != 0) { 63 64 u = 0; 65 for (i = 0; i < len; i++) { 66 u = (u << 8) + overlong[i]; 67 } 68 69 nxt_log_alert(thr->log, 70 "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz", 71 u, len, d, size); 72 73 return NXT_ERROR; 74 } 75 } 76 77 return NXT_OK; 78 } 79 80 81 nxt_int_t 82 nxt_utf8_test(nxt_thread_t *thr) 83 { 84 u_char *p, utf8[4]; 85 size_t len; 86 int32_t n; 87 uint32_t u, d; 88 nxt_uint_t i, k, l, m; 89 const u_char *pp; 90 91 nxt_thread_time_update(thr); 92 93 nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test started"); 94 95 /* Test valid UTF-8. */ 96 97 for (u = 0; u < 0x110000; u++) { 98 99 p = nxt_utf8_encode(utf8, u); 100 101 if (p == NULL) { 102 nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u); 103 return NXT_ERROR; 104 } 105 106 pp = utf8; 107 108 d = nxt_utf8_decode(&pp, p); 109 110 if (u != d) { 111 nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD", 112 u, d); 113 return NXT_ERROR; 114 } 115 } 116 117 /* Test some invalid UTF-8. */ 118 119 for (i = 0; i < sizeof(invalid); i += 5) { 120 121 len = invalid[i]; 122 utf8[0] = invalid[i + 1]; 123 utf8[1] = invalid[i + 2]; 124 utf8[2] = invalid[i + 3]; 125 utf8[3] = invalid[i + 4]; 126 127 pp = utf8; 128 129 d = nxt_utf8_decode(&pp, utf8 + len); 130 131 if (d != 0xFFFFFFFF) { 132 133 u = 0; 134 for (i = 0; i < len; i++) { 135 u = (u << 8) + utf8[i]; 136 } 137 138 nxt_log_alert(thr->log, 139 "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD", 140 u, len, d); 141 return NXT_ERROR; 142 } 143 } 144 145 /* Test all overlong UTF-8. */ 146 147 for (i = NXT_UTF8_START_TEST; i < 256; i++) { 148 utf8[0] = i; 149 150 if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) { 151 return NXT_ERROR; 152 } 153 154 for (k = 0; k < 256; k++) { 155 utf8[1] = k; 156 157 if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) { 158 return NXT_ERROR; 159 } 160 161 for (l = 0; l < 256; l++) { 162 utf8[2] = l; 163 164 if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) { 165 return NXT_ERROR; 166 } 167 168 for (m = 0; m < 256; m++) { 169 utf8[3] = m; 170 171 if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) { 172 return NXT_ERROR; 173 } 174 } 175 } 176 } 177 } 178 179 n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ", 180 (u_char *) "abc абв αβγ", 181 sizeof("ABC АБВ ΑΒΓ") - 1, 182 sizeof("abc абв αβγ") - 1); 183 184 if (n != 0) { 185 nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed"); 186 return NXT_ERROR; 187 } 188 189 nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test passed"); 190 return NXT_OK; 191 } 192