1384Szelenkov@nginx.com 2384Szelenkov@nginx.com /* 3384Szelenkov@nginx.com * Copyright (C) Igor Sysoev 4384Szelenkov@nginx.com * Copyright (C) NGINX, Inc. 5384Szelenkov@nginx.com */ 6384Szelenkov@nginx.com 7384Szelenkov@nginx.com #include <nxt_main.h> 8384Szelenkov@nginx.com #include "nxt_tests.h" 9384Szelenkov@nginx.com 10384Szelenkov@nginx.com 11611Svbart@nginx.com #define NXT_UTF8_START_TEST 0xC2 12384Szelenkov@nginx.com //#define NXT_UTF8_START_TEST 0 13384Szelenkov@nginx.com 14384Szelenkov@nginx.com 15384Szelenkov@nginx.com static u_char invalid[] = { 16384Szelenkov@nginx.com 17611Svbart@nginx.com /* Invalid first byte less than 0xC2. */ 18384Szelenkov@nginx.com 1, 0x80, 0x00, 0x00, 0x00, 19611Svbart@nginx.com 1, 0xC0, 0x00, 0x00, 0x00, 20611Svbart@nginx.com 2, 0xC0, 0x00, 0x00, 0x00, 21611Svbart@nginx.com 3, 0xC0, 0x00, 0x00, 0x00, 22611Svbart@nginx.com 4, 0xC0, 0x00, 0x00, 0x00, 23384Szelenkov@nginx.com 24*613Svbart@nginx.com /* Invalid 0x110000 value. */ 25611Svbart@nginx.com 4, 0xF4, 0x90, 0x80, 0x80, 26384Szelenkov@nginx.com 27384Szelenkov@nginx.com /* Incomplete length. */ 28611Svbart@nginx.com 2, 0xE0, 0xAF, 0xB5, 0x00, 29384Szelenkov@nginx.com 30384Szelenkov@nginx.com /* Overlong values. */ 31611Svbart@nginx.com 2, 0xC0, 0x80, 0x00, 0x00, 32611Svbart@nginx.com 2, 0xC1, 0xB3, 0x00, 0x00, 33611Svbart@nginx.com 3, 0xE0, 0x80, 0x80, 0x00, 34611Svbart@nginx.com 3, 0xE0, 0x81, 0xB3, 0x00, 35611Svbart@nginx.com 3, 0xE0, 0x90, 0x9A, 0x00, 36611Svbart@nginx.com 4, 0xF0, 0x80, 0x8A, 0x80, 37611Svbart@nginx.com 4, 0xF0, 0x80, 0x81, 0xB3, 38611Svbart@nginx.com 4, 0xF0, 0x80, 0xAF, 0xB5, 39384Szelenkov@nginx.com }; 40384Szelenkov@nginx.com 41384Szelenkov@nginx.com 42384Szelenkov@nginx.com static nxt_int_t 43384Szelenkov@nginx.com nxt_utf8_overlong(nxt_thread_t *thr, u_char *overlong, size_t len) 44384Szelenkov@nginx.com { 45384Szelenkov@nginx.com u_char *p, utf8[4]; 46384Szelenkov@nginx.com size_t size; 47384Szelenkov@nginx.com uint32_t u, d; 48384Szelenkov@nginx.com nxt_uint_t i; 49384Szelenkov@nginx.com const u_char *pp; 50384Szelenkov@nginx.com 51384Szelenkov@nginx.com pp = overlong; 52384Szelenkov@nginx.com 53384Szelenkov@nginx.com d = nxt_utf8_decode(&pp, overlong + len); 54384Szelenkov@nginx.com 55384Szelenkov@nginx.com len = pp - overlong; 56384Szelenkov@nginx.com 57611Svbart@nginx.com if (d != 0xFFFFFFFF) { 58384Szelenkov@nginx.com p = nxt_utf8_encode(utf8, d); 59384Szelenkov@nginx.com 60384Szelenkov@nginx.com size = (p != NULL) ? p - utf8 : 0; 61384Szelenkov@nginx.com 62384Szelenkov@nginx.com if (len != size || nxt_memcmp(overlong, utf8, size) != 0) { 63384Szelenkov@nginx.com 64384Szelenkov@nginx.com u = 0; 65384Szelenkov@nginx.com for (i = 0; i < len; i++) { 66384Szelenkov@nginx.com u = (u << 8) + overlong[i]; 67384Szelenkov@nginx.com } 68384Szelenkov@nginx.com 69384Szelenkov@nginx.com nxt_log_alert(thr->log, 70384Szelenkov@nginx.com "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD, %uz", 71384Szelenkov@nginx.com u, len, d, size); 72384Szelenkov@nginx.com 73384Szelenkov@nginx.com return NXT_ERROR; 74384Szelenkov@nginx.com } 75384Szelenkov@nginx.com } 76384Szelenkov@nginx.com 77384Szelenkov@nginx.com return NXT_OK; 78384Szelenkov@nginx.com } 79384Szelenkov@nginx.com 80384Szelenkov@nginx.com 81384Szelenkov@nginx.com nxt_int_t 82384Szelenkov@nginx.com nxt_utf8_test(nxt_thread_t *thr) 83384Szelenkov@nginx.com { 84384Szelenkov@nginx.com u_char *p, utf8[4]; 85384Szelenkov@nginx.com size_t len; 86384Szelenkov@nginx.com int32_t n; 87384Szelenkov@nginx.com uint32_t u, d; 88384Szelenkov@nginx.com nxt_uint_t i, k, l, m; 89384Szelenkov@nginx.com const u_char *pp; 90384Szelenkov@nginx.com 91384Szelenkov@nginx.com nxt_thread_time_update(thr); 92384Szelenkov@nginx.com 93384Szelenkov@nginx.com nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test started"); 94384Szelenkov@nginx.com 95384Szelenkov@nginx.com /* Test valid UTF-8. */ 96384Szelenkov@nginx.com 97384Szelenkov@nginx.com for (u = 0; u < 0x110000; u++) { 98384Szelenkov@nginx.com 99384Szelenkov@nginx.com p = nxt_utf8_encode(utf8, u); 100384Szelenkov@nginx.com 101384Szelenkov@nginx.com if (p == NULL) { 102384Szelenkov@nginx.com nxt_log_alert(thr->log, "nxt_utf8_encode(%05uxD) failed", u); 103384Szelenkov@nginx.com return NXT_ERROR; 104384Szelenkov@nginx.com } 105384Szelenkov@nginx.com 106384Szelenkov@nginx.com pp = utf8; 107384Szelenkov@nginx.com 108384Szelenkov@nginx.com d = nxt_utf8_decode(&pp, p); 109384Szelenkov@nginx.com 110384Szelenkov@nginx.com if (u != d) { 111384Szelenkov@nginx.com nxt_log_alert(thr->log, "nxt_utf8_decode(%05uxD) failed: %05uxD", 112384Szelenkov@nginx.com u, d); 113384Szelenkov@nginx.com return NXT_ERROR; 114384Szelenkov@nginx.com } 115384Szelenkov@nginx.com } 116384Szelenkov@nginx.com 117384Szelenkov@nginx.com /* Test some invalid UTF-8. */ 118384Szelenkov@nginx.com 119384Szelenkov@nginx.com for (i = 0; i < sizeof(invalid); i += 5) { 120384Szelenkov@nginx.com 121384Szelenkov@nginx.com len = invalid[i]; 122384Szelenkov@nginx.com utf8[0] = invalid[i + 1]; 123384Szelenkov@nginx.com utf8[1] = invalid[i + 2]; 124384Szelenkov@nginx.com utf8[2] = invalid[i + 3]; 125384Szelenkov@nginx.com utf8[3] = invalid[i + 4]; 126384Szelenkov@nginx.com 127384Szelenkov@nginx.com pp = utf8; 128384Szelenkov@nginx.com 129384Szelenkov@nginx.com d = nxt_utf8_decode(&pp, utf8 + len); 130384Szelenkov@nginx.com 131611Svbart@nginx.com if (d != 0xFFFFFFFF) { 132384Szelenkov@nginx.com 133384Szelenkov@nginx.com u = 0; 134384Szelenkov@nginx.com for (i = 0; i < len; i++) { 135384Szelenkov@nginx.com u = (u << 8) + utf8[i]; 136384Szelenkov@nginx.com } 137384Szelenkov@nginx.com 138384Szelenkov@nginx.com nxt_log_alert(thr->log, 139384Szelenkov@nginx.com "nxt_utf8_decode(%05uxD, %uz) failed: %05uxD", 140384Szelenkov@nginx.com u, len, d); 141384Szelenkov@nginx.com return NXT_ERROR; 142384Szelenkov@nginx.com } 143384Szelenkov@nginx.com } 144384Szelenkov@nginx.com 145384Szelenkov@nginx.com /* Test all overlong UTF-8. */ 146384Szelenkov@nginx.com 147384Szelenkov@nginx.com for (i = NXT_UTF8_START_TEST; i < 256; i++) { 148384Szelenkov@nginx.com utf8[0] = i; 149384Szelenkov@nginx.com 150384Szelenkov@nginx.com if (nxt_utf8_overlong(thr, utf8, 1) != NXT_OK) { 151384Szelenkov@nginx.com return NXT_ERROR; 152384Szelenkov@nginx.com } 153384Szelenkov@nginx.com 154384Szelenkov@nginx.com for (k = 0; k < 256; k++) { 155384Szelenkov@nginx.com utf8[1] = k; 156384Szelenkov@nginx.com 157384Szelenkov@nginx.com if (nxt_utf8_overlong(thr, utf8, 2) != NXT_OK) { 158384Szelenkov@nginx.com return NXT_ERROR; 159384Szelenkov@nginx.com } 160384Szelenkov@nginx.com 161384Szelenkov@nginx.com for (l = 0; l < 256; l++) { 162384Szelenkov@nginx.com utf8[2] = l; 163384Szelenkov@nginx.com 164384Szelenkov@nginx.com if (nxt_utf8_overlong(thr, utf8, 3) != NXT_OK) { 165384Szelenkov@nginx.com return NXT_ERROR; 166384Szelenkov@nginx.com } 167384Szelenkov@nginx.com 168384Szelenkov@nginx.com for (m = 0; m < 256; m++) { 169384Szelenkov@nginx.com utf8[3] = m; 170384Szelenkov@nginx.com 171384Szelenkov@nginx.com if (nxt_utf8_overlong(thr, utf8, 4) != NXT_OK) { 172384Szelenkov@nginx.com return NXT_ERROR; 173384Szelenkov@nginx.com } 174384Szelenkov@nginx.com } 175384Szelenkov@nginx.com } 176384Szelenkov@nginx.com } 177384Szelenkov@nginx.com } 178384Szelenkov@nginx.com 179384Szelenkov@nginx.com n = nxt_utf8_casecmp((u_char *) "ABC АБВ ΑΒΓ", 180384Szelenkov@nginx.com (u_char *) "abc абв αβγ", 181384Szelenkov@nginx.com sizeof("ABC АБВ ΑΒΓ") - 1, 182384Szelenkov@nginx.com sizeof("abc абв αβγ") - 1); 183384Szelenkov@nginx.com 184384Szelenkov@nginx.com if (n != 0) { 185384Szelenkov@nginx.com nxt_log_alert(thr->log, "nxt_utf8_casecmp() failed"); 186384Szelenkov@nginx.com return NXT_ERROR; 187384Szelenkov@nginx.com } 188384Szelenkov@nginx.com 189384Szelenkov@nginx.com nxt_log_error(NXT_LOG_NOTICE, thr->log, "utf8 test passed"); 190384Szelenkov@nginx.com return NXT_OK; 191384Szelenkov@nginx.com } 192