1
2 /*
3 * Copyright (C) Igor Sysoev
4 * Copyright (C) NGINX, Inc.
5 */
6
7 #ifndef _NXT_UTF8_H_INCLUDED_
8 #define _NXT_UTF8_H_INCLUDED_
9
10
11 /*
12 * Since the maximum valid Unicode character is 0x0010FFFF, the maximum
13 * difference between Unicode characters is lesser 0x0010FFFF and
14 * 0x0EEE0EEE can be used as value to indicate UTF-8 encoding error.
15 */
16 #define NXT_UTF8_SORT_INVALID 0x0EEE0EEE
17
18
19 NXT_EXPORT u_char *nxt_utf8_encode(u_char *p, uint32_t u);
20 NXT_EXPORT uint32_t nxt_utf8_decode(const u_char **start, const u_char *end);
21 NXT_EXPORT uint32_t nxt_utf8_decode2(const u_char **start, const u_char *end);
22 NXT_EXPORT nxt_int_t nxt_utf8_casecmp(const u_char *start1,
23 const u_char *start2, size_t len1, size_t len2);
24 NXT_EXPORT uint32_t nxt_utf8_lowcase(const u_char **start, const u_char *end);
25 NXT_EXPORT ssize_t nxt_utf8_length(const u_char *p, size_t len);
26 NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len);
27
28
29 /* nxt_utf8_next() expects a valid UTF-8 string. */
30
31 nxt_inline const u_char *
nxt_utf8_next(const u_char * p,const u_char * end)32 nxt_utf8_next(const u_char *p, const u_char *end)
33 {
34 u_char c;
35
36 c = *p++;
37
38 if ((c & 0x80) != 0) {
39
40 do {
41 /*
42 * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx.
43 * The next UTF-8 bytes are 10xxxxxx.
44 */
45 c = *p;
46
47 if ((c & 0xC0) != 0x80) {
48 return p;
49 }
50
51 p++;
52
53 } while (p < end);
54 }
55
56 return p;
57 }
58
59
60 #endif /* _NXT_UTF8_H_INCLUDED_ */
61