xref: /unit/src/nxt_utf8.h (revision 611:323e11065f83)
1 
2 /*
3  * Copyright (C) Igor Sysoev
4  * Copyright (C) NGINX, Inc.
5  */
6 
7 #ifndef _NXT_UTF8_H_INCLUDED_
8 #define _NXT_UTF8_H_INCLUDED_
9 
10 
11 /*
12  * Since the maximum valid Unicode character is 0x0010FFFF, the maximum
13  * difference between Unicode characters is lesser 0x0010FFFF and
14  * 0x0EEE0EEE can be used as value to indicate UTF-8 encoding error.
15  */
16 #define NXT_UTF8_SORT_INVALID  0x0EEE0EEE
17 
18 
19 NXT_EXPORT u_char *nxt_utf8_encode(u_char *p, uint32_t u);
20 NXT_EXPORT uint32_t nxt_utf8_decode(const u_char **start, const u_char *end);
21 NXT_EXPORT uint32_t nxt_utf8_decode2(const u_char **start, const u_char *end);
22 NXT_EXPORT nxt_int_t nxt_utf8_casecmp(const u_char *start1,
23     const u_char *start2, size_t len1, size_t len2);
24 NXT_EXPORT uint32_t nxt_utf8_lowcase(const u_char **start, const u_char *end);
25 NXT_EXPORT ssize_t nxt_utf8_length(const u_char *p, size_t len);
26 NXT_EXPORT nxt_bool_t nxt_utf8_is_valid(const u_char *p, size_t len);
27 
28 
29 /* nxt_utf8_next() expects a valid UTF-8 string. */
30 
31 nxt_inline const u_char *
nxt_utf8_next(const u_char * p,const u_char * end)32 nxt_utf8_next(const u_char *p, const u_char *end)
33 {
34     u_char  c;
35 
36     c = *p++;
37 
38     if ((c & 0x80) != 0) {
39 
40         do {
41             /*
42              * The first UTF-8 byte is either 0xxxxxxx or 11xxxxxx.
43              * The next UTF-8 bytes are 10xxxxxx.
44              */
45             c = *p;
46 
47             if ((c & 0xC0) != 0x80) {
48                 return p;
49             }
50 
51             p++;
52 
53         } while (p < end);
54     }
55 
56     return p;
57 }
58 
59 
60 #endif /* _NXT_UTF8_H_INCLUDED_ */
61