utf-8字符窜对应的辅助函数

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
int
utf8_char_size(const unsigned char c)
{
    if(c < 0x80) return 1;
    if((c & 0xc0) == 0x80) return 0;
 
    int mask = 0x80;
    int num = 0;
    while(mask & c)
    {
        ++num;
        mask >>= 1;
    }
 
    return num;
}
 
int
utf8_len(const char *utf8)
{
    const unsigned char *input = (const unsigned char *)utf8;
    int count = 0;
    while(*input) {
        while((*input & 0xc0) == 0x80)
            ++input;
        count++;
        input++;
    };
 
    return count;
}
 
int
utf8_char_at(const char *utf8, int index)
{
    const unsigned char *input = (const unsigned char *)utf8;
    int count = 0;
    while(*input) {
        if(count == index) return input - (const unsigned char *)utf8;
 
        input += utf8_char_size(*input);
        count++;
    };
 
    return -1;
}

 

 

int utf8_char_size(const unsigned char c)  //取得utf-8字符的长度.

例子: char *str="中文";utf8_char_size(*str)=3;utf8_char_size(*++str)=0

int utf8_len(const char *utf8)  //取得字符窜的长度,类似wcslen(wchar_t)

例子:  utf8_len("hello你好,world") = 17

int utf8_char_at(const char *utf8, int index) // 取得utf-8字符窜真实地址偏移值

例子:utf8_char_at("hello你好,world", 7) = 11