utf-8字符窜对应的辅助函数

 

int
utf8_char_size(const unsigned char c)
{
    if(c < 0x80) return 1;
    if((c & 0xc0) == 0x80) return 0;

    int mask = 0x80;
    int num = 0;
    while(mask & c)
    {
        ++num;
        mask >>= 1;
    }

    return num;
}

int
utf8_len(const char *utf8)
{
    const unsigned char *input = (const unsigned char *)utf8;
    int count = 0;
    while(*input) {
        while((*input & 0xc0) == 0x80)
            ++input;
        count++;
        input++;
    };

    return count;
}

int
utf8_char_at(const char *utf8, int index)
{
    const unsigned char *input = (const unsigned char *)utf8;
    int count = 0;
    while(*input) {
        if(count == index) return input - (const unsigned char *)utf8;

        input += utf8_char_size(*input);
        count++;
    };

    return -1;
}
 

 

 

int utf8_char_size(const unsigned char c)  //取得utf-8字符的长度.

例子: char *str="中文";utf8_char_size(*str)=3;utf8_char_size(*++str)=0

int utf8_len(const char *utf8)  //取得字符窜的长度,类似wcslen(wchar_t)

例子:  utf8_len("hello你好,world") = 17

int utf8_char_at(const char *utf8, int index) // 取得utf-8字符窜真实地址偏移值

例子:utf8_char_at("hello你好,world", 7) = 11