int
utf8_char_size(const unsigned char c)
{
if(c < 0x80) return 1;
if((c & 0xc0) == 0x80) return 0;
int mask = 0x80;
int num = 0;
while(mask & c)
{
++num;
mask >>= 1;
}
return num;
}
int
utf8_len(const char *utf8)
{
const unsigned char *input = (const unsigned char *)utf8;
int count = 0;
while(*input) {
while((*input & 0xc0) == 0x80)
++input;
count++;
input++;
};
return count;
}
int
utf8_char_at(const char *utf8, int index)
{
const unsigned char *input = (const unsigned char *)utf8;
int count = 0;
while(*input) {
if(count == index) return input - (const unsigned char *)utf8;
input += utf8_char_size(*input);
count++;
};
return -1;
}
int utf8_char_size(const unsigned char c) //取得utf-8字符的长度.
例子: char *str="中文";utf8_char_size(*str)=3;utf8_char_size(*++str)=0
int utf8_len(const char *utf8) //取得字符窜的长度,类似wcslen(wchar_t)
例子: utf8_len("hello你好,world") = 17
int utf8_char_at(const char *utf8, int index) // 取得utf-8字符窜真实地址偏移值
例子:utf8_char_at("hello你好,world", 7) = 11