1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | int utf8_char_size( const unsigned char c) { if (c < 0x80) return 1; if ((c & 0xc0) == 0x80) return 0; int mask = 0x80; int num = 0; while (mask & c) { ++num; mask >>= 1; } return num; } int utf8_len( const char *utf8) { const unsigned char *input = ( const unsigned char *)utf8; int count = 0; while (*input) { while ((*input & 0xc0) == 0x80) ++input; count++; input++; }; return count; } int utf8_char_at( const char *utf8, int index) { const unsigned char *input = ( const unsigned char *)utf8; int count = 0; while (*input) { if (count == index) return input - ( const unsigned char *)utf8; input += utf8_char_size(*input); count++; }; return -1; } |
int utf8_char_size(const unsigned char c) //取得utf-8字符的长度.
例子: char *str="中文";utf8_char_size(*str)=3;utf8_char_size(*++str)=0
int utf8_len(const char *utf8) //取得字符窜的长度,类似wcslen(wchar_t)
例子: utf8_len("hello你好,world") = 17
int utf8_char_at(const char *utf8, int index) // 取得utf-8字符窜真实地址偏移值
例子:utf8_char_at("hello你好,world", 7) = 11