部分参考:
www.360doc.com/content/12/0926/12/1072296_238242301.shtml
https://blog.csdn.net/wyingquan/article/details/3882432
涉及到的文件unicode_to_gb2312_table.bin,请前往我的csdn资源下载中寻找
源码如下:(使用方法直接拷贝进去一个.c文件,用vc编译一下就ok了,不过需要在本地文件夹放unicode_to_gb2312_table.bin)
// utf8_to_gb2312_in_c.cpp : 定义控制台应用程序的入口点。 // #include "stdafx.h" #include "malloc.h" #include "string.h" typedef unsigned char u8; typedef unsigned short u16; typedef struct unicode_gb { unsigned short unicode; unsigned short gb; } UNICODE_GB; #define utf8_malloc malloc #define utf8_free free #define UTF8_DEBUG 1 #if defined(UTF8_DEBUG)&&UTF8_DEBUG #define APP_PRINT printf #else #define APP_PRINT(fmt, ...) #endif //全局码表handle,需初始化 UNICODE_GB *code_table=NULL; int code_table_size_in_item = 0; //获取utf8转unicode的字节个数 int GetUtf8ByteNumForWord(u8 firstCh) { u8 temp = 0x80; int num = 0; while (temp & firstCh) { num++; temp = (temp >> 1); } APP_PRINT("\r\nthe num is: %d", num); return num; } //搜索unicode对应的gb2312码 //参数: unicodeKey- unicode值 // code_table- unicode转gb2312码表 // CODE_TABLE_SIZE- 码表大小,元素结构体个数 //返回值: 0- 未找到unicode对应的gb2312值 // 非0- 找到的gb2312值 u16 SearchCodeTable(u16 unicodeKey, UNICODE_GB *code_table, int CODE_TABLE_SIZE) { int first = 0; int end = CODE_TABLE_SIZE - 1; int mid = 0; if (!code_table) return 0; if (!CODE_TABLE_SIZE) return 0; while (first <= end) { mid = (first + end) / 2; if (code_table[mid].unicode == unicodeKey) { return code_table[mid].gb; } else if (code_table[mid].unicode > unicodeKey) { end = mid - 1; } else { first = mid + 1; } } return 0; } //utf8字符串转gb2312字符串 //参数: utf8- utf8字符串 // len- utf8字符串长度,按字节 // temp- 转化后的gb2312值 //返回值: 0- success, others-fail int Utf8ToGb2312(const char* utf8, int len, char *temp) { APP_PRINT("\r\nutf8->unicode: \n"); APP_PRINT("utf8: ["); for (int k = 0; k < len; k++) { APP_PRINT("0x%02x ", utf8[k]); } APP_PRINT("]\n"); int byteCount = 0; int i = 0; int j = 0; u16 unicodeKey = 0; u16 gbKey = 0; //循环解析 while (i < len) { switch (GetUtf8ByteNumForWord((u8)utf8[i])) { case 0: temp[j] = utf8[i]; byteCount = 1; break; case 2: temp[j] = utf8[i]; temp[j + 1] = utf8[i + 1]; byteCount = 2; break; case 3: //这里就开始进行UTF8->Unicode temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F); temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F); //取得Unicode的值 memcpy(&unicodeKey, (temp + j), 2); APP_PRINT("\r\nunicode key is: 0x%04X\n", unicodeKey); //根据这个值查表取得对应的GB2312的值 gbKey = SearchCodeTable(unicodeKey, code_table, code_table_size_in_item); APP_PRINT("\r\ngb2312 key is: 0x%04X\n", gbKey); if (gbKey != 0) { //here change the byte //不为0表示搜索到,将高低两个字节调换调成我要的形式 gbKey = (gbKey >> 8) | (gbKey << 8); APP_PRINT("\r\nafter changing, gb2312 key is: 0x%04X\n", gbKey); memcpy((temp + j), &gbKey, 2); } byteCount = 3; break; case 4: byteCount = 4; break; case 5: byteCount = 5; break; case 6: byteCount = 6; break; default: APP_PRINT("\r\nthe len is more than 6, error\n"); //break; return -1; } i += byteCount; if (byteCount == 1) { j++; } else { j += 2; } } APP_PRINT("\r\ngb2312: ["); for (int k = 0; k < j; k++) { APP_PRINT("0x%02x ", temp[k]); } APP_PRINT("]\n"); return 0; } //初始化utf8转gb2312转换环境,主要是初始化码表handle和码表大小 //参数:无 //返回值:0- 成功, 其他值- 失败 static FILE *fp=NULL; #define TABLE_FILE "./unicode_to_gb2312_table.bin" int Utf8ToGb2312_init(void) { long file_size_in_byte; int ret; int len; ret = 0; fopen_s(&fp, TABLE_FILE, "rb+"); if (!fp) { APP_PRINT("\r\nUtf8ToGb2312_init open file fail"); return -1; } fseek(fp, 0, SEEK_END); file_size_in_byte=ftell(fp); rewind(fp); if (!code_table) { code_table = (UNICODE_GB*)utf8_malloc(file_size_in_byte); code_table_size_in_item = file_size_in_byte / sizeof(UNICODE_GB); APP_PRINT("\r\nopen file ok, size_in_byte=%d, size_in_item=%d", file_size_in_byte, code_table_size_in_item); len = fread(code_table, sizeof(code_table[0]), code_table_size_in_item, fp); if (len != code_table_size_in_item) { APP_PRINT("\r\nfile read error, len ret=%d", len); ret = -3; } } else { APP_PRINT("\r\ncode table handle is exists error"); ret = -2; } fclose(fp); return ret; } //反初始化utf8转gb2312转换环境 //参数: 无 //返回值: 0- 成功, 其他值-失败 int Utf8ToGb2312_deinit(void) { utf8_free(code_table); code_table = NULL; code_table_size_in_item = 0; return 0; } int _tmain(int argc, _TCHAR* argv[]) { //char temp[100]; char utf8[100] = {0xe4, 0xbd, 0xa0}; char gb2312[100]; int ret; Utf8ToGb2312_init(); //memset(temp, 0, sizeof(temp)); memset(gb2312, 0, sizeof(gb2312)); ret=Utf8ToGb2312(utf8, strlen(utf8), gb2312); if (!ret) { printf("\r\nutf8 to gb2312 ok\r\n"); printf((char*)gb2312); } else { printf("\r\nutf8 to gb2312 fail\r\n"); } Utf8ToGb2312_deinit(); getchar(); return 0; }