嵌入式utf-8转码gb2312的c语言实现,附源码

部分参考:
www.360doc.com/content/12/0926/12/1072296_238242301.shtml
https://blog.csdn.net/wyingquan/article/details/3882432

涉及到的文件unicode_to_gb2312_table.bin,请前往我的csdn资源下载中寻找

源码如下:(使用方法直接拷贝进去一个.c文件,用vc编译一下就ok了,不过需要在本地文件夹放unicode_to_gb2312_table.bin

// utf8_to_gb2312_in_c.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include "malloc.h"
#include "string.h"

typedef unsigned char u8;
typedef unsigned short u16;

typedef struct unicode_gb
{
	unsigned short unicode;
	unsigned short gb;
} UNICODE_GB;


#define utf8_malloc malloc
#define utf8_free   free

#define UTF8_DEBUG 1

#if defined(UTF8_DEBUG)&&UTF8_DEBUG
#define APP_PRINT printf
#else
#define APP_PRINT(fmt, ...)
#endif


//全局码表handle,需初始化
UNICODE_GB *code_table=NULL;
int code_table_size_in_item = 0;


//获取utf8转unicode的字节个数
int GetUtf8ByteNumForWord(u8 firstCh)
{
	u8 temp = 0x80;
	int num = 0;

	while (temp & firstCh)
	{
		num++;
		temp = (temp >> 1);
	}
	APP_PRINT("\r\nthe num is: %d", num);
	return num;
}

//搜索unicode对应的gb2312码
//参数: unicodeKey- unicode值
//		code_table- unicode转gb2312码表
//		CODE_TABLE_SIZE- 码表大小,元素结构体个数
//返回值:	0- 未找到unicode对应的gb2312值
//			非0- 找到的gb2312值
u16 SearchCodeTable(u16 unicodeKey, UNICODE_GB *code_table, int CODE_TABLE_SIZE)
{
	int first = 0;
	int end = CODE_TABLE_SIZE - 1;
	int mid = 0;

	if (!code_table)		return 0;
	if (!CODE_TABLE_SIZE)	return 0;

	while (first <= end)
	{
		mid = (first + end) / 2;
		if (code_table[mid].unicode == unicodeKey)
		{
			return code_table[mid].gb;
		}
		else if (code_table[mid].unicode > unicodeKey)
		{
			end = mid - 1;
		}
		else
		{
			first = mid + 1;
		}
	}
	return 0;
}


//utf8字符串转gb2312字符串
//参数: utf8- utf8字符串
//		len- utf8字符串长度,按字节
//		temp- 转化后的gb2312值
//返回值: 0- success, others-fail
int Utf8ToGb2312(const char* utf8, int len, char *temp)
{
	APP_PRINT("\r\nutf8->unicode: \n");
	APP_PRINT("utf8: [");
	for (int k = 0; k < len; k++)
	{
		APP_PRINT("0x%02x ", utf8[k]);
	}
	APP_PRINT("]\n");

	int byteCount = 0;
	int i = 0;
	int j = 0;

	u16 unicodeKey = 0;
	u16 gbKey = 0;


	//循环解析
	while (i < len)
	{
		switch (GetUtf8ByteNumForWord((u8)utf8[i]))
		{
		case 0:
			temp[j] = utf8[i];
			byteCount = 1;
			break;

		case 2:
			temp[j] = utf8[i];
			temp[j + 1] = utf8[i + 1];
			byteCount = 2;
			break;

		case 3:
			//这里就开始进行UTF8->Unicode
			temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
			temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);

			//取得Unicode的值
			memcpy(&unicodeKey, (temp + j), 2);
			APP_PRINT("\r\nunicode key is: 0x%04X\n", unicodeKey);

			//根据这个值查表取得对应的GB2312的值
			gbKey = SearchCodeTable(unicodeKey, code_table, code_table_size_in_item);
			APP_PRINT("\r\ngb2312 key is: 0x%04X\n", gbKey);

			if (gbKey != 0)
			{
				//here change the byte
				//不为0表示搜索到,将高低两个字节调换调成我要的形式
				gbKey = (gbKey >> 8) | (gbKey << 8);
				APP_PRINT("\r\nafter changing, gb2312 key is: 0x%04X\n", gbKey);
				memcpy((temp + j), &gbKey, 2);
			}
			byteCount = 3;
			break;

		case 4:
			byteCount = 4;
			break;
		case 5:
			byteCount = 5;
			break;
		case 6:
			byteCount = 6;
			break;

		default:
			APP_PRINT("\r\nthe len is more than 6, error\n");
			//break;
			return -1;
		}
		i += byteCount;
		if (byteCount == 1)
		{
			j++;
		}
		else
		{
			j += 2;
		}

	}
	APP_PRINT("\r\ngb2312: [");
	for (int k = 0; k < j; k++)
	{
		APP_PRINT("0x%02x ", temp[k]);
	}
	APP_PRINT("]\n");

	return 0;
}

//初始化utf8转gb2312转换环境,主要是初始化码表handle和码表大小
//参数:无
//返回值:0- 成功, 其他值- 失败

static FILE *fp=NULL;
#define TABLE_FILE "./unicode_to_gb2312_table.bin"

int Utf8ToGb2312_init(void)
{
	long file_size_in_byte;
	int ret;
	int len;

	ret = 0;

	fopen_s(&fp, TABLE_FILE, "rb+");
	if (!fp)
	{
		APP_PRINT("\r\nUtf8ToGb2312_init open file fail");
		return -1;
	}

	fseek(fp, 0, SEEK_END);
	file_size_in_byte=ftell(fp);
	rewind(fp);

	if (!code_table)
	{
		code_table = (UNICODE_GB*)utf8_malloc(file_size_in_byte);
		code_table_size_in_item = file_size_in_byte / sizeof(UNICODE_GB);

		APP_PRINT("\r\nopen file ok, size_in_byte=%d, size_in_item=%d", file_size_in_byte, code_table_size_in_item);

		len = fread(code_table, sizeof(code_table[0]), code_table_size_in_item, fp);
		if (len != code_table_size_in_item)
		{
			APP_PRINT("\r\nfile read error, len ret=%d", len);
			ret = -3;
		}
	}
	else
	{
		APP_PRINT("\r\ncode table handle is exists error");
		ret = -2;
	}

	fclose(fp);

	return ret;
}

//反初始化utf8转gb2312转换环境
//参数: 无
//返回值: 0- 成功, 其他值-失败
int Utf8ToGb2312_deinit(void)
{
	utf8_free(code_table);
	code_table = NULL;
	code_table_size_in_item = 0;

	return 0;
}


int _tmain(int argc, _TCHAR* argv[])
{

	//char temp[100];
	char utf8[100] = {0xe4, 0xbd, 0xa0};
	char gb2312[100];
	int ret;

	Utf8ToGb2312_init();

	//memset(temp, 0, sizeof(temp));
	memset(gb2312, 0, sizeof(gb2312));
	ret=Utf8ToGb2312(utf8, strlen(utf8), gb2312);
	if (!ret)
	{
		printf("\r\nutf8 to gb2312 ok\r\n");
		printf((char*)gb2312);
	}
	else
	{
		printf("\r\nutf8 to gb2312 fail\r\n");
	}
	Utf8ToGb2312_deinit();

	getchar();

	return 0;
}


猜你喜欢

转载自blog.csdn.net/oushaojun2/article/details/79793124