Protobuf utf-8 encoding and decoding

original

C++ must first convert strings to utf8 for serialization. The conversion is as follows:

#include <iostream>
#include <codecvt>

using namespace std;
#pragma comment(lib, "libprotobuf.lib")

const std::string ws2s(const wstring& ws)
{
	locale old_loc = locale::global(locale(""));
	const wchar_t* src_wstr = ws.c_str();
	size_t buffer_size = ws.size() * 4 + 1;
	char* dst_str = new char[buffer_size];
	memset(dst_str, 0, buffer_size);
	size_t i = 0;
	wcstombs_s(&i, dst_str, buffer_size, src_wstr, buffer_size);
	string result = dst_str;
	delete[]dst_str;
	locale::global(old_loc);
	return result;
}

const std::wstring s2ws(const string& s)
{
	locale old_loc = locale::global(locale(""));
	const char* src_str = s.c_str();
	const size_t buffer_size = s.size() + 1;
	wchar_t* dst_wstr = new wchar_t[buffer_size];
	wmemset(dst_wstr, 0, buffer_size);
	size_t i = 0;
	mbstowcs_s(&i, dst_wstr, buffer_size, src_str, buffer_size);
	wstring result = dst_wstr;
	delete[]dst_wstr;
	locale::global(old_loc);
	return result;
}

const std::string ws2utf8(const wstring& src)
{
	wstring_convert<codecvt_utf8<wchar_t>> conv;
	return conv.to_bytes(src);
}

const wstring utf8_2_ws(const string& src)
{
	wstring_convert<codecvt_utf8<wchar_t> > conv;
	return conv.from_bytes(src);
}

//////////////使用方法是先将字符串转成UTF-8
int main()
{
	TGS::MyInFo info;
	string name = "姓名:TGS";
	info.set_mingzi(ws2utf8(s2ws(name)));
	info.set_nianling(24);
	string dh = "123456789";
	info.set_dianhua(ws2utf8(s2ws(dh)));
	char a[400];
	memset(a, 0, sizeof(a));
	info.SerializeToArray(&a, sizeof(a));
	cout << a << endl;

	TGS::MyInFo tt;
	tt.ParseFromArray(a, sizeof(a));
	cout << ws2s(utf8_2_ws(tt.mingzi())) << " " << tt.nianling() << " " << ws2s(utf8_2_ws(tt.dianhua())) << endl;
	system("pause");
	return 0;
}

Python encoding convert

import chardet
import codecs

input_file = './some.h'
output_file = './some.h.new'

with open(input_file, 'rb') as f:
    data = f.read()
    encoding_type = chardet.detect(data)
    print(encoding_type["encoding"])

with codecs.open(filename=input_file, mode='r', encoding='gb2312') as fi:
    data = fi.read()
    with open(output_file, mode='w', encoding='utf-8') as fo:
        fo.write(data)
        fo.close()