分享web开发知识

注册/登录|最近发布|今日推荐

主页 IT知识网页技术软件开发前端开发代码编程运营维护技术分享教程案例
当前位置:首页 > 前端开发

js实现unicode码字符串与utf8字节数据互转

发布时间:2023-09-06 02:31责任编辑:彭小芳关键词:js

js的string变量存储字符串使用的是unicode编码,要保存时必须选择其他编码后进行传输,比如转成utf-8,utf-32等。存储到数据库中为utf-8编码,读取出来如何转换成正确的字符串就成了问题。现在给出解决方案,可以正确支持中文、emoji表情、英文混合的字符串编码互转。

/** * Created by hdwang on 2019/1/28. */var convertUtf8 = (function() { ???/** ????* unicode string to utf-8 ????* @param text 字符串 ????* @returns {*} utf-8编码 ????*/ ???function toBytes(text) { ???????var result = [], i = 0; ???????text = encodeURI(text); ???????while (i < text.length) { ???????????var c = text.charCodeAt(i++); ???????????// if it is a % sign, encode the following 2 bytes as a hex value ???????????if (c === 37) { ???????????????result.push(parseInt(text.substr(i, 2), 16)) ???????????????i += 2; ???????????????// otherwise, just the actual byte ???????????} else { ???????????????result.push(c) ???????????} ???????} ???????return coerceArray(result); ???} ???/** ????* utf8 byte to unicode string ????* @param utf8Bytes ????* @returns {string} ????*/ ???function utf8ByteToUnicodeStr(utf8Bytes){ ???????var unicodeStr =""; ???????for (var pos = 0; pos < utf8Bytes.length;){ ???????????var flag= utf8Bytes[pos]; ???????????var unicode = 0 ; ???????????if ((flag >>>7) === 0 ) { ???????????????unicodeStr+= String.fromCharCode(utf8Bytes[pos]); ???????????????pos += 1; ???????????} else if ((flag &0xFC) === 0xFC ){ ???????????????unicode = (utf8Bytes[pos] & 0x3) << 30; ???????????????unicode |= (utf8Bytes[pos+1] & 0x3F) << 24; ???????????????unicode |= (utf8Bytes[pos+2] & 0x3F) << 18; ???????????????unicode |= (utf8Bytes[pos+3] & 0x3F) << 12; ???????????????unicode |= (utf8Bytes[pos+4] & 0x3F) << 6; ???????????????unicode |= (utf8Bytes[pos+5] & 0x3F); ???????????????unicodeStr+= String.fromCodePoint(unicode) ; ???????????????pos += 6; ???????????}else if ((flag &0xF8) === 0xF8 ){ ???????????????unicode = (utf8Bytes[pos] & 0x7) << 24; ???????????????unicode |= (utf8Bytes[pos+1] & 0x3F) << 18; ???????????????unicode |= (utf8Bytes[pos+2] & 0x3F) << 12; ???????????????unicode |= (utf8Bytes[pos+3] & 0x3F) << 6; ???????????????unicode |= (utf8Bytes[pos+4] & 0x3F); ???????????????unicodeStr+= String.fromCodePoint(unicode) ; ???????????????pos += 5; ???????????} else if ((flag &0xF0) === 0xF0 ){ ???????????????unicode = (utf8Bytes[pos] & 0xF) << 18; ???????????????unicode |= (utf8Bytes[pos+1] & 0x3F) << 12; ???????????????unicode |= (utf8Bytes[pos+2] & 0x3F) << 6; ???????????????unicode |= (utf8Bytes[pos+3] & 0x3F); ???????????????unicodeStr+= String.fromCodePoint(unicode) ; ???????????????pos += 4; ???????????} else if ((flag &0xE0) === 0xE0 ){ ???????????????unicode = (utf8Bytes[pos] & 0x1F) << 12;; ???????????????unicode |= (utf8Bytes[pos+1] & 0x3F) << 6; ???????????????unicode |= (utf8Bytes[pos+2] & 0x3F); ???????????????unicodeStr+= String.fromCharCode(unicode) ; ???????????????pos += 3; ???????????} else if ((flag &0xC0) === 0xC0 ){ //110 ???????????????unicode = (utf8Bytes[pos] & 0x3F) << 6; ???????????????unicode |= (utf8Bytes[pos+1] & 0x3F); ???????????????unicodeStr+= String.fromCharCode(unicode) ; ???????????????pos += 2; ???????????} else{ ???????????????unicodeStr+= String.fromCharCode(utf8Bytes[pos]); ???????????????pos += 1; ???????????} ???????} ???????return unicodeStr; ???} ???function checkInt(value) { ???????return (parseInt(value) === value); ???} ???function checkInts(arrayish) { ???????if (!checkInt(arrayish.length)) { return false; } ???????for (var i = 0; i < arrayish.length; i++) { ???????????if (!checkInt(arrayish[i]) || arrayish[i] < 0 || arrayish[i] > 255) { ???????????????return false; ???????????} ???????} ???????return true; ???} ???function coerceArray(arg, copy) { ???????// ArrayBuffer view ???????if (arg.buffer && arg.name === ‘Uint8Array‘) { ???????????if (copy) { ???????????????if (arg.slice) { ???????????????????arg = arg.slice(); ???????????????} else { ???????????????????arg = Array.prototype.slice.call(arg); ???????????????} ???????????} ???????????return arg; ???????} ???????// It‘s an array; check it is a valid representation of a byte ???????if (Array.isArray(arg)) { ???????????if (!checkInts(arg)) { ???????????????throw new Error(‘Array contains invalid value: ‘ + arg); ???????????} ???????????return new Uint8Array(arg); ???????} ???????// Something else, but behaves like an array (maybe a Buffer? Arguments?) ???????if (checkInt(arg.length) && checkInts(arg)) { ???????????return new Uint8Array(arg); ???????} ???????throw new Error(‘unsupported array-like object‘); ???} ???return { ???????toBytes: toBytes, ???????fromBytes: utf8ByteToUnicodeStr ???}})()

参考链接:

https://www.oschina.net/question/1046342_2199669

js实现unicode码字符串与utf8字节数据互转

原文地址:https://www.cnblogs.com/hdwang/p/10331344.html

知识推荐

我的编程学习网——分享web前端后端开发技术知识。 垃圾信息处理邮箱 tousu563@163.com 网站地图
icp备案号 闽ICP备2023006418号-8 不良信息举报平台 互联网安全管理备案 Copyright 2023 www.wodecom.cn All Rights Reserved