zoukankan      html  css  js  c++  java
  • js解析emoji表情

    Emoji

    公司的产品之前只有网页端,并没有提供emoji表情,之后将某个模块整合到app中,里面有个评论功能,在手机端可以输入emoji,显示的时候是空白,说明数据库并没有存储成功,查阅资料后得知emoji是四个字节,而mysql5.5.3前的版本utf8编码最多只支持3个字节。

    js解析emoji

    先需要了解几个概念,js的编码方式、utf16、unicode

    1.JavaScript语言采用Unicode字符集,但是只支持一种编码方法ucs-2

    2.utf16编码

    utf16是ucs-2的超集

    3.Unicode只规定了每个字符的码点,到底用什么样的字节序表示这个码点,就涉及到编码方法

    由于JavaScript只能处理UCS-2编码,造成所有字符在这门语言中都是2个字节,如果是4个字节的字符,会当作两个双字节的字符处理。JavaScript的字符函数都受到这一点的影响,无法返回正确结果

    emoji表情是由utf16编码的,可能是2个字节,也可能是四个字节

    这里的解析我用的是twemoji库,原理是将utf16编码转为unicode的十六机制并以此十六进制作为emoji图片的命名

    这里的关键是如何将utf16转为unicode十六进制

    UTF-16的转码公式

    将unicode转为utf16,官方给了公式

    Unicode码点转成UTF-16的时候,首先区分这是基本平面字符(2字节),还是辅助平面字符(4字节)。如果是前者,直接将码点转为对应的十六进制形式,长度为两字节。

    如果是辅助平面字符,Unicode 3.0版给出了转码公式。

    H= Math.floor((c-0x10000)/0x400)+0xD800 //高位

    L = (c-0x10000)%0x400+0xDC00 //低位

    将utf16转为unicode则是知道H,L,求c,学过方程组的应该都会解答吧

    给出上述转化的函数

     1 /*unicode编码范围 2字节0x0000-0xffff
     2       四字节为0x010000-0x10ffff
     3       U+D800到U+DFFF 为空段
     4       由于JavaScript只能处理UCS-2编码,造成所有字符在这门语言中都是2个字节,如果是4个字节的字符,会当作两个双字节的字符处理
     5      */
     6   function toCodePoint(unicodeSurrogates, sep) {
     7     var
     8       r = [],
     9       c = 0,
    10       p = 0,
    11       i = 0;
    12     while (i < unicodeSurrogates.length) {
    13       c = unicodeSurrogates.charCodeAt(i++);//返回位置的字符的 Unicode 编码
    14       
    15       if (p) {
    16         r.push((0x10000 + ((p - 0xD800) << 10) + (c - 0xDC00)).toString(16)); //计算4字节的unicode
    17         p = 0;
    18       } else if (0xD800 <= c && c <= 0xDBFF) { 
    19         p = c; //如果unicode编码在oxD800-0xDBff之间,则需要与后一个字符放在一起
    20       } else {
    21         r.push(c.toString(16)); //如果是2字节,直接将码点转为对应的十六进制形式
    22       }
    23     }
    24     return r.join(sep || '-');
    25   }
    View Code

    emojipicker

    页面上选择emoji表情,插入input,发送给后端时需要转为utf16

    这里我用的库是jquery-emoji-picker,这里遇到一个问题,它的css中背景图片是datauri,我又需要兼容ie6,我需要将它的样式文件重写,并将图片保存起来。如果数量少,我会手动改下,结果一看,855个,果断写脚本

     1 <?php
     2 function formatData($str){
     3     $data=array();
     4     $reg='/^\.emoji-([^\{]+)\s+\{background-size:100% !important; background-image: url\(\'(.+)\'\);\}/'; //匹配样式
     5     preg_match($reg, $str, $matches);
     6     $data=array('filename'=>$matches[1].'.png','base64'=>substr($matches[2],22),'name'=>$matches[1]);
     7     return $data;
     8 }
     9 function basetopng($base64,$filename){
    10     $str=base64_decode($base64);
    11     file_put_contents('images/'.$filename, $str);
    12 }
    13 $css_file = 'emojipicker.css';
    14 $start = 0;        // 从第0行开始读取
    15 $num = 855;                // 读取855行
    16 $data = array();
    17 $str='';//生成css文件
    18 $spl_object = new SplFileObject($css_file, 'rb');
    19 $spl_object->seek($start);
    20 while ($num-- && !$spl_object->eof()) {
    21     $tmp = $spl_object->fgets();
    22     $tmpData=formatData($tmp);
    23     $filepath='../images/'.$tmpData['filename'];
    24     $str.=".emoji-{$tmpData['name']} { background-size:100% !important; background-image: url('/Public/plugin/emojipicker/images/{$tmpData['filename']}');}\n"; 
    25     $spl_object->next();
    26 }
    27 file_put_contents('emojipicker.ff.css', $str);
    28 
    29 ?>
    View Code

     点击icon获得emoji的name,将name转化为‘<微笑>’字样插入input,提交给后台的时候再将'<微笑>'字样转化为utf16(先转化为unicode,在转化为utf16)

    我又跑去微信界面盗了点资源过来,领导说做成微信类似就好了,原来的jquery.emojipicker.a.js中的数据结构是

    {
        "name": "sunny",
        "unicode": "2600",
        "shortcode": "sunny",
        "description": "BLACK SUN WITH RAYS",
        "category": "thing"
      }

    我需要给它加点东西,变成这样

    {
            "name": "sunny",
            "unicode": "2600",
            "shortcode": "sunny",
            "desc": "<太阳>",
            "title": "太阳",
            "category": "thing"
    }

    而微信的数据结构是这样 {"<太阳>" : "2600"},应该怎么对应,unicode与wx的value相等,以这个为基准

     1 var  a=[],//需要的微信表情unicode
     2                     wxemojis=window.gQQFaceMap,
     3                     b=[];//格式化数据,添加desc,title
     4                for(var i in wxemojis){
     5                     if(wxemojis[i].length>3){
     6                          if(a.indexOf(wxemojis[i]) < 0){
     7                               a[a.length]=wxemojis[i];
     8                               b[b.length]={
     9                                    'unicode':wxemojis[i],
    10                                    'desc':i,
    11                                    'title':i.replace(/(<|>)/g,'')
    12                               };
    13                          }
    14                     }
    15                }
    16 var myEmojis=[];//我需要的表情数组
    17 for(var i in emojis){
    18      var tmpIndex=a.indexOf(emojis[i].unicode.toLowerCase());
    19      if(tmpIndex > -1){
    20           myEmojis[myEmojis.length]={
    21                "name":emojis[i].name,
    22                "unicode":emojis[i].unicode,
    23                "shortcode":emojis[i].shortcode,
    24                "desc":b[tmpIndex].desc,
    25                "title":b[tmpIndex].title,
    26                "category":emojis[i].category
    27                          }
    28      }
    29 }
    30 console.log(JSON.stringify(myEmojis));   
    View Code

     生成了自己的表情数组。

    将中文字样转为utf16,传给后端

     1 function toUnicode(code) {
     2     var codes = code.split('-').map(function(value, index) {
     3       return parseInt(value, 16);
     4     });
     5     return String.fromCodePoint.apply(null, codes);
     6   }
     7 
     8   if (!String.fromCodePoint) {
     9     // ES6 Unicode Shims 0.1 , © 2012 Steven Levithan http://slevithan.com/ , MIT License
    10     String.fromCodePoint = function fromCodePoint () {
    11       var chars = [], point, offset, units, i;
    12       for (i = 0; i < arguments.length; ++i) {
    13         point = arguments[i];
    14         offset = point - 0x10000;
    15         units = point > 0xFFFF ? [0xD800 + (offset >> 10), 0xDC00 + (offset & 0x3FF)] : [point];
    16         chars.push(String.fromCharCode.apply(null, units));
    17       }
    18       return chars.join("");
    19     }
    20   }
    21   function htmlEncode(a) {
    22   return a && a.replace ? a.replace(/&/g, "&amp;").replace(/\"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/\'/g, "&#39;") : a
    23 }
    24   function afterEncodeEmoji(str){
    25     var faceMap={"<笑脸>":"1f604","<开心>":"1f60a","<大笑>":"1f603","<热情>":"263a","<眨眼>":"1f609","<色>":"1f60d","<接吻>":"1f618","<亲吻>":"1f61a","<脸红>":"1f633","<露齿笑>":"1f63c","<满意>":"1f60c","<戏弄>":"1f61c","<吐舌>":"1f445","<无语>":"1f612","<得意>":"1f60f","<汗>":"1f613","<失望>":"1f640","<低落>":"1f61e","<呸>":"1f616","<焦虑>":"1f625","<担心>":"1f630","<震惊>":"1f628","<悔恨>":"1f62b","<眼泪>":"1f622","<哭>":"1f62d","<破涕为笑>":"1f602","<晕>":"1f632","<恐惧>":"1f631","<心烦>":"1f620","<生气>":"1f63e","<睡觉>":"1f62a","<生病>":"1f637","<恶魔>":"1f47f","<外星人>":"1f47d","<心>":"2764","<心碎>":"1f494","<丘比特>":"1f498","<闪烁>":"2728","<星星>":"1f31f","<叹号>":"2755","<问号>":"2754","<睡着>":"1f4a4","<水滴>":"1f4a6","<音乐>":"1f3b5","<火>":"1f525","<便便>":"1f4a9","<强>":"1f44d","<弱>":"1f44e","<拳头>":"1f44a","<胜利>":"270c","<上>":"1f446","<下>":"1f447","<右>":"1f449","<左>":"1f448","<第一>":"261d","<强壮>":"1f4aa","<吻>":"1f48f","<热恋>":"1f491","<男孩>":"1f466","<女孩>":"1f467","<女士>":"1f469","<男士>":"1f468","<天使>":"1f47c","<骷髅>":"1f480","<红唇>":"1f48b","<太阳>":"2600","<下雨>":"2614","<多云>":"2601","<雪人>":"26c4","<月亮>":"1f319","<闪电>":"26a1","<海浪>":"1f30a","<猫>":"1f431","<小狗>":"1f429","<老鼠>":"1f42d","<仓鼠>":"1f439","<兔子>":"1f430","<狗>":"1f43a","<青蛙>":"1f438","<老虎>":"1f42f","<考拉>":"1f428","<熊>":"1f43b","<猪>":"1f437","<牛>":"1f42e","<野猪>":"1f417","<猴子>":"1f435","<马>":"1f434","<蛇>":"1f40d","<鸽子>":"1f426","<鸡>":"1f414","<企鹅>":"1f427","<毛虫>":"1f41b","<章鱼>":"1f419","<鱼>":"1f420","<鲸鱼>":"1f433","<海豚>":"1f42c","<玫瑰>":"1f339","<花>":"1f33a","<棕榈树>":"1f334","<仙人掌>":"1f335","<礼盒>":"1f49d","<南瓜灯>":"1f383","<鬼魂>":"1f47b","<圣诞老人>":"1f385","<圣诞树>":"1f384","<礼物>":"1f381","<铃>":"1f514","<庆祝>":"1f389","<气球>":"1f388","<CD>":"1f4bf","<相机>":"1f4f7","<录像机>":"1f3a5","<电脑>":"1f4bb","<电视>":"1f4fa","<电话>":"1f4de","<解锁>":"1f513","<锁>":"1f512","<钥匙>":"1f511","<成交>":"1f528","<灯泡>":"1f4a1","<邮箱>":"1f4eb","<浴缸>":"1f6c0","<钱>":"1f4b2","<药丸>":"1f48a","<橄榄球>":"1f3c8","<篮球>":"1f3c0","<足球>":"26bd","<棒球>":"26be","<高尔夫>":"26f3","<奖杯>":"1f3c6","<入侵者>":"1f47e","<唱歌>":"1f3a4","<吉他>":"1f3b8","<比基尼>":"1f459","<皇冠>":"1f451","<雨伞>":"1f302","<手提包>":"1f45c","<口红>":"1f484","<戒指>":"1f48d","<钻石>":"1f48e","<咖啡>":"2615","<啤酒>":"1f37a","<干杯>":"1f37b","<鸡尾酒>":"1f377","<汉堡>":"1f354","<薯条>":"1f35f","<意面>":"1f35d","<寿司>":"1f363","<面条>":"1f35c","<煎蛋>":"1f373","<冰激凌>":"1f366","<蛋糕>":"1f382","<苹果>":"1f34f","<飞机>":"2708","<火箭>":"1f680","<自行车>":"1f6b2","<高铁>":"1f684","<警告>":"26a0","<旗>":"1f3c1","<男人>":"1f6b9","<女人>":"1f6ba","<O>":"2b55","<X>":"274e","<商标>":"2122"};
    26     var unicodeStr=str.replace(/<.*?>/g,function(a){
    27       if(faceMap[a]){
    28         return toUnicode(faceMap[a]);
    29       }else{
    30         return a;
    31       }
    32       //  return a?toUnicode(faceMap[a]):'';
    33     });
    34     return htmlEncode(unicodeStr);
    35   }

     已经基本完成我需要的功能了(兼容至ie6+)。

    结语

    献上本人拙劣的demo(css不行)

    [emoji picker demo](https://ceau.github.io/emojipicker/demo.html)

  • 相关阅读:
    JSTL笔记(胖先生版)
    EL表达式(胖先生版)
    包装类-Character
    String定义与方法
    冒泡排序(大熊版)
    tomcat Manger App
    第一天
    剑指offer:面试题5、从尾到头打印链表
    剑指offer:面试题4、替换空格
    剑指offer:面试题3、二维数组中的查找
  • 原文地址:https://www.cnblogs.com/mingao/p/5000114.html
Copyright © 2011-2022 走看看