zoukankan      html  css  js  c++  java
  • protobuf 无proto 解码 decode 语言 java python

    package com.example.demo.services;
    
    import com.google.common.base.Charsets;
    import com.google.protobuf.ByteString;
    import com.google.protobuf.CodedInputStream;
    import com.google.protobuf.InvalidProtocolBufferException;
    import com.google.protobuf.WireFormat;
    
    import java.io.ByteArrayOutputStream;
    import java.io.FileInputStream;
    import java.io.IOException;
    
    public class DouyinRead {
    
        public static interface Listener{
            default void onComing(String nickname, String douyinNumber, String liveId){}
            default void onSpeaking(String nickname, String douyinNumber){}
        }
        private static int i = 0;
        public static void main(String[] args) throws IOException {
            FileInputStream fis = new FileInputStream("F:\\dy抓包\\74_.txt");
            byte[] buf = new byte[2048];
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            int len = 0;
            while((len = fis.read(buf)) > 0){
                baos.write(buf, 0, len);
            }
            String s = decodeProto(baos.toByteArray(), false, new String[]{"", ""}, new Listener() {
                @Override
                public void onComing(String nickname, String douyinNumber, String liveId) {
                    System.out.println(nickname + "." + douyinNumber + "." + liveId + "来了");
                    //Listener.super.onComing(nickname, douyinNumber, liveId);
                }
            });
            System.out.println(s);
        }
    
        //main
        public static String decodeProto(byte[] data, boolean singleLine, String[] messageType, Listener listener) throws IOException {
            return decodeProto(ByteString.copyFrom(data), 0, singleLine, messageType, listener);
        }
    
        public static String decodeProto(ByteString data, int depth, boolean singleLine, String[] messageType, Listener listener) throws IOException {
            final CodedInputStream input = CodedInputStream.newInstance(data.asReadOnlyByteBuffer());
            return decodeProtoInput(input, depth, singleLine, messageType, listener);
        }
    
        /**
         * zyl's自定义流程
         *  @param number  数量
         * @param depth   深度
         * @param str     str
         * @param msgType msg类型
         * @param listener
         */
        private static void customProcess(int number, int depth, String str, String[] msgType, Listener listener){
            if(number == 2 && depth == 1){
                System.out.println(str);
            }
            if(number == 12 && depth == 3){
                System.out.println(str);
            }
            //System.out.println(number+":" + depth + ":" + str);
            if(number == 1 && depth == 1){
                msgType[0] = str + i++; //WebcastChatMessage
                //   System.out.println("msgType: " + str);
            }
    
            /*if(number == 1 && depth == 2){
                msgType[1] = str;
            }
    */
    
            if(msgType[0].startsWith("WebcastChatMessage")) {
                //System.out.println("msgType:" + msgType[0] + ",number:" + number + ",depth:" + depth + ",str:" + str);
                //昵称
                if(number == 3 && depth == 3){
                    System.out.print(str);
                }
                //账号
                if(number == 38 && depth == 3){
                    System.out.print("("+str+")");
                }
                //弹幕
                if(number==3 && depth == 2){
                    System.out.println(":" + str);
                }
            }
    
            //进入房间消息
            if(msgType[0].startsWith("WebcastMemberMessage")){
                if(number == 3 && depth == 3){
                    msgType[1] = str;
                }
    
                if(number == 1 && depth == 4 && str.equals("live_room_enter_toast")){
                    msgType[0] = "live_room_enter_toast";
                }
            }
            if(msgType[0].equals("live_room_enter_toast")){
                //昵称
                if(number == 3 && depth == 7){
                    //System.out.print(str);
                }
                //账号
                if(number == 38 && depth == 7){
                    listener.onComing("", str, msgType[1]);
                    //System.out.print("(" + str + ")");
                }
                if(number == 68 && depth == 7){
                    //System.out.println("来了");
                    msgType[0] = "none";
                }
            }
    
        }
    
        private static String decodeProtoInput(CodedInputStream input, int depth, boolean singleLine, String[] msgType, Listener listener) throws IOException {
            StringBuilder s = new StringBuilder("{ ");
            boolean foundFields = false;
            while (true) {
                final int tag = input.readTag();
                int type = WireFormat.getTagWireType(tag);
                if (tag == 0 || type == WireFormat.WIRETYPE_END_GROUP) {
                    break;
                }
                foundFields = true;
                protoNewline(depth, s, singleLine);
    
                final int number = WireFormat.getTagFieldNumber(tag);
                s.append(number).append(".").append(depth).append(": ");
    
                switch (type) {
                    case WireFormat.WIRETYPE_VARINT:
                        long lng = input.readInt64();
                        customProcess(number, depth, String.valueOf(lng), msgType, listener);
                        s.append(lng);
                        break;
                    case WireFormat.WIRETYPE_FIXED64:
                        s.append(Double.longBitsToDouble(input.readFixed64()));
                        break;
                    case WireFormat.WIRETYPE_LENGTH_DELIMITED:
                        ByteString data = input.readBytes();
                        try {
                            String submessage = decodeProto(data, depth + 1, singleLine, msgType, listener);
                            if (data.size() < 30) {
                                boolean probablyString = true;
                                String str = new String(data.toByteArray(), Charsets.UTF_8);
                                for (char c : str.toCharArray()) {
                                    if (c < '\n') {
                                        probablyString = false;
                                        break;
                                    }
                                }
                                customProcess(number, depth, str, msgType, listener);
                                if (probablyString) {
                                    s.append("\"").append(str).append("\" ");
                                }
                            }
                            s.append(submessage);
                        } catch (IOException e) {
                            String str = new String(data.toByteArray());
                            customProcess(number, depth, str, msgType, listener);
                            s.append('"').append(str).append('"');
                        }
                        break;
                    case WireFormat.WIRETYPE_START_GROUP:
                        s.append(decodeProtoInput(input, depth + 1, singleLine, msgType, listener));
                        break;
                    case WireFormat.WIRETYPE_FIXED32:
                        s.append(Float.intBitsToFloat(input.readFixed32()));
                        break;
                    default:
                        throw new InvalidProtocolBufferException("Invalid wire type");
                }
    
            }
            if (foundFields) {
                protoNewline(depth - 1, s, singleLine);
            }
            return s.append('}').toString();
        }
    
        private static void protoNewline(int depth, StringBuilder s, boolean noNewline) {
            if (noNewline) {
                s.append(" ");
                return;
            }
            s.append('\n');
            for (int i = 0; i <= depth; i++) {
                s.append("\t");
            }
        }
    }
    JAVA版

    maven依赖:

    <dependency>
    <groupId>com.google.guava</groupId>
    <artifactId>guava-base</artifactId>
    <version>r03</version>
    </dependency>

    <dependency>
    <groupId>com.google.protobuf</groupId>
    <artifactId>protobuf-javalite</artifactId>
    <version>3.8.0-rc-1</version>
    </dependency>

    python:

    git源码应该2.7+的:https://github.com/nevermoe/protobuf_decoder
    修改为3.5+的parse.py(解码只需要这一个文件就够了,其他文件其实没用)

    # -*- coding: utf-8 -*-
    import sys
    import codecs
    import struct
    import json
    import traceback
    
    strings = []
    
    def GetDynamicWireFormat(data, start, end):
        wire_type = data[start] & 0x7
        firstByte = data[start]
        if (firstByte & 0x80) == 0:
            field_number = (firstByte >> 3)
            return (start+1, wire_type, field_number)
        else:
            byteList = []
            pos = 0
            while True:
                if start+pos >= end:
                    return (None, None, None)
                oneByte = data[start+pos]
                byteList.append(oneByte & 0x7F)
                pos = pos + 1
                if oneByte & 0x80 == 0x0:
                    break;
    
            newStart = start + pos
    
            index = len(byteList) - 1
            field_number = 0
            while index >= 0:
                field_number = (field_number << 0x7) + byteList[index]
                index = index - 1
    
            field_number = (field_number >> 3)
            return (newStart, wire_type, field_number)
    
    
    
    #return (num, newStart, success)
    def RetrieveInt(data, start, end):
        pos = 0
        byteList = []
        while True:
            if start+pos >= end:
                return (None, None, False)
            oneByte = data[start+pos]
            byteList.append(oneByte & 0x7F)
            pos = pos + 1
            if oneByte & 0x80 == 0x0:
                break
    
        newStart = start + pos
    
        index = len(byteList) - 1
        num = 0
        while index >= 0:
            num = (num << 0x7) + byteList[index]
            index = index - 1
        return (num, newStart, True)
    
    
    def ParseRepeatedField(data, start, end, message, depth = 0):
        while start < end:
            (num, start, success) = RetrieveInt(data, start, end)
            if success == False:
                return False
            message.append(num)
        return True
    
    def ParseData(data, start, end, messages, depth = 0):
        global strings
        #print strings
        ordinary = 0
        while start < end:
            (start, wire_type, field_number) = GetDynamicWireFormat(data, start, end)
            if start == None:
                return False
    
            if wire_type == 0x00:#Varint
                #(num, start, success) = RetrieveInt(data, start+1, end)
                (num, start, success) = RetrieveInt(data, start, end)
                if success == False:
                    return False
    
                if depth != 0:
                    strings.append('\t'*depth)
                strings.append("(%d) Varint: %d\n" % (field_number, num))
                messages['%02d:%02d:Varint' % (field_number,ordinary)] = num
                ordinary  = ordinary + 1
    
            elif wire_type == 0x01:#64-bit
                num = 0
                pos = 7
                while pos >= 0:
                    #if start+1+pos >= end:
                    if start+pos >= end:
                        return False
                    #num = (num << 8) + ord(data[start+1+pos])
                    num = (num << 8) + data[start+pos]
                    pos = pos - 1
    
                #start = start + 9
                start = start + 8
                try:
                    floatNum = struct.unpack('d',struct.pack('q',int(hex(num),16)))
                    floatNum = floatNum[0]
                except:
                    floatNum = None
                    
                if depth != 0:
                    strings.append('\t'*depth)
                if floatNum != None:
                    strings.append("(%d) 64-bit: 0x%x / %f\n" % (field_number, num, floatNum))
                    messages['%02d:%02d:64-bit' % (field_number,ordinary)] = floatNum
                else:
                    strings.append("(%d) 64-bit: 0x%x\n" % (field_number, num))
                    messages['%02d:%02d:64-bit' % (field_number,ordinary)] = num
    
    
                ordinary = ordinary + 1
    
                
            elif wire_type == 0x02:#Length-delimited
                curStrIndex = len(strings)
                #(stringLen, start, success) = RetrieveInt(data, start+1, end)
                (stringLen, start, success) = RetrieveInt(data, start, end)
                if success == False:
                    return False
                #stringLen = ord(data[start+1])
                if depth != 0:
                    strings.append('\t'*depth)
                strings.append("(%d) embedded message:\n" % field_number)
                messages['%02d:%02d:embedded message' % (field_number, ordinary)] = {}
                if start+stringLen > end:
                    del strings[curStrIndex + 1:]    #pop failed result
                    messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None)
                    return False
    
                ret = ParseData(data, start, start+stringLen, messages['%02d:%02d:embedded message' % (field_number, ordinary)], depth+1)
                #print '%d:%d:embedded message' % (field_number, ordinary)
                if ret == False:
                    del strings[curStrIndex + 1:]    #pop failed result
                    #print 'pop: %d:%d:embedded message' % (field_number, ordinary)
                    messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None)
                    #print messages
                    if depth != 0:
                        strings.append('\t'*depth)
    
                    strings.append("(%d) repeated:\n" % field_number)
                    try:
                        data[start:start+stringLen].decode('utf-8')# .encode('utf-8')
                        strings.append("(%d) string: %s\n" % (field_number, data[start:start+stringLen]))
                        messages['%02d:%02d:string' % (field_number, ordinary)] = data[start:start+stringLen].decode('utf-8')
                    except:
                       if depth != 0:
                           strings.append('\t'*depth)
    
                       strings.append("(%d) repeated:\n" % field_number)
                       messages['%02d:%02d:repeated' % (field_number, ordinary)] = []
                       ret = ParseRepeatedField(data, start, start+stringLen, messages['%02d:%02d:repeated' % (field_number, ordinary)], depth+1)
                       if ret == False:
                           del strings[curStrIndex + 1:]     #pop failed result
                           messages.pop('%02d:%02d:repeated' % (field_number, ordinary), None)
                           #print traceback.format_exc()
                           hexStr = ['0x%x' % x for x in data[start:start+stringLen]]
                           hexStr = ':'.join(hexStr)
                           strings.append("(%d) bytes: %s\n" % (field_number, hexStr))
                           messages['%02d:%02d:bytes' % (field_number, ordinary)] = hexStr
    
                ordinary = ordinary + 1
                #start = start+2+stringLen
                start = start+stringLen
    
            elif wire_type == 0x05:#32-bit
                num = 0
                pos = 3
                while pos >= 0:
    
                    #if start+1+pos >= end:
                    if start+pos >= end:
                        return False
                    #num = (num << 8) + ord(data[start+1+pos])
                    num = (num << 8) + data[start+pos]
                    pos = pos - 1
    
                #start = start + 5
                start = start + 4
                try:
                    floatNum = struct.unpack('f',struct.pack('i',int(hex(num),16)))
                    floatNum = floatNum[0]
                except:
                    floatNum = None
    
                    
                if depth != 0:
                    strings.append('\t'*depth)
                if floatNum != None:
                    strings.append("(%d) 32-bit: 0x%x / %f\n" % (field_number, num, floatNum))
                    messages['%02d:%02d:32-bit' % (field_number,ordinary)] = floatNum
                else:
                    strings.append("(%d) 32-bit: 0x%x\n" % (field_number, num))
                    messages['%02d:%02d:32-bit' % (field_number,ordinary)] = num 
    
                ordinary = ordinary + 1
    
    
            else:
                return False
    
        return True
    
    def ParseProto(fileName):
        data = open(fileName, "rb").read()
        size = len(data)
    
        messages = {}
        ParseData(data, 0, size, messages)
    
        return messages
    
    def GenValueList(value):
        valueList = []
        #while value > 0:
        while value >= 0:
            oneByte = (value & 0x7F)
            value = (value >> 0x7)
            if value > 0:
                oneByte |= 0x80
            valueList.append(oneByte)
            if value == 0:
                break
        
        return valueList
    
    
    def WriteValue(value, output):
        byteWritten = 0
        #while value > 0:
        while value >= 0:
            oneByte = (value & 0x7F)
            value = (value >> 0x7)
            if value > 0:
                oneByte |= 0x80
            output.append(oneByte)
            byteWritten += 1
            if value == 0:
                break
        
        return byteWritten
    
    def WriteVarint(field_number, value, output):
        byteWritten = 0
        wireFormat = (field_number << 3) | 0x00
        #output.append(wireFormat)
        #byteWritten += 1
        byteWritten += WriteValue(wireFormat, output)
        #while value > 0:
        while value >= 0:
            oneByte = (value & 0x7F)
            value = (value >> 0x7)
            if value > 0:
                oneByte |= 0x80
            output.append(oneByte)
            byteWritten += 1
            if value == 0:
                break
        
        return byteWritten
    
    def Write64bitFloat(field_number, value, output):
        byteWritten = 0
        wireFormat = (field_number << 3) | 0x01
        #output.append(wireFormat)
        #byteWritten += 1
        byteWritten += WriteValue(wireFormat, output)
        
        bytesStr = struct.pack('d', value).encode('hex')
        n = 2
        bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)]
        #i = len(bytesList) - 1
        #while i >= 0:
        #    output.append(int(bytesList[i],16))
        #    byteWritten += 1
        #    i -= 1
        for i in range(0,len(bytesList)):
            output.append(int(bytesList[i],16))
            byteWritten += 1
    
        return byteWritten
    
    def Write64bit(field_number, value, output):
        byteWritten = 0
        wireFormat = (field_number << 3) | 0x01
        byteWritten += WriteValue(wireFormat, output)
        #output.append(wireFormat)
        #byteWritten += 1
        
        for i in range(0,8):
            output.append(value & 0xFF)
            value = (value >> 8)
            byteWritten += 1
    
        return byteWritten
    
    def Write32bitFloat(field_number, value, output):
        byteWritten = 0
        wireFormat = (field_number << 3) | 0x05
        #output.append(wireFormat)
        #byteWritten += 1
        byteWritten += WriteValue(wireFormat, output)
        
        bytesStr = struct.pack('f', value).encode('hex')
        n = 2
        bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)]
        #i = len(bytesList) - 1
        #while i >= 0:
        #    output.append(int(bytesList[i],16))
        #    byteWritten += 1
        #    i -= 1
        for i in range(0,len(bytesList)):
            output.append(int(bytesList[i],16))
            byteWritten += 1
    
    
        return byteWritten
    
    def Write32bit(field_number, value, output):
        byteWritten = 0
        wireFormat = (field_number << 3) | 0x05
        #output.append(wireFormat)
        #byteWritten += 1
        byteWritten += WriteValue(wireFormat, output)
        
        for i in range(0,4):
            output.append(value & 0xFF)
            value = (value >> 8)
            byteWritten += 1
    
        return byteWritten
    
    def WriteRepeatedField(message, output):
        byteWritten = 0
        for v in message:
            byteWritten += WriteValue(v, output)
        return byteWritten
    
    
    def Decode(binary):
        messages = {}
        ret = ParseData(binary, 0, len(binary), messages)
    
        if ret == False:
            return False
    
        return messages
    
    
    def ReEncode(messages, output):
        byteWritten = 0
        #for key in sorted(messages.iterkeys(), key= lambda x: int(x.split(':')[0]+x.split(':')[1])):
        for key in sorted(messages.iterkeys(), key= lambda x: int(x.split(':')[1])):
            keyList = key.split(':')
            field_number = int(keyList[0])
            wire_type = keyList[2]
            value = messages[key]
    
            if wire_type == 'Varint':
                byteWritten += WriteVarint(field_number, value, output)
            elif wire_type == '32-bit':
                if type(value) == type(float(1.0)):
                    byteWritten += Write32bitFloat(field_number, value, output)
                else:
                    byteWritten += Write32bit(field_number, value, output)
            elif wire_type == '64-bit':
                if type(value) == type(float(1.0)):
                    byteWritten += Write64bitFloat(field_number, value, output)
                else:
                    byteWritten += Write64bit(field_number, value, output)
            elif wire_type == 'embedded message':
                wireFormat = (field_number << 3) | 0x02 
                byteWritten += WriteValue(wireFormat, output)
                index = len(output)
                tmpByteWritten = ReEncode(messages[key], output)
                valueList = GenValueList(tmpByteWritten)
                listLen = len(valueList)
                for i in range(0,listLen):
                    output.insert(index, valueList[i])
                    index += 1
                #output[index] = tmpByteWritten
                #print "output:", output
                byteWritten += tmpByteWritten + listLen
            elif wire_type == 'repeated':
                wireFormat = (field_number << 3) | 0x02
                byteWritten += WriteValue(wireFormat, output)
                index = len(output)
                tmpByteWritten = WriteRepeatedField(messages[key], output)
                valueList = GenValueList(tmpByteWritten)
                listLen = len(valueList)
                for i in range(0,listLen):
                    output.insert(index, valueList[i])
                    index += 1
                #output[index] = tmpByteWritten
                #print "output:", output
                byteWritten += tmpByteWritten + listLen
            elif wire_type == 'string':
                wireFormat = (field_number << 3) | 0x02 
                byteWritten += WriteValue(wireFormat, output)
    
                bytesStr = [int(elem.encode("hex"),16) for elem in messages[key].encode('utf-8')]
    
                byteWritten += WriteValue(len(bytesStr),output)
    
                output.extend(bytesStr)
                byteWritten += len(bytesStr)
            elif wire_type == 'bytes':
                wireFormat = (field_number << 3) | 0x02 
                byteWritten += WriteValue(wireFormat, output)
    
                bytesStr = [int(byte,16) for byte in messages[key].split(':')]
                byteWritten += WriteValue(len(bytesStr),output)
    
                output.extend(bytesStr)
                byteWritten += len(bytesStr)
                
    
        return byteWritten
        
    
    def SaveModification(messages, fileName):
        output = list()
        ReEncode(messages, output)
        f = open(fileName, 'wb')
        f.write(bytearray(output))
        f.close()
        
    
    if __name__ == "__main__":
        if sys.argv[1] == "dec":
            messages = ParseProto('tmp.pb')
    
            f = codecs.open('tmp.json', 'wb', 'utf-8')
            json.dump(messages, f, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8')
            f.close()
    
            #for str in strings:
            #    try:
            #        print str,
            #    except:
            #        pass
            f.close()
    
        elif sys.argv[1] == "enc":
    
            f = codecs.open('tmp.json', 'r', 'utf-8')
            messages = json.load(f, encoding='utf-8')
            f.close()
    
            SaveModification(messages, "tmp.pb")
    
        else:
            messages = ParseProto(sys.argv[1])
    
            print(json.dumps(messages, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8'))
    
            # modify any field you like
            #messages['01:00:embedded message']['01:00:string'] = "あなた"
    
            # dump and reload the 'messages' json objects to ensure it being utf-8 encoded
            f = open('tmp.json', 'wb')
            json.dump(messages, f, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8')
            f.close()
            f = codecs.open('tmp.json', 'r', 'utf-8')
            messages = json.load(f, encoding='utf-8')
            f.close()
    
            # the modification is saved in file named "modified"
            SaveModification(messages, "modified")

    调用示例:

    def pxprint(dict, indent=0):
        spaces = "    "
        for k,v in dict.items():
            if isinstance(v, Dict):
                print(spaces*(indent+1) + f'"{k}": {{')
                pxprint(v,indent+1)
                print(spaces*(indent+1) + '}')
            else:
                try:
                    print(spaces*(indent+1) + f'"{k}":"{v}"')
                except UnicodeEncodeError as e:
                    # python的终端打印不出多字节字符,会抛这个error,单独处理下,数据库要能存储才行,如mysql,则要设置编码[数据库和表都要设置]为utf8mb4_general_ci
                    print(spaces*(indent+1) + f'"{k}":"error-v"')
    
    def main():
        dict = pbparser.ParseProto(r"F:\dy抓包\74_.txt")
        pxprint(dict)
    
    main()
    调用示例

    解析结果样例:

    完!

  • 相关阅读:
    【P000-004】交易费计算系统,功能类规划
    【P000-003】交易费计算系统,从股票信息网络接口获取信息
    ASP页面的执行顺序
    Python ImportError: DLL load failed: %1 不是有效的 Win32 应用程序
    VSCode运行已有代码
    WPF MVVM-TreeView数据源添加了节点,UI没有刷新
    MapGIS二次开发注意事项
    把echarts嵌入winform窗口注意事项
    host is not allowed to connect mysql解决方法
    SqlDbx连接Oracle数据库
  • 原文地址:https://www.cnblogs.com/Denny_Yang/p/15660376.html
Copyright © 2011-2022 走看看