zoukankan      html  css  js  c++  java
  • HTTP/1.1 chunked 解码

    0、简介

    1、定义

    RFC定义 https://tools.ietf.org/html/rfc2616#section-3.6.1

    Chunked-Body   = *chunk
                     last-chunk
                     trailer
                     CRLF
    
    chunk          = chunk-size [ chunk-extension ] CRLF
                     chunk-data CRLF
    chunk-size     = 1*HEX
    last-chunk     = 1*("0") [ chunk-extension ] CRLF
    
    chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
    chunk-ext-name = token
    chunk-ext-val  = token | quoted-string
    chunk-data     = chunk-size(OCTET)
    trailer        = *(entity-header CRLF)

    2.1、Entity Header Fields

    https://tools.ietf.org/html/rfc2616#section-7.1

    2、解析

    解码伪代码 https://tools.ietf.org/html/rfc2616#section-19.4.6

    length := 0  //body总长度初始化为0
    read chunk-size, chunk-extension (if any) and CRLF  //读取第一行 获取 第一块 chunked 数据的大小(chunk扩展项)
    while (chunk-size > 0) {
       read chunk-data and CRLF  //读取chunk-data, chunk-data 的长度为 chunk-size, 后面跟 
     表示结束, chunk-size不包含
    
       append chunk-data to entity-body  //将chunk-data 追加到 实体body 中(解码后) 
       length := length + chunk-size  //body总长度更新
       read chunk-size and CRLF  //读取下一个 chunk头 获取chunk-size
    }
    //退出循环说明 chunk-size 为0, 即last-chunk, last-chunk后面可能会跟有trailer read entity-header //读取 entity-header while (entity-header not empty) { //读到空行,即整行内容只是 这两个字节 append entity-header to existing header fields read entity-header } Content-Length := length Remove "chunked" from Transfer-Encoding

    运行方式 ./a.out -u http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx -t target.jpg

    #define _GNU_SOURCE /* for memmem */
    #include <sys/types.h>
    #include <sys/socket.h>
    #include <arpa/inet.h>
    #include <netinet/in.h>
    #include <netdb.h>
    #include <stdio.h>
    #include <unistd.h>
    #include <stdlib.h>
    #include <string.h>
    
    /* chunked-encoding example URL
    GET http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx
    */
    
    void parse_paramters(int argc, char* argv[], char** url, char** target);
    void parse_req_url(char* url, char** host, char** service, char** uri, char** target);
    
    /* 
     * host can be domain-name or ip-address 
     * service can be well-known service name("http"/"ftp") or port number
     */
    int connect_to_server(char* host, char* service);
    
    void send_req_to_server(int fd, char* uri, char* host, char* service);
    
    void recv_res_from_server(int fd, char* store_path);
    
    
    int main(int argc, char* argv[])
    {
        int sfd; /* socket file descriptor */
        char *url, *host, *service, *uri, *target = NULL;
    
        /* parse request */
        parse_paramters(argc, argv, &url, &target);
        parse_req_url(url, &host, &service, &uri, target ? NULL : &target);
        printf("Host   : [%s]
    ", host);
        printf("Port   : [%s]
    ", service);
        printf("Uri    : [%s]
    ", uri);
        printf("Target : [%s]
    ", target);
    
        /* create the connection to server */
        sfd = connect_to_server(host, service);
    
        /* send http req to server */
        send_req_to_server(sfd, uri, host, service);
        free(uri);
        free(host);
        free(service);
    
        /* get response from server */
        recv_res_from_server(sfd, target);
        free(target);
    
        /* cleanup */
        shutdown(sfd, SHUT_RDWR);
        close(sfd);
    
        exit(EXIT_SUCCESS);
    }
    
    
    void parse_paramters(int argc, char* argv[], char** url, char** target)
    {
        int opt;
    
        if (!(url && target && (argc > 1) && argv)) {
            fprintf(stderr, "Usage: %s [-u url] [-t store_path]
    ", argv[0]);
            exit(EXIT_FAILURE);
        }
    
        while ((opt = getopt(argc, argv, "u:t:")) != -1) {
            switch (opt) {
            case 'u':
                *url = optarg;
                break;
            case 't':
                *target = strdup(optarg);
                break;
            default: /* '?' */
                fprintf(stderr, "Usage: %s [-u url] [-t store_path]
    ", argv[0]);
                exit(EXIT_FAILURE);
            }
        }
    }
    
    
    void parse_req_url(char* url, char** host, char** service, char** uri, char** target)
    {
        char* tmp;
        char* token;
    
        /* skip scheme */
        token = strstr(url, "://");
        if (token) {
            url = token + sizeof("://") - 1; 
        }
    
        /* find uri */
        token = strchr(url, '/');
        if (NULL == token) {
            *uri = strdup("/");
            if (target) {
                *target = strdup("index.html");
            }
        }
        else {
            *uri = strdup(token);
            *token = '';
            if (target) {
                token = strrchr(*uri, '/');
                if (token) {
                    *target = strdup(token + 1);
                }
            }
        }
        
        /* find port */
        token = strchr(url, ':');
        if (token) {/* find port */
            *token = '';
        }
        else {
            *service = strdup("80");
        }
        *host = strdup(url);
    
    }
    
    
    int connect_to_server(char* host, char* service)
    {
        int ret;
        int sfd;
        struct addrinfo hints;
        struct addrinfo *result, *rp;
    
        memset(&hints, 0, sizeof(struct addrinfo));
        hints.ai_family = AF_UNSPEC;     /* Allow IPv4 or IPv6 */
        hints.ai_socktype = SOCK_STREAM; /* stream socket */
        hints.ai_flags = AI_ADDRCONFIG; /* return the addr type same wtih the local system addr type */
        hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */
        ret = getaddrinfo(host, service, &hints, &result);
        if (ret != 0) {
            fprintf(stderr, "getaddrinfo() failed: %s
    ", gai_strerror(ret));
            exit(EXIT_FAILURE);
        }
        
        /* getaddrinfo() returns a list of address structures.
            Try each address until we successfully connect(2).
            If socket(2) (or connect(2)) fails, we (close the socket and) try the next address. */
        for (rp = result; rp != NULL; rp = rp->ai_next) {
            sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
            if (sfd == -1)
                continue;
        
            if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1)
                break;                    /* Success */
        
            close(sfd);
        }
        
        if (rp == NULL) { /* No address succeeded */
            fprintf(stderr, "Could not connect to server %s:%s
    ", host, service);
            exit(EXIT_FAILURE);
        }
        
        /* No longer needed */
        freeaddrinfo(result);
    
        return sfd;
    }
    
    
    
    void send_req_to_server(int fd, char* uri, char* host, char* service)
    {
        int sn_len;
        char* send_buf;
        size_t buf_len;
        char req_header[] = "GET %s HTTP/1.1
    "
            "Host: %s
    "
            "Connection: close
    
    ";
    
        buf_len = sizeof(req_header) + strlen(uri) + strlen(host) + 5;
        send_buf = (char*)calloc(buf_len, 1);
        sn_len = snprintf(send_buf, buf_len, req_header, uri, host);
        
        if (send(fd, send_buf, sn_len, 0)== -1) {
            perror("send() failed");
            exit(EXIT_FAILURE);
        }
    
        /* print req header */
        printf("
    %s", send_buf);
        
        free(send_buf);
    }
    
    
    
    typedef int (*DATA_PROC_CB)(void* data, ssize_t len);
    
    typedef enum _srv_header_state {
        HEADER_STATE_LINE_START = 0,
        HEADER_STATE_LINE_DATA,
        HEADER_STATE_LINE_END, /* meet '
    ' */
        HEADER_STATE_LINE_DONE,  /* meet '
    ' */
        HEADER_STATE_HEAD_END,  /* meet '
    ' */
        HEADER_STATE_HEAD_DONE  /* meet '
    ' */
    } srv_header_state_t;
    
    typedef enum _srv_body_state {
        BODY_STATE_CHUNK_LINE_START = 0,
        BODY_STATE_CHUNK_LINE_DATA, /* the first chunk-body line  chunk-size [ chunk-extension ] CRLF  */
        BODY_STATE_CHUNK_LINE_END,  /* meet '
    ' */
        BODY_STATE_CHUNK_LINE_DONE, /* meet '
    ' */
        BODY_STATE_CHUNK_DATA_START,
        BODY_STATE_CHUNK_DATA_END,
        BODY_STATE_CHUNK_DATA_DONE
    } srv_body_state_t;
    
    
    typedef struct _srv_res {
        /* recv buf */
        unsigned char* buf_ptr;
        unsigned char* buf_start;
        size_t buf_len; /* total lenght */
        size_t buf_remain; /* unused lenght */
    
        /* buf proc */
        DATA_PROC_CB data_proc;
        DATA_PROC_CB res_header_proc;
        srv_header_state_t header_state;
        unsigned char* header_line_start;
        
        DATA_PROC_CB res_body_proc;
        srv_body_state_t body_state;
        unsigned char* body_chunk_start;
        
        FILE* store_file;
        /* body */
        int is_chunked_encoding;
        unsigned long chunked_size;
        unsigned long content_length;
    } srv_res_t;
    
    #define CHUNKED_ENCODING "Transfer-Encoding: chunked"
    #define CONTETN_LENGTH   "Content-Length: "
    
    int proc_res_header(void* data, ssize_t len)
    {
        int i;
        int field_len;
        unsigned char ch;
        unsigned char* content_length;
        srv_res_t* res = (srv_res_t*)data;
    
        for (i = 0; i < len && res->header_state != HEADER_STATE_HEAD_DONE; i++) {
            ch = res->buf_ptr[i];
            switch (res->header_state) {
            case HEADER_STATE_LINE_START:
                res->header_state = HEADER_STATE_LINE_DATA;
                res->header_line_start = res->buf_ptr + i;
                break;
            case HEADER_STATE_LINE_DATA:
                if (ch == '
    ') {
                    res->header_state = HEADER_STATE_LINE_END;
                }
                break;
            case HEADER_STATE_LINE_END:
                if (ch == '
    ') {
                    res->header_state = HEADER_STATE_LINE_DONE;
                    field_len = res->buf_ptr + i - res->header_line_start - 1;
                    /* search Transfer-Encoding */
                    if (!res->is_chunked_encoding && (field_len == sizeof(CHUNKED_ENCODING)-1) && 
                        !memcmp(res->header_line_start, CHUNKED_ENCODING, sizeof(CHUNKED_ENCODING)-1)) {
                        res->is_chunked_encoding = 1;
                    }
                    /* search Content-Length */
                    if (!res->content_length) {
                        content_length = memmem(res->header_line_start, field_len, CONTETN_LENGTH, sizeof(CONTETN_LENGTH)-1);
                        if (content_length) {
                            res->content_length = strtoul(content_length + sizeof(CONTETN_LENGTH) - 1, NULL, 10);
                        }
                    }
                        
                    /* print header line */
                    fprintf(stdout, "33[45m"); /* color start */
                    fwrite(res->header_line_start, 1, field_len, stdout);
                    fprintf(stdout, "33[0m
    "); /* color end */
                }
                else {
                    fprintf(stderr, "invalid header found
    ");
                    exit(EXIT_FAILURE);
                }
                break;
            case HEADER_STATE_LINE_DONE:
                if (ch == '
    ') {
                    res->header_state = HEADER_STATE_HEAD_END;
                }
                else {
                    res->header_state = HEADER_STATE_LINE_DATA;
                    res->header_line_start = res->buf_ptr + i;
                }
                break;
            case HEADER_STATE_HEAD_END:
                if (ch == '
    ') {
                    res->header_state = HEADER_STATE_HEAD_DONE;
                    fprintf(stdout, "
    33[31m=== parse header done, chunked[%d] content-length[%lu] === 33[0m
    
    ", 
                        res->is_chunked_encoding, res->content_length);
                }
            default:
                break;
            }
        }
    
        if (res->header_state == HEADER_STATE_HEAD_DONE) {
            res->data_proc = res->res_body_proc;
            if ((i + 1) < len) {/* found body data */
                res->buf_ptr += i;
                res->buf_remain -= len;
                return res->data_proc(res, len - i);
            }
        }
        else { /* header not finish */
            res->buf_remain -= len;
            if (res->buf_remain <= 0) {
                fprintf(stderr, "large header found
    ");
                exit(EXIT_FAILURE);
            }
            res->buf_ptr += len;
        }
    
        return 0;
    }
    
    int proc_res_body(void* data, ssize_t len)
    {
        int i;
        int data_left;
        unsigned char ch;
        srv_res_t* res = (srv_res_t*)data;
    
        /* not chunked encoding */
        if (!res->is_chunked_encoding) {
            fwrite(res->buf_ptr, 1, len, res->store_file);
            res->content_length -= len;
            if (res->content_length == 0) {/* get all body data */
                return 1;
            }
            return 0;
        }
    
    
        /* parse chunked-encoding */
        for (i = 0; i < len; i++) {
            ch = res->buf_ptr[i];
            switch (res->body_state) {
            case BODY_STATE_CHUNK_LINE_START:
                res->body_chunk_start = res->buf_ptr + i;
                res->body_state = BODY_STATE_CHUNK_LINE_DATA;
                break;
            case BODY_STATE_CHUNK_LINE_DATA:
                if (ch == '
    ') {
                    res->body_state = BODY_STATE_CHUNK_LINE_DONE;
                }
                break;
            case BODY_STATE_CHUNK_LINE_DONE:
                if (ch == '
    ') {/* ignore chunk-extension */
                    res->body_state = BODY_STATE_CHUNK_DATA_START;
                    res->chunked_size = strtoul(res->body_chunk_start, NULL, 16);
                    if (0 == res->chunked_size) { /* last chunk */
                        fprintf(stdout, "=== last-chunk found(total body size = %lu) ===
    ", res->content_length);
                        /* ignore trailer */
                        return 1;
                    }
                    res->content_length += res->chunked_size;
                    fprintf(stdout, "=== chunk-size %lu ===
    ", res->chunked_size);
                }
                else {
                    fprintf(stderr, "invalid chunk-body line found
    ");
                    exit(EXIT_FAILURE);
                }
                break;
            case BODY_STATE_CHUNK_DATA_START:
                data_left = len - i;
                if (data_left < res->chunked_size) {
                    fwrite(res->buf_ptr + i, 1, data_left, res->store_file);
                    res->chunked_size -= data_left;
                    i  = len; /* end loop */
                    /* reset buf */
                    res->buf_ptr = res->buf_start;
                    res->buf_remain = res->buf_len;
                }
                else {
                    fwrite(res->buf_ptr + i, 1, res->chunked_size, res->store_file);
                    i += res->chunked_size - 1;
                    res->chunked_size = 0;
                    res->body_state = BODY_STATE_CHUNK_DATA_END;
                }
                break;
            case BODY_STATE_CHUNK_DATA_END:
                if (ch == '
    ') {
                    res->body_state = BODY_STATE_CHUNK_DATA_DONE;
                }
                else {
                    fprintf(stderr, "invalid chunk-body data found
    ");
                    exit(EXIT_FAILURE);
                }
                break;
            case BODY_STATE_CHUNK_DATA_DONE:
                if (ch == '
    ') {
                    res->body_state = BODY_STATE_CHUNK_LINE_START;
                }
                else {
                    fprintf(stderr, "invalid chunk-body data found
    ");
                    exit(EXIT_FAILURE);
                }
                break;
            }
        }
    
        /* dont break the size line */
        if ((res->body_state == BODY_STATE_CHUNK_LINE_DATA) || 
            (res->body_state == BODY_STATE_CHUNK_LINE_DONE)) {
            res->buf_ptr += len;
            res->buf_remain -= len;
        }
    }
    
    
    
    void recv_res_from_server(int fd, char* store_path)
    {
        ssize_t ret = 1;
        srv_res_t response;
        
        memset(&response, 0x00, sizeof(response));
    
        response.store_file = fopen(store_path, "wb");
        if (NULL == response.store_file) {
            perror("fopen() failed");
            exit(EXIT_SUCCESS);
        }
    
        response.buf_len = 0x1000; /* 4k */
        response.buf_ptr = calloc(1, 0x1000); /* alloc 4k memory */
        response.buf_remain = response.buf_len;
        response.buf_start = response.buf_ptr;
        response.res_header_proc = proc_res_header;
        response.res_body_proc = proc_res_body;
        response.data_proc = response.res_header_proc;
    
        while (ret > 0) {
            ret = recv(fd, response.buf_ptr, response.buf_remain, 0);
            if (ret > 0) {
                if (response.data_proc(&response, ret) == 1) { /* get all response */
                    break;
                }
            }
        }
    
        if (ret == 0) {
            printf("server shutdown the connection
    ");
        }
        else if (ret < 0) {
            perror("recv() failed");
            exit(EXIT_FAILURE);
        }
    
        /* cleanup */
        fclose(response.store_file);
        free(response.buf_start);
    
    }
  • 相关阅读:
    有哪些学习openCV的网站或书籍?
    开发者自述:我是这样学习 GAN 的
    FSGAN Subject Agnostic Face Swapping and Reenactment翻译+重点标注
    01-GAN公式简明原理之铁甲小宝篇
    CentOS 7 之 hostapd.conf 配置说明
    dlib库实现人脸68个特征点的定标操作的原理有哪些?
    生成对抗网络GAN---生成mnist手写数字图像示例(附代码)
    deepfake 资源总结
    大话CNN经典模型:VGGNet
    欧几里得距离
  • 原文地址:https://www.cnblogs.com/LubinLew/p/http-chunked-encoding.html
Copyright © 2011-2022 走看看