zoukankan      html  css  js  c++  java
  • 标准CSV解析

    很方便的csv解析,可以解析带有分隔符的字段,可以解析成map方便程序使用。

    //
    //  CCSVParse.hpp
    //  CPPAlgorithm
    //
    //  Created by xujw on 16/2/26.
    //  Copyright © 2016年 xujw. All rights reserved.
    //
    
    #ifndef CCSVParse_h
    #define CCSVParse_h
    
    #include <stdio.h>
    #include <vector>
    #include <string>
    #include <map>
    #include <iostream>
    #include <sstream>
    
    /*
     * 全局函数,转换数据类型
     * toInt
     * toFloat
     * toDouble
     */
    int conToInt(std::string &source);
    float conToFloat(std::string &source);
    double conToDouble(std::string &source);
    //转成std::string
    std::string conToString(int s);
    std::string conToString(float s);
    std::string conToString(double s);
    
    
    class CCSVParse
    {
    public:
        CCSVParse();
        ~CCSVParse();
    
        /*
         *  解析成map形式(数据要保证第一行为数据类型名,第一列为id名,id不可重复)
         *  使用时通过id获取一行数据(也是map形式),然后通过类型名作为key获取数据
         *  格式如下:
         *   id   name    age
         *   1    小明     20
         *   2    小红     19
         *   3    小光     18
         */
        std::map<std::string, std::map<std::string, std::string> > parseCsvFileToMap(const std::string &fileName,const std::string &separator = ",");
    
        //解析出行列数据 separtor只能是一个字符(比如 , # 等)
        std::vector< std::vector<std::string> > parseCsvFile(const std::string &fileName,const std::string &separator = ",");
    
        //打印出解析的数据 测试用
        void printParseData() const;
    
        inline size_t getRowNum() const {return _gridData.size();};
    
        inline void useSimpleModel(bool flag){_useSimpleModel = flag;};
        /*
            分隔字符串
            str:要分隔的字符串/文件
            seperator:分隔符
         */
        std::vector<std::string> splitString(const std::string &str,const std::string &sparator);
    
        std::string loadCsvFile(const std::string &fileName);
    
    private:
        //原始数据
        std::vector< std::vector<std::string> > _gridData;
        bool _useSimpleModel;   //是否使用简单模式
    };
    
    #endif /* CCSVParse_h */
    
    //
    //  CCSVParse.cpp
    //  CPPAlgorithm
    //
    //  Created by xujw on 16/2/26.
    //  Copyright © 2016年 xujw. All rights reserved.
    //
    
    #include "CCSVParse.h"
    #include <assert.h>
    
    CCSVParse::CCSVParse():_useSimpleModel(false){}
    CCSVParse::~CCSVParse(){}
    
    std::vector<std::string> CCSVParse::splitString(const std::string &str, const std::string &separator)
    {
        std::vector<std::string> resVec;
        if ("" == str)
        {
            return resVec;
        }
        //方便截取最后一段数据
        std::string dataStr = str + separator;
    
        size_t pos = dataStr.find(separator);
        size_t size = dataStr.size();
    
        while (pos != std::string::npos)
        {
            std::string x = dataStr.substr(0,pos);
            resVec.push_back(x);
            dataStr = dataStr.substr(pos+1,size);
            pos = dataStr.find(separator);
        }
    
        return resVec;
    
    //    //Method 2
    //    size_t nStartPosFound = str.find(separator, 0);
    //    size_t nFieldStart = 0;
    //    for (; nStartPosFound != -1; nStartPosFound = str.find(separator, nStartPosFound))
    //    {
    //        std::string strSub = str.substr(nFieldStart, nStartPosFound - nFieldStart);
    //        nStartPosFound = nStartPosFound + separator.size();
    //        nFieldStart = nStartPosFound;
    //        
    //        
    //        resVec.push_back(strSub);
    //    }
    //    
    //    // 加入最后一个字段
    //    if (nFieldStart < str.size())
    //    {
    //        std::string strSub = str.substr(nFieldStart, str.size() - nFieldStart);
    //        resVec.push_back(strSub);
    //    }
    //    return resVec;
    
    }
    
    std::string CCSVParse::loadCsvFile(const std::string &fileName)
    {
        FILE *pFile = fopen(fileName.c_str(), "rb");
        if (0 == pFile)
        {
            return "";
        }
    
        fseek(pFile, 0, SEEK_END);      //指针移动到文件结尾
        long len = ftell(pFile);        //获取文件大小
    
        char *pBuffer = new char[len+1];
    
        fseek(pFile, 0, SEEK_SET);      //指针移动到文件开头
        fread(pBuffer, 1, len, pFile);  //读取文件
        fclose(pFile);
    
        //等价于std::string s;s.assign(pBuffer,len);
        pBuffer[len] = 0;
        std::string strRead(pBuffer,len);
    
        delete [] pBuffer;
    
        return strRead;
    }
    
    std::vector<std::vector<std::string>> CCSVParse::parseCsvFile(const std::string &fileName,const std::string &separator)
    {
        clock_t before = clock();
    
        _gridData.clear();
    
        std::string strAllData = loadCsvFile(fileName);
    
        if (strAllData.size() == 0)
        {
            return _gridData;
        }
    
        //分隔符只能是一个字符
        assert(separator.size() == 1);
    
        //简易模式,字段里面不能包含分隔符
        if (_useSimpleModel)
        {
            std::cout<<"使用简易模式解析"<<std::endl;
            //分出行和字段
            std::vector<std::string> ret = splitString(strAllData, "
    ");
            for (size_t i=0; i<ret.size(); i++)
            {
                std::vector<std::string> rowData = splitString(ret.at(i), separator);
                _gridData.push_back(rowData);
            }
            return _gridData;
        }
    
        //标准模式,字段里面可以包含分隔符
        //定义状态
        typedef enum stateType
        {
            kNewFieldStart = 0,  //新字段开始
            kNonQuotesField,     //非引号字段
            kQuotesField,        //引号字段
            kFieldSeparator,     //字段分隔
            kQuoteInQuotesField, //引号字段中的引号
            kRowSeparator,       //行分隔符(回车)
            kError               //语法错误
        }StateType;
    
        //分出行
        std::vector<std::string> vecRows = splitString(strAllData, "
    ");
        for (int i=0; i<vecRows.size(); i++)
        {
            //一行一行处理
            std::string strRowData = vecRows.at(i);
            if (0 == strRowData.size())
            {
                continue;
            }
    
            std::vector< std::string > vecFields;
            std::string strField;
            StateType state = kNewFieldStart;
            for (int j=0; j<strRowData.size(); j++)
            {
                const char &ch = strRowData.at(j);
                switch ( state )
                {
                    case kNewFieldStart:
                    {
                        if (ch == '"')
                        {
                            state = kQuotesField;
                        }
                        else if (ch == separator.at(0))
                        {
                            vecFields.push_back("");
                            state = kFieldSeparator;
                        }
                        else if (ch == '
    ' || ch == '
    ')
                        {
                            state = kRowSeparator;
                        }
                        else
                        {
                            strField.push_back(ch);
                            state = kNonQuotesField;
                        }
                    }
                        break;
    
                    case kNonQuotesField:
                    {
                        if (ch == separator.at(0))
                        {
                            vecFields.push_back(strField);
                            strField.clear();
                            state = kFieldSeparator;
                        }
                        else if (ch == '
    ' || ch == '
    ')
                        {
                            vecFields.push_back(strField);
                            state = kRowSeparator;
                        }
                        else
                        {
                            strField.push_back(ch);
                        }
                    }
                        break;
    
                    case kQuotesField:
                    {
                        if (ch == '"')
                        {
                            state = kQuoteInQuotesField;
                        }
                        else
                        {
                            strField.push_back(ch);
                        }
                    }
                        break;
    
                    case kFieldSeparator:
                    {
                        if (ch == separator.at(0))
                        {
                            vecFields.push_back("");
                        }
                        else if (ch == '"')
                        {
                            strField.clear();
                            state = kQuotesField;
                        }
                        else if (ch == '
    ' || ch == '
    ')
                        {
                            vecFields.push_back("");
                            state = kRowSeparator;
                        }
                        else
                        {
                            strField.push_back(ch);
                            state = kNonQuotesField;
                        }
    
                    }
                        break;
    
                    case kQuoteInQuotesField:
                    {
                        if (ch == separator.at(0))
                        {
                            //引号闭合
                            vecFields.push_back(strField);
                            strField.clear();
                            state = kFieldSeparator;
                        }
                        else if (ch == '
    ' || ch == '
    ')
                        {
                            vecFields.push_back(strField);
                            state = kRowSeparator;
                        }
                        else if (ch == '"')
                        {
                            //转义引号
                            strField.push_back(ch);
                            state = kQuotesField;
                        }
                        else
                        {
                            //引号字段里包含引号时,需要对内引号进行加引号转义
                            std::cout<<"语法错误: 转义字符 " 不能完成转义 或 引号字段结尾引号没有紧贴字段分隔符"<<std::endl;
                            assert(false);
                        }
    
                    }
                        break;
    
                    case kRowSeparator:
                    {
                        _gridData.push_back(vecFields);
                        continue;
                    }
                        break;
    
                    case kError:
                    {
    
                    }
                        break;
    
                    default:
                        break;
                }
    
            }
    
            switch (state)
            {
                case kNonQuotesField:
                {
                    vecFields.push_back(strField);
                    _gridData.push_back(vecFields);
                }
                    break;
                case kQuoteInQuotesField:
                {
                    vecFields.push_back(strField);
                    _gridData.push_back(vecFields);
                }
                    break;
                case kFieldSeparator:
                {
                    vecFields.push_back("");
                    _gridData.push_back(vecFields);
                }
                    break;
                case kRowSeparator:
                {
                    _gridData.push_back(vecFields);
                }
                    break;
                default:
                    break;
            }
        }
    
    
        float used = (float)(clock()-before)/CLOCKS_PER_SEC;
        std::cout<<"解析此csv花费时间:"<<used<<"S"<<std::endl;
    
        return _gridData;
    }
    
    void CCSVParse::printParseData() const
    {
        std::cout<<"以下是解析的csv数据:"<<std::endl;
        std::cout<<"row counts:"<<_gridData.size()<<std::endl;
    
        for (int row=0; row<_gridData.size(); row++)
        {
            std::vector<std::string> rowData = _gridData.at(row);
            for (int col = 0; col<rowData.size(); col++)
            {
                std::cout<<rowData.at(col)<<"	";
            }
            std::cout<<"
    "<<std::endl;
        };
    }
    
    std::map<std::string, std::map<std::string, std::string> > CCSVParse::parseCsvFileToMap(const std::string &fileName,const std::string &separator)
    {
        //先获取所有的行列数据
        std::vector<std::vector<std::string>> allData = parseCsvFile(fileName,separator);
        //转为字典形式
        std::map<std::string, std::map<std::string, std::string> > mapAllData;
        for (size_t i=1; i<allData.size(); i++)
        {
            std::vector<std::string> rowData = allData.at(i);
            //数据第一行为数据类型key
            std::vector<std::string> keyData = allData.at(0);
    
            std::map<std::string, std::string> mapRow;
            for (int i=0; i<keyData.size(); i++)
            {
                std::string key = keyData.at(i);
                std::string value = rowData.at(i);
                mapRow[key] = value;
            }
            //每一行数据的第一列是id
            mapAllData[rowData.at(0)] = mapRow;
        }
    
        return mapAllData;
    }
    
    #pragma mark--全局函数 类型转换
    int conToInt(std::string &source)
    {
        std::stringstream ss;
        int res;
        ss<<source;
        ss>>res;
    
        return res;
    }
    float conToFloat(std::string &source)
    {
        std::stringstream ss;
        float res;
        ss<<source;
        ss>>res;
    
        return res;
    }
    double conToDouble(std::string &source)
    {
        std::stringstream ss;
        double res;
        ss<<source;
        ss>>res;
    
        return res;
    }
    
    std::string conToString(int s)
    {
        std::stringstream ss;
        std::string res;
        ss<<s;
        ss>>res;
    
        return res;
    }
    std::string conToString(float s)
    {
        std::stringstream ss;
        std::string res;
        ss<<s;
        ss>>res;
    
        return res;
    }
    std::string conToString(double s)
    {
        std::stringstream ss;
        std::string res;
        ss<<s;
        ss>>res;
    
        return res;
    }

    测试:
    test.csv:
    fid,name,age
    1,小明,20
    2,小刚,20
    3,小红,19

    CCSVParse *parse = new CCSVParse();
    //解析成数组
    vector<vector<string>> s = parse->parseCsvFile("test.csv");
    parse->printParseData();
    //解析成字典
    std::map<std::string, std::map<std::string, std::string> > mapData = parse->parseCsvFileToMap("testcsv");
    std::map<std::string, std::string> mapXiaom = mapData.at("1");
    std::string name = mapXiaom.at("name"); //小明
    int age = conToInt(mapXiao.at("age")); //20
    
    delete parse;

    解析结果:
    1 小明 20
    2 小刚 20
    3 小红 19

    下载链接:百度云盘下载

  • 相关阅读:
    wtl 支持托拽文件并在ListBox框中显示文件路径的方法
    提升本程序进程优先级和权限(VC++源代码)
    Win7下运行VC程序UAC权限问题
    Flex中带for的循环
    Flex注释
    Flex建立编译环境
    Flex事件驱动机制
    Flex应用程序的系统开发周期
    Linux下c开发 之 线程通信
    将 Win32 C/C++ 应用程序迁移到 POWER 上的 Linux,第 1 部分: 进程、线程和共享内存服务
  • 原文地址:https://www.cnblogs.com/skyxu123/p/9543806.html
Copyright © 2011-2022 走看看