很方便的csv解析,可以解析带有分隔符的字段,可以解析成map方便程序使用。
//
// CCSVParse.hpp
// CPPAlgorithm
//
// Created by xujw on 16/2/26.
// Copyright © 2016年 xujw. All rights reserved.
//
#ifndef CCSVParse_h
#define CCSVParse_h
#include <stdio.h>
#include <vector>
#include <string>
#include <map>
#include <iostream>
#include <sstream>
/*
* 全局函数,转换数据类型
* toInt
* toFloat
* toDouble
*/
int conToInt(std::string &source);
float conToFloat(std::string &source);
double conToDouble(std::string &source);
//转成std::string
std::string conToString(int s);
std::string conToString(float s);
std::string conToString(double s);
class CCSVParse
{
public:
CCSVParse();
~CCSVParse();
/*
* 解析成map形式(数据要保证第一行为数据类型名,第一列为id名,id不可重复)
* 使用时通过id获取一行数据(也是map形式),然后通过类型名作为key获取数据
* 格式如下:
* id name age
* 1 小明 20
* 2 小红 19
* 3 小光 18
*/
std::map<std::string, std::map<std::string, std::string> > parseCsvFileToMap(const std::string &fileName,const std::string &separator = ",");
//解析出行列数据 separtor只能是一个字符(比如 , # 等)
std::vector< std::vector<std::string> > parseCsvFile(const std::string &fileName,const std::string &separator = ",");
//打印出解析的数据 测试用
void printParseData() const;
inline size_t getRowNum() const {return _gridData.size();};
inline void useSimpleModel(bool flag){_useSimpleModel = flag;};
/*
分隔字符串
str:要分隔的字符串/文件
seperator:分隔符
*/
std::vector<std::string> splitString(const std::string &str,const std::string &sparator);
std::string loadCsvFile(const std::string &fileName);
private:
//原始数据
std::vector< std::vector<std::string> > _gridData;
bool _useSimpleModel; //是否使用简单模式
};
#endif /* CCSVParse_h */
//
// CCSVParse.cpp
// CPPAlgorithm
//
// Created by xujw on 16/2/26.
// Copyright © 2016年 xujw. All rights reserved.
//
#include "CCSVParse.h"
#include <assert.h>
CCSVParse::CCSVParse():_useSimpleModel(false){}
CCSVParse::~CCSVParse(){}
std::vector<std::string> CCSVParse::splitString(const std::string &str, const std::string &separator)
{
std::vector<std::string> resVec;
if ("" == str)
{
return resVec;
}
//方便截取最后一段数据
std::string dataStr = str + separator;
size_t pos = dataStr.find(separator);
size_t size = dataStr.size();
while (pos != std::string::npos)
{
std::string x = dataStr.substr(0,pos);
resVec.push_back(x);
dataStr = dataStr.substr(pos+1,size);
pos = dataStr.find(separator);
}
return resVec;
// //Method 2
// size_t nStartPosFound = str.find(separator, 0);
// size_t nFieldStart = 0;
// for (; nStartPosFound != -1; nStartPosFound = str.find(separator, nStartPosFound))
// {
// std::string strSub = str.substr(nFieldStart, nStartPosFound - nFieldStart);
// nStartPosFound = nStartPosFound + separator.size();
// nFieldStart = nStartPosFound;
//
//
// resVec.push_back(strSub);
// }
//
// // 加入最后一个字段
// if (nFieldStart < str.size())
// {
// std::string strSub = str.substr(nFieldStart, str.size() - nFieldStart);
// resVec.push_back(strSub);
// }
// return resVec;
}
std::string CCSVParse::loadCsvFile(const std::string &fileName)
{
FILE *pFile = fopen(fileName.c_str(), "rb");
if (0 == pFile)
{
return "";
}
fseek(pFile, 0, SEEK_END); //指针移动到文件结尾
long len = ftell(pFile); //获取文件大小
char *pBuffer = new char[len+1];
fseek(pFile, 0, SEEK_SET); //指针移动到文件开头
fread(pBuffer, 1, len, pFile); //读取文件
fclose(pFile);
//等价于std::string s;s.assign(pBuffer,len);
pBuffer[len] = 0;
std::string strRead(pBuffer,len);
delete [] pBuffer;
return strRead;
}
std::vector<std::vector<std::string>> CCSVParse::parseCsvFile(const std::string &fileName,const std::string &separator)
{
clock_t before = clock();
_gridData.clear();
std::string strAllData = loadCsvFile(fileName);
if (strAllData.size() == 0)
{
return _gridData;
}
//分隔符只能是一个字符
assert(separator.size() == 1);
//简易模式,字段里面不能包含分隔符
if (_useSimpleModel)
{
std::cout<<"使用简易模式解析"<<std::endl;
//分出行和字段
std::vector<std::string> ret = splitString(strAllData, "
");
for (size_t i=0; i<ret.size(); i++)
{
std::vector<std::string> rowData = splitString(ret.at(i), separator);
_gridData.push_back(rowData);
}
return _gridData;
}
//标准模式,字段里面可以包含分隔符
//定义状态
typedef enum stateType
{
kNewFieldStart = 0, //新字段开始
kNonQuotesField, //非引号字段
kQuotesField, //引号字段
kFieldSeparator, //字段分隔
kQuoteInQuotesField, //引号字段中的引号
kRowSeparator, //行分隔符(回车)
kError //语法错误
}StateType;
//分出行
std::vector<std::string> vecRows = splitString(strAllData, "
");
for (int i=0; i<vecRows.size(); i++)
{
//一行一行处理
std::string strRowData = vecRows.at(i);
if (0 == strRowData.size())
{
continue;
}
std::vector< std::string > vecFields;
std::string strField;
StateType state = kNewFieldStart;
for (int j=0; j<strRowData.size(); j++)
{
const char &ch = strRowData.at(j);
switch ( state )
{
case kNewFieldStart:
{
if (ch == '"')
{
state = kQuotesField;
}
else if (ch == separator.at(0))
{
vecFields.push_back("");
state = kFieldSeparator;
}
else if (ch == '
' || ch == '
')
{
state = kRowSeparator;
}
else
{
strField.push_back(ch);
state = kNonQuotesField;
}
}
break;
case kNonQuotesField:
{
if (ch == separator.at(0))
{
vecFields.push_back(strField);
strField.clear();
state = kFieldSeparator;
}
else if (ch == '
' || ch == '
')
{
vecFields.push_back(strField);
state = kRowSeparator;
}
else
{
strField.push_back(ch);
}
}
break;
case kQuotesField:
{
if (ch == '"')
{
state = kQuoteInQuotesField;
}
else
{
strField.push_back(ch);
}
}
break;
case kFieldSeparator:
{
if (ch == separator.at(0))
{
vecFields.push_back("");
}
else if (ch == '"')
{
strField.clear();
state = kQuotesField;
}
else if (ch == '
' || ch == '
')
{
vecFields.push_back("");
state = kRowSeparator;
}
else
{
strField.push_back(ch);
state = kNonQuotesField;
}
}
break;
case kQuoteInQuotesField:
{
if (ch == separator.at(0))
{
//引号闭合
vecFields.push_back(strField);
strField.clear();
state = kFieldSeparator;
}
else if (ch == '
' || ch == '
')
{
vecFields.push_back(strField);
state = kRowSeparator;
}
else if (ch == '"')
{
//转义引号
strField.push_back(ch);
state = kQuotesField;
}
else
{
//引号字段里包含引号时,需要对内引号进行加引号转义
std::cout<<"语法错误: 转义字符 " 不能完成转义 或 引号字段结尾引号没有紧贴字段分隔符"<<std::endl;
assert(false);
}
}
break;
case kRowSeparator:
{
_gridData.push_back(vecFields);
continue;
}
break;
case kError:
{
}
break;
default:
break;
}
}
switch (state)
{
case kNonQuotesField:
{
vecFields.push_back(strField);
_gridData.push_back(vecFields);
}
break;
case kQuoteInQuotesField:
{
vecFields.push_back(strField);
_gridData.push_back(vecFields);
}
break;
case kFieldSeparator:
{
vecFields.push_back("");
_gridData.push_back(vecFields);
}
break;
case kRowSeparator:
{
_gridData.push_back(vecFields);
}
break;
default:
break;
}
}
float used = (float)(clock()-before)/CLOCKS_PER_SEC;
std::cout<<"解析此csv花费时间:"<<used<<"S"<<std::endl;
return _gridData;
}
void CCSVParse::printParseData() const
{
std::cout<<"以下是解析的csv数据:"<<std::endl;
std::cout<<"row counts:"<<_gridData.size()<<std::endl;
for (int row=0; row<_gridData.size(); row++)
{
std::vector<std::string> rowData = _gridData.at(row);
for (int col = 0; col<rowData.size(); col++)
{
std::cout<<rowData.at(col)<<" ";
}
std::cout<<"
"<<std::endl;
};
}
std::map<std::string, std::map<std::string, std::string> > CCSVParse::parseCsvFileToMap(const std::string &fileName,const std::string &separator)
{
//先获取所有的行列数据
std::vector<std::vector<std::string>> allData = parseCsvFile(fileName,separator);
//转为字典形式
std::map<std::string, std::map<std::string, std::string> > mapAllData;
for (size_t i=1; i<allData.size(); i++)
{
std::vector<std::string> rowData = allData.at(i);
//数据第一行为数据类型key
std::vector<std::string> keyData = allData.at(0);
std::map<std::string, std::string> mapRow;
for (int i=0; i<keyData.size(); i++)
{
std::string key = keyData.at(i);
std::string value = rowData.at(i);
mapRow[key] = value;
}
//每一行数据的第一列是id
mapAllData[rowData.at(0)] = mapRow;
}
return mapAllData;
}
#pragma mark--全局函数 类型转换
int conToInt(std::string &source)
{
std::stringstream ss;
int res;
ss<<source;
ss>>res;
return res;
}
float conToFloat(std::string &source)
{
std::stringstream ss;
float res;
ss<<source;
ss>>res;
return res;
}
double conToDouble(std::string &source)
{
std::stringstream ss;
double res;
ss<<source;
ss>>res;
return res;
}
std::string conToString(int s)
{
std::stringstream ss;
std::string res;
ss<<s;
ss>>res;
return res;
}
std::string conToString(float s)
{
std::stringstream ss;
std::string res;
ss<<s;
ss>>res;
return res;
}
std::string conToString(double s)
{
std::stringstream ss;
std::string res;
ss<<s;
ss>>res;
return res;
}
测试:
test.csv:
fid,name,age
1,小明,20
2,小刚,20
3,小红,19
CCSVParse *parse = new CCSVParse();
//解析成数组
vector<vector<string>> s = parse->parseCsvFile("test.csv");
parse->printParseData();
//解析成字典
std::map<std::string, std::map<std::string, std::string> > mapData = parse->parseCsvFileToMap("testcsv");
std::map<std::string, std::string> mapXiaom = mapData.at("1");
std::string name = mapXiaom.at("name"); //小明
int age = conToInt(mapXiao.at("age")); //20
delete parse;
解析结果:
1 小明 20
2 小刚 20
3 小红 19
下载链接:百度云盘下载