zoukankan html css js c++ java

pandas(三)：pandas读取excel

一、代码如下

import pandas as pd

class ProcessData(object):
    def __init__(self):
        #self.path = "../data/new_data/随机抽听_1.xls"
        #self.path = "../data/new_data/无意义核对语料.xlsx"
        self.path = "../data/new_data/0520新增语义.xlsx"
    def write_suiji(self):
        readbook = pd.read_excel(self.path, sheet_name="sheet1")
        s1 = readbook["语句"]
        y = readbook["标注大类"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                l = l.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                line = l + "  " + s + "
"
                #f.write(l + "  " + s + "
")
    def get_nomeans(self):
        readbook = pd.read_excel(self.path, sheet_name="Sheet1")
        s1 = readbook["客户语句"]
        y = readbook["语义小类"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                l = l.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                line = l + "  " + s + "
"
                f.write(l + "  " + s + "
")
    def get_wenti(self):
        readbook = pd.read_excel(self.path, sheet_name="Sheet1")
        s1 = readbook["客户话术"]
        y = readbook["语义"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                l = l.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                f.write(l + "  " + s + "
")
    def get_0520(self):
        readbook = pd.read_excel(self.path, sheet_name="Sheet1")
        s1 = readbook["客户语句"]
        y = readbook["语义"]
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for s,l in zip(s1, y):
                s = s.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                l = l.replace(" ", "").replace("
", "").replace("
", "").replace("	", "")
                f.write(l + "  " + s + "
")

    def get_fangshi(self):
        dataLine = []
        with open("../data/new_data/还款方式语料.txt", "r", encoding="utf8") as f:
            for line in f.readlines():
                sentence = line.replace(" ", "").replace("
", "").replace("	", "").replace("
", "")
                dataLine.append(sentence)
        with open("../data/train.txt", "a+", encoding="utf8") as f:
            for data in dataLine:
                s = data
                l = "支付宝微信号是多少"
                f.write(l + "  " + s + "
")

if __name__ == '__main__':
    ProcessData().get_fangshi()

查看全文

相关阅读:
浏览器的reflow和repaint
javascript正则表达式中参数g的作用
 InkCanvas 自由虚线笔画
 WPF 圆轮菜单的实现
 WPF实现化学式上下标
 install and use zookeeper C client API
install and use boost::thread
explicit instantiations in template class/function
The science of programming
how does vector work?

原文地址：https://www.cnblogs.com/zhangxianrong/p/14858615.html