zoukankan      html  css  js  c++  java
  • pandas(三):pandas读取excel

    一、代码如下

    import pandas as pd
    
    class ProcessData(object):
        def __init__(self):
            #self.path = "../data/new_data/随机抽听_1.xls"
            #self.path = "../data/new_data/无意义核对语料.xlsx"
            self.path = "../data/new_data/0520新增语义.xlsx"
        def write_suiji(self):
            readbook = pd.read_excel(self.path, sheet_name="sheet1")
            s1 = readbook["语句"]
            y = readbook["标注大类"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    line = l + "  " + s + "
    "
                    #f.write(l + "  " + s + "
    ")
        def get_nomeans(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户语句"]
            y = readbook["语义小类"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    line = l + "  " + s + "
    "
                    f.write(l + "  " + s + "
    ")
        def get_wenti(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户话术"]
            y = readbook["语义"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    f.write(l + "  " + s + "
    ")
        def get_0520(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户语句"]
            y = readbook["语义"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    f.write(l + "  " + s + "
    ")
    
        def get_fangshi(self):
            dataLine = []
            with open("../data/new_data/还款方式语料.txt", "r", encoding="utf8") as f:
                for line in f.readlines():
                    sentence = line.replace(" ", "").replace("
    ", "").replace("	", "").replace("
    ", "")
                    dataLine.append(sentence)
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for data in dataLine:
                    s = data
                    l = "支付宝微信号是多少"
                    f.write(l + "  " + s + "
    ")
    
    if __name__ == '__main__':
        ProcessData().get_fangshi()
  • 相关阅读:
    持续集成(Continuous Integration),
    python的几个有趣点
    C++语言发展历史 & 基础知识
    [C++] Windows下的socket编程(这是一个简单的TCP/IP例子)
    office app 代码简析之 task pane app
    佳言玩具
    数据的图形可视化[R语言结果GML引发出来的调查]
    收藏的一系列教程帖子,很适合有一定基础,想要进阶的同学
    各种排序算法总结
    三层架构+存储过程实现分页
  • 原文地址:https://www.cnblogs.com/zhangxianrong/p/14858615.html
Copyright © 2011-2022 走看看