zoukankan      html  css  js  c++  java
  • pandas(三):pandas读取excel

    一、代码如下

    import pandas as pd
    
    class ProcessData(object):
        def __init__(self):
            #self.path = "../data/new_data/随机抽听_1.xls"
            #self.path = "../data/new_data/无意义核对语料.xlsx"
            self.path = "../data/new_data/0520新增语义.xlsx"
        def write_suiji(self):
            readbook = pd.read_excel(self.path, sheet_name="sheet1")
            s1 = readbook["语句"]
            y = readbook["标注大类"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    line = l + "  " + s + "
    "
                    #f.write(l + "  " + s + "
    ")
        def get_nomeans(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户语句"]
            y = readbook["语义小类"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    line = l + "  " + s + "
    "
                    f.write(l + "  " + s + "
    ")
        def get_wenti(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户话术"]
            y = readbook["语义"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    f.write(l + "  " + s + "
    ")
        def get_0520(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户语句"]
            y = readbook["语义"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    f.write(l + "  " + s + "
    ")
    
        def get_fangshi(self):
            dataLine = []
            with open("../data/new_data/还款方式语料.txt", "r", encoding="utf8") as f:
                for line in f.readlines():
                    sentence = line.replace(" ", "").replace("
    ", "").replace("	", "").replace("
    ", "")
                    dataLine.append(sentence)
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for data in dataLine:
                    s = data
                    l = "支付宝微信号是多少"
                    f.write(l + "  " + s + "
    ")
    
    if __name__ == '__main__':
        ProcessData().get_fangshi()
  • 相关阅读:
    nyoj 21三个水杯(BFS + 栈)
    hdu 4493 Tutor
    树的判断(poj nyoj hduoj)
    nyoj 228 士兵杀敌(五)
    poj 3468 A Simple Problem with Integers(线段树)
    hdu 2565 放大的X
    nyoj 528 找球号(三)(哈希)
    nyoj 138 找球号(二)(哈希)
    算法之搜索篇
    每日命令:(11)nl
  • 原文地址:https://www.cnblogs.com/zhangxianrong/p/14858615.html
Copyright © 2011-2022 走看看