zoukankan      html  css  js  c++  java
  • pandas(三):pandas读取excel

    一、代码如下

    import pandas as pd
    
    class ProcessData(object):
        def __init__(self):
            #self.path = "../data/new_data/随机抽听_1.xls"
            #self.path = "../data/new_data/无意义核对语料.xlsx"
            self.path = "../data/new_data/0520新增语义.xlsx"
        def write_suiji(self):
            readbook = pd.read_excel(self.path, sheet_name="sheet1")
            s1 = readbook["语句"]
            y = readbook["标注大类"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    line = l + "  " + s + "
    "
                    #f.write(l + "  " + s + "
    ")
        def get_nomeans(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户语句"]
            y = readbook["语义小类"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    line = l + "  " + s + "
    "
                    f.write(l + "  " + s + "
    ")
        def get_wenti(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户话术"]
            y = readbook["语义"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    f.write(l + "  " + s + "
    ")
        def get_0520(self):
            readbook = pd.read_excel(self.path, sheet_name="Sheet1")
            s1 = readbook["客户语句"]
            y = readbook["语义"]
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for s,l in zip(s1, y):
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    f.write(l + "  " + s + "
    ")
    
        def get_fangshi(self):
            dataLine = []
            with open("../data/new_data/还款方式语料.txt", "r", encoding="utf8") as f:
                for line in f.readlines():
                    sentence = line.replace(" ", "").replace("
    ", "").replace("	", "").replace("
    ", "")
                    dataLine.append(sentence)
            with open("../data/train.txt", "a+", encoding="utf8") as f:
                for data in dataLine:
                    s = data
                    l = "支付宝微信号是多少"
                    f.write(l + "  " + s + "
    ")
    
    if __name__ == '__main__':
        ProcessData().get_fangshi()
  • 相关阅读:
    冒泡排序
    Windows 10家庭版升级专业版
    VRRP + MSTP实验
    MSTP多生成树协议
    解决office 2016提示“你的许可证不是正版,并且你可能是盗版软件的受害者。使用正版Office,避免干扰并保护你的文件安全”
    路由器开启ssh实现远程管理
    CentOS 7安装Telnet服务进行远程管理
    CentOS 7开启ssh服务进行远程管理
    华为特有接口Hybrid
    Vlan Mapping
  • 原文地址:https://www.cnblogs.com/zhangxianrong/p/14858615.html
Copyright © 2011-2022 走看看