zoukankan      html  css  js  c++  java
  • pandas(六):pandas对excel进行读写

    一、代码如下

    import pandas as pd
    import numpy as np
    
    class Process():
        def __init__(self):
            self.path_1 = "3000条无意义.xlsx"
            self.path_2 = "录音跟听0526.xlsx"
            self.s = []
            self.l = []
    
        def read_path_1(self):
            readbook = pd.read_excel(self.path_1, sheet_name="Sheet1")
            s1 = readbook["语句"]
            y = readbook["语义"]
            for s,l in zip(s1, y):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    self.s.append(s)
                    self.l.append(l)
    
        def read_path_2(self):
            t1 = pd.read_excel(self.path_2, sheet_name="0526")
            t2 = pd.read_excel(self.path_2, sheet_name="0525")
            t3 = pd.read_excel(self.path_2, sheet_name="0524")
            s1 = t1["内容"]
            y1 = t1["正确标签"]
            for s,l in zip(s1, y1):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    self.s.append(s)
                    self.l.append(l)
            s2 = t2["内容"]
            y2 = t2["正确标签"]
            for s,l in zip(s2, y2):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    self.s.append(s)
                    self.l.append(l)
            s3 = t3["内容"]
            y3 = t3["正确标签"]
            for s,l in zip(s3, y3):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    self.s.append(s)
                    self.l.append(l)
    
        def noSemantic_1(self):
            path_in = "3000_1.xlsx"
            path_out = "new_data/3000_1.csv"
            t1 = pd.read_excel(path_in, sheet_name="Sheet1")
            s1 = t1["话术"]
            y1 = t1["语义"]
            sentence = []
            label = []
            predict = []
            for s, l in zip(s1, y1):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    sentence.append(s)
                    label.append(l)
                    predict.append("无意义")
            dataframe = pd.DataFrame({'sentence': sentence,"predict":predict, 'label': label })
            dataframe.to_csv(path_out, index=False, sep='	', encoding="utf8")
    
        def noSemantic_2(self):
            path_in = "3000_2.xlsx"
            path_out = "new_data/3000_2.csv"
            t1 = pd.read_excel(path_in, sheet_name="对话文本")
            s1 = t1["客户语句"]
            y1 = t1["语义小类"]
            sentence = []
            label = []
            predict = []
            for s, l in zip(s1, y1):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    sentence.append(s)
                    label.append(l)
                    predict.append("无意义")
            dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label})
            dataframe.to_csv(path_out, index=False, sep='	', encoding="utf8")
    
        def noSemantic_3(self):
            path_in = "3000_3.xlsx"
            path_out = "new_data/3000_3.csv"
            t1 = pd.read_excel(path_in, sheet_name="Sheet1")
            s1 = t1["语句"]
            y1 = t1["语义"]
            sentence = []
            label = []
            predict = []
            for s, l in zip(s1, y1):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    sentence.append(s)
                    label.append(l)
                    predict.append("无意义")
            dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label})
            dataframe.to_csv(path_out, index=False, sep='	', encoding="utf8")
    
        def luyin_1(self):
            path_in = "录音跟听_1.xlsx"
            path_out = "new_data/record_1.csv"
            t1 = pd.read_excel(path_in, sheet_name="Sheet1")
            s1 = t1["内容"]
            p1 = t1["预测标签"]
            y1 = t1["正确标签"]
            sentence = []
            label = []
            predict = []
            for s, l, pre in zip(s1, y1, p1):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    p = pre.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    sentence.append(s)
                    label.append(l)
                    predict.append(p)
            dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label})
            dataframe.to_csv(path_out, index=False, sep='	', encoding="utf8")
            dataframe.to_excel('new_data/record_1.xlsx', sheet_name='data', index=False, encoding="utf8")
    
        def luyin_2(self):
            path_in = "录音跟听_2.xlsx"
            path_out = "new_data/record_2.csv"
            t1 = pd.read_excel(path_in, sheet_name="Sheet1")
            s1 = t1["内容"]
            p1 = t1["预测标签"]
            y1 = t1["正确标签"]
            sentence = []
            label = []
            predict = []
            for s, l, pre in zip(s1, y1, p1):
                if str(l) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    p = pre.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    sentence.append(s)
                    label.append(l)
                    predict.append(p)
            dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label})
            dataframe.to_csv(path_out, index=False, sep='	', encoding="utf8")
            dataframe.to_excel('new_data/record_2.xlsx', sheet_name='data', index=False, encoding="utf8")
    
        def luyin_3(self):
            path_in = "录音跟听_3.xlsx"
            path_out = "new_data/record_3.csv"
            t1 = pd.read_excel(path_in, sheet_name="Sheet1")
            s1 = t1["内容"]
            p1 = t1["预测标签"]
            y1 = t1["正确标签"]
            sentence = []
            label = []
            predict = []
            for s, l, pre in zip(s1, y1, p1):
                if str(l) == "nan":
                    continue
                if str(pre) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    p = pre.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    sentence.append(s)
                    label.append(l)
                    predict.append(p)
            dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label})
            dataframe.to_csv(path_out, index=False, sep='	', encoding="utf8")
            dataframe.to_excel('new_data/record_3.xlsx', sheet_name='data', index=False, encoding="utf8")
    
        def liucheng(self):
            path_in = "流程跟听.xlsx"
            path_out = "new_data/procedure.csv"
            t1 = pd.read_excel(path_in, sheet_name="Sheet1")
            s1 = t1["语句"]
            p1 = t1["预测标签"]
            y1 = t1["语义"]
            sentence = []
            label = []
            predict = []
            for s, l, pre in zip(s1, y1, p1):
                if str(l) == "nan":
                    continue
                if str(pre) == "nan":
                    continue
                else:
                    s = s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    l = l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    p = pre.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
                    sentence.append(s)
                    label.append(l)
                    predict.append(p)
            dataframe = pd.DataFrame({'sentence': sentence, "predict": predict, 'label': label})
            dataframe.to_csv(path_out, index=False, sep='	', encoding="utf8")
            dataframe.to_excel('new_data/procedure.xlsx', sheet_name='data',index=False, encoding="utf8")
    
        def main(self):
            self.read_path_1()
            self.read_path_2()
            dataframe = pd.DataFrame({'l': self.l, 's': self.s})
            dataframe.to_csv("all_0607.csv", index=False, sep='	', encoding="utf8")
    
        # def update_1(self):
        #     path = "excel/3000_拒识语料.csv"
        #     data = pd.read_csv(path, sep="	")
        #     sentence1 = data["sentence"].tolist()
        #     label1 = data["label"].tolist()
        #
        #     t2 = pd.read_excel("excel/语义优化_0608_1.xlsx", sheet_name="data")
        #     sentence2 = t2["sentence"].tolist()
        #     label2 = t2["label"].tolist()
        #
        #     t3 = pd.read_excel("excel/语义优化_0608_2.xlsx", sheet_name="data")
        #     sentence3 = t3["sentence"].tolist()
        #     label3 = t3["label"].tolist()
        #
        #     s = sentence1 + sentence2 + sentence3
        #     l = label1 + label2 + label3
        #     with open("all.txt", "a+", encoding="utf8") as f:
        #         for _l,_s in zip(l, s):
        #             _l = _l.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
        #             _s = _s.replace(" ", "").replace("
    ", "").replace("
    ", "").replace("	", "")
        #             line = _l + "  " + _s
        #             f.write(line + "
    ")
        def no_semantic(self):
            t1 = pd.read_csv("new_data/3000_1.csv", sep="	")
    
            t2 = pd.read_csv("new_data/3000_2.csv", sep="	")
    
            t3 = pd.read_csv("new_data/3000_3.csv", sep="	")
    
            df = pd.concat([t1, t2, t3], ignore_index=True)
            df.to_excel('excel/nosemantic.xlsx', sheet_name='data',index=False, encoding="utf8")
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    if __name__ == '__main__':
        Process().no_semantic()
  • 相关阅读:
    loj#6074. 「2017 山东一轮集训 Day6」子序列(矩阵乘法 dp)
    loj#6073. 「2017 山东一轮集训 Day5」距离(费用流)
    洛谷P5108 仰望半月的夜空(后缀数组)
    二次剩余Cipolla算法学习笔记
    BZOJ5118: Fib数列2(二次剩余)
    BZOJ3122: [Sdoi2013]随机数生成器(BSGS)
    loj#2531. 「CQOI2018」破解 D-H 协议(BSGS)
    noi.ac #289. 电梯(单调队列)
    51nod“省选”模测第二场 C 小朋友的笑话(线段树 set)
    HDU 4770 Lights Against DudelyLights
  • 原文地址:https://www.cnblogs.com/zhangxianrong/p/14889024.html
Copyright © 2011-2022 走看看