zoukankan      html  css  js  c++  java
  • 语音切割

    # 设置分句的标志符号;可以根据实际需要进行修改
    # cutlist = "。!?".decode('utf-8')
    
    cutlist = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
    
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
    
    
    # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
    def FindToken(cutlist, char):
        if char in cutlist:
            return True
        else:
            return False
    
    
    # 进行分句的核心函数
    def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
        l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
        line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
    
        for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
            if FindToken(cutlist, i):  # 如果当前字符是分句符号
                line.append(i)  # 将此字符放入临时列表中
                l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
                line = []  # 将符号列表清空,以便下次分句使用
            else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
                line.append(i)
        return l
    
    
    r_s = []
    # 以下为调用上述函数实现从文本文件中读取内容并进行分句。
    # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
    #     for lines in fr:
    #         l = Cut(list(cutlist), list(lines))
    #         for line in l:
    #             if len(line.replace(' ', '')) == 0:
    #                 continue
    #             if line.strip() != "":
    #                 line=line.strip()
    #                 r_s.append(line)
    #
    #                 # li = line.strip().split()
    #                 # for sentence in li:
    #                 #     r_s.append(sentence)
    str_ = ''
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
    ']
    
    with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
        for lines in fr:
            if len(lines.replace(' ', '')) == 0:
                continue
            # str_='{}{}'.format(str_,lines.replace('
    ',''))
            # if len(lines.replace(' ','').replace('
    ',''))==0:
            #     continue
            str_ = '{}{}'.format(str_, lines)
            # l = Cut(list(cutlist), list(lines))
            # for line in l:
            #     if line.strip() != "":
            #         line=line.strip()
    
    
    from aip import AipSpeech
    
    bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
    APP_ID, API_KEY, SECRET_KEY = bd_k_l
    
    
    import math
    bd_str_per_limit=1024
    rep_times=math.ceil(len(str_)/bd_str_per_limit)
    
    for i in range(rep_times):
        cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
    
        mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'
    
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        result = client.synthesis(str_, 'zh', 1, {
            'vol': 5,
        })
        uid = 'liukeyuanCAKE_whole_para'
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
    
        if not isinstance(result, dict):
            # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
            f_w = '{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit',i, '.mp3')
            # ,'g3db',uid,'g3uid'
            #  with open('auido.b.mp3', 'wb') as f:
            with open(f_w, 'wb') as f:
                f.write(result)
    
    import os
    os._exit(2)
    

      

    换行符影响 

    # 设置分句的标志符号;可以根据实际需要进行修改
    # cutlist = "。!?".decode('utf-8')
    
    cutlist = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
    
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
    
    
    # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
    def FindToken(cutlist, char):
        if char in cutlist:
            return True
        else:
            return False
    
    
    # 进行分句的核心函数
    def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
        l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
        line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
    
        for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
            if FindToken(cutlist, i):  # 如果当前字符是分句符号
                line.append(i)  # 将此字符放入临时列表中
                l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
                line = []  # 将符号列表清空,以便下次分句使用
            else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
                line.append(i)
        return l
    
    
    r_s = []
    # 以下为调用上述函数实现从文本文件中读取内容并进行分句。
    # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
    #     for lines in fr:
    #         l = Cut(list(cutlist), list(lines))
    #         for line in l:
    #             if len(line.replace(' ', '')) == 0:
    #                 continue
    #             if line.strip() != "":
    #                 line=line.strip()
    #                 r_s.append(line)
    #
    #                 # li = line.strip().split()
    #                 # for sentence in li:
    #                 #     r_s.append(sentence)
    str_ = ''
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
    ']
    
    with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
        for lines in fr:
            if len(lines.replace(' ', '')) == 0:
                continue
            # str_='{}{}'.format(str_,lines.replace('
    ',''))
            # if len(lines.replace(' ','').replace('
    ',''))==0:
            #     continue
            str_ = '{}{}'.format(str_, lines.replace('
    ',''))
            # l = Cut(list(cutlist), list(lines))
            # for line in l:
            #     if line.strip() != "":
            #         line=line.strip()
    
    
    from aip import AipSpeech
    
    bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
    APP_ID, API_KEY, SECRET_KEY = bd_k_l
    
    
    import math
    bd_str_per_limit=1024
    rep_times=math.ceil(len(str_)/bd_str_per_limit)
    
    for i in range(rep_times):
        cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
        print(cut_str)
        print('----------------------------------')
        mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'
    
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        result = client.synthesis(cut_str, 'zh', 1, {
            'vol': 5,
        })
        uid = 'liukeyuanCAKE_whole_para'
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
    
        if not isinstance(result, dict):
            # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
            f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit','_NO_trN_',i, '.mp3')
            # ,'g3db',uid,'g3uid'
            #  with open('auido.b.mp3', 'wb') as f:
            with open(f_w, 'wb') as f:
                f.write(result)
    
    import os
    os._exit(2)
    

      

    # 设置分句的标志符号;可以根据实际需要进行修改
    # cutlist = "。!?".decode('utf-8')
    
    cutlist = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']
    
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']
    
    
    # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
    def FindToken(cutlist, char):
        if char in cutlist:
            return True
        else:
            return False
    
    
    # 进行分句的核心函数
    def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
        l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
        line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
    
        for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
            if FindToken(cutlist, i):  # 如果当前字符是分句符号
                line.append(i)  # 将此字符放入临时列表中
                l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
                line = []  # 将符号列表清空,以便下次分句使用
            else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
                line.append(i)
        return l
    
    
    r_s = []
    # 以下为调用上述函数实现从文本文件中读取内容并进行分句。
    # with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
    #     for lines in fr:
    #         l = Cut(list(cutlist), list(lines))
    #         for line in l:
    #             if len(line.replace(' ', '')) == 0:
    #                 continue
    #             if line.strip() != "":
    #                 line=line.strip()
    #                 r_s.append(line)
    #
    #                 # li = line.strip().split()
    #                 # for sentence in li:
    #                 #     r_s.append(sentence)
    str_ = ''
    
    # cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
    ']
    
    with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
        for lines in fr:
            # if len(lines.replace(' ', '')) == 0:
            #     continue
            # str_='{}{}'.format(str_,lines.replace('
    ',''))
            if len(lines.replace(' ','').replace('
    ',''))==0:
                continue
            str_ = '{}{}'.format(str_, lines.replace('
    ',''))
            # l = Cut(list(cutlist), list(lines))
            # for line in l:
            #     if line.strip() != "":
            #         line=line.strip()
    
    
    from aip import AipSpeech
    
    bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
    APP_ID, API_KEY, SECRET_KEY = bd_k_l
    
    
    import math
    #bd_str_per_limit=1024
    bd_str_per_limit=300
    rep_times=math.ceil(len(str_)/bd_str_per_limit)
    
    for i in range(rep_times):
        cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
        print(cut_str)
        print('----------------------------------')
        mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'
    
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        result = client.synthesis(cut_str, 'zh', 1, {
            'vol': 5,
        })
        uid = 'CAKE'
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')
    
        if not isinstance(result, dict):
            # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
            f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','noBRBlankLine','',i, '.mp3')
            # ,'g3db',uid,'g3uid'
            #  with open('auido.b.mp3', 'wb') as f:
            with open(f_w, 'wb') as f:
                f.write(result)
    
    import os
    os._exit(2)
    

      

  • 相关阅读:
    Python学习札记(十五) 高级特性1 切片
    LeetCode Longest Substring Without Repeating Characters
    Python学习札记(十四) Function4 递归函数 & Hanoi Tower
    single number和变体
    tusen 刷题
    实验室网站
    leetcode 76. Minimum Window Substring
    leetcode 4. Median of Two Sorted Arrays
    leetcode 200. Number of Islands 、694 Number of Distinct Islands 、695. Max Area of Island 、130. Surrounded Regions 、434. Number of Islands II(lintcode) 并查集 、178. Graph Valid Tree(lintcode)
    刷题注意事项
  • 原文地址:https://www.cnblogs.com/rsapaper/p/8795540.html
Copyright © 2011-2022 走看看