zoukankan      html  css  js  c++  java
  • _io.TextIOWrapper

    '''
    SELECT * FROM Info_Roles WHERE Flag=1 LIMIT 2;
    
    
     select   top   y   *   from   表   where   主键   not   in(select   top   (x-1)*y   主键   from   表)
    
    
    
      如果表中无主键,可以用临时表,加标识字段解决.这里的x,y可以用变量.
    
      select   id=identity(int,1,1),*     into   #tb   from   表
      select   *   from   #tb   where   id   between   (x-1)*y   and   x*y-1
    
    
    
    
     select   top   1000   Info_ID   from   Info_Roles
     select   top   2000   Info_ID,',xiaole20180410SPLIT,',content   from   Info_Content   where   Info_ID   not   in( select   top   1000   Info_ID   from   Info_Roles   )   ;
     select   top   399   Info_ID,',xiaole20180410SPLIT,',UPPER(content)   from   Info_Content      ;
     select   top   399   CHARINDEX('IMG',UPPER(content))   from   Info_Content      ;
     select   top   15   Info_ID,',xiaole20180410SPLIT,',content   from   Info_Content   where  CHARINDEX('IMG',UPPER(content))>0;
     select   top   15   Info_ID,',xiaole20180410SPLIT,',content   from   Info_Content   where
     Info_ID      in( select   top   1000   Info_ID   from   Info_Roles   )  and
      CHARINDEX('IMG',UPPER(content))>0
     ;
    
    
    
    SELECT
    	TOP 15 Info_ID,
    	',xiaole20180410SPLIT,',
    	content
    FROM
    	Info_Content
    WHERE
    	Info_ID IN (
    		SELECT
    			TOP 1000 Info_ID
    		FROM
    			Info_Roles
    		WHERE
    			Flag = 1
    	)
    AND CHARINDEX('IMG', UPPER(content)) > 0;
    
    
    
    
    
    SELECT
    	TOP 200 Info_ID,
    	',xiaole20180410SPLIT,',
    	content
    FROM
    	Info_Content
    WHERE
    	Info_ID IN (
    		SELECT
    			TOP 90000 Info_ID
    		FROM
    			Info_Roles
    	)
    AND CHARINDEX('<IMG', UPPER(content)) > 0;
    
    
    
    '''
    
    from bs4 import BeautifulSoup
    from selenium import webdriver
    
    xlsplit_str = ',xiaole20180410SPLIT,'
    xlsplit_str = ',xiaole20180410SPLIT,'
    f_db_txt, uid_d = 'db.uid.para.txt', {}
    f_db_txt, uid_d = 'db.uid.para.byhand.txt', {}
    uid_ = 0
    # uid = '{}{}'.format('byhand', uid_)
    # uid_d[uid]={}
    with open(f_db_txt, 'r', encoding='utf-8') as fr:
        for i in fr:
            i = i.replace('	', '').replace('
    ', '')
            if xlsplit_str in i:
                l = i.split(xlsplit_str)
                #   uid = l[0].replace(' ', '')
                #  uid = l[0].replace(' ', '')
                uid_ += 1
                uid = '{}{}'.format('byhand', uid_)
                uid_d[uid] = {}
                # uid_d[uid]['html'] = []
                # uid_d[uid]['html'].append(l[1])
                uid_d[uid]['html'] = l[1]
            else:
                #  uid_d[uid]['html'].append(i)
                uid_d[uid]['html'] = '{}{}'.format(uid_d[uid]['html'], i)
    
    r_d = {}
    
    '''
    中文分句
    '''
    cutlist = ['。', ';', '?', '.', ';', '?', '...', '、、、', ':', ':', ',', ',']
    
    
    # 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
    def FindToken(cutlist, char):
        if char in cutlist:
            return True
        else:
            return False
    
    
    # 进行分句的核心函数
    def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
        l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
        line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空
    
        for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
            if FindToken(cutlist, i):  # 如果当前字符是分句符号
                line.append(i)  # 将此字符放入临时列表中
                l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
                line = []  # 将符号列表清空,以便下次分句使用
            else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
                line.append(i)
        return l
    
    
    '''
    
    '''
    
    
    def paragraph_to_sentence(paragraph, sentence_l):
        paragraph = paragraph.replace(' ', '')
        sentence_split_l = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ',', ',']
        for i in sentence_split_l:
            ll = paragraph.split(i)
            sentence_l.append(ll[0])
            if len(ll) > 1:
                paragraph_to_sentence(ll[1], sentence_l)
            else:
                break
    
        return sentence_l
    
    
    def paragraph_to_sentence_no_recursion(paragraph, sentence_l):
        paragraph = paragraph.replace(' ', '')
        sentence_split_l = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ',', ',']
        for i in sentence_split_l:
            ll = paragraph.split(i)
            sentence_l.append(ll[0])
            if len(ll) > 1:
                paragraph_to_sentence(ll[1], sentence_l)
            else:
                break
    
        return sentence_l
    
    
    paragraph = ''
    sentence_l = []
    paragraph = paragraph.replace(' ', '')
    sentence_split_l = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ',', ',']
    for i in sentence_split_l:
        ll = paragraph.split(i)
        sentence_l.append(ll[0])
        if len(ll) > 1:
            paragraph_to_sentence(ll[1], sentence_l)
        else:
            break
    
    
    def sentence_l_to_sentence_l_l(sentence_l):
        sentence_l_l = []
        sentence_split_l = ['
    ', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ',', ',']
        for i in sentence_l:
            for ii in sentence_split_l:
                ll = i.split(ii)
                if len(ll) > 1:
                    sentence_l_l += ll
                else:
                    sentence_l_l.append(i)
                    continue
    
        return sentence_l_l
    
    
    import requests, time, threading
    
    img_dir = 'C:\Users\sas\PycharmProjects\py_win_to_unix\crontab_chk_url\personas\trunk\plugins\spider\dl_img_tmp\'
    img_dir = 'C:\Users\sas\PycharmProjects\produce_video\mypng\'
    import random
    
    import os, time, glob
    
    os_sep = os.sep
    this_file_abspath = os.path.abspath(__file__)
    this_file_dirname, this_file_name = os.path.dirname(this_file_abspath), os.path.abspath(__file__).split(os_sep)[
        -1]
    fw_f = '{}{}'.format(this_file_name, '.txt')
    fw_f_onerow = '{}{}'.format(fw_f.replace( '.txt',''), '.txt')
    
    
    
    
    with open(fw_f, 'w', encoding='utf-8') as fw_txt:
        with open(fw_f_onerow, 'w', encoding='utf-8') as fw_txt_onerow:
            for uid in uid_d:
                str_ = uid_d[uid]['html']
                fhtml = 'qqzong.vedio.allinone.tmp.html'
                fhtml = '{}{}{}{}'.format('D:\myv\myhtml\', int(time.time()), random.randint(1234, 6789), fhtml)
                with open(fhtml, 'w', encoding='utf-8') as fw:
                    fw.write(str_)
                with open(fhtml, 'r', encoding='utf-8') as fo:
                    soup = BeautifulSoup(fo, 'html.parser')
                    sentence_l = Cut(list(cutlist), list(soup.text))
    
                # 过滤句子单条长度 条数
                sen_num = 32
                sen_pass = False
                if len(sentence_l) < sen_num:
                    sen_pass = True
                    continue
    
                for sen in sentence_l:
                    if len(sen) > 64:
                        sen_pass = True
                        break
                if sen_pass:
                    continue
                s = '{}{}{}'.format('-----------------------', uid, '----------------------------------------
    ')
                fw_txt.write(s)
                fw_txt_onerow(s)
                n = 0
                for sen in sentence_l:
                    s = '{}{}'.format(sen, '
    ')
                    print(s)
                    fw_txt.write(s)
                    n += 1
                    if n == 31:
                        break
                # 联系方式:王经理13212312312
                fw_txt_onerow(''.join(sentence_l[0:31]))
                s = '{}{}{}'.format('联系方式:王经理', random.randint(13200000000, 15812341234), '
    ')
                fw_txt.write(s)
                fw_txt_onerow(s)
    
    dd = 9
    

      

  • 相关阅读:
    LeetCode 623. Add One Row to Tree
    LeetCode 894. All Possible Full Binary Trees
    LeetCode 988. Smallest String Starting From Leaf
    LeetCode 979. Distribute Coins in Binary Tree
    LeetCode 814. Binary Tree Pruning
    LeetCode 951. Flip Equivalent Binary Trees
    LeetCode 426. Convert Binary Search Tree to Sorted Doubly Linked List
    LeetCode 889. Construct Binary Tree from Preorder and Postorder Traversal
    LeetCode 687. Longest Univalue Path
    LeetCode 428. Serialize and Deserialize N-ary Tree
  • 原文地址:https://www.cnblogs.com/rsapaper/p/8865182.html
Copyright © 2011-2022 走看看