zoukankan      html  css  js  c++  java
  • [Synthetic-data-with-text-and-image]


    0 引言

    本文是之前为了解决如何将文字贴到图片上而编写的代码,默认是如发票一类的,所以并未考虑透视变换等。且采用的是pygame粘贴方式,之前也尝试过opencv的seamlessClone粘贴。

    值得注意的是,通过修改参数,增加各种干扰操作(羽化,噪音等等),生成的数据集看似丰富,其实因为内在的数据分布还是十分单一,也就是用该数据集去作为ocr的模型训练集,得到的模型仍然无法在现实生活场景上使用。因为在现实世界中,你光照角度,拍摄角度,打印机用墨等等都是一种变量,而这些变量都会让现实世界的票据呈现的文字内在数据分布是十分丰富的。而通过简单的代码生成的数据分布却并不能覆盖,或者说重叠其中一部分。故而,通过代码生成数据集的方式是无法解决ocr现实数据集不够的问题的。

    所需要的操作:
    1 - 下载colors_new.cp
    2 - 将下面两份代码存成对应的get_color.py 和pygame_main.py;
    3 - python 运行pygame_main.py即可。

    # get_color.py
    import cv2
    import pickle as cp
    import numpy as np
    
    class ColorSample(object):
      def __init__(self):
        '''colors_new.cp来自https://github.com/JarveeLee/SynthText_Chinese_version/tree/master/data/models/colors_new.cp '''
         with open('colors_new.cp','rb') as f:
           self.colorsRGB = cp.load(f,encoding='latin-1')
           self.ncol = self.colorsRGB.shape[0]#4941
    
               # convert color-means from RGB to LAB for better nearest neighbour
               # computations:
           self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8')
           self.colorsLAB = np.squeeze(cv2.cvtColor(self.colorsLAB[None,:,:],cv2.COLOR_RGB2Lab))
    
      def sample_normal(self, col_mean, col_std):
          """
          sample from a normal distribution centered around COL_MEAN
          with standard deviation = COL_STD.
          """
          col_sample = col_mean + col_std * np.random.randn()
          return np.clip(col_sample, 0, 255).astype('uint8')
    
      def sample_from_data(self,bg_mat):
          """
          bg_mat : this is a nxmx3 RGB image.
    
          returns a tuple : (RGB_foreground, RGB_background)
          each of these is a 3-vector.
          """
          bg_orig = bg_mat.copy()
          bg_mat = cv2.cvtColor(bg_mat, cv2.COLOR_RGB2Lab)
          bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3))
          bg_mean = np.mean(bg_mat,axis=0)
    
          norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1)
          # choose a random color amongst the top 3 closest matches:
          #nn = np.random.choice(np.argsort(norms)[:3])
          nn = np.argmin(norms)
    
          ## nearest neighbour color:
          data_col = self.colorsRGB[np.mod(nn,self.ncol),:]
    
          col1 = self.sample_normal(data_col[:3],data_col[3:6])
          col2 = self.sample_normal(data_col[6:9],data_col[9:12])
    
          if nn < self.ncol:
              return (col2, col1)
          else:
              # need to swap to make the second color close to the input backgroun color
              return (col1, col2)
    if __name__ =='__main__':
      fg_col,bg_col = sample_from_data(bgi)
    
    # -*- coding: utf-8 -*-
    #pygame replace Image
    
    import os
    import cv2
    import glob
    import math
    import random
    import numpy as np
    import os.path as osp
    from xml.dom.minidom import Document
    import multiprocessing as mp
    import logging
    from PIL import Image,ImageDraw,ImageFont
    import secrets
    import pygame
    from pygame.locals import *
    from pygame import freetype
    
    import get_color
    
    resultImgsDir = '/home/result_imgs'  # 生成的图片存放位置
    resultXmlDir = '/home/result_xmls'    # 生产的xml存放位置
    bgiDir = '/home/background_images' # 添加背景图片
    gTtf= '/home/ttfs'  # 添加字体库
    totalFile = '/home/zzc/data/synth_recepit_text/result_200.txt'  # 所需要添加的文字,一行一句(或者一行一个单词)
    
    FORMAT = '%(asctime)-15s [%(processName)s] %(message)s'
    logging.basicConfig(format = FORMAT)
    
    gBlockSize = 20   #每一个进程一次处理的句子
    ttfSize = [28,30,35,40,45,50,55,60,65]
    
    #====test
    #charset = [line.strip().split('	')[1] for line in open('text/chars_gb2312').readlines()[:-1]]
    def _addSaltNoise(block,level = 10):
        '''添加椒盐噪声 '''
        ran = np.random.randint(0,level,block.shape)
        salt = ran == 0
        pepper = ran == level
        block[salt]= 0
        block[pepper] = 255
        return block
    
    def _addNoise(block,below=4,high =20):
        ''' 添加噪声'''
        randValue = np.random.randn(*block.shape)*np.random.randint(below,high)
        block = block+randValue
        block[block<0] = 0.0
        block[block>255] = 255.0
        block = block.astype('uint8')
        return block
    
    def _feather(block, height):
        ''' 对图片进行羽化'''
        # determine the gaussian-blur std:
        if height <= 30 :
            bsz = 0.25
            ksz=1
        elif 30 < height < 50:
            bsz = max(0.30, 0.5 + 0.1*np.random.randn())
            ksz = 3
        else:
            bsz = max(0.5, 1.5 + 0.5*np.random.randn())
            ksz = 5#np.random.choice([1,3,5])#5
        return cv2.GaussianBlur(block,(ksz,ksz),bsz)
    
    def _seamlessClone(obj,dst,center):
        ''' 进行前背景合成'''
        mask = 255 * np.ones(obj.shape, obj.dtype)
        #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center)
    
        try:
          mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE)
        except Exception as e:
          print('exception:',obj.shape,dst.shape,mask.shape,center)
          raise e
        return mixed_clone
    
    def _rander(bgiGame,string,rowStart,font,get_color):
        ''' 进行渲染'''
        isFailed = False
        width, height = bgiGame.get_size()
        '''sample the color '''
        bgiNp = pygame.surfarray.array3d(bgiGame)
        fg_col,bg_col = get_color.sample_from_data(bgiNp)
        #fg_col = fg_col +  np.random.randint(-3,3,[1,3])
        fg_col = fg_col.squeeze()
        '''change the property of font '''
        font.oblique = secrets.choice([False,True])
        font.rotation = secrets.choice(range(-5,5))
    
        test = font.render(string)
        txtwidth,txtheight = test[1].size
    
        if width-txtwidth < 0: isFailed = True
        colStart = secrets.randbelow(max(1, width-txtwidth))
    
        if rowStart+txtheight > height or colStart+txtwidth>width or isFailed:
            return bgiGame,rowStart,0,0,0
        '''render the text '''
        try:
          font.render_to(bgiGame,(colStart,rowStart), string, fg_col)
        except:
          print('fg_col',fg_col)
        '''surface 2 numpy '''
        bgiNp = pygame.surfarray.array3d(bgiGame)
        bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_RGB2BGR)
    
        '''add noise and blur '''
        block = bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:]
        block = _addNoise(block,4,20)
        if secrets.choice(range(4))==0:  block = _addSaltNoise(block,np.random.randint(70,80))
        block = _feather(block,txtheight)
        block = _addNoise(block,2,20)
        if secrets.choice(range(4))==0:  block = _addSaltNoise(block,np.random.randint(70,80))
        #=====
        bgiNp[rowStart:rowStart+txtheight,colStart:colStart+txtwidth,:] = block
        '''numpy 2 surface '''
        bgiNp = cv2.cvtColor(bgiNp.transpose([1,0,2]),cv2.COLOR_BGR2RGB)
        bgiGame = pygame.surfarray.make_surface(bgiNp)
    
        return bgiGame,rowStart,colStart,txtwidth,txtheight
        '''
        bgi = _seamlessClone(block,bgi,center)
    
        return bgi
        '''
    
    def _paste(bgiGame,ttf,size,rowStart,curText,cols,get_color):
    
    
        #ttfont = ImageFont.truetype(ttf,size)
        ttfont = freetype.Font(ttf,size)
        curText = curText.strip()
    
        '''random the digit '''
        numberLength = 10
        digits = ['0','1','2','3','4','5','6','7','8','9']
        if secrets.randbelow(numberLength) == 0:
            #curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)])
            curText = ''.join([secrets.choice(digits) for _ in range(numberLength)] )
            string = curText
            '''random the dot '''
            if secrets.randbelow(numberLength-2) == 0:
                dotInd = random.randint(1,numberLength-2)
                string = curText[:dotInd]+'.'+curText[dotInd+1:]
        else:
            string = curText
    
        '''如果maxNumText小于10,则跳过 '''
        numText = len(string)
        if numText != numberLength:
            string = ''
            return None,None,None,None
    
        bgiGame,rowStart,colStart,txtwidth,txtheight = _rander(bgiGame,string,rowStart,ttfont,get_color)
    
        return bgiGame,string,rowStart,colStart,txtwidth,txtheight
    
    def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT):
        ''' 生成对应的xml'''
        if not string: return
        body = doc.createElement('object')
        anno.appendChild(body)
    
        name = doc.createElement('name')
        nameText = doc.createTextNode('text')
        name.appendChild(nameText)
        body.appendChild(name)
    
        content = doc.createElement('textContent')
        contentText = doc.createTextNode(string)
        content.appendChild(contentText)
        body.appendChild(content)
    
        bndbox = doc.createElement('bndbox')
    
        xmin = doc.createElement('xmin')
        ymin = doc.createElement('ymin')
        xmax = doc.createElement('xmax')
        ymax = doc.createElement('ymax')
    
        xminText = doc.createTextNode(str(xminT))
        yminText = doc.createTextNode(str(yminT))
        xmaxText = doc.createTextNode(str(xmaxT))
        ymaxText = doc.createTextNode(str(ymaxT))
    
        xmin.appendChild(xminText)
        ymin.appendChild(yminText)
        xmax.appendChild(xmaxText)
        ymax.appendChild(ymaxText)
    
        bndbox.appendChild(xmin)
        bndbox.appendChild(ymin)
        bndbox.appendChild(xmax)
        bndbox.appendChild(ymax)
        body.appendChild(bndbox)
    
    def paste(imgname,bgi,text,ttf,get_color):
    
        pygame.init()
        bgiGame = pygame.image.load(bgi)
        width,height = bgiGame.get_size()
        depth = bgiGame.get_bitsize()//8
    
        # 选择当前行的间距
        curRow = 0
        curRowInter = random.randint(3,7)
        curRow += curRowInter
    
        # 随机选择字体大小
        curTtfSize = random.choice(ttfSize)
        # 创建xml的文件头
        doc = Document()
        anno = doc.createElement('Annotations')
        doc.appendChild(anno)
        imgNameNode = doc.createElement('imgName')
        imgNameNode.appendChild(doc.createTextNode(imgname))
        anno.appendChild(imgNameNode)
    
        sizeNode = doc.createElement('size')
        widthNode = doc.createElement('width')
        widthNode.appendChild(doc.createTextNode(str(width)))
        sizeNode.appendChild(widthNode)
        heightNode = doc.createElement('height')
        heightNode.appendChild(doc.createTextNode(str(height)))
        sizeNode.appendChild(heightNode)
        depthNode = doc.createElement('depth')
        depthNode.appendChild(doc.createTextNode(str(depth)))
        sizeNode.appendChild(depthNode)
        anno.appendChild(sizeNode)
    
    
        # 循环的一行一行去将文字粘贴到对应的图片上
        curCol = 0; numTextDone = 0
        while curRow+curTtfSize <= 
            # cur col point
    
            # cur row point
            '''paste the text on bgiGame '''
            if  curRow+curTtfSize <= 
              # if curcols is bigger than 0.9*cols,then do not paste the line
               curText = secrets.choice(text)
    
               bgiGame,string,curRow,colStart,txtwidth,txtheight = _paste(bgiGame,ttf,curTtfSize,curRow,curText,width,get_color)
               if not string: continue
               numTextDone += 1
               _xml(doc,anno,string,xminT = colStart,yminT = curRow,xmaxT = colStart+txtwidth,ymaxT = curRow+txtheight)
    
               curRow += txtheight
               curRow += curRowInter
            # cur intervel
            curRowInter = random.randint(3,6)
            # cur ttf size
            curTtfSize = random.choice(ttfSize)
    
        bgi = pygame.surfarray.array3d(bgiGame).transpose([1,0,2])
        bgi = cv2.cvtColor(bgi,cv2.COLOR_RGB2BGR)
        return bgi, doc, numTextDone
    
    def handle(indTexts):
    
        ind, texts = indTexts
        # 获取进程号
        pid = os.getpid()
        # 随机获取颜色
        getcolor = get_color.ColorSample()
        bgis = glob.glob( osp.join(bgiDir, '*.jpg') )
        #  随机选择当前一张背景图
        bgipath = random.choice(bgis)
    
        # 随机获取字体
        ttf = random.choice(ttfs)
        
        # 调用paste函数进行操作
        imgname = 'bgi{}_ind{}_pid{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,pid,osp.basename(ttf))
        bgiNp,doc,numTextDone =  paste(imgname,bgipath,texts,ttf,getcolor)
    
        imgnamep = 'bgi{}_ind{}_{}Of{}_ttf{}.jpg'.format(osp.basename(bgipath),ind,numTextDone,len(texts),osp.basename(ttf))
        logging.warn(imgnamep)
       
        # 将图片和xml写入到对应位置
        cv2.imwrite(osp.join(resultImgsDir,imgname),bgiNp)
        xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4]))
        with open(xmlFileName, "w") as fxml:
            fxml.write(str(doc.toprettyxml(indent = "    ", newl = "
    ", encoding = "utf-8"),encoding = 'utf-8'))
    
        pygame.quit()
        return
    
    if __name__ == '__main__':
    
        ''' 1 - 先读取文字行,然后按照进程个数进行划分'''
        total = [line.strip() for line in open(totalFile)]
        numP = 30
        totalSP = []
        inter = math.ceil(len(total)/gBlockSize)
        for i in range(inter):
            totalSP.append(total[i::inter])
    
        '''2 - 开启多进程进行处理 '''
        print('begin',len(totalSP))
        p = mp.Pool(numP)
        p.map(handle, enumerate(totalSP))
    

    结果如图:

    bgi39.jpg_ind0_pid8387_ttf锐字工房云字库小标宋GBK.ttf.jpg


    bgi39.jpg_ind0_pid8387_ttf锐字工房云字库小标宋GBK.ttf.xml

    下面是采用PIL和opencv的seamlessClone粘贴方式,只是PIL这个包进行文字粘贴的时候,不支持文字旋转,且简单的文字粘贴,好像pygame的结果和seamlessClone效果差不多。

    # -*- coding: utf-8 -*-
    
    import os
    import cv2
    import glob
    import math
    import random
    import numpy as np
    import os.path as osp
    from xml.dom.minidom import Document
    import multiprocessing as mp
    import logging
    from PIL import Image,ImageDraw,ImageFont
    import pygame
    from pygame.locals import *
    from pygame import freetype
    
    import get_color
    
    resultImgsDir = 'crnn_result_imgs1'
    resultXmlDir = 'crnn_result_xmls1'
    bgiDir = 'bgi'
    gTtf= 'ttfs'
    totalFile = 'texts.txt'
    
    FORMAT = '%(asctime)-15s [%(processName)s] %(message)s'
    logging.basicConfig(format = FORMAT)
    
    gBlockSize = 20#num of each process's sentences
    ttfSize = [28,30,35,40,45,50,55,60,65]
    
    def _addSaltNoise(block,level = 10):
        ran = np.random.randint(0,level,block.shape)
        salt = ran == 0
        pepper = ran == level
        block[salt]= 0
        block[pepper] = 255
        return block
    
    def _addNoise(block):
        randValue = np.random.randn(*block.shape)*np.random.randint(2,20)
        block = block+randValue
        block[block<0] = 0.0
        block[block>255] = 255.0
        block = block.astype('uint8')
        return block
    
    def _feather(block, height):
        # determine the gaussian-blur std:
        if height <= 30 :
            bsz = 0.25
            ksz=1
        elif 30 < height < 50:
            bsz = max(0.30, 0.5 + 0.1*np.random.randn())
            ksz = 3
        else:
            bsz = max(0.5, 1.5 + 0.5*np.random.randn())
            ksz = 5#np.random.choice([1,3,5])#5
        return cv2.GaussianBlur(block,(ksz,ksz),bsz)
    
    def _seamlessClone(obj,dst,center):
        mask = 255 * np.ones(obj.shape, obj.dtype)
        #print('obj,shape:',obj.shape,' dst.shape:',dst.shape,' center:',center)
    
        try:
          mixed_clone = cv2.seamlessClone(obj, dst, mask, center, cv2.MIXED_CLONE)
        except Exception as e:
          print('exception:',obj.shape,dst.shape,mask.shape,center)
          raise e
        return mixed_clone
    
    def _rander(rawbgi,string,bgr,point,font,get_color):
    
        bgi = Image.fromarray(rawbgi)
        draw = ImageDraw.Draw(bgi)
        curCol,curRow = point
        fg_col,bg_col = get_color.sample_from_data(rawbgi)
        fg_col = fg_col +  np.random.randint(-3,3,[1,3])
        draw.text((curCol,curRow),string, tuple(fg_col.squeeze()), font=font)
        width,height = font.getsize(string)
        region = curCol,curRow,curCol+width,curRow+height
        bgi = np.array(bgi)
        block = bgi[curRow:curRow+height,curCol:curCol+width,:]
        block = _addNoise(block)
        block = _feather(block,height)
        block = _addNoise(block)
        block = _addSaltNoise(block,50)
        #=====
        # bgi[curRow:curRow+height,curCol:curCol+width,:] = block
        # return bgi
        #cv2.imwrite('/home/zzc/tmp111.jpg',block)
        center = (curCol+curCol+width)//2,(curRow+curRow+height)//2
    #    width, height, channels = bgi.shape
    #    center = height//2,width//2
        bgi = _seamlessClone(block,bgi,center)
    
        return bgi
    
    
    def _paste(bgi,ttf,size,curRow,curCol,curText,cols,get_color):
    
    
        ttfont = ImageFont.truetype(ttf,size)
        maxNumText = math.floor((cols-curCol)/size)
        curText = curText.strip()
    
        '''random the digit '''
        shouldMaxNumTxt = 10
        if random.randint(0,9)==9 and maxNumText >= 4:
            curText = ''.join([str(random.randint(0,9)) for _ in range(shouldMaxNumTxt)])
            string = curText
            '''random the dot '''
            if random.randint(0,7)==7:
                dotInd = random.randint(1,shouldMaxNumTxt-2)
                string = curText[:dotInd]+'.'+curText[dotInd+1:]
        else:
            startInd = random.randint(0,max(0,len(curText)-shouldMaxNumTxt-1))
            string = curText[startInd:startInd+shouldMaxNumTxt].strip()
            string= curText
    
        '''如果maxNumText小于10,则跳过 '''
        if maxNumText < 10 or len(curText)<10: string = ''
    
        numText = len(string)
        if numText == 10 :
            bgr = [random.randint(100,254) for i in range(3)]
            bgi = _rander(bgi,string,bgr,(curCol,curRow),ttfont,get_color)
        else:
            string = ''
        #=====
        '''get printed width height '''
        width,height = ttfont.getsize(string)
        return bgi,string,width,height
    
    
    def _xml(doc,anno,string,xminT,yminT,xmaxT,ymaxT):
    
        if not string: return
        body = doc.createElement('object')
        anno.appendChild(body)
    
        name = doc.createElement('name')
        nameText = doc.createTextNode('text')
        name.appendChild(nameText)
        body.appendChild(name)
    
        content = doc.createElement('textContent')
        contentText = doc.createTextNode(string)
        content.appendChild(contentText)
        body.appendChild(content)
    
        bndbox = doc.createElement('bndbox')
    
        xmin = doc.createElement('xmin')
        ymin = doc.createElement('ymin')
        xmax = doc.createElement('xmax')
        ymax = doc.createElement('ymax')
    
        xminText = doc.createTextNode(str(xminT))
        yminText = doc.createTextNode(str(yminT))
        xmaxText = doc.createTextNode(str(xmaxT))
        ymaxText = doc.createTextNode(str(ymaxT))
    
        xmin.appendChild(xminText)
        ymin.appendChild(yminText)
        xmax.appendChild(xmaxText)
        ymax.appendChild(ymaxText)
    
        bndbox.appendChild(xmin)
        bndbox.appendChild(ymin)
        bndbox.appendChild(xmax)
        bndbox.appendChild(ymax)
        body.appendChild(bndbox)
    
    def paste(imgname,bgi,text,ttf,ttfRandom,get_color):
    
        bgi = cv2.imread(bgi)
        rows,cols,depth = bgi.shape
    
     #   bgi = Image.fromarray(bgi)
     #   draw = ImageDraw.Draw(bgi)
    
        curRow = 0
        curRowInter = random.randint(3,7)
        curRow += curRowInter
        curTtfSize = random.randint(0,len(ttfRandom)-1)
        #create the xml head
        doc = Document()
        anno = doc.createElement('Annotations')
        doc.appendChild(anno)
        imgNameNode = doc.createElement('imgName')
        imgNameNode.appendChild(doc.createTextNode(imgname))
        anno.appendChild(imgNameNode)
    
        height,width,depth = rows,cols,depth
        sizeNode = doc.createElement('size')
        widthNode = doc.createElement('width')
        widthNode.appendChild(doc.createTextNode(str(width)))
        sizeNode.appendChild(widthNode)
        heightNode = doc.createElement('height')
        heightNode.appendChild(doc.createTextNode(str(height)))
        sizeNode.appendChild(heightNode)
        depthNode = doc.createElement('depth')
        depthNode.appendChild(doc.createTextNode(str(depth)))
        sizeNode.appendChild(depthNode)
        anno.appendChild(sizeNode)
    
        while curRow + ttfRandom[curTtfSize] <=rows:
            #cur col point
            curCol = random.randint(0,cols-1)
    
            #cur row point
            '''paste the text on bgi '''
            if curCol < cols*0.9 and curRow+ttfRandom[curTtfSize] <= rows:
              #if curcols is bigger than 0.9*cols,then do not paste the line
               curText = text[random.randint(0,len(text)-1)]
    
               bgi,string,width,height = _paste(bgi,ttf,ttfRandom[curTtfSize],curRow,curCol,curText,cols,get_color)
               if not string: continue
               _xml(doc,anno,string,xminT = curCol,yminT = curRow,xmaxT = curCol+width,ymaxT = curRow+height)
               curRow += curRowInter
               curRow += ttfRandom[curTtfSize]
            #cur intervel
            curRowInter = random.randint(3,7)
            #cur ttf size
            curTtfSize = random.randint(0,len(ttfRandom)-1)
        return np.array(bgi), doc
    
    def handle(text):
    
        ind, text = text
        #pid
        pid = os.getpid()
        #background image
        getcolor = get_color.ColorSample()
        bgis = glob.glob( osp.join(bgiDir,'*.jpg') )
        #select one background image
        curBgi = random.randint(0,len(bgis)-1)
        bgi = bgis[curBgi]
    
        #ttf
        ttfs = glob.glob(osp.join(gTtf,'*.ttf'))
        curTtf = random.randint(0,len(ttfs)-1)
        ttf = ttfs[curTtf]
    
        #ttf size random
        ttfRandom = [1]+[ random.randint(0,1) for i in range(len(ttfSize)-1)]
        ttfRandom = [ran*size for ran,size in zip(ttfRandom, ttfSize)]
        ttfRandom = [i for i in ttfRandom if i != 0]
    
        imgname = '{}_{}_{}.jpg'.format(ind,pid,curTtf)
        bgi,doc =  paste(imgname,bgi,text,ttf,ttfRandom,getcolor)
        cv2.imwrite(osp.join(resultImgsDir,imgname),bgi)
        xmlFileName = osp.join(resultXmlDir,'{}.xml'.format(imgname[:-4]))
        with open(xmlFileName, "w") as fxml:
            fxml.write(str(doc.toprettyxml(indent = "    ", newl = "
    ", encoding = "utf-8"),encoding = 'utf-8'))
        logging.warn('{}'.format(ind))
        return
    
    if __name__ == '__main__':
    
        total = [line.strip() for line in open(totalFile)]
        numP = 30
        totalSP = []
        inter = math.ceil(len(total)/gBlockSize)
        for i in range(inter):
            totalSP.append(total[i::inter])
    
        print('begin')
        p = mp.Pool(numP)
        p.map(handle, enumerate(totalSP[:1000]))
    
    
  • 相关阅读:
    HDU1029 Ignatius and the Princess IV
    UVA11039 Building designing【排序】
    UVA11039 Building designing【排序】
    POJ3278 HDU2717 Catch That Cow
    POJ3278 HDU2717 Catch That Cow
    POJ1338 Ugly Numbers(解法二)
    POJ1338 Ugly Numbers(解法二)
    UVA532 Dungeon Master
    UVA532 Dungeon Master
    POJ1915 Knight Moves
  • 原文地址:https://www.cnblogs.com/shouhuxianjian/p/9962016.html
Copyright © 2011-2022 走看看