from mysql_tool import * import copy s = ''' INSERT INTO `qqzone`.`myu` (`id`, `uid`, `age`, `gender`, `marriageStatus`, `education`, `consumptionAbility`, `LBS`, `interest1`, `interest2`, `interest3`, `interest4`, `interest5`, `kw1`, `kw2`, `kw3`, `kw4`, `kw5`, `topic1`, `topic2`, `topic3`, `topic4`, `topic5`, `ct`, `appIdAction`, `appIdInstall`, `os`, `carrier`, `house`) VALUES ('33', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); '''.replace(' ', '') indb_fields_s = '{}{}'.format(s.split('VALUES')[0], ' VALUES ').replace('`id`,', '') fields_l = [i.replace(' ', '').replace('`', '') for i in s.split('(')[1].split(')')[0].split(',')] val_d = {} for i in fields_l: val_d[i] = 'NULL' del val_d['id'] indb_step, indb_step_s = 2000, '' f = 'userFeature.data' with open(f, 'r') as fr: sql_d_l, indb_step_c = [], 0 for i in fr: break sql_d = copy.deepcopy(val_d) l = i.replace(' ', '').split('|') for ii in l: try: ix_ = ii.index(' ') k, v = ii[0:ix_], ii[ix_ + 1:] # if k == 'appIdInstall': # continue sql_d[k] = v except Exception as e: print(e) sql_d_l.append(sql_d) indb_step_c += 1 if indb_step_c % indb_step == 0: s_l = [] for d in sql_d_l: s_l.append('("{}")'.format('","'.join([d[k] for k in d]))) indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l)) try: mysql_write(indb_step_s) indb_step_c = 0 except Exception as e: logs_l = [e, indb_step_s] logs_s = '||'.join(logs_l) print(logs_s) sql_d, sql_d_l, indb_step_c = val_d, [], 0 if indb_step_c % indb_step != 0: s_l = [] for d in sql_d_l: s_l.append('("{}")'.format('","'.join([d[k] for k in d]))) indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l)) try: mysql_write(indb_step_s) indb_step_c = 0 except Exception as e: logs_l = [e, indb_step_s] logs_s = '||'.join(logs_l) print(logs_s) def myindb(f, indb_fields_s, indb_step=2000): with open(f, 'r') as fr: sql_l, indb_step_c, indb_step_s = [], 0,'' for i in fr: if 'aid' in i: continue sql_l.append([ii for ii in i.replace(' ', '').split(',')]) indb_step_c += 1 if indb_step_c % indb_step == 0: s_l = [] for l in sql_l: s_l.append('("{}")'.format('","'.join(l))) indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l)) try: mysql_write(indb_step_s) sql_l, indb_step_c = [], 0 except Exception as e: logs_l = [e, indb_step_s] logs_s = '||'.join(logs_l) print(indb_step_s) print(logs_s) print(e) if indb_step_c % indb_step != 0: s_l = [] for l in sql_l: s_l.append('("{}")'.format('","'.join(l))) indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l)) try: mysql_write(indb_step_s) except Exception as e: logs_l = [e, indb_step_s] logs_s = '||'.join(logs_l) print(logs_s) print(e) f = 'train.csv' indb_step, indb_step_s = 10000, '' indb_fields_s = 'INSERT INTO `qqzone`.`myt` ( `aid`, `uid`, `label`) VALUES ' #myindb(f, indb_fields_s, indb_step) #f = 'test1.csv' indb_step, indb_step_s = 10000, '' indb_fields_s = 'INSERT INTO `qqzone`.`myr` ( `aid`, `uid`) VALUES ' #myindb(f, indb_fields_s, indb_step) f = 'adFeature.csv' indb_step, indb_step_s = 200, '' indb_fields_s = 'INSERT INTO `qqzone`.`myadf` (`aid`, `advertiserId`, `campaignId`, `creativeId`, `creativeSize`,`adCategoryId`, `productId`, `productType`) VALUES ' #myindb(f, indb_fields_s, indb_step)
总的字段数、字段名确定,但是每行数据可能
缺失某些字段