zoukankan      html  css  js  c++  java
  • 爬取数据的程序

      1 # -*- enconding:etf-8 -*-
      2 import pymysql
      3 import os
      4 import time
      5 import re
      6 serveraddr="localhost"
      7 user="root"
      8 password="123456"
      9 databaseName="test"
     10 filename="./data/UNCL.csv"
     11 #读取文件里面的内容插入数据库
     12 def InsertDataFromFile(absolutePath,file_name):
     13     file_name = file_name.split('.')[0]
     14     file_list_name = absolutePath.split(sep)
     15     if(len(file_list_name)>2):#设置父目录名字
     16         parent_name = file_list_name[len(file_list_name)-2]
     17     else:
     18         parent_name = "default_root"#缺省的根目录名字
     19     file_object = open(absolutePath)
     20     try:
     21         all_the_text = file_object.read()
     22     finally:
     23         file_object.close()
     24     all_the_text = all_the_text.strip(',')
     25     #all_the_text = all_the_text.replace('[','(');
     26     #splitList = all_the_text.replace(']',')');
     27     all_the_text = all_the_text.lstrip('[')
     28     all_the_text = all_the_text.rstrip(']') 
     29     splitList = all_the_text.split('],[')
     30     totalList = []
     31     for item in  splitList:
     32         arr = item.split(',')#将一组数据变成数据
     33         if config.GaomuTrue == 1:
     34             arr.insert(0,'0')#插入id
     35             arr.insert(3,parent_name+'_'+file_name.split(config.keyWord)[0])#插入文件名字
     36             arr.append(arr[4])
     37             arr.remove(arr[4])
     38             #text = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(arr[2]))
     39             #print text
     40             #arr[2] = text
     41             #arr.remove(arr[2])
     42             #arr.insert(2,text);
     43             #print time.localtime(int(arr[2]))
     44             #print time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(int(arr[2])))
     45             #print int(arr[2])
     46         else:
     47             arr.insert(0,'0')#插入id
     48             arr.insert(1,file_name)#插入文件名字
     49         #totalList.insert(len(totalList),arr);
     50         totalList.append(arr)
     51     try:  
     52         #sql_string = 'insert into '+tableName+ ' values (0,'+ file_name +',%s,%s,%s,%s,%s) '
     53         sql_string = 'insert into '+tableName+ ' values (%s,%s,%s,%s,%s,%s,%s) '
     54         # 执行sql语句
     55         cur.executemany(sql_string,totalList)
     56         # 提交到数据库执行
     57         conn.commit()
     58     except MySQLdb.Error,msg:  
     59         print file_name+" insert data error"
     60         # 发生错误时回滚
     61         conn.rollback()
     62         return 0
     63     #cur.close()
     64     #conn.close()
     65     return len(totalList)#返回文件插入的条数
     66 
     67 amountOfInsert = 0#统计本次插入的总数
     68 def create_table():
     69     # sep=os.sep
     70     # tableName=raw_input('please input the table name which will be created:')
     71     # classpath=raw_input('请输入需要遍历的路径:')
     72 
     73     db=pymysql.connect(serveraddr,user,password,databaseName)
     74     cursor=db.cursor()
     75     cursor.execute("drop table if exists `tncl`")
     76     sql="""    create table `tncl`(
     77             `tncl_id` varchar(25) not null,
     78             `tncl_tag` varchar(25) not null,
     79             `tncl_desc` varchar(255) not null,
     80             `tncl_note` varchar(1200) not null,
     81             primary key(`tncl_id`)
     82             ) engine=InnoDB default charset=utf8;"""
     83             
     84     cursor.execute(sql)
     85     db.close()
     86 def test():
     87     p1=r"^s{13}w.+|
    $"
     88     pattern=re.compile(p1)
     89     fr=open(filename)
     90     w2=open('./data/e1.csv','a')
     91     for line in fr.readlines():
     92         print(line)
     93         matcher=re.findall(pattern,line)
     94         print(matcher)
     95         # print(type(matcher)) list
     96         for i in matcher:
     97             # print(i) 
     98             # print(type(i)) str
     99             for k in i:
    100                 # print(k)
    101                 # print(type(k)) str
    102                 w2.write(k)
    103             # w2.write("
    ")
    104         print("-----------")
    105     fr.close()
    106     w2.close()
    107 
    108     
    109 if __name__=='__main__':
    110     test()
  • 相关阅读:
    String、StringBuffer与StringBuilder的区别
    案例2:用一条SQL查询出数学语文成绩都大于80分的学生姓名?
    案例1:写一个压缩字符串的方法,例如aaaabbcxxx,则输出a4b2c1x3。
    jsp的九大内置对象及作用
    SQL语句总结2018-11-7
    kafka-spark streaming (一)
    python while嵌套循环
    docker-compose.yml样例(mysql主从+mycat读写分离)
    docker-compose管理daocker
    docker搭建私有registry
  • 原文地址:https://www.cnblogs.com/smuxiaolei/p/7427678.html
Copyright © 2011-2022 走看看