zoukankan      html  css  js  c++  java
  • 下载生物信息

    http://www.catalogueoflife.org/col/browse/classification

    这是一个国外的生物信息网站

    今天的代码可以抓取指定分类的信息(id,学名)

    没有把多线程写进去,略失败...

    运用:webclient,regex,io

    项目在>>>开源中国

     1 using System;
     2 using System.Collections.Generic;
     3 using System.ComponentModel;
     4 using System.Data;
     5 using System.Drawing;
     6 using System.Linq;
     7 using System.Text;
     8 using System.Threading.Tasks;
     9 using System.Windows.Forms;
    10 using System.Net;
    11 using System.Text.RegularExpressions;
    12 using System.Threading;
    13 using System.IO;
    14 namespace cateoflife
    15 {
    16     public partial class Form1 : Form
    17     {
    18         WebClient wc = new WebClient();
    19         int start;
    20         int end;
    21         string url;
    22         string reg;
    23         string msg;
    24         int now = 1;
    25         public Form1()
    26         {
    27             InitializeComponent();
    28 
    29         }
    30 
    31         private void button1_Click(object sender, EventArgs e)
    32         {
    33             start = int.Parse(textBox2.Text);
    34 
    35             FileInfo fifo = new FileInfo(start+".txt");
    36             FileStream fs= fifo.OpenWrite();
    37             StreamWriter w = new StreamWriter(fs);
    38             w.BaseStream.Seek(0, SeekOrigin.End);
    39 
    40             end=(int.Parse(textBox3.Text)==0)?99999:int.Parse(textBox3.Text);
    41             url = textBox1.Text;
    42             reg = textBox4.Text;
    43             wc.Encoding = Encoding.UTF8;
    44             string Htm;
    45             for (int i = start; i <= end; i++)
    46             {
    47                 try
    48                 {
    49                     Htm = wc.DownloadString(url + i);
    50                     foreach (Match m in Regex.Matches(Htm, reg))
    51                     {
    52                         gettxt(m.ToString());
    53                         w.Write(msg);
    54                         w.Flush();   
    55                     }                    
    56                 }
    57                 catch (Exception)
    58                 {
    59                     Htm = wc.DownloadString(url + i);
    60                     foreach (Match m in Regex.Matches(Htm, reg))
    61                     {
    62                         gettxt(m.ToString());
    63                         w.Write(msg);
    64                         w.Flush();
    65                     }               
    66                 }                
    67                              
    68             }
    69             w.Close();
    70         }
    71         void gettxt(string html)
    72         {
    73             msg=Regex.Match(html,"(?<=/)\d+").ToString()+"	"+Regex.Match(html,"(?<=>)\w+\s*\w+").ToString()+"
    ";
    74         }
    75     }
    76 }
  • 相关阅读:
    Ubuntu20安装docker
    ubuntu 下mysql 大小写问题
    Tensorflow-常见报错解决方案
    迁移学习(Transfer Learning)
    c#的托管代码和非托管代码的理解
    .net面试题升级版
    ADO.NET知识点
    支持“WeShopDb”上下文的模型已在数据库创建后发生更改。请考虑使用 Code First 迁移更新数据库
    .net 面试题
    6、zookeeper应用场景-分布式唯一ID
  • 原文地址:https://www.cnblogs.com/Fadinglemon/p/3737058.html
Copyright © 2011-2022 走看看