zoukankan      html  css  js  c++  java
  • vb.net 多线程爬虫抓取免费代理IP

    Class Program
    Public Shared masterPorxyList As List(Of proxy) = New List(Of proxy)()

    Public Class proxy
    Public ip As String
    Public port As String
    Public speed As Integer

    Public Sub New(ByVal pip As String, ByVal pport As String, ByVal pspeed As Integer)
    Me.ip = pip
    Me.port = pport
    Me.speed = pspeed
    End Sub
    End Class

    Private Shared Sub getProxyList(ByVal pageIndex As Object)
    Dim urlCombin As String = "http://www.xicidaili.com/wt/" & pageIndex.ToString()
    Dim catchHtml As String = catchProxIpMethord(urlCombin, "UTF8")
    Dim doc As HtmlAgilityPack.HtmlDocument = New HtmlAgilityPack.HtmlDocument()
    doc.LoadHtml(catchHtml)
    Dim table As HtmlNode = doc.DocumentNode.SelectSingleNode("//div[@id='wrapper']//div[@id='body']/table[1]")
    Dim collectiontrs As HtmlNodeCollection = table.SelectNodes("./tr")

    For i As Integer = 0 To collectiontrs.Count - 1
    Dim itemtr As HtmlAgilityPack.HtmlNode = collectiontrs(i)
    Dim collectiontds As HtmlNodeCollection = itemtr.ChildNodes

    If i > 0 Then
    Dim itemtdip As HtmlNode = CType(collectiontds(3), HtmlNode)
    Dim itemtdport As HtmlNode = CType(collectiontds(5), HtmlNode)
    Dim itemtdspeed As HtmlNode = CType(collectiontds(13), HtmlNode)
    Dim ip As String = itemtdip.InnerText.Trim()
    Dim port As String = itemtdport.InnerText.Trim()
    Dim speed As String = itemtdspeed.InnerHtml
    Dim beginIndex As Integer = speed.IndexOf(":", 0, speed.Length)
    Dim endIndex As Integer = speed.IndexOf("%", 0, speed.Length)
    Dim subSpeed As Integer = Integer.Parse(speed.Substring(beginIndex + 1, endIndex - beginIndex - 1))

    If subSpeed > 90 Then
    Dim temp As proxy = New proxy(ip, port, subSpeed)
    masterPorxyList.Add(temp)
    Console.WriteLine("当前是第:" & masterPorxyList.Count.ToString() & "个代理IP")
    End If
    End If
    Next
    End Sub

    Private Shared Function catchProxIpMethord(ByVal url As String, ByVal encoding As String) As String
    Dim htmlStr As String = ""

    Try

    If Not String.IsNullOrEmpty(url) Then
    Dim request As WebRequest = WebRequest.Create(url)
    Dim response As WebResponse = request.GetResponse()
    Dim datastream As Stream = response.GetResponseStream()
    Dim ec As Encoding = Encoding.[Default]

    If encoding = "UTF8" Then
    ec = Encoding.UTF8
    ElseIf encoding = "Default" Then
    ec = Encoding.[Default]
    End If

    Dim reader As StreamReader = New StreamReader(datastream, ec)
    htmlStr = reader.ReadToEnd()
    reader.Close()
    datastream.Close()
    response.Close()
    End If

    Catch
    End Try

    Return htmlStr
    End Function

    Private Shared Sub Main(ByVal args As String())
    For i As Integer = 1 To 15
    ThreadPool.QueueUserWorkItem(AddressOf getProxyList, i)
    Next

    Console.Read()
    End Sub
    End Class

  • 相关阅读:
    搭上末班车去了京东,终于可以做东哥兄弟...
    面试官问我会不会Elasticsearch,我语塞了...
    Elasticsearch到底哪点好?
    资本寒冬,应届生被裁,亲身经历从被裁到上岸,我们该如何自渡?如何保持核心竞争力?
    面试官求你了,别再问我TCP的三次握手和四次挥手
    如何保证网络传输的可靠性?
    龙叔拿了20几个offer,原因竟有些泪目...
    面试百度的机器学习算法,也不过如此
    《数据结构与算法》—— O(3N)=O(N) ?
    我以为我学懂了数据结构,看到这张导图,我才发现我错了
  • 原文地址:https://www.cnblogs.com/rj888/p/11720268.html
Copyright © 2011-2022 走看看