zoukankan      html  css  js  c++  java
  • F# Grab Web page

    open System
    open System.Diagnostics
    open System.Net
    open System.Xml
    open System.IO
    //open HtmlAgilityPack   
    let asyncGrapUrl(newUrl : string) =
        async{            
            let fileName = @"D:\" + newUrl.Replace('.','0').Replace('/','0').Replace(':','0') + ".xml"
            let httpRequest = HttpWebRequest.Create(newUrl) :?> HttpWebRequest
            let! httpRespon = Async.AwaitTask(httpRequest.GetResponseAsync())
            let responStream = httpRespon.GetResponseStream()
    
    //        let xml = new XmlDocument()
    //        xml.Load(responStream)
    
            let fileStream = new System.IO.FileStream(fileName,FileMode.OpenOrCreate,FileAccess.Write)
                
            let streamWr = new StreamWriter(fileStream,Text.Encoding.GetEncoding("GB2312"))
            use strd =new StreamReader(responStream)
    
            while(not strd.EndOfStream ) do            
                streamWr.WriteLine(strd.ReadLine())
                streamWr.Flush()
            
            
            fileStream.Close() 
            responStream.Close()
            return fileName//,xml)
        } |> Async.RunSynchronously   
         
    let main() =
        let url = @"http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/sci/tech/rss.xml"
        let asyncResults = asyncGrapUrl(url)
        let filename =  asyncResults
        let xml = new XmlDocument()//snd asyncResults
        let fileInfo = new System.IO.FileStream(filename,FileMode.Open,FileAccess.Read)  
        let fileStr = new StreamReader(fileInfo,Text.Encoding.GetEncoding("GB2312"))
        xml.Load(fileStr)
    
        let nodes = xml.SelectNodes("/rss/channel/item/title")
    
        for i in 0..(nodes.Count - 1) do
            printfn "%d : %s" (i + 1) nodes.[i].InnerText 
    
        let item = int(Console.ReadLine())
        let newUrl =
            let xpath = sprintf "/rss/channel/item[%i]/link" item
            let node = xml.SelectSingleNode(xpath)
            node.InnerText
        let proStart = new ProcessStartInfo(UseShellExecute=true,FileName=newUrl)
        let proc = new Process()
        proc.StartInfo <- proStart
        proc.Start() |> ignore
        asyncGrapUrl(newUrl) |> ignore
    
    main()

    目前还没有完善, 中文乱码。。 在英文系统下, 没有实现抓取正文,只获取全部源代码。

  • 相关阅读:
    Ehcache缓存配置
    spring3使用task:annotation-driven开始定时
    Constructor >> @Autowired >> @PostConstruct
    面试转载
    阿里面试:MYSQL的引擎区别
    Redis的主从复制的原理介绍
    微服务的调用链
    java的零拷贝机制
    存储过程与触发器面试
    ABA问题
  • 原文地址:https://www.cnblogs.com/FsharpZack/p/2844647.html
Copyright © 2011-2022 走看看