zoukankan      html  css  js  c++  java
  • c#采集网页用得几个函数 有解释

    public  string GetHtmlSource(string Url, string charset) //得到Html源代码
            {
                if (charset == "" || charset == null) charset = "gb2312";
                string text1 = "";
                try
                {
                    HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(Url);
                    HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();
                    Stream stream1 = response1.GetResponseStream();
                    StreamReader reader1 = new StreamReader(stream1, Encoding.GetEncoding(charset));
                    text1 = reader1.ReadToEnd();
                    stream1.Close();
                    response1.Close();
                }
                catch (Exception exception1)
                {
                }
                return text1;
            }

    //获得页面HTML代码中开始标记和结束标记中间的数据:测试可用

    //参    数:HTML源代码 ,开始标记,结束标记

     public string SniffwebCode(string code, string wordsBegin, string wordsEnd)
            {
                string NewsTitle = "";
                Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[\s\S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())
                {
                    NewsTitle = match1.Groups["title"].ToString();
                }
                return NewsTitle;

            }

    public ArrayList SniffwebCodeReturnList(string code, string wordsBegin, string wordsEnd)
            {
                ArrayList urlList = new ArrayList();
                //string NewsTitle = "";
                Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[\s\S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())
                {
                    urlList.Add(match1.Groups["title"].ToString());
                }
                return urlList;

            }

  • 相关阅读:
    MySQL启动和关闭命令总结
    MySQL数据库5.6版本首次安装Root密码问题
    tomcat 9性能调优注意事项
    扫除减脂之路上的几个小障碍
    MySQL常见面试题
    关于邮箱发送邮件二之附件及图片
    关于邮箱发送邮件
    关于算法
    python中常见的数据类型
    C++实现复数类的输入输出流以及+-*/的重载
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1656716.html
Copyright © 2011-2022 走看看