zoukankan      html  css  js  c++  java
  • 各种获取页面的方法

    是程序中时不时用到的 还没有整理出来 等回头在整理一份

      public static string UrlEncode(string str)
            {
                StringBuilder sb = new StringBuilder();
                foreach (char c in str)
                {
                    if (HttpUtility.UrlEncode(c.ToString()).Length > 1)
                    {
                        sb.Append(HttpUtility.UrlEncode(c.ToString()).ToUpper());
                    }
                    else
                    {
                        sb.Append(c);
                    }
                }
                return sb.ToString();
            }
    

      

    用代理账号密码获取页面的

     /// <summary>
            /// 代理返回Html  Get
            /// </summary>
            /// <param name="strReferUrl">来源url</param>
            /// <param name="bEnableProxy">是否用代理</param>
            /// <param name="strProxyHost">代理地址</param>
            /// <param name="iProxyPort">代理端口号</param>
            /// <param name="strName">代理账号</param>
            /// <param name="strPwd">代理密码</param>
            /// <param name="url">url</param>
            /// <param name="SaveCookies">是否保存cookies</param>
            ///<param name="OutTime">暂停时间</param>
            /// <returns></returns>
            public string GetProxyHtml(string strReferUrl, bool bEnableProxy, string strProxyHost, int iProxyPort, string strName, string strPwd, string url, bool SaveCookies, int? time = 0, int? isCook = 0)
            {
                 
                try
                {
                    HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
                    if (bEnableProxy)
                    {
                        myHttpWebRequest.Proxy = new WebProxy(strProxyHost, iProxyPort)
                        {
                            Credentials = new NetworkCredential(strName, strPwd)
                        };
                        myHttpWebRequest.Headers.Add("Proxy-Authorization", "Basic MzUxMTM6MDM1NjI5");
                        myHttpWebRequest.UseDefaultCredentials = true;
                    }
                    myHttpWebRequest.ServicePoint.Expect100Continue = true;
                    myHttpWebRequest.MaximumAutomaticRedirections = 50;
                    myHttpWebRequest.Method = "GET";
                    myHttpWebRequest.Headers.Add("Accept-Language", "zh-Hans-CN,zh-Hans;q=0.5");
                   // myHttpWebRequest.Host = "jcr.incites.thomsonreuters.com";
                    myHttpWebRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                    myHttpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
                    myHttpWebRequest.KeepAlive = true;
                    myHttpWebRequest.ContentType = "text/html; charset=utf-8";
                    myHttpWebRequest.AllowAutoRedirect = true;
                    if (!string.IsNullOrEmpty(strReferUrl))
                    {
                        myHttpWebRequest.Referer = strReferUrl;
                    }
    
                    if (isCook != 0)
                    {
                        myHttpWebRequest.CookieContainer = new CookieContainer();
                    }
                    else
                    {
                        if (cookies == null)
                        {
                            myHttpWebRequest.CookieContainer = new CookieContainer();
                        }
                        else
                        {
                            myHttpWebRequest.CookieContainer = cookies;
                        }
                    }
    
                    myHttpWebRequest.Timeout = 32000;
                    myHttpWebRequest.ReadWriteTimeout = 32000;
                    HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
                    myHttpWebResponse.Cookies = cookies.GetCookies(myHttpWebRequest.RequestUri);
                    Stream myStream = myHttpWebResponse.GetResponseStream();
                    StreamReader myStreamReader = new StreamReader(myStream, Encoding.GetEncoding("utf-8"));
                    if (SaveCookies)
                    {
                        cookies = myHttpWebRequest.CookieContainer;
                    }
                    var html = myStreamReader.ReadToEnd();
                    if (time != 0)
                    {
                        Thread.Sleep(5000);
                    }
                    myStream.Close();
                    myStreamReader.Close();
                    myHttpWebResponse.Close();
                    myHttpWebRequest.Abort();
                    return html.Replace("
    ", "").Replace("
    ", "");
                }
                catch (Exception e)
                {
                    return "";
                }
            }
    

     简单的页面

     static string GetWebClient(string url)
            {
                string strHTML = "";
                WebClient myWebClient = new WebClient();
                Stream myStream = myWebClient.OpenRead(url);
                StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8"));
                strHTML = sr.ReadToEnd();
                myStream.Close();
                return strHTML;
            }
    
    static string GetWebRequest(string url)
            {
                Uri uri = new Uri(url);
                WebRequest myReq = WebRequest.Create(uri);
                WebResponse result = myReq.GetResponse();
                Stream receviceStream = result.GetResponseStream();
                StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("utf-8"));
                string strHTML = readerOfStream.ReadToEnd();
                readerOfStream.Close();
                receviceStream.Close();
                result.Close();
                return strHTML;
            }
    
    static string GetHttpWebRequest(string url)
            {
                Uri uri = new Uri(url);
                HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri);
                myReq.Host = "tandfonline.com";
                myReq.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0";
                myReq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
    
    
                myReq.KeepAlive = true;
               
                HttpWebResponse result = (HttpWebResponse)myReq.GetResponse();
                Stream receviceStream = result.GetResponseStream();
                StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("utf-8"));
                string strHTML = readerOfStream.ReadToEnd();
                readerOfStream.Close();
                receviceStream.Close();
                result.Close();
                return strHTML;
            }
    
    static string GetContentFromUrl(string URL)
            {
                try
                {
                    string strBuff = "";
                    int byteRead = 0;
                    char[] cbuffer = new char[256];
                    HttpWebRequest httpReq = (HttpWebRequest)WebRequest.Create(new Uri(URL));
                    HttpWebResponse httpResp = (HttpWebResponse)httpReq.GetResponse();
                    Stream respStream = httpResp.GetResponseStream();
                    StreamReader respStreamReader = new StreamReader(respStream, System.Text.Encoding.UTF8);
                    byteRead = respStreamReader.Read(cbuffer, 0, 256);
                    while (byteRead != 0)
                    {
                        string strResp = new string(cbuffer, 0, byteRead);
                        strBuff = strBuff + strResp;
                        byteRead = respStreamReader.Read(cbuffer, 0, 256);
                    }
                    respStream.Close();
                    return strBuff;
                }
                catch (Exception ex)
                {
                    return ex.Message;
                }
            }
    
    static string GetContentFromUrl1(string url)
            {
                try
                {
                    WebClient client = new WebClient();
                    client.Credentials = CredentialCache.DefaultCredentials;//获取或设置请求凭据  
                    Byte[] pageData = client.DownloadData(url); //下载数据  
                    string pageHtml = System.Text.Encoding.UTF8.GetString(pageData);
                    return pageHtml;
                }
                catch (WebException ex)
                {
                    return ex.Message;
                }
            }
    
    static string GetStringByUrl(string Url)
            {
                if (Url.Equals("about:blank")) return null; ;
                if (!Url.StartsWith("http://") && !Url.StartsWith("https://")) { Url = "http://" + Url; }
                int dialCount = 0;
                loop:
                StreamReader sreader = null;
                string result = string.Empty;
                try
                {
                    HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(Url);
                    //httpWebRequest.Timeout = 20;
                   
                    httpWebRequest.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";
                    httpWebRequest.Accept = "*/*";
                    httpWebRequest.KeepAlive = true;
                    httpWebRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                    
                    HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                    if (httpWebResponse.StatusCode == HttpStatusCode.OK)
                    {
                        sreader = new StreamReader(httpWebResponse.GetResponseStream(), System.Text.Encoding.GetEncoding("utf-8"));
                        
                        char[] cCont = new char[256];
                        int count = sreader.Read(cCont, 0, 256);
                        while (count > 0)
                        { // Dumps the 256 characters on a string and displays the string to the console. 
                            String str = new String(cCont, 0, count);
                            result += str;
                            count = sreader.Read(cCont, 0, 256);
                        }
                    }
                    if (null != httpWebResponse) { httpWebResponse.Close(); }
                    return result;
                }
                catch (WebException e)
                {
                    if (e.Status == WebExceptionStatus.ConnectFailure)
                    {
                        dialCount++;
                        ReDial();
                    }
                    if (dialCount < 5) { goto loop; }
                    return null;
                }
                finally { if (sreader != null) { sreader.Close(); } }
            }
    

    下面的方法是调用WebBrowser 是为了页面加载完成后在取数据!  提示:据说此代码运行没有前面的快 所以不到需要的时候不用

        [STAThread]
            static void Main(string[] args)

       [STAThread]
    static void Main(string[] args)
            {
    
                          var html = GetPageStringbyWebBrowser(url); 
                               
            }
    
     private static string GetPageStringbyWebBrowser(string url)
            {
    
                string htmlstr = "";
                if (url.Equals("about:blank")) htmlstr = "";
                if (!url.StartsWith("http://") && !url.StartsWith("https://")) { url = "http://" + url; }
    
                WebBrowser myWB = new WebBrowser();
                myWB.ScrollBarsEnabled = false;
                myWB.Navigate(url.ToString());
    
                while (myWB.ReadyState != WebBrowserReadyState.Complete)
                {
                    System.Windows.Forms.Application.DoEvents();
                }
                if (myWB != null)
                {
                    System.IO.StreamReader getReader = null;
                    try
                    {
                        getReader = new System.IO.StreamReader(myWB.DocumentStream, System.Text.Encoding.GetEncoding(myWB.Document.Encoding));
                        htmlstr = getReader.ReadToEnd();
                    }
                    catch { htmlstr = ""; }
                    finally
                    {
                        if (getReader != null) { getReader.Close(); }
                        myWB.Dispose();
                    }
                }
                return htmlstr;
            }
    

    保存文件 第一个是常用的:

    如果报错 比如说

    请求被中止: 未能创建 SSL/TLS 安全通道

    用第二个方法

      static bool GetAndSavePdf(string url, string path)
            {
                HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
                myHttpWebRequest.Timeout = 20 * 1000; //连接超时
                myHttpWebRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
                myHttpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
                //myHttpWebRequest.CookieContainer = cookies; //使用已经保存的cookies 
                HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
                Stream stream = myHttpWebResponse.GetResponseStream();
                if (stream != null)
                {
                    SaveFile(path, stream);
                    return true;
                }
                return false;
            }
    
     public static void SaveFile(string path, Stream stream)
            {
                int BufSize = 102400;
                if (System.IO.File.Exists(path))
                    return;
    
                FileInfo file = new FileInfo(path);
                if (!System.IO.Directory.Exists(file.DirectoryName))
                {
                    System.IO.Directory.CreateDirectory(file.DirectoryName);
                }
    
                FileStream fs = new FileStream(path, FileMode.Create);
                byte[] buf = new byte[BufSize];
                int size = 0;
                try
                {
                    size = stream.Read(buf, 0, BufSize);
                    while (size > 0)
                    {
                        fs.Write(buf, 0, size);
    
                        size = stream.Read(buf, 0, BufSize);
                    }
                    stream.Close();
                }
                catch (Exception ex)
                {
                    throw ex;
    
                }
                finally { fs.Close(); }
    
            }
    

    第二个 保存文件

    [DllImport("urlmon.dll", CharSet = CharSet.Auto, SetLastError = true)]
            static extern Int32 URLDownloadToFile(
               [MarshalAs(UnmanagedType.IUnknown)] object callerPointer,
               [MarshalAs(UnmanagedType.LPWStr)] string url,
               [MarshalAs(UnmanagedType.LPWStr)] string filePathWithName,
               Int32 reserved,
               IntPtr callBack);
    
    
            public static FileInfo DownloadFile(string url, string destinationFullPathWithName)
            {
                URLDownloadToFile(null, url, destinationFullPathWithName, 0, IntPtr.Zero);
                return new FileInfo(destinationFullPathWithName);
            }
    

    POST

    public static string geta3(string _url,string jsonParam)
            {
                
                var request = (HttpWebRequest)WebRequest.Create(_url);
                request.Method = "POST";
                request.ContentType = "application/json;charset=UTF-8";
                byte[] byteData = Encoding.UTF8.GetBytes(jsonParam);
                int length = byteData.Length;
                request.ContentLength = length;
                Stream writer = request.GetRequestStream();
                writer.Write(byteData, 0, length);
                writer.Close();
                var response = (HttpWebResponse)request.GetResponse();
                var responseString = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")).ReadToEnd();
                return responseString;
            }
    

      

    public static string GetHtmlByPostUrl(string strReferer, string strPostUrl, string strPostData)
            {
                //cookies = new CookieContainer();
                string strReturn = string.Empty;
                try
                {
    
    
                    string postData = strPostData;
    
                    //实例化编码方式
                    //UTF8Encoding encoding = new UTF8Encoding();
                    Encoding encoding = Encoding.GetEncoding("utf-8");
                    //请求链接参数需求对参数字符串转为二进制字符组
                    byte[] byteData = encoding.GetBytes(postData);
    
                    //创建请求对象
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strPostUrl);
    
                    request.Host = "";
                    //请求用户代理
                    request.UserAgent = " Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0";
                    //请求头协议方式
                    request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
                    //请求头语言
                    request.Headers.Add("Accept-Language", "zh-cn");
                    //请求头语言
                    //request.Headers.Add("Accept-Encoding", "gzip, deflate, br");
                    //请求类型
                    request.ContentType = "application/x-www-form-urlencoded";
                    //设置请求数据长度
                    request.ContentLength = byteData.Length;
                    request.Headers.Add("Upgrade-Insecure-Requests", "1");
                    //request.Connection = "keep-alive";
                    request.Headers.Add("Pragma", "no-cache");
                    request.Headers.Add("Cache-Control", "no-cache");
     
                    request.Method = "POST";
                    
     
                    if (!string.IsNullOrEmpty(strReferer))
                    {
                        request.Referer = strReferer;
                    }
                    else
                    {
                        request.Referer = strPostUrl;
                    }
    
                    if (cookies == null)
                    {
                        request.CookieContainer = new CookieContainer();
                    }
                    else
                    {
                        request.CookieContainer = cookies;
                    }
                    //设置请求等待时间
                    request.Timeout = 100000;
                    request.ReadWriteTimeout = 100000;
                    //将请求到数据放入到流中
                    Stream reqStream = request.GetRequestStream();
                    //输出流
                    reqStream.Write(byteData, 0, byteData.Length);
                    //关闭流
                    reqStream.Close();
                    //返回对象
                    HttpWebResponse objResponse = (HttpWebResponse)request.GetResponse();
                    Stream streamResponse = objResponse.GetResponseStream();
                    StreamReader streamReader = new StreamReader(streamResponse, Encoding.GetEncoding("utf-8"));
                    cookies = request.CookieContainer;
                    strReturn = streamReader.ReadToEnd();
    
                    streamReader.Close();
    
                    return strReturn;
                }
                catch(Exception E)
                {
                    Console.WriteLine("11111----html取不出来");
                    strReturn = GetHtmlByPostUrl(strReferer, strPostUrl, strPostData);
                }
                return strReturn;
            }
    

      POST  gzip

      public static string GetHtmlByPostUrl2(string RefereruRL, string strPostUrl, string PostData)
            {
                string strReturn = string.Empty;
    
                try
                {
                    //实例化编码方式
                    Encoding encoding = Encoding.GetEncoding("utf-8");
                    //根据安卓电子市场请求链接参数需求对参数字符串转为二进制字符组
                    byte[] byteData = encoding.GetBytes(PostData);
    
                    //创建请求对象
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strPostUrl);
    
                    //设置请求方式
                    request.Method = "POST";
                    request.Accept = "text/html, */*; q=0.01";
     
                    //请求头语言
                    request.Headers.Add("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
                    request.Headers.Add("Accept-Encoding", "gzip, deflate");
                    request.Headers.Add("Cache-Control", "no-cache");
     
                    request.Headers.Add("X-MicrosoftAjax", "Delta=true");
                    request.Headers.Add("X-Requested-With", "XMLHttpRequest");
     
                    request.Host = "";
    
                    request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0";
                    //myHttpWebRequest.KeepAlive = true;
                    request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
                    //设置请求数据长度
                    request.ContentLength = byteData.Length;
                    request.Referer = RefereruRL;
    
                    //设置请求等待时间
                    request.Timeout = 30000000;
                    request.ReadWriteTimeout = 30000000;
                    //将请求到数据放入到流中
                    Stream reqStream = request.GetRequestStream();
                    //输出流
                    reqStream.Write(byteData, 0, byteData.Length);
                    //关闭流
                    reqStream.Close();
                    //返回对象
                    HttpWebResponse objResponse = (HttpWebResponse)request.GetResponse();
    
                    string contentype = objResponse.Headers["Content-Type"];
                    Regex regex = new Regex("charset\s*=\s*[\W]?\s*([\w-]+)", RegexOptions.IgnoreCase);
                    if (objResponse.ContentEncoding.ToLower() == "gzip")//如果使用了GZip则先解压
                    {
                        using (System.IO.Stream streamReceive = objResponse.GetResponseStream())
                        {
                            using (var zipStream = new System.IO.Compression.GZipStream(streamReceive, System.IO.Compression.CompressionMode.Decompress))
                            {
                                //匹配编码格式
                                if (regex.IsMatch(contentype))
                                {
                                    Encoding ending = Encoding.GetEncoding(regex.Match(contentype).Groups[1].Value.Trim());
                                    using (StreamReader sr = new System.IO.StreamReader(zipStream, ending))
                                    {
                                        strReturn = sr.ReadToEnd();
                                    }
                                }
                                else
                                {
                                    using (StreamReader sr = new StreamReader(zipStream, Encoding.UTF8))
                                    {
                                        strReturn = sr.ReadToEnd();
                                    }
                                }
    
                            }
                        }
                    }
                }
                catch
                {
                    Console.WriteLine("重新获取一下");
                    return GetHtmlByPostUrl(RefereruRL, strPostUrl, PostData);
                }
                return strReturn;
            }
    

      

    public static void SetHeaderValue(WebHeaderCollection header, string name, string value)
    {
        var property = typeof(WebHeaderCollection).GetProperty("InnerCollection",
            System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic);
        if (property != null)
        {
            var collection = property.GetValue(header, null) as NameValueCollection;
            collection[name] = value;
        }
    }
    
    
    SetHeaderValue(request.Headers, "Host", "");
    SetHeaderValue(request.Headers, "Connection", "keep-alive");
     
    

      

  • 相关阅读:
    websphere安装及部署
    ant的安装及使用
    JAVA多线程线程阻塞与唤醒
    Win2008 404 找不到文件或目录。
    cmd命令大全和IIS服务命令
    phpcgi.exe上传大量数据
    phpcgi.exe多个进程 ,cpu跑满
    php shopex显示乱码
    ie中td空值不显示边框解决办法
    Win2003服务器主机下无法/不能播放FLV视频的设置方法!
  • 原文地址:https://www.cnblogs.com/xuanlanbinfen/p/8309265.html
Copyright © 2011-2022 走看看