zoukankan      html  css  js  c++  java
  • c# 爬虫(三) 文件上传

    在上一篇中,我们说了模拟登录

    下面我们说说附件上传。

    据说,最早的http协议是不支持附件上传的,后来有添加了一个RFC 2045 协议,才支持附件上传,关于附件上传,请参见

    http://www.cnblogs.com/greenerycn/archive/2010/05/15/csharp_http_post.html

    好了,其实用C#模拟上传附件,主要的难点就在于如何构建此协议的格式以及编码。

    作为备忘,我这里只放一段成品代码了。

    参数:URL,上传控件的id,附件的本地全路径,传输的Key-value数据

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    public string HttpWebUpload(string url, string fileControlID, string fileFullPath, Dictionary<string, string> dic)
            {
                string fileContentType = "application/octet-stream";//"application/octet-stream";//"text/plain";//image/jpeg
              
                int index = fileFullPath.LastIndexOf('\');
                string filename = fileFullPath.Substring(index+1);
                 
                var boundary = DateTime.Now.Ticks.ToString("x");
                var beginBoundary = "--" + boundary;
                var reqContentType = "multipart/form-data; boundary=" + boundary;
     
                var memStream = new MemoryStream();
                var fileStream = new FileStream(fileFullPath, FileMode.Open, FileAccess.Read);
                int fsLen = (int)fileStream.Length;
                byte[] btfileValue = new byte[fsLen];
                int r = fileStream.Read(btfileValue, 0, btfileValue.Length);
     
                StringBuilder fileHeader = new StringBuilder();
                fileHeader.AppendLine(beginBoundary);
                fileHeader.AppendLine(string.Format("Content-Disposition: form-data; name="{0}"; filename="{1}"", fileControlID, filename));
                fileHeader.AppendLine(string.Format("Content-Type: {0}", fileContentType));
                fileHeader.AppendLine();
                var btfileKey = Encoding.UTF8.GetBytes(fileHeader.ToString());
     
                StringBuilder dicData = new StringBuilder();
                dicData.AppendLine();
                foreach (var item in dic)
                {
     
                    dicData.AppendLine(beginBoundary);
                    dicData.AppendLine(string.Format("Content-Disposition:form-data; name="{0}"", item.Key));
                    dicData.AppendLine();
                    dicData.AppendLine(item.Value);
                }
     
                dicData.AppendLine(beginBoundary + "--");
                var btDic = Encoding.UTF8.GetBytes(dicData.ToString());
     
                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
                req.Method = "POST";
                req.AllowAutoRedirect = false;
                req.ContentType = reqContentType;
                if (!string.IsNullOrEmpty(UserAgent))
                {
                    req.UserAgent = this.UserAgent;
                }
     
                if (!string.IsNullOrEmpty(Cookie))
                {
                    req.CookieContainer = new CookieContainer();
                    req.CookieContainer.SetCookies(req.RequestUri, this.Cookie);
                }
     
                req.ContentLength = btfileKey.Length + btfileValue.Length + btDic.Length;
                Stream postDataStream = req.GetRequestStream();
                postDataStream.Write(btfileKey, 0, btfileKey.Length);
                postDataStream.Write(btfileValue, 0, btfileValue.Length);
                postDataStream.Write(btDic, 0, btDic.Length);
                postDataStream.Close();
     
                HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
                string html = new StreamReader(resp.GetResponseStream()).ReadToEnd();
                return html;
            }

      注意,这里如果需要cookie,则添加对应的cookie。

    调用方法

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    string controlid = "fupCert";
    string filefullPath = "f:\3.pdf";
    string url = "http://10.35.3.240/NT_HandbookExtend/FrmCertificate.aspx?type=N&zdtype=J&DELEGATENO=SBWJ16100001&ID=SB20161018000001%2c3206960757&Action=Modify&manualRecordID=SL20161018000001&tm=0.17032586549994377";
     
    var hiddenVal = web.GetDoNetHiddenValuesByUrl(url);//获取__VIEWSTATE,与__EVENTVALIDATION隐藏域值
     
    Dictionary<string, string> dic = new Dictionary<string, string>();
    dic.Add("__EVENTARGUMENT", "");
    dic.Add("__EVENTTARGET", "btnUpload");
    dic.Add("__EVENTVALIDATION", hiddenVal.Eventvalidation);
    dic.Add("__PREVIOUSPAGE", "EncayjCF95BJXxMazWGgd9UdPYeLp64GjuJlZ-rvnQ5n34-y7KQYllE35nAdVFvk0");
    dic.Add("__VIEWSTATE", hiddenVal.Vievstate);
    dic.Add("__VIEWSTATEENCRYPTED", "");
    dic.Add("grdNavigatorPRE_EMS3_CUS_IMG$ctl06", "");
    dic.Add("grdNavigatorPRE_EMS3_CUS_IMG$ctl07", "15");
    dic.Add("grdNavigatorPRE_EMS3_CUS_IMG$labelRowCount", "8");
    dic.Add("hfERP", "http://www.nteport.gov.cn/cerp/platform/erp/documentManagement/documentDocking.jsp");
    dic.Add("hidFileType", "333");
    dic.Add("hidID", "");
    //web.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
    string html = web.HttpWebUpload(url, controlid, filefullPath, dic);

      

    其他注意事项:

    1.如果不限制文件类型,请使用 ContentType = "application/octet-stream";这个我也没怎么懂,尴尬。。。

    2. 编码很重要,由于传参时,都是以流的形式传参,所以文件编码使用默认的编码方式,和其他构建的RFC 2045 协议格式,使用UTF-8编码。

  • 相关阅读:
    经典排序算法
    浅谈C++继承
    进程间通信
    我在其它博客写文章
    git 版本回退
    vmware machine 虚拟机无法启动 Credential Guard 或 Device Guard
    我的头像制作过程
    vs安装qt5后打开qt4创建的 .pro 文件提示找不到头文件的解决方法
    [转]手动安装 Eclipse 插件 Viplugin
    装好 JDK 配置环境变量
  • 原文地址:https://www.cnblogs.com/cuihongyu3503319/p/14773130.html
Copyright © 2011-2022 走看看