zoukankan      html  css  js  c++  java
  • HTML网页保存为PDF文件

    开发中遇到的问题,需要把网站的页面保存为PDF文件

    所以自己研究,总结如下:

    一、Pechkin:html->pdf

    1.WinForm中转换为PDF

      a.在项目添加引用,引用 -> 管理NuGet程序包

    b.在导出PDF按钮中添加方法

     1 SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
     2                 .SetMargins(new Margins() { Left = 10, Right = 10, Top = 0, Bottom = 0 }) //设置边距
     3                 .SetPaperOrientation(false) //设置纸张方向为横向
     4                 .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张为A4纸大小
     5 
     6             byte[] buf = sc.Convert(new ObjectConfig(), getWebContent());
     7 
     8             if (buf == null)
     9             {
    10                 MessageBox.Show("Error converting!");
    11                 return;
    12             }
    13 
    14             File.WriteAllBytes(@"d:google-news123.pdf", buf);
    15 
    16             try
    17             {
    18                 string fn = Path.GetTempFileName() + ".pdf";
    19                 FileStream fs = new FileStream(fn, FileMode.Create);
    20                 fs.Write(buf, 0, buf.Length);
    21                 fs.Close();
    22 
    23                 //MessageBox.Show("操作成功,文件已保存至F盘下", "提示");
    24 
    25                 Process myProcess = new Process();
    26                 myProcess.StartInfo.FileName = fn;
    27                 myProcess.Start();
    28 
    29                 //SaveFileDialog();
    30             }
    31             catch { }
    View Code

     相关方法

     1 private int ConvertToHundredthsInch(int millimeter)
     2         {
     3             return (int)((millimeter * 10.0) / 2.54);
     4         }
     5 
     6         /// <summary>
     7         /// 获取网站内容,包含了 HTML+CSS+JS
     8         /// </summary>
     9         /// <returns>String返回网页信息</returns>
    10         public string getWebContent()
    11         {
    12             try
    13             {
    14                 WebClient MyWebClient = new WebClient();
    15                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
    16                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
    17                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan_new.asp?nstr=jwmlYCBYPDcHJlX2VudHJ5X2lkPTIyMjkyMDE1MDc5MTk1MjcyOSZ0b2lwPTExNA==");
    18                 //从指定网站下载数据
    19                 string pageHtml = Encoding.UTF8.GetString(pageData);
    20                 //如果获取网站页面采用的是GB2312,则使用这句       
    21                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
    22                 if (!isBool)
    23                 {
    24                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
    25                     pageHtml = pageHtml1;
    26                 }
    27                 else
    28                 {
    29                     string pageHtml2 = Encoding.Default.GetString(pageData);
    30                     pageHtml = pageHtml2;
    31                 }
    32                 return pageHtml;
    33             }
    34 
    35             catch (WebException webEx)
    36             {
    37                 Console.WriteLine(webEx.Message.ToString());
    38                 return webEx.Message;
    39             }
    40         }
    41 
    42         /// <summary>
    43         /// 判断是否有乱码
    44         /// </summary>
    45         /// <param name="txt"></param>
    46         /// <returns></returns>
    47         public bool isMessyCode(string txt)
    48         {
    49             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
    50             for (var i = 0; i < bytes.Length; i++)
    51             {
    52                 if (i < bytes.Length - 3)
    53                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
    54                     {
    55                         return true;
    56                     }
    57             }
    58             return false;
    59         }
    相关方法

    优缺点

      1.只能保存到制定的目录中,并且直接打开文件

      2.网页中的图片导不出来

      3.可能会出现乱码

      4.生成项目的时候需要把相应的DLL拷贝进去,不然不能生成

    这是另外一种方法:http://www.cnblogs.com/lsgsanxiao/p/4878077.html

    2.WEB网站中转换为PDF

      项目Demo http://pan.baidu.com/s/1gfhRR8n

      a.项目相关引用与上面相同

      b.网站中采用JS调用一般处理程序的方式

    1 function createPdf() {
    2             window.open("CreatePdf.ashx?html=222222222222233324243");
    3         }
    View Code
      1 using System;
      2 using System.Drawing.Printing;
      3 using System.IO;
      4 using System.Net;
      5 using System.Text;
      6 using System.Web;
      7 using Pechkin;
      8 using Pechkin.Synchronized;
      9 
     10 namespace WebApplication3
     11 {
     12     /// <summary>
     13     /// CreatePdf 的摘要说明
     14     /// </summary>
     15     public class CreatePdf : IHttpHandler
     16     {
     17 
     18         public void ProcessRequest(HttpContext context)
     19         {
     20             string htmlFile = context.Request["html"];
     21 
     22             string html = getWebContent();
     23             SynchronizedPechkin sc = new SynchronizedPechkin(new GlobalConfig()
     24                                     .SetMargins(new Margins() { Left = 0, Right = 0, Top = 0, Bottom = 0 }) //设置边距
     25                                     .SetPaperOrientation(false) //设置纸张方向为横向
     26                                     .SetPaperSize(ConvertToHundredthsInch(210), ConvertToHundredthsInch(297))); //设置纸张大小50mm * 100mm
     27 
     28             byte[] buf = sc.Convert(new ObjectConfig(), html);
     29 
     30             if (buf == null)
     31             {
     32                 context.Response.ContentType = "text/plain";
     33                 context.Response.Write("Error converting!");
     34             }
     35 
     36             try
     37             {
     38                 context.Response.Clear();
     39 
     40 
     41                 //方式1:提示浏览器下载pdf   
     42                 context.Response.AddHeader("content-disposition", "attachment;filename=" + htmlFile + ".pdf");
     43                 context.Response.ContentType = "application/octet-stream";
     44                 context.Response.BinaryWrite(buf);
     45 
     46                 //方式2:直接在浏览器打开pdf
     47                 //context.Response.ContentType = "application/pdf";
     48                 //context.Response.OutputStream.Write(buf, 0, buf.Length);
     49 
     50                 context.Response.End();
     51 
     52             }
     53             catch (Exception e)
     54             {
     55                 context.Response.ContentType = "text/plain";
     56                 context.Response.Write(e.Message);
     57             }
     58         }
     59 
     60         public bool IsReusable
     61         {
     62             get
     63             {
     64                 return false;
     65             }
     66         }
     67 
     68         private int ConvertToHundredthsInch(int millimeter)
     69         {
     70             return (int)((millimeter * 10.0) / 2.54);
     71         }
     72 
     73         /// <summary>
     74         /// 获取网站内容,包含了 HTML+CSS+JS
     75         /// </summary>
     76         /// <returns>String返回网页信息</returns>
     77         public string getWebContent()
     78         {
     79             try
     80             {
     81                 WebClient MyWebClient = new WebClient();
     82                 MyWebClient.Credentials = CredentialCache.DefaultCredentials;
     83                 //获取或设置用于向Internet资源的请求进行身份验证的网络凭据
     84                 Byte[] pageData = MyWebClient.DownloadData("http://a4.keyue.com.cn/out/fwd/2fenhd/yuludan.asp?nstr=AAfFJb_SVvcHJlX2VudHJ5X2lkPTIyMzEyMDE1MDgxMTY0NDUzOSZ0b2lwPTExNA==");
     85                 //从指定网站下载数据
     86                 string pageHtml = Encoding.UTF8.GetString(pageData);
     87                 //如果获取网站页面采用的是GB2312,则使用这句       
     88                 bool isBool = isMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
     89                 if (!isBool)
     90                 {
     91                     string pageHtml1 = Encoding.UTF8.GetString(pageData);
     92                     pageHtml = pageHtml1;
     93                 }
     94                 else
     95                 {
     96                     string pageHtml2 = Encoding.Default.GetString(pageData);
     97                     pageHtml = pageHtml2;
     98                 }
     99                 return pageHtml;
    100             }
    101 
    102             catch (WebException webEx)
    103             {
    104                 Console.WriteLine(webEx.Message.ToString());
    105                 return webEx.Message;
    106             }
    107         }
    108 
    109         /// <summary>
    110         /// 判断是否有乱码
    111         /// </summary>
    112         /// <param name="txt"></param>
    113         /// <returns></returns>
    114         public bool isMessyCode(string txt)
    115         {
    116             var bytes = Encoding.UTF8.GetBytes(txt);            //239 191 189            
    117             for (var i = 0; i < bytes.Length; i++)
    118             {
    119                 if (i < bytes.Length - 3)
    120                     if (bytes[i] == 239 && bytes[i + 1] == 191 && bytes[i + 2] == 189)
    121                     {
    122                         return true;
    123                     }
    124             }
    125             return false;
    126         }
    127     }
    128 }
    一般处理程序
  • 相关阅读:
    java 8
    内存溢出VS内存泄漏
    dubbo zk 分布式服务项目搭建与配置
    转发 VS 重定向
    过滤器
    Synchronized
    java 泛型
    spring 整合 mongo
    泛型
    反虚拟机
  • 原文地址:https://www.cnblogs.com/Johnfx-home/p/5556131.html
Copyright © 2011-2022 走看看