zoukankan      html  css  js  c++  java
  • 博客园博客PDF生成器

          周末写了一个博客园博客PDF生成器,由于博客园文件上传大小的限制,我把源代码放在CSDN上了(想信大家都有帐号哈),如果没有帐号的请留下邮箱,我会尽快发给你,当然如果哪位朋友能帮忙把源代码上传到博客园上更好:博客园博客PDF生成器 

          废话不多说,直接看生成后的PDF效果哈:

    博客中图片效果:

          代码比较简单,这里先简单说一下思路,先通过博客地址取得该博客的RSS信息,这是一个XML文件,把源码存在本地,然后解析这个XML文件,从中取出需要的信息,再用iTextSharp这个DLL来操作PDF,从面生成PDF文档。

          下面只帖出几个主要的类,大家有兴趣可以下载源代码看:

          实体类channel,类属性是从XML文件中取得的:

    实体类:
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;

    namespace BlogsConvert
    {
        
    public class channel
        {
            
    private string title;
            
    private string link;
            
    private string description;
            
    private string language;
            
    private DateTime lastBuildDate;
            
    private DateTime pubDate;
            
    private int ttl;

            
    public string Title
            {
                
    get { return title; }
                
    set { title = value; }
            }

            
    public string Link
            {
                
    get { return link; }
                
    set { link = value; }
            }

            
    public string Description
            {
                
    get { return description; }
                
    set { description = value; }
            }

            
    public string Language
            {
                
    get { return language; }
                
    set { language = value; }
            }

            
    public DateTime LastBuildDate
            {
                
    get { return lastBuildDate; }
                
    set { lastBuildDate = value; }
            }

            
    public DateTime PubDate
            {
                
    get { return pubDate; }
                
    set { pubDate = value; }
            }

            
    public int Ttl
            {
                
    get { return ttl; }
                
    set { ttl = value; }
            }
        }
    }

          实体类item(属性来自XML文件):

    实体类:
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;

    namespace BlogsConvert
    {
        
    public class item
        {
            
    private string title;
            
    private string link;
            
    private string dc_creator;
            
    private string author;
            
    private DateTime pubDate;
            
    private string guid;
            
    private string description;

            
    public string Title
            {
                
    get { return title; }
                
    set { title = value; }
            }

            
    public string Link
            {
                
    get { return link; }
                
    set { link = value; }
            }

            
    public string Dc_creator
            {
                
    get { return dc_creator; }
                
    set { dc_creator = value; }
            }

            
    public string Author
            {
                
    get { return author; }
                
    set { author = value; }
            }

            
    public DateTime PubDate
            {
                
    get { return pubDate; }
                
    set { pubDate = value; }
            }

            
    public string Guid
            {
                
    get { return guid; }
                
    set { guid = value; }
            }

            
    public string Description
            {
                
    get { return description; }
                
    set { description = value; }
            }
        }
    }

          从XML文件中提取博客信息类:

    代码
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Xml.Linq;
    using System.Xml;

    namespace BlogsConvert
    {
        
    public class BlogsInfo
        {
            
    /// <summary>
            
    /// 从XML文件中取得博主信息
            
    /// </summary>
            
    /// <param name="xmlPath">xml文件路径</param>
            
    /// <returns>channel</returns>
            public channel GetChannel(string xmlPath)
            {
                channel cha
    =new channel();
                
    //解析XML文件
                XmlDocument myXml = new XmlDocument();
                myXml.Load(xmlPath);
                XmlNode blogs 
    = myXml.DocumentElement;
                XmlNode node
    =blogs.ChildNodes[0];
                
    if (node.Name == "channel")
                {
                    
    foreach (XmlNode chanode in node.ChildNodes)
                    {
                        
    switch (chanode.Name)
                        {
                            
    case "title":
                                cha.Title 
    = chanode.InnerText;
                                
    break;
                            
    case "link":
                                cha.Link 
    = chanode.InnerText;
                                
    break;
                            
    case "description":
                                cha.Description 
    = chanode.InnerText;
                                
    break;
                            
    case "language":
                                cha.Language 
    = chanode.InnerText;
                                
    break;
                            
    case "lastBuildDate":
                                cha.LastBuildDate 
    = DateTime.Parse(chanode.InnerText);
                                
    break;
                            
    case "pubDate":
                                cha.PubDate 
    = DateTime.Parse(chanode.InnerText);
                                
    break;
                            
    case "ttl":
                                cha.Ttl 
    = int.Parse(chanode.InnerText);
                                
    break;
                        }
                        
    if (chanode.Name == "item")
                            
    break;
                    }
                }
                
    if (cha.Title.Trim()!="")
                    
    return cha;
                
    return null;
            }

            
    /// <summary>
            
    /// 从XML文件中取得文章信息
            
    /// </summary>
            
    /// <param name="xmlPath">xml文件路径</param>
            
    /// <returns>IList</returns>
            public IList<item> GetItems(string xmlPath)
            {
                
    return GetItems(xmlPath,"");
            }

            
    /// <summary>
            
    /// 从XML文件中取得文章信息
            
    /// </summary>
            
    /// <param name="xmlPath">xml文件路径</param>
            
    /// <param name="keyWord">按关键字提取博客信息</param>
            
    /// <returns>IList</returns>
            public IList<item> GetItems(string xmlPath,string keyWord)
            {
                IList
    <item> itemList = new List<item>();
                item temp;
                
    //解析XML文件
                XmlDocument myXml = new XmlDocument();
                myXml.Load(xmlPath);
                XmlNode blogs 
    = myXml.DocumentElement;
                XmlNode node 
    = blogs.ChildNodes[0];
                
    if (node.Name == "channel")
                {
                    
    foreach (XmlNode statusnode in node.ChildNodes)
                    {
                        
    switch (statusnode.Name)
                        {
                            
    case "item":
                                temp
    =new item();
                                
    bool flag = true;
                                
    foreach (XmlNode o in statusnode.ChildNodes)
                                {
                                    
    if (flag)
                                    {
                                        
    switch (o.Name)
                                        {
                                            
    case "title":
                                                
    if (keyWord.Trim() != "")
                                                {
                                                    
    if (!o.InnerText.Contains(keyWord))
                                                        flag 
    = false;
                                                }
                                                temp.Title 
    = o.InnerText;
                                                
    break;
                                            
    case "link":
                                                temp.Link 
    = o.InnerText;
                                                
    break;
                                            
    case "dc:creator":
                                                temp.Dc_creator 
    = o.InnerText;
                                                
    break;
                                            
    case "author":
                                                temp.Author 
    = o.InnerText;
                                                
    break;
                                            
    case "pubDate":
                                                temp.PubDate 
    = DateTime.Parse(o.InnerText);
                                                
    break;
                                            
    case "guid":
                                                temp.Guid 
    = o.InnerText;
                                                
    break;
                                            
    case "description":
                                                temp.Description 
    = o.InnerText;
                                                
    break;
                                        }
                                    }
                                }
                                
    if(temp.Link!=null)
                                    itemList.Add(temp);
                                
    break;
                        }
                    }
                }
                
    if(itemList.Count>0)
                    
    return itemList;
                
    return null;
            }
        }
    }

            PDF文件生成类,也是本软件中最重要的一个类,其实就是iTextSharp的运用(这个DLL文件在源代码中有):

    代码
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using iTextSharp.text;
    using iTextSharp.text.pdf;
    using System.IO;
    using System.Text.RegularExpressions;

    namespace BlogsConvert
    {
        
    public class ToPdf:IConvert
        {
            
    #region IConvert 成员

            
    /// <summary>
            
    /// 转为PDF
            
    /// </summary>
            
    /// <param name="commonInfo">博主信息</param>
            
    /// <param name="itemList">文章信息</param>
            
    /// <param name="path">生成的PDF文件存放路径</param>
            public void Convert(channel commonInfo, IList<item> itemList,string path)
            {
                
    if (commonInfo != null && itemList != null)
                {
                    
    //设置页面大小
                    Rectangle pageSize = PageSize.A4;
                    
    //创建文档对象
                    Document document = new Document(pageSize);
                    PdfWriter.GetInstance(document,
    new FileStream(path,FileMode.Create));

                    
    //打开文档
                    document.Open();

                    
    //定义字体
                    BaseFont bfSongTi = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                    Font font 
    = new Font(bfSongTi, 12);

                    
    //定义字体
                    BaseFont bfSongTiBlod = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                    Font fontBlod 
    = new Font(bfSongTiBlod, 15);

                    
    //提示段落
                    Paragraph pToop=new Paragraph(new Chunk("本文档由程序整理生成(生成时间:"+DateTime.Now+"",fontBlod));
                    
    //1为居中,0为居左,2为居右
                    pToop.Alignment = 1;
                    pToop.SpacingAfter 
    = 20;
                    document.Add(pToop);

                    
    //博客标题
                    Paragraph pTitle = new Paragraph(new Phrase(commonInfo.Title, fontBlod));
                    pTitle.Alignment 
    = 1;
                    pTitle.SpacingAfter 
    = 20;
                    document.Add(pTitle);

                    
    //添加博客子标题
                    Paragraph pDescription=new Paragraph(commonInfo.Description,font);
                    pDescription.Alignment 
    = 0;
                    
    //行间距(倍)
                    pDescription.MultipliedLeading = 2;
                    pDescription.SpacingAfter 
    = 20;
                    document.Add(pDescription);

                    
    //博客目录
                    Paragraph allGuid = new Paragraph("目      录", fontBlod);
                    allGuid.Alignment 
    = 1;
                    allGuid.SpacingBefore 
    = 10;
                    document.Add(allGuid);

                    
    //添加目录
                    Paragraph guid=new Paragraph("    ");
                    guid.MultipliedLeading 
    = 1;
                    Anchor aTitle;
                    
    for (int i = 0; i < itemList.Count;i++ )
                    {
                        item o 
    = itemList[i];
                        aTitle 
    = new Anchor(""+(i+1)+"篇: "+o.Title,font);
                        aTitle.Reference 
    = "#link" + o.PubDate.ToString();
                        document.Add(aTitle);
                        document.Add(guid);
                    }
                    document.Add(guid);
                    document.Add(guid);
                    document.Add(guid);

                    
    //文章标题
                    Paragraph blogTitle;
                    
    //文章内容
                    Paragraph blogContent;
                    
    //分割线
                    Paragraph hr=new Paragraph("--------------------------------------------------------------------------------------------------------");
                    hr.Alignment
    =1;
                    hr.SpacingAfter
    =20;
                    hr.SpacingBefore
    =20;

                    
    //提取图片
                    string Content;
                    Regex reg 
    = new Regex(@"(?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+))");
                    MatchCollection mc;
                    IList
    <string> picList;

                    
    //内容处理
                    string[] ContentArray;
                    Anchor lTitle;
                    
    int index = 1;
                    
    foreach (var o in itemList)
                    {
                        lTitle 
    = new Anchor(""+index+"篇:",font);
                        lTitle.Name 
    = "link" + o.PubDate.ToString();
                        document.Add(lTitle);
                        index
    ++;
                        blogTitle
    =new Paragraph(o.Title,fontBlod);
                        blogTitle.Alignment 
    = 1;
                        blogTitle.MultipliedLeading 
    = 1;                    
                        document.Add(blogTitle);

                        Content 
    = o.Description;
                        Content 
    = Content.Replace("<p>""");
                        Content 
    = Content.Replace("<br />""");
                        Content 
    = Content.Replace("<br/ />""");

                         mc
    = reg.Matches(Content);
                         picList 
    = new List<string>();
                        
    for(int i=0;i<mc.Count;i++)
                        {
                            Match m
    =mc[i];
                            
    if (!m.Groups["img"].Value.Contains("OutliningIndicators"))
                            {
                                picList.Add(m.Groups[
    "img"].Value);
                                Content 
    = Content.Replace(m.Groups["img"].Value, "\" />卍Pic" + m.Groups["img"].Value + "ciP卍<img src=\"");
                            }
                        }

                        
    //去掉Html标签
                        Content = NoHTML(Content);

                        
    //按文章内容生成段落
                        ContentArray = Content.Split('');
                        
    for (int i = 0; i < ContentArray.Length; i++)
                        {
                            
    for (int j = 0; j < picList.Count; j++)
                            {
                                
    if ( ContentArray[i] == "Pic" +picList[j] + "ciP")
                                {
                                    Image jpeg 
    = Image.GetInstance(picList[j]);
                                    
    if (jpeg.Width > PageSize.A4.Width)
                                    {
                                        jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width 
    * jpeg.Height / PageSize.A4.Width);
                                    }
                                    jpeg.Alignment 
    = Image.MIDDLE_ALIGN;
                                    document.Add(jpeg);
                                    ContentArray[i] 
    = "PicDRJciP";
                                }
                            }
                            
    if (ContentArray[i] != "PicDRJciP")
                            {
                                blogContent 
    = new Paragraph(ContentArray[i], font);
                                blogContent.Alignment 
    = 0;
                                blogContent.MultipliedLeading 
    = 2;
                                blogContent.SpacingAfter 
    = 10;
                                document.Add(blogContent);
                            }
                        }
                        document.Add(hr);
                    }

                    
    //提示信息
                    Paragraph drj = new Paragraph(new Chunk("本程序由博客园——天行健(http://home.cnblogs.com/u/durongjian/)制作,如有建议请发邮件至drjchina@163.com", font));
                    
    //1为居中,0为居左,2为居右
                    drj.Alignment = 1;
                    drj.SpacingAfter 
    = 20;
                    drj.SpacingBefore 
    = 20;
                    document.Add(drj);

                    
    //关闭文档
                    document.Close();
                }
            }

            
    /// <summary>
            
    /// 去掉HTML标签
            
    /// </summary>
            
    /// <param name="Htmlstring">带有HTML标签的字符串</param>
            
    /// <returns>string</returns>
            public static string NoHTML(string Htmlstring)
            {
                Htmlstring 
    = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>""", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"<(.[^>]*)>""", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"([\r\n])[\s]+""", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"-->""", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"<!--.*""", RegexOptions.IgnoreCase);

                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(quot|#34);""\"", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);""&", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(lt|#60);""<", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(gt|#62);"">", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(nbsp|#160);"" ", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(iexcl|#161);""\xa1", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(cent|#162);""\xa2", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(pound|#163);""\xa3", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&(copy|#169);""\xa9", RegexOptions.IgnoreCase);
                Htmlstring 
    = Regex.Replace(Htmlstring, @"&#(\d+);""", RegexOptions.IgnoreCase);

                Htmlstring.Replace(
    "<""");
                Htmlstring.Replace(
    ">""");
                Htmlstring.Replace(
    "\r\n""");

                
    return Htmlstring.Trim();
            }

            
    #endregion
        }
    }

          最后就是调用类了,先看一下软件界面吧:

          后台代码:

     

    代码
    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using BlogsConvert;
    using System.Net;
    using System.IO;

    namespace CnBlogsHelper
    {
        
    public partial class BlogToPdf : Form
        {
            
    public channel commonInfo=new channel();
            
    public IList<item> blogInfos=new List<item>();
            
    public BlogToPdf()
            {
                InitializeComponent();
            }

            
    private void BlogToPdf_Load(object sender, EventArgs e)
            {
            }

            
    /// <summary>
            
    /// 获取RSS源码,存入XML文件中
            
    /// </summary>
            
    /// <param name="PageUrl">XML文件路径</param>
            public void GetXML(string PageUrl)
            {
                
    //发送GET请求,得到XML格式的数据
                WebRequest request = WebRequest.Create(PageUrl);
                WebResponse response 
    = request.GetResponse();
                Stream resStream 
    = response.GetResponseStream();
                StreamReader sr 
    = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
                
    string Content = sr.ReadToEnd();
                
    string xmlPath =Application.StartupPath+ @"\Blogs.xml";

                
    //如果XML文件不存在就创建
                if (!System.IO.File.Exists(xmlPath))
                {
                    System.IO.FileStream f 
    = System.IO.File.Create(xmlPath);
                    f.Close();
                }
                
    //以覆盖的形式把数据写入XML文件
                System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false, System.Text.Encoding.GetEncoding("UTF-8"));
                f2.Write(Content);
                f2.Close();
                f2.Dispose();
                sr.Close();
                resStream.Close();

                
    if (Content.Trim() == "")
                {
                    
    throw new Exception("用户名有误,请检查后重新输入!");
                }
            }

            
    /// <summary>
            
    /// 生成PDF文件
            
    /// </summary>
            
    /// <param name="saveName">生成的PDF文件名</param>
            
    /// <param name="cha">博主信息</param>
            
    /// <param name="itemList">文章信息</param>
            public void CreatePDF(string saveName,channel cha,IList<item> itemList)
            {
                BlogsInfo blog 
    = new BlogsInfo();
                IConvert con 
    = new ToPdf();
                
    string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
                con.Convert(cha,itemList,dir
    +"\\"+saveName+".pdf");
            }

            
    //生成事件
            private void btnCreate_Click(object sender, EventArgs e)
            {
                
    if (!CheckForm())
                    
    return;
                
    try
                {
                    
    if (blogInfos.Count > 0)
                    {
                        Wait f 
    = new Wait();
                        f.Show();
                        Application.DoEvents();

                        CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);

                        f.Close();
                        MessageBox.Show(
    "PDF文档“" + txtFileName.Text.Trim() + ".pdf”生成成功,文档在桌面!");
                    }
                    
    else
                    {
                        MessageBox.Show(
    "博客数为0,请先提取博客信息!");
                    }
                }
                
    catch (Exception ex)
                {
                    MessageBox.Show(
    "异常信息:"+ex.Message);
                }
            }

            
    //提取博客信息事件
            private void btnFind_Click(object sender, EventArgs e)
            {
                
    if (!CheckForm())
                    
    return;
                libBlog.Items.Clear();
                
    string pageUrl = txtBlogUrl.Text.Trim();
                
    if (pageUrl.Substring(pageUrl.Length - 11!= "/")
                {
                    pageUrl 
    = pageUrl + @"/";
                }
                pageUrl 
    = pageUrl + "rss";
                
    try
                {
                    
    //弹出等待窗体
                    Wait f = new Wait();
                    f.Show();
                    Application.DoEvents();

                    GetXML(pageUrl);
                    
    string path = Application.StartupPath + @"\Blogs.xml";
                    BlogsInfo blogInfo 
    = new BlogsInfo();
                    commonInfo 
    = blogInfo.GetChannel(path);
                    blogInfos 
    = blogInfo.GetItems(path, txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text.Trim());

                    
    foreach (item o in blogInfos)
                    {
                        libBlog.Items.Add(o.Title);
                    }

                    f.Close();
                }
                
    catch (Exception ex)
                {
                    MessageBox.Show(
    "异常信息:" + ex.Message);
                }
            }

            
    //清空事件
            private void btnClearAll_Click(object sender, EventArgs e)
            {
                libBlog.Items.Clear();
                blogInfos.Clear();
            }

            
    //删除当前选中项事件
            private void btnClearCurrent_Click(object sender, EventArgs e)
            {
                
    int index=libBlog.SelectedIndex;
                libBlog.Items.Remove(libBlog.Items[index]);
                blogInfos.RemoveAt(index);
            }

            
    //鼠标进入文本框清空默认文本
            private void txtKeyWord_Click(object sender, EventArgs e)
            {
                txtKeyWord.Text 
    = txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text;
            }

            
    private bool CheckForm()
            {
                
    if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "")
                {
                    MessageBox.Show(
    "博客地址和保存文件名不能为空!");
                    txtBlogUrl.Text 
    = "http://www.cnblogs.com/";
                    txtFileName.Text 
    = "我的博客";
                    
    return false;
                }
                
    return true;
            }
        }
    }

          其中调用了一个等待窗体Wait,非常简单,这里就不说了,大家可以看源代码。

          博客园中高手如云,本人只能算个菜,只是把自己写的一点小东西拿出来跟大家分享,希望能帮到大家,欢迎各位朋友批评指正,如果使用过程中有错误请留言哦。

          本软件目地是服务博客园的朋友们,源代码完全开源,但转载或二次开发请注明出处。

  • 相关阅读:
    python操作mysql封装成类
    es 数据 导出 到 MySQL
    Elasticsearch的数据导出和导入操作(elasticdump工具),以及删除指定type的数据(delete-by-query插件)
    解决VM虚拟机中的ubuntu不能全屏的问题
    pandas操作,感觉不错,复制过来的
    BTree和B+Tree详解
    ant安装配置
    jmeter默认生成测试报告
    学习网站
    selenium多窗口切换(windows)
  • 原文地址:https://www.cnblogs.com/artwl/p/1860514.html
Copyright © 2011-2022 走看看