博客园博客PDF生成器

zoukankan html css js c++ java

博客园博客PDF生成器

      周末写了一个博客园博客PDF生成器，由于博客园文件上传大小的限制，我把源代码放在CSDN上了（想信大家都有帐号哈），如果没有帐号的请留下邮箱，我会尽快发给你，当然如果哪位朋友能帮忙把源代码上传到博客园上更好：博客园博客PDF生成器

      废话不多说，直接看生成后的PDF效果哈：

博客中图片效果：

      代码比较简单，这里先简单说一下思路，先通过博客地址取得该博客的RSS信息，这是一个XML文件，把源码存在本地，然后解析这个XML文件，从中取出需要的信息，再用iTextSharp这个DLL来操作PDF，从面生成PDF文档。

      下面只帖出几个主要的类，大家有兴趣可以下载源代码看：

      实体类channel，类属性是从XML文件中取得的：

实体类：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
    public class channel
    {
        private string title;
        private string link;
        private string description;
        private string language;
        private DateTime lastBuildDate;
        private DateTime pubDate;
        private int ttl;

        public string Title
        {
            get { return title; }
            set { title = value; }
        }

        public string Link
        {
            get { return link; }
            set { link = value; }
        }

        public string Description
        {
            get { return description; }
            set { description = value; }
        }

        public string Language
        {
            get { return language; }
            set { language = value; }
        }

        public DateTime LastBuildDate
        {
            get { return lastBuildDate; }
            set { lastBuildDate = value; }
        }

        public DateTime PubDate
        {
            get { return pubDate; }
            set { pubDate = value; }
        }

        public int Ttl
        {
            get { return ttl; }
            set { ttl = value; }
        }
    }
}

      实体类item（属性来自XML文件）：

实体类：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace BlogsConvert
{
    public class item
    {
        private string title;
        private string link;
        private string dc_creator;
        private string author;
        private DateTime pubDate;
        private string guid;
        private string description;

        public string Title
        {
            get { return title; }
            set { title = value; }
        }

        public string Link
        {
            get { return link; }
            set { link = value; }
        }

        public string Dc_creator
        {
            get { return dc_creator; }
            set { dc_creator = value; }
        }

        public string Author
        {
            get { return author; }
            set { author = value; }
        }

        public DateTime PubDate
        {
            get { return pubDate; }
            set { pubDate = value; }
        }

        public string Guid
        {
            get { return guid; }
            set { guid = value; }
        }

        public string Description
        {
            get { return description; }
            set { description = value; }
        }
    }
}

      从XML文件中提取博客信息类：

代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Xml;

namespace BlogsConvert
{
    public class BlogsInfo
    {
        /// <summary>
        /// 从XML文件中取得博主信息
        /// </summary>
        /// <param name="xmlPath">xml文件路径</param>
        /// <returns>channel</returns>
        public channel GetChannel(string xmlPath)
        {
            channel cha=new channel();
            //解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs = myXml.DocumentElement;
            XmlNode node=blogs.ChildNodes[0];
            if (node.Name == "channel")
            {
                foreach (XmlNode chanode in node.ChildNodes)
                {
                    switch (chanode.Name)
                    {
                        case "title":
                            cha.Title = chanode.InnerText;
                            break;
                        case "link":
                            cha.Link = chanode.InnerText;
                            break;
                        case "description":
                            cha.Description = chanode.InnerText;
                            break;
                        case "language":
                            cha.Language = chanode.InnerText;
                            break;
                        case "lastBuildDate":
                            cha.LastBuildDate = DateTime.Parse(chanode.InnerText);
                            break;
                        case "pubDate":
                            cha.PubDate = DateTime.Parse(chanode.InnerText);
                            break;
                        case "ttl":
                            cha.Ttl = int.Parse(chanode.InnerText);
                            break;
                    }
                    if (chanode.Name == "item")
                        break;
                }
            }
            if (cha.Title.Trim()!="")
                return cha;
            return null;
        }

        /// <summary>
        /// 从XML文件中取得文章信息
        /// </summary>
        /// <param name="xmlPath">xml文件路径</param>
        /// <returns>IList</returns>
        public IList<item> GetItems(string xmlPath)
        {
            return GetItems(xmlPath,"");
        }

        /// <summary>
        /// 从XML文件中取得文章信息
        /// </summary>
        /// <param name="xmlPath">xml文件路径</param>
        /// <param name="keyWord">按关键字提取博客信息</param>
        /// <returns>IList</returns>
        public IList<item> GetItems(string xmlPath,string keyWord)
        {
            IList<item> itemList = new List<item>();
            item temp;
            //解析XML文件
            XmlDocument myXml = new XmlDocument();
            myXml.Load(xmlPath);
            XmlNode blogs = myXml.DocumentElement;
            XmlNode node = blogs.ChildNodes[0];
            if (node.Name == "channel")
            {
                foreach (XmlNode statusnode in node.ChildNodes)
                {
                    switch (statusnode.Name)
                    {
                        case "item":
                            temp=new item();
                            bool flag = true;
                            foreach (XmlNode o in statusnode.ChildNodes)
                            {
                                if (flag)
                                {
                                    switch (o.Name)
                                    {
                                        case "title":
                                            if (keyWord.Trim() != "")
                                            {
                                                if (!o.InnerText.Contains(keyWord))
                                                    flag = false;
                                            }
                                            temp.Title = o.InnerText;
                                            break;
                                        case "link":
                                            temp.Link = o.InnerText;
                                            break;
                                        case "dc:creator":
                                            temp.Dc_creator = o.InnerText;
                                            break;
                                        case "author":
                                            temp.Author = o.InnerText;
                                            break;
                                        case "pubDate":
                                            temp.PubDate = DateTime.Parse(o.InnerText);
                                            break;
                                        case "guid":
                                            temp.Guid = o.InnerText;
                                            break;
                                        case "description":
                                            temp.Description = o.InnerText;
                                            break;
                                    }
                                }
                            }
                            if(temp.Link!=null)
                                itemList.Add(temp);
                            break;
                    }
                }
            }
            if(itemList.Count>0)
                return itemList;
            return null;
        }
    }
}

        PDF文件生成类，也是本软件中最重要的一个类，其实就是iTextSharp的运用（这个DLL文件在源代码中有）：

代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
using System.Text.RegularExpressions;

namespace BlogsConvert
{
    public class ToPdf:IConvert
    {
        #region IConvert 成员

        /// <summary>
        /// 转为PDF
        /// </summary>
        /// <param name="commonInfo">博主信息</param>
        /// <param name="itemList">文章信息</param>
        /// <param name="path">生成的PDF文件存放路径</param>
        public void Convert(channel commonInfo, IList<item> itemList,string path)
        {
            if (commonInfo != null && itemList != null)
            {
                //设置页面大小
                Rectangle pageSize = PageSize.A4;
                //创建文档对象
                Document document = new Document(pageSize);
                PdfWriter.GetInstance(document,new FileStream(path,FileMode.Create));

                //打开文档
                document.Open();

                //定义字体
                BaseFont bfSongTi = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font font = new Font(bfSongTi, 12);

                //定义字体
                BaseFont bfSongTiBlod = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
                Font fontBlod = new Font(bfSongTiBlod, 15);

                //提示段落
                Paragraph pToop=new Paragraph(new Chunk("本文档由程序整理生成（生成时间："+DateTime.Now+"）",fontBlod));
                //1为居中，0为居左，2为居右
                pToop.Alignment = 1;
                pToop.SpacingAfter = 20;
                document.Add(pToop);

                //博客标题
                Paragraph pTitle = new Paragraph(new Phrase(commonInfo.Title, fontBlod));
                pTitle.Alignment = 1;
                pTitle.SpacingAfter = 20;
                document.Add(pTitle);

                //添加博客子标题
                Paragraph pDescription=new Paragraph(commonInfo.Description,font);
                pDescription.Alignment = 0;
                //行间距（倍）
                pDescription.MultipliedLeading = 2;
                pDescription.SpacingAfter = 20;
                document.Add(pDescription);

                //博客目录
                Paragraph allGuid = new Paragraph("目      录", fontBlod);
                allGuid.Alignment = 1;
                allGuid.SpacingBefore = 10;
                document.Add(allGuid);

                //添加目录
                Paragraph guid=new Paragraph("    ");
                guid.MultipliedLeading = 1;
                Anchor aTitle;
                for (int i = 0; i < itemList.Count;i++ )
                {
                    item o = itemList[i];
                    aTitle = new Anchor("第"+(i+1)+"篇： "+o.Title,font);
                    aTitle.Reference = "#link" + o.PubDate.ToString();
                    document.Add(aTitle);
                    document.Add(guid);
                }
                document.Add(guid);
                document.Add(guid);
                document.Add(guid);

                //文章标题
                Paragraph blogTitle;
                //文章内容
                Paragraph blogContent;
                //分割线
                Paragraph hr=new Paragraph("--------------------------------------------------------------------------------------------------------");
                hr.Alignment=1;
                hr.SpacingAfter=20;
                hr.SpacingBefore=20;

                //提取图片
                string Content;
                Regex reg = new Regex(@"(?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+))");
                MatchCollection mc;
                IList<string> picList;

                //内容处理
                string[] ContentArray;
                Anchor lTitle;
                int index = 1;
                foreach (var o in itemList)
                {
                    lTitle = new Anchor("第"+index+"篇：",font);
                    lTitle.Name = "link" + o.PubDate.ToString();
                    document.Add(lTitle);
                    index++;
                    blogTitle=new Paragraph(o.Title,fontBlod);
                    blogTitle.Alignment = 1;
                    blogTitle.MultipliedLeading = 1;
                    document.Add(blogTitle);

                    Content = o.Description;
                    Content = Content.Replace("<p>", "卍");
                    Content = Content.Replace("<br />", "卍");
                    Content = Content.Replace("<br/ />", "卍");

                     mc= reg.Matches(Content);
                     picList = new List<string>();
                    for(int i=0;i<mc.Count;i++)
                    {
                        Match m=mc[i];
                        if (!m.Groups["img"].Value.Contains("OutliningIndicators"))
                        {
                            picList.Add(m.Groups["img"].Value);
                            Content = Content.Replace(m.Groups["img"].Value, "\" />卍Pic" + m.Groups["img"].Value + "ciP卍<img src=\"");
                        }
                    }

                    //去掉Html标签
                    Content = NoHTML(Content);

                    //按文章内容生成段落
                    ContentArray = Content.Split('卍');
                    for (int i = 0; i < ContentArray.Length; i++)
                    {
                        for (int j = 0; j < picList.Count; j++)
                        {
                            if ( ContentArray[i] == "Pic" +picList[j] + "ciP")
                            {
                                Image jpeg = Image.GetInstance(picList[j]);
                                if (jpeg.Width > PageSize.A4.Width)
                                {
                                    jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width * jpeg.Height / PageSize.A4.Width);
                                }
                                jpeg.Alignment = Image.MIDDLE_ALIGN;
                                document.Add(jpeg);
                                ContentArray[i] = "PicDRJciP";
                            }
                        }
                        if (ContentArray[i] != "PicDRJciP")
                        {
                            blogContent = new Paragraph(ContentArray[i], font);
                            blogContent.Alignment = 0;
                            blogContent.MultipliedLeading = 2;
                            blogContent.SpacingAfter = 10;
                            document.Add(blogContent);
                        }
                    }
                    document.Add(hr);
                }

                //提示信息
                Paragraph drj = new Paragraph(new Chunk("本程序由博客园——天行健(http://home.cnblogs.com/u/durongjian/)制作，如有建议请发邮件至drjchina@163.com", font));
                //1为居中，0为居左，2为居右
                drj.Alignment = 1;
                drj.SpacingAfter = 20;
                drj.SpacingBefore = 20;
                document.Add(drj);

                //关闭文档
                document.Close();
            }
        }

        /// <summary>
        /// 去掉HTML标签
        /// </summary>
        /// <param name="Htmlstring">带有HTML标签的字符串</param>
        /// <returns>string</returns>
        public static string NoHTML(string Htmlstring)
        {
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);

            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("\r\n", "");

            return Htmlstring.Trim();
        }

        #endregion
    }
}

      最后就是调用类了，先看一下软件界面吧：

      后台代码：

代码

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using BlogsConvert;
using System.Net;
using System.IO;

namespace CnBlogsHelper
{
    public partial class BlogToPdf : Form
    {
        public channel commonInfo=new channel();
        public IList<item> blogInfos=new List<item>();
        public BlogToPdf()
        {
            InitializeComponent();
        }

        private void BlogToPdf_Load(object sender, EventArgs e)
        {
        }

        /// <summary>
        /// 获取RSS源码，存入XML文件中
        /// </summary>
        /// <param name="PageUrl">XML文件路径</param>
        public void GetXML(string PageUrl)
        {
            //发送GET请求，得到XML格式的数据
            WebRequest request = WebRequest.Create(PageUrl);
            WebResponse response = request.GetResponse();
            Stream resStream = response.GetResponseStream();
            StreamReader sr = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
            string Content = sr.ReadToEnd();
            string xmlPath =Application.StartupPath+ @"\Blogs.xml";

            //如果XML文件不存在就创建
            if (!System.IO.File.Exists(xmlPath))
            {
                System.IO.FileStream f = System.IO.File.Create(xmlPath);
                f.Close();
            }
            //以覆盖的形式把数据写入XML文件
            System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false, System.Text.Encoding.GetEncoding("UTF-8"));
            f2.Write(Content);
            f2.Close();
            f2.Dispose();
            sr.Close();
            resStream.Close();

            if (Content.Trim() == "")
            {
                throw new Exception("用户名有误，请检查后重新输入!");
            }
        }

        /// <summary>
        /// 生成PDF文件
        /// </summary>
        /// <param name="saveName">生成的PDF文件名</param>
        /// <param name="cha">博主信息</param>
        /// <param name="itemList">文章信息</param>
        public void CreatePDF(string saveName,channel cha,IList<item> itemList)
        {
            BlogsInfo blog = new BlogsInfo();
            IConvert con = new ToPdf();
            string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
            con.Convert(cha,itemList,dir+"\\"+saveName+".pdf");
        }

        //生成事件
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (!CheckForm())
                return;
            try
            {
                if (blogInfos.Count > 0)
                {
                    Wait f = new Wait();
                    f.Show();
                    Application.DoEvents();

                    CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);

                    f.Close();
                    MessageBox.Show("PDF文档“" + txtFileName.Text.Trim() + ".pdf”生成成功,文档在桌面!");
                }
                else
                {
                    MessageBox.Show("博客数为0，请先提取博客信息！");
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("异常信息:"+ex.Message);
            }
        }

        //提取博客信息事件
        private void btnFind_Click(object sender, EventArgs e)
        {
            if (!CheckForm())
                return;
            libBlog.Items.Clear();
            string pageUrl = txtBlogUrl.Text.Trim();
            if (pageUrl.Substring(pageUrl.Length - 1, 1) != "/")
            {
                pageUrl = pageUrl + @"/";
            }
            pageUrl = pageUrl + "rss";
            try
            {
                //弹出等待窗体
                Wait f = new Wait();
                f.Show();
                Application.DoEvents();

                GetXML(pageUrl);
                string path = Application.StartupPath + @"\Blogs.xml";
                BlogsInfo blogInfo = new BlogsInfo();
                commonInfo = blogInfo.GetChannel(path);
                blogInfos = blogInfo.GetItems(path, txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text.Trim());

                foreach (item o in blogInfos)
                {
                    libBlog.Items.Add(o.Title);
                }

                f.Close();
            }
            catch (Exception ex)
            {
                MessageBox.Show("异常信息:" + ex.Message);
            }
        }

        //清空事件
        private void btnClearAll_Click(object sender, EventArgs e)
        {
            libBlog.Items.Clear();
            blogInfos.Clear();
        }

        //删除当前选中项事件
        private void btnClearCurrent_Click(object sender, EventArgs e)
        {
            int index=libBlog.SelectedIndex;
            libBlog.Items.Remove(libBlog.Items[index]);
            blogInfos.RemoveAt(index);
        }

        //鼠标进入文本框清空默认文本
        private void txtKeyWord_Click(object sender, EventArgs e)
        {
            txtKeyWord.Text = txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text;
        }

        private bool CheckForm()
        {
            if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "")
            {
                MessageBox.Show("博客地址和保存文件名不能为空!");
                txtBlogUrl.Text = "http://www.cnblogs.com/";
                txtFileName.Text = "我的博客";
                return false;
            }
            return true;
        }
    }
}

      其中调用了一个等待窗体Wait，非常简单，这里就不说了，大家可以看源代码。

      博客园中高手如云，本人只能算个菜，只是把自己写的一点小东西拿出来跟大家分享，希望能帮到大家，欢迎各位朋友批评指正，如果使用过程中有错误请留言哦。

      本软件目地是服务博客园的朋友们，源代码完全开源，但转载或二次开发请注明出处。

作者：Artwl

出处：http://artwl.cnblogs.com

本文首发博客园，版权归作者跟博客园共有。转载必须保留本段声明，并在页面显著位置给出本文链接，否则保留追究法律责任的权利。

查看全文

相关阅读:
python操作mysql封装成类
 es 数据导出到 MySQL
Elasticsearch的数据导出和导入操作（elasticdump工具），以及删除指定type的数据（delete-by-query插件）
解决VM虚拟机中的ubuntu不能全屏的问题
 pandas操作，感觉不错，复制过来的
 BTree和B+Tree详解
 ant安装配置
 jmeter默认生成测试报告
 学习网站
 selenium多窗口切换（windows)

原文地址：https://www.cnblogs.com/artwl/p/1860514.html