zoukankan      html  css  js  c++  java
  • 转:C#读取PDF、TXT内容

    //读取PDF内容
    private void button2_Click(object sender, EventArgs e)
            {
                label3.Text = OnCreated("D:\aa.pdf");
            }
     
            private string OnCreated(string filepath)
            {
                try
                {
                    string pdffilename = filepath;
                    PdfReader pdfReader = new PdfReader(pdffilename);
                    int numberOfPages = pdfReader.NumberOfPages;
                    string text = string.Empty;
     
                    for (int i = 1; i <= numberOfPages; ++i)
                    {
                        iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
                        text += iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy);
                    }
                    pdfReader.Close();
     
                    return text;
                }
                catch (Exception ex)
                {
                    StreamWriter wlog = File.AppendText(System.AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "\mylog.log");
                    wlog.WriteLine("出错文件:"  + "原因:" + ex.ToString());
                    wlog.Flush();
                    wlog.Close(); return null;
                }
     
     
    //读取TXT
    string text = System.IO.File.ReadAllText(path);//读取内容 path为文件路径
    text = text.Replace("
    ", string.Empty).Replace("
    ", string.Empty);//去掉字符串里的
     
    符号
     
    实例:
    
    //1. 生成一个PDF,将文本和图片添加到PDF里面。
            //2. 从PDF文档中提取所有图片。
            //3. 从PDF文档中提取所有文本。
     
           //生成一个PDF文件 里面包含文本和图片
            private void button2_Click(object sender, EventArgs e)
            {
                Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
                PdfPageBase page = doc.Pages.Add();
     
                //添加文本  
                page.Canvas.DrawString("Hello!Welcome to my house!",
                new Spire.Pdf.Graphics.PdfFont(PdfFontFamily.Helvetica, 20f),
                new PdfSolidBrush(Color.Black), 10, 10);//中文汉字字符均不能正确生成 英文字母可以
     
                //添加图片
                Spire.Pdf.Graphics.PdfImage image = Spire.Pdf.Graphics.PdfImage.FromFile("ff.jpg");
                float width = image.Width * 0.75f;
                float height = image.Height * 0.75f;
                float x = (page.Canvas.ClientSize.Width - width) / 2;
                page.Canvas.DrawImage(image, x, 60, width, height);
     
                //Spire.Pdf.Graphics.PdfImage image2 = Spire.Pdf.Graphics.PdfImage.FromFile("image.jpg");
                //width = image2.Width * 0.75f;
                //height = image2.Height * 0.75f;
                //page.Canvas.DrawImage(image2, x - 100, 220, width, height);
                doc.SaveToFile("sample.pdf");
            }
     
            //读取图片 获取图片个数 并把图片保存到本地
            private void button1_Click(object sender, EventArgs e)
            {
                Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
                doc.LoadFromFile("sample.pdf");
                IList<Image> images = new List<Image>();
                foreach (PdfPageBase page in doc.Pages)
                {
                    if (page.ExtractImages() != null)
                    {
                        foreach (Image image in page.ExtractImages())
                        {
                            images.Add(image);
                        }
                    }
                }
                doc.Close();
                int index = 0;
                int aa = images.Count;
                label3.Text = aa.ToString();
                foreach (Image image in images)
                {
                    String imageFileName = String.Format("Image-{0}.png", index++);
                    image.Save(imageFileName, ImageFormat.Png);
                }
            }
     
            //读取文本
            private void button3_Click(object sender, EventArgs e)
            {
                Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument();
                doc.LoadFromFile("sample.pdf");
     
                StringBuilder buffer = new StringBuilder();
                foreach (PdfPageBase page in doc.Pages)
                {
                    buffer.Append(page.ExtractText());
                }
                doc.Close();
                label1.Text = buffer.ToString();//在界面显示读取到的文本
                //把读取到的文本写入TXT文件
                //String fileName = "TextInPdf.txt";
                //File.WriteAllText(fileName, buffer.ToString());
                buffer = null;
            }

    原文:https://blog.csdn.net/wk125570/article/details/73794257?utm_source=copy 

    参考:http://www.cnblogs.com/Yesi/p/4203686.html

  • 相关阅读:
    mysql的复制
    web页面请求历程
    django工作原理简介
    http协议
    路由器和交换机的区别
    OSI七层模型
    TCP/IP协议总结
    IO复用
    僵尸进程和孤儿进程
    java源代码如何打成jar包
  • 原文地址:https://www.cnblogs.com/chuhj/p/9776278.html
Copyright © 2011-2022 走看看