zoukankan      html  css  js  c++  java
  • C#Word文件转换为html

      1 using Microsoft.Office.Interop.Word;
      2 using System;
      3 using System.Collections.Generic;
      4 using System.IO;
      5 using System.Linq;
      6 using System.Text;
      7 using System.Web;
      8 using System.Web.UI;
      9 using System.Web.UI.WebControls;
     10 
     11 namespace Admin
     12 {
     13     public partial class TestDemo : System.Web.UI.Page
     14     {
     15         protected void Page_Load(object sender, EventArgs e)
     16         {
     17             string wordFileName = "~/uploads/TutorCV/111.docx";
     18             string htmlFileName = "~/uploads/TutorCV/";
     19             //GetPathByDocToHTML(Server.MapPath(wordFileName));
     20         }
     21         private string GetPathByDocToHTML(string strFile)
     22         {
     23             if (string.IsNullOrEmpty(strFile))
     24             {
     25                 return "0";//没有文件
     26             }
     27 
     28             //Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
     29             Microsoft.Office.Interop.Word._Application word = new Application();
     30             Type wordType = word.GetType();
     31             Microsoft.Office.Interop.Word.Documents docs = word.Documents;
     32 
     33             // 打开文件  
     34             Type docsType = docs.GetType();
     35 
     36             object fileName = strFile;
     37 
     38             Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
     39             System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
     40 
     41             // 转换格式,另存为html  
     42             Type docType = doc.GetType();
     43             //给文件重新起名
     44             string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
     45             System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
     46 
     47             string strFileFolder = "~/uploads/TutorCV/";
     48             DateTime dt = DateTime.Now;
     49             //以yyyymmdd形式生成子文件夹名
     50             string strFileSubFolder = dt.Year.ToString();
     51             strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
     52             strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
     53             string strFilePath = strFileFolder + strFileSubFolder + "/";
     54             // 判断指定目录下是否存在文件夹,如果不存在,则创建 
     55             if (!Directory.Exists(Server.MapPath(strFilePath)))
     56             {
     57                 // 创建up文件夹 
     58                 Directory.CreateDirectory(Server.MapPath(strFilePath));
     59             }
     60 
     61             //被转换的html文档保存的位置 
     62             // HttpContext.Current.Server.MapPath("html" + strFileSubFolder + filename + ".html")
     63             string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
     64             object saveFileName = ConfigPath;
     65 
     66             /*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成: 
     67               * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
     68               * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
     69               * 其它格式: 
     70               * wdFormatHTML 
     71               * wdFormatDocument 
     72               * wdFormatDOSText 
     73               * wdFormatDOSTextLineBreaks 
     74               * wdFormatEncodedText 
     75               * wdFormatRTF 
     76               * wdFormatTemplate 
     77               * wdFormatText 
     78               * wdFormatTextLineBreaks 
     79               * wdFormatUnicodeText 
     80             */
     81             docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
     82             null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
     83 
     84             //docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
     85             //  null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML }); 
     86 
     87             //关闭文档  
     88             docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
     89             null, doc, new object[] { null, null, null });
     90 
     91             // 退出 Word  
     92             wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
     93             //转到新生成的页面  
     94             //return ("/" + filename + ".html");
     95 
     96             //转化HTML页面统一编码格式
     97             TransHTMLEncoding(ConfigPath);
     98 
     99             return (strFilePath + filename + ".html");
    100         }
    101         private void TransHTMLEncoding(string strFilePath)
    102         {
    103             try
    104             {
    105                 System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0));
    106                 string html = sr.ReadToEnd();
    107                 sr.Close();
    108                 html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    109                 System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default);
    110 
    111                 sw.Write(html);
    112                 sw.Close();
    113             }
    114             catch (Exception ex)
    115             {
    116                 Page.RegisterStartupScript("alt", "<script>alert('" + ex.Message + "')</script>");
    117             }
    118         }
    119 
    120     }
    121 }
  • 相关阅读:
    oracle客户端服务端字符集-解决乱码
    ORA-04089: 无法对 SYS 拥有的对象创建触发器
    ORA-01109:数据库未打开(解决)
    系统重装
    mybatis实现多表联合查询
    hibernate实现多表联合查询
    GitHub搭建博客过程
    mybatis&Hibernate区别
    IDEA搭建ssm框架测试衍生出的问题The APR based Apache Tomcat Native library which allows optimal performance in production environments was not found on the java.library.path: D:Developjdk7jdk1.7.0_79in;
    IDEA创建maven项目jar更新缓慢问题
  • 原文地址:https://www.cnblogs.com/Cein/p/7281360.html
Copyright © 2011-2022 走看看