.NET实现Word或Excel文件转为HTML文件
Word文件转html,返回相对路径
private string GetPathByDocToHTML(string strFile) { if (string.IsNullOrEmpty(strFile)) { return "0";//没有文件 } Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass(); Type wordType = word.GetType(); Microsoft.Office.Interop.Word.Documents docs = word.Documents; // 打开文件 Type docsType = docs.GetType(); object fileName = strFile; Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true }); // 转换格式,另存为html Type docType = doc.GetType(); //给文件重新起名 string filename = System.DateTime.Now.Year.ToString() System.DateTime.Now.Month.ToString() System.DateTime.Now.Day.ToString() System.DateTime.Now.Hour.ToString() System.DateTime.Now.Minute.ToString() System.DateTime.Now.Second.ToString(); string strFileFolder = "../html/"; DateTime dt = DateTime.Now; //以yyyymmdd形式生成子文件夹名 string strFileSubFolder = dt.Year.ToString(); strFileSubFolder = (dt.Month < 10) ? ("0" dt.Month.ToString()) : dt.Month.ToString(); strFileSubFolder = (dt.Day < 10) ? ("0" dt.Day.ToString()) : dt.Day.ToString(); string strFilePath = strFileFolder strFileSubFolder "/"; // 判断指定目录下是否存在文件夹,如果不存在,则创建 if (!Directory.Exists(Server.MapPath(strFilePath))) { // 创建up文件夹 Directory.CreateDirectory(Server.MapPath(strFilePath)); } //被转换的html文档保存的位置 // HttpContext.Current.Server.MapPath("html" strFileSubFolder filename ".html") string ConfigPath = Server.MapPath(strFilePath filename ".html"); object saveFileName = ConfigPath; /*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成: * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); * 其它格式: * wdFormatHTML * wdFormatDocument * wdFormatDOSText * wdFormatDOSTextLineBreaks * wdFormatEncodedText * wdFormatRTF * wdFormatTemplate * wdFormatText * wdFormatTextLineBreaks * wdFormatUnicodeText */ docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML }); //docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, // null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML }); //关闭文档 docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[] { null, null, null }); // 退出 Word wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null); //转到新生成的页面 //return ("/" filename ".html"); //转化HTML页面统一编码格式 TransHTMLEncoding(ConfigPath); return (strFilePath filename ".html"); }
Excel文件转HTML,返回相对路径
private string GetPathByXlsToHTML(string strFile) { if (string.IsNullOrEmpty(strFile)) { return "0";//没有文件 } //实例化Excel Microsoft.Office.Interop.Excel.Application repExcel = new Microsoft.Office.Interop.Excel.Application(); Microsoft.Office.Interop.Excel.Workbook workbook = null; Microsoft.Office.Interop.Excel.Worksheet worksheet = null; //打开文件,n.FullPath是文件路径 workbook = repExcel.Application.Workbooks.Open(strFile, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); worksheet = (Microsoft.Office.Interop.Excel.Worksheet)workbook.Worksheets[1]; //给文件重新起名 string filename = System.DateTime.Now.Year.ToString() System.DateTime.Now.Month.ToString() System.DateTime.Now.Day.ToString() System.DateTime.Now.Hour.ToString() System.DateTime.Now.Minute.ToString() System.DateTime.Now.Second.ToString(); string strFileFolder = "../html/"; DateTime dt = DateTime.Now; //以yyyymmdd形式生成子文件夹名 string strFileSubFolder = dt.Year.ToString(); strFileSubFolder = (dt.Month < 10) ? ("0" dt.Month.ToString()) : dt.Month.ToString(); strFileSubFolder = (dt.Day < 10) ? ("0" dt.Day.ToString()) : dt.Day.ToString(); string strFilePath = strFileFolder strFileSubFolder "/"; // 判断指定目录下是否存在文件夹,如果不存在,则创建 if (!Directory.Exists(Server.MapPath(strFilePath))) { // 创建up文件夹 Directory.CreateDirectory(Server.MapPath(strFilePath)); } string ConfigPath = Server.MapPath(strFilePath filename ".html"); object savefilename = (object)ConfigPath; object ofmt = Microsoft.Office.Interop.Excel.XlFileFormat.xlHtml; //进行另存为操作 workbook.SaveAs(savefilename, ofmt, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Microsoft.Office.Interop.Excel.XlSaveAsAccessMode.xlNoChange, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); object osave = false; //逐步关闭所有使用的对象 workbook.Close(osave, Type.Missing, Type.Missing); repExcel.Quit(); System.Runtime.InteropServices.Marshal.ReleaseComObject(worksheet); worksheet = null; //垃圾回收 GC.Collect(); System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook); workbook = null; GC.Collect(); System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel.Application.Workbooks); GC.Collect(); System.Runtime.InteropServices.Marshal.ReleaseComObject(repExcel); repExcel = null; GC.Collect(); //依据时间杀灭进程 System.Diagnostics.Process[] process = System.Diagnostics.Process.GetProcessesByName("EXCEL"); foreach (System.Diagnostics.Process p in process) { if (DateTime.Now.Second - p.StartTime.Second > 0 && DateTime.Now.Second - p.StartTime.Second < 5) { p.Kill(); } } return (strFilePath filename ".html"); }
这里可能会遇到一个问题,由于转化为HTML文件的页面编码可能使得浏览器无法正确解读,
所以需要转码,转换代码如下:
private void TransHTMLEncoding(string strFilePath) { try { System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0)); string html = sr.ReadToEnd(); sr.Close(); html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default); sw.Write(html); sw.Close(); } catch (Exception ex) { Page.RegisterStartupScript("alt", "<script>alert('" ex.Message "')</script>"); } }
原文链接:.NET实现Word或Excel文件转为HTML文件