zoukankan      html  css  js  c++  java
  • 将EXCEL表格文字图片信息转成RTF格式写入数据库BLOB字段

    最近在项目开发过程中,有一个需求是将Excel的文字图片信息导入数据库。

    数据库中已存在的数据格式是RTF富文本的内容转成byte[]写入Oracle数据库的Blob字段的,

    因此在不影响软件和保证已存在数据格式的前提下,需要将Excel的文字图片信息做转换处理。

    流程如下:

    需要导入的Excel文件内容

    软件界面显示效果

    读取EXCEL文件文字、图片信息,并将EXCEL文字、图片转换成RTF格式

    这里我用的NPOI 2.3.0.0

      1 //RTF格式开始的固定字符串
      2 private const string RtfStartTxt = @"{
    tf1deff0{fonttbl{f0 Calibri;}{f1fcharset134 Tahoma;}}{colortbl
    ed0green0lue0 ;
    ed0green0lue255 ;}{*defchp fs22}{*listoverridetable}{stylesheet {qlfs22 Normal;}{*cs1f1fs18 Default Paragraph Font;}{*cs2sbasedon1f1fs18 Line Number;}{*cs3ulfs22cf1 Hyperlink;}{*	s4	srowdfs22ql	rautofit1	scellpaddfl3	scellpaddl108	scellpaddfr3	scellpaddr108	svertaltcltxlrtb Normal Table;}{*	s5	srowdsbasedon4fs22ql	rbrdrtrdrsrdrw10	rbrdrlrdrsrdrw10	rbrdrbrdrsrdrw10	rbrdrrrdrsrdrw10	rbrdrhrdrsrdrw10	rbrdrvrdrsrdrw10	rautofit1	scellpaddfl3	scellpaddl108	scellpaddfr3	scellpaddr108	svertaltcltxlrtb Table Simple 1;}}
    ouicompatsplytwninehtmautspsectdpardplainql";
      3 //RTF代表文本的首字符串
      4 private const string RtfTxtStartTxt = @"{f1fs18cf0 ";
      5 //RTF格式中代表一个回车
      6 private const string RtfEnter = @"f2fs18cf0parpardplainql";
      7 //RTF格式结束的固定字符串
      8 private const string RtfEndTxt = @"f1fs18cf0par}";
      9 //将字符串转换成RTF格式的内容
     10 private static string GetRichText(string str)
     11 {
     12     return str.Select(b => ((int) b).ToString(CultureInfo.InvariantCulture)).Select(tmp1 => @"u" + tmp1 + "?").Aggregate("", (current, tmp2) => current + tmp2);
     13 }
     14 
     15 //将转化成字符串的图片信息按每200个字符加一个回车形成RTF图片信息
     16 private static string GetSplitString(string str)
     17 {
     18     string temp = string.Empty;
     19     for (int i = 0; i < str.Length; i += 200)
     20         temp += (i + 200 < str.Length) ? (str.Substring(i, 200) + "
    ") : str.Substring(i);
     21     return temp;
     22 }
     23 /// <summary>
     24 /// 根据Excel列类型获取列的值
     25 /// </summary>
     26 /// <param name="cell">Excel列</param>
     27 /// <returns></returns>
     28 private static string GetCellValue(ICell cell)
     29 {
     30     if (cell == null)
     31         return string.Empty;
     32     switch (cell.CellType)
     33     {
     34         case CellType.Blank:
     35             return string.Empty;
     36         case CellType.Boolean:
     37             return cell.BooleanCellValue.ToString();
     38         case CellType.Error:
     39             return cell.ErrorCellValue.ToString(CultureInfo.InvariantCulture);
     40         case CellType.Numeric:
     41             short format = cell.CellStyle.DataFormat;
     42             //对时间格式(2015.12.5、2015/12/5、2015-12-5等)的处理
     43             if (format == 14 || format == 31 || format == 57 || format == 58)
     44                 return cell.DateCellValue.ToString(CultureInfo.InvariantCulture);
     45             return cell.NumericCellValue.ToString(CultureInfo.InvariantCulture);
     46         default:
     47             return cell.ToString();
     48         case CellType.String:
     49             return cell.StringCellValue;
     50         case CellType.Formula:
     51             try
     52             {
     53                 var e = new HSSFFormulaEvaluator(cell.Sheet.Workbook);
     54                 e.EvaluateInCell(cell);
     55                 return cell.ToString();
     56             }
     57             catch
     58             {
     59                 return cell.NumericCellValue.ToString(CultureInfo.InvariantCulture);
     60             }
     61     }
     62 }
     63 //替换字符串前面内容
     64 public static string TrimStarString(string s, string searchStr, string replaceStr)
     65 {
     66     var result = s;
     67     try
     68     {
     69         if (string.IsNullOrEmpty(result))
     70             return result;
     71         if (s.Length < searchStr.Length)
     72             return result;
     73         if (s.IndexOf(searchStr, 0, searchStr.Length, StringComparison.Ordinal) > -1)
     74             result = s.Substring(searchStr.Length);
     75         return result;
     76     }
     77     catch (Exception)
     78     {
     79         return result;
     80     }
     81 }
     82 //替换字符串后面内容
     83 public static string TrimEndString(string s, string searchStr, string replaceStr)
     84 {
     85     var result = s;
     86     try
     87     {
     88         if (string.IsNullOrEmpty(result))
     89             return result;
     90         if (s.Length < searchStr.Length)
     91             return result;
     92         if (s.IndexOf(searchStr, s.Length - searchStr.Length, searchStr.Length, StringComparison.Ordinal) > -1)
     93             result = s.Substring(0, s.Length - searchStr.Length);
     94         return result;
     95     }
     96     catch (Exception)
     97     {
     98         return result;
     99     }
    100 }
    101 /// <summary>
    102 /// 将excel导入到datatable
    103 /// </summary>
    104 /// <param name="filePath">excel路径</param>
    105 /// <param name="sheetName"></param>
    106 /// <param name="isColumnName">第一行是否是列名</param>
    107 /// <param name="dataTable"></param>
    108 /// <returns>返回datatable</returns>
    109 public static bool ExcelToDataTable(string filePath, string sheetName, bool isColumnName, out DataTable dataTable)
    110 {
    111     dataTable = null;
    112     FileStream fs = null;
    113     IWorkbook workbook = null;
    114     int startRow = 0;
    115     var pictures = new List<PicturesInfo>();
    116     try
    117     {
    118         using (fs = File.OpenRead(filePath))
    119         {
    120             // 2007版本
    121             if (filePath.ToUpper().EndsWith(".XLSX"))
    122                 workbook = new XSSFWorkbook(fs);
    123             // 2003版本
    124             else if (filePath.ToUpper().EndsWith(".XLS"))
    125                 workbook = new HSSFWorkbook(fs);
    126             if (workbook != null)
    127             {
    128                 ISheet sheet = workbook.GetSheet(sheetName);
    129                 dataTable = new DataTable();
    130                 if (sheet != null)
    131                 {
    132                     pictures = NpoiExtend.GetAllPictureInfos(sheet);//获取图片信息
    133                     int rowCount = sheet.LastRowNum; //总行数
    134                     if (rowCount > 0)
    135                     {
    136                         IRow firstRow = sheet.GetRow(0); //第一行
    137                         int cellCount = firstRow.LastCellNum; //列数
    138                         //构建datatable的列
    139                         ICell cell;
    140                         DataColumn column;
    141                         if (isColumnName)
    142                         {
    143                             startRow = 1; //如果第一行是列名,则从第二行开始读取
    144                             for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
    145                             {
    146                                 cell = firstRow.GetCell(i);
    147                                 if (cell != null)
    148                                 {
    149                                     if (cell.StringCellValue != null)
    150                                     {
    151                                         column = new DataColumn(cell.StringCellValue);
    152                                         dataTable.Columns.Add(column);
    153                                     }
    154                                 }
    155                             }
    156                         }
    157                         else
    158                         {
    159                             for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
    160                             {
    161                                 column = new DataColumn("column" + (i + 1));
    162                                 dataTable.Columns.Add(column);
    163                             }
    164                         }
    165                         //填充行
    166                         for (int i = startRow; i <= rowCount; ++i)
    167                         {
    168                             IRow row = sheet.GetRow(i);
    169                             if (row == null) continue;
    170                             DataRow dataRow = dataTable.NewRow();
    171                             for (int j = row.FirstCellNum; j < cellCount; ++j)
    172                             {
    173                                 cell = row.GetCell(j);
    174                                 if (cell == null)
    175                                     dataRow[j] = "";
    176                                 else
    177                                     dataRow[j] = GetCellValue(cell);
    178                             }
    179                             dataTable.Rows.Add(dataRow);
    180                         }
    181                     }
    182                 }
    183             }
    184         }
    185         if (dataTable != null && dataTable.Columns.Count >= 0 && pictures.Count > 0)
    186         {
    187             var columnIndex = 0;
    188             for (int i = 0; i < dataTable.Columns.Count; i++)
    189             {
    190                 if (dataTable.Columns[i].ColumnName == "说明")//找到说明列
    191                 {
    192                     columnIndex = i;
    193                     break;
    194                 }
    195             }
    196             for (int i = 0; i < dataTable.Rows.Count; i++)
    197             {
    198                 DataRow row = dataTable.Rows[i];
    199                 var str = row[columnIndex].ToString();
    200                 var pics = pictures.Where(x => x.MinCol == columnIndex && x.MinRow == i + 1).ToList();
    201                 if (pics.Count > 0)
    202                 {
    203                     var allPicStr = "";
    204                     foreach (PicturesInfo pic in pics)
    205                     {
    206                         byte[] picdata = pic.PictureData;//Excel 中的图片信息,byte[]格式
    207                         var picstr = "";
    208                         foreach (byte b in picdata)
    209                         {
    210                             string tempStr = Convert.ToString(b, 16);
    211                             if (tempStr.Length == 1)
    212                                 tempStr = "0" + tempStr;//不足一位需要前面补0
    213                             picstr += tempStr;
    214                         }//将byte[]格式图片=>转换成16进制
    215                         picstr = GetSplitString(picstr);//转成RTF格式需要的内容
    216                         //RTF图片格式{*shppict{pictpngblippicw8494pich3175picwgoal4815pichgoal1800picscalex100picscaley100 [图片16进制信息]}}
    217                         picstr = @"{*shppict{pictpngblip" +
    218                                     @"picw" + pic.SizeWidth * 26 +
    219                                     @"pich" + pic.SizeHeight * 26 +
    220                                     @"picwgoal" + pic.SizeWidth * 26 * 72 / 127 +
    221                                     @"pichgoal" + pic.SizeHeight * 26 * 72 / 127 +
    222                                     @"picscalex100picscaley100 " +
    223                                     picstr + @"}}";
    224                         allPicStr += picstr + RtfEnter;
    225                     }
    226                     var allTextStr = RtfTxtStartTxt + GetRichText(str) + "}" + RtfEnter;//Excel单元格文字内容转RTF格式
    227                     var textPic = RtfStartTxt + allTextStr + allPicStr + RtfEndTxt;//将Excel文字和图片拼接到一起
    228                     row[columnIndex] = textPic;
    229                 }
    230             }
    231             var k = dataTable.Rows.Count - 1;
    232             for (int i = k; i > 0; i--)//将多个单元格合并到一起,处理如图Excel格式内容
    233             {
    234                 DataRow row = dataTable.Rows[i];
    235                 if (string.IsNullOrEmpty(row[0].ToString()) && !string.IsNullOrEmpty(row[columnIndex].ToString()))
    236                 {
    237                     var str1 = dataTable.Rows[i - 1][columnIndex].ToString();
    238                     str1 = TrimEndString(str1, RtfEndTxt, "");
    239                     var str2 = dataTable.Rows[i][columnIndex].ToString();
    240                     str2 = TrimEndString(str2, RtfEndTxt, "");
    241                     str2 = TrimStarString(str2, RtfStartTxt, "");
    242                     dataTable.Rows[i - 1][columnIndex] = str1 + RtfEnter + str2 + RtfEndTxt;
    243                     dataTable.Rows.RemoveAt(i);
    244                 }
    245             }
    246         }
    247         return true;
    248     }
    249     catch (Exception ex)
    250     {
    251         if (fs != null)
    252             fs.Close();
    253         return false;
    254     }
    255 }
    View Code

    读取Excel图片我参考的是这篇博文:https://www.cnblogs.com/hanzhaoxin/p/4442369.html

    PicturesInfo.cs

    在原博文的基础上,我加了SizeWidth 和 SizeHeight两个属性,用于后续生成RTF格式图片Size内容。

        public class PicturesInfo
        {
            public int MinRow { get; set; }
            public int MaxRow { get; set; }
            public int MinCol { get; set; }
            public int MaxCol { get; set; }
            public Byte[] PictureData { get; private set; }
            public int SizeWidth { get; set; }
            public int SizeHeight { get; set; }
    
            public PicturesInfo(int minRow, int maxRow, int minCol, int maxCol, Byte[] pictureData)
            {
                this.MinRow = minRow;
                this.MaxRow = maxRow;
                this.MinCol = minCol;
                this.MaxCol = maxCol;
                this.PictureData = pictureData;
                using (var ms = new MemoryStream(pictureData))
                {
                    var image = Image.FromStream(ms);
                    this.SizeWidth = image.Size.Width;
                    this.SizeHeight = image.Size.Height;
                }
            }
        }
    View Code

    NpoiExtend.cs

    public static class NpoiExtend
    {
        public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet)
        {
            return sheet.GetAllPictureInfos(null,null,null,null);
        }
     
        public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet,int? minRow,int? maxRow,int? minCol,int? maxCol,bool onlyInternal = true)
        {
            if (sheet is HSSFSheet)
            {
                return GetAllPictureInfos((HSSFSheet)sheet,minRow,maxRow,minCol,maxCol,onlyInternal);
            }
            else if (sheet is XSSFSheet)
            {
                return GetAllPictureInfos((XSSFSheet)sheet, minRow, maxRow, minCol, maxCol, onlyInternal);
            }
            else
            {
                throw new Exception("未处理类型,没有为该类型添加:GetAllPicturesInfos()扩展方法!");
            }
        }
     
        private static List<PicturesInfo> GetAllPictureInfos(HSSFSheet sheet,int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal)
        {
            List<PicturesInfo> picturesInfoList = new List<PicturesInfo>();
     
            var shapeContainer = sheet.DrawingPatriarch as HSSFShapeContainer;
            if (null != shapeContainer)
            {
                var shapeList = shapeContainer.Children;
                foreach (var shape in shapeList)
                {
                    if (shape is HSSFPicture && shape.Anchor is HSSFClientAnchor)
                    {
                        var picture = (HSSFPicture)shape;
                        var anchor = (HSSFClientAnchor)shape.Anchor;
                        
                        if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal))
                        {
                            picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data));
                        }
                    }
                }
            }
     
            return picturesInfoList;
        }
     
        private static List<PicturesInfo> GetAllPictureInfos(XSSFSheet sheet, int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal)
        {
            List<PicturesInfo> picturesInfoList = new List<PicturesInfo>();
     
            var documentPartList = sheet.GetRelations();
            foreach (var documentPart in documentPartList)
            {
                if (documentPart is XSSFDrawing)
                {
                    var drawing = (XSSFDrawing)documentPart;
                    var shapeList = drawing.GetShapes();
                    foreach (var shape in shapeList)
                    {
                        if (shape is XSSFPicture)
                        {
                            var picture = (XSSFPicture)shape;
                            var anchor = picture.GetPreferredSize();
                            
                            if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal))
                            {
                                picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data));
                            }
                        }
                    }
                }
            }
     
            return picturesInfoList;
        }
     
        private static bool IsInternalOrIntersect(int? rangeMinRow, int? rangeMaxRow, int? rangeMinCol, int? rangeMaxCol,
            int pictureMinRow, int pictureMaxRow, int pictureMinCol, int pictureMaxCol, bool onlyInternal)
        {
            int _rangeMinRow = rangeMinRow ?? pictureMinRow;
            int _rangeMaxRow = rangeMaxRow ?? pictureMaxRow;
            int _rangeMinCol = rangeMinCol ?? pictureMinCol;
            int _rangeMaxCol = rangeMaxCol ?? pictureMaxCol;
     
            if (onlyInternal)
            {
                return (_rangeMinRow <= pictureMinRow && _rangeMaxRow >= pictureMaxRow &&
                        _rangeMinCol <= pictureMinCol && _rangeMaxCol >= pictureMaxCol);
            }
            else
            {
                return ((Math.Abs(_rangeMaxRow - _rangeMinRow) + Math.Abs(pictureMaxRow - pictureMinRow) >= Math.Abs(_rangeMaxRow + _rangeMinRow - pictureMaxRow - pictureMinRow)) &&
                (Math.Abs(_rangeMaxCol - _rangeMinCol) + Math.Abs(pictureMaxCol - pictureMinCol) >= Math.Abs(_rangeMaxCol + _rangeMinCol - pictureMaxCol - pictureMinCol)));
            }
        }
    }
    View Code

    将RTF格式转成byte[],转换完成后就可以直接写入数据库了。

     foreach (DataRow row in dataTable.Rows)
     {
         byte[] filedBlob = Encoding.Default.GetBytes(row["说明"].ToString());//转成byte[]
     }
    作者:九年新
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文链接,否则保留追究法律责任的权利。
  • 相关阅读:
    微服务架构总结
    微服务-网关服务
    HttpClient-RestTemplate-Feign
    RPC和REST
    Springmvc的拦截器执行顺序及各方法作用
    秒杀系统优化方案(下)吐血整理
    秒杀系统优化方案(上)吐血整理
    分布式session的管理
    缓存设计——缓存和数据库的数据一致性
    个人理解的javascript作用域链与闭包
  • 原文地址:https://www.cnblogs.com/yellow3gold/p/14153890.html
Copyright © 2011-2022 走看看