zoukankan      html  css  js  c++  java
  • 获取EMF文件内全部文字, 并按照左上到右下的顺序排序

    因为工作要求, 需要对EMF文件文字内容做分析.....SO, 如下代码出现了

    懒得加注释了, 反正对外接口属性就那么几个, 根据英文猜吧, 很容易的

    说明一下:

      这个东西结果会对所有文字内容按照左上到右下的顺序排序(EMF内数据顺序是根据画图顺序来的, 所以不一定是什么顺序, 但是数据分析就要得到行列关系)

    但是图片没有行列概念, 所以我简单借鉴了一下纯粹横排版模式, 认为2个文字元素, 只要显示范围的中线在对方范围内, 就会被认为是同一行

    2015-10-19:

      1.修改了几个排序时的BUG, 增加了一个对显示区域的处理, 最大方式减少对排版的影响

      2.修改了获取SmallTextOut的处理方式

     

    {
    EMF文件分析单元
    读取EMF内文字元素并排版
    
    最后修改时间 2015-10-19
    
    by: 刘志林
    E-Mail: lzl_17948876@hotmail.com
    }
    
    unit Comm.EMFInfo;
    
    interface
    
    uses
      System.Types, System.Generics.Collections,
      Vcl.Graphics;
    
    type
      TEMFStrInfo = record
        DisplayRect: TRect; {显示区域}
        Text: string; {显示内容}
        LineKey: string; {行标记}
      end;
      PEMFStrInfo = ^TEMFStrInfo;
    
      TEMFStrInfoList = Class
      private
        FList: TList<PEMFStrInfo>;
        FDic: TDictionary<string, UInt32>;
        FMaxHeight: Integer;
        FJSONStrs: string; {定位查找失败时,使用文本进行泛查找}
    
        function GetItem(Index: UInt32): TEMFStrInfo;
        function GetCount: UInt32;
        function GetJSONStrs: string;
      public
        constructor Create;
        destructor Destroy; override;
    
        procedure Append(AEMF: TMetafile; var AHeight: Integer);
        procedure Clear;
        property Count: UInt32 read GetCount;
        property Items[Index: UInt32]: TEMFStrInfo read GetItem;
        function TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
        function StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean;
        property JSONStr: string read GetJSONStrs;
        property MaxHeight: Integer read FMaxHeight;
      end;
    
    implementation
    
    uses
      System.SysUtils, System.Classes, System.Generics.Defaults,
      System.RegularExpressions,
      Winapi.Windows,
      Vcl.Printers,
      QJSON;
    
    const
      // if set use ANSI version else UNICODE
      SMALLTEXT_TYPE_ANSI = $200;
      // if set use EMR_SMALLTEXTOUT else use EMR_SMALLTEXTOUTCLIP
      SMALLTEXT_TYPE_WITHOUT_CLIP = $100;
    
    // Structures
    type
      EMR_SMALLTEXTOUT_HEAD = RECORD
        emr: emr;
        ptlReference: TPoint;
        nChars: DWORD;
        fuOptions: DWORD; // this record type
        // == SMALLTEXT_TYPE_WITHOUT_CLIP
        // == SMALLTEXT_TYPE_ANSI
        // also holds fuOptions like in the ExtTextOut function
        iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
        exScale: Single; { X and Y scales from Page units to .01mm units }
        eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
      END;
    
      PEMRSmallTextOutHead = ^EMR_SMALLTEXTOUT_HEAD;
    
      EMR_SMALLTEXTOUTCLIPA = RECORD
        emr: emr;
        ptlReference: TPoint; // might be in negative numbers, so take abs
        nChars: DWORD;
        fuOptions: DWORD; // this record type
        // != SMALLTEXT_TYPE_WITHOUT_CLIP
        // == SMALLTEXT_TYPE_ANSI
        // also holds fuOptions like in the ExtTextOut function
        iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
        exScale: Single; { X and Y scales from Page units to .01mm units }
        eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
        rclClip: TRect;
        cString: Array [0 .. 0] of AnsiChar;
        { This is followed by the string array }
      END;
    
      PEMRSmallTextOutClipA = ^EMR_SMALLTEXTOUTCLIPA;
    
      EMR_SMALLTEXTOUTCLIPW = RECORD
        emr: emr;
        ptlReference: TPoint;
        nChars: DWORD;
        fuOptions: DWORD; // this record type
        // != SMALLTEXT_TYPE_WITHOUT_CLIP
        // != SMALLTEXT_TYPE_ANSI
        // also holds fuOptions like in the ExtTextOut function
        iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
        exScale: Single; { X and Y scales from Page units to .01mm units }
        eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
        rclClip: TRect;
        cString: Array [0 .. 0] of WideChar;
        { This is followed by the string array }
      END;
    
      PEMRSmallTextOutClipW = ^EMR_SMALLTEXTOUTCLIPW;
    
      EMR_SMALLTEXTOUTA = RECORD
        emr: emr;
        ptlReference: TPoint;
        nChars: DWORD;
        fuOptions: DWORD; // this record type
        // == SMALLTEXT_TYPE_WITHOUT_CLIP
        // == SMALLTEXT_TYPE_ANSI
        // also holds fuOptions like in the ExtTextOut function
        iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
        exScale: Single; { X and Y scales from Page units to .01mm units }
        eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
        cString: Array [0 .. 0] of AnsiChar;
        { This is followed by the string array }
      END;
    
      PEMRSmallTextOutA = ^EMR_SMALLTEXTOUTA;
    
      EMR_SMALLTEXTOUTW = RECORD
        emr: emr;
        ptlReference: TPoint;
        nChars: DWORD;
        fuOptions: DWORD; // this record type
        // == SMALLTEXT_TYPE_WITHOUT_CLIP
        // != SMALLTEXT_TYPE_ANSI
        // also holds fuOptions like in the ExtTextOut function
        iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
        exScale: Single; { X and Y scales from Page units to .01mm units }
        eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
        cString: Array [0 .. 0] of WideChar;
        { This is followed by the string array }
      END;
    
      PEMRSmallTextOutW = ^EMR_SMALLTEXTOUTW;
    
    var
      FReferenceDC: VCL.Graphics.TBitmap;
    
    function EnumTextProc(DC: HDC; lpHTable: PHANDLETABLE; EMFR: PENHMETARECORD;
      nObj, lpData: Integer): Integer; stdcall;
    
      function _IsEffeetiveRect(const ARect: TRect): Boolean;
      begin
        Result := (not ARect.IsEmpty) and (ARect.Right > 0) and (ARect.Left > 0)
          and (ARect.Bottom - ARect.Top > 4) and (ARect.Right - ARect.Left > 4);
      end;
    
      procedure _ShrinkRect(var ARect: TRect; ASize: TSize);
      var
        v: Integer;
      begin
        v := ARect.Left + ASize.cx;
        if ARect.Right > v then
          ARect.Right := v;
        v := ARect.Top + ASize.cy;
        if ARect.Bottom > v then
          ARect.Bottom := v;
      end;
    
    var
      nSize: TSize;
      nStrA: PAnsiChar;
      nStrW: PWideChar;
      nEMRTO: PEMRExtTextOut;
    
      nEMRSTOHead: PEMRSmallTextOutHead;
      nEMRSTO_A: PEMRSmallTextOutA;
      nEMRSTO_AC: PEMRSmallTextOutClipA;
      nEMRSTO_W: PEMRSmallTextOutW;
      nEMRSTO_WC: PEMRSmallTextOutClipW;
    
      nOTR: PEMFStrInfo;
      nEMFElementList: TList<PEMFStrInfo>;
    begin
      nEMFElementList := Pointer(lpData);
      nSize.cX := 0;
      nSize.cY := 0;
    
      if (EMFR.iType = EMR_EXTTEXTOUTA) then
      begin
        nEMRTO := PEMRExtTextOut(EMFR);
        nStrA := AnsiStrAlloc(nEMRTO.EMRText.nChars + 1);
        try
          FillChar(nStrA^, nEMRTO.EMRText.nChars + 1, 0);
          Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString)^,
            nStrA^, nEMRTO.EMRText.nChars);
    
          New(nOTR);
          with nOTR^ do
          begin
            Text := Trim(nStrA);
            DisplayRect := nEMRTO.rclBounds;
            LineKey := '';
          end;
    
        finally
          StrDispose(nStrA);
        end;
    
        Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
          nOTR^.Text, Length(nOTR^.Text), nSize);
        nOTR^.DisplayRect.NormalizeRect;
        _ShrinkRect(nOTR^.DisplayRect, nSize);
    
        if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
          nEMFElementList.Add(nOTR)
        else
          Dispose(nOTR);
      end
      else if (EMFR.iType = EMR_EXTTEXTOUTW) then
      begin
        nEMRTO := PEMRExtTextOut(EMFR);
        nStrW := WideStrAlloc(nEMRTO.EMRText.nChars + 1);
        try
          FillChar(nStrW^, (nEMRTO.EMRText.nChars + 1) * 2, 0);
          Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString div 2)^,
            nStrW^, nEMRTO.EMRText.nChars * 2);
    
          New(nOTR);
          with nOTR^ do
          begin
            Text := Trim(nStrW);
            DisplayRect := nEMRTO.rclBounds;
            LineKey := '';
          end;
    
        finally
          StrDispose(nStrW);
        end;
    
        Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
          nOTR^.Text, Length(nOTR^.Text), nSize);
        nOTR^.DisplayRect.NormalizeRect;
        _ShrinkRect(nOTR^.DisplayRect, nSize);
    
        if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
          nEMFElementList.Add(nOTR)
        else
          Dispose(nOTR);
      end
      else if EMFR.iType = EMR_SMALLTEXTOUT then
      begin
        nEMRSTOHead := PEMRSmallTextOutHead(EMFR);
        New(nOTR);
        if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_ANSI = SMALLTEXT_TYPE_ANSI then
        begin
          if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then
          begin
            nEMRSTO_A := Pointer(nEMRSTOHead);
            nStrA := AnsiStrAlloc(nEMRSTO_A^.nChars + 1);
            try
              FillChar(nStrA^, nEMRSTO_A^.nChars + 1, 0);
              Move(nEMRSTO_A^.cString[0], nStrA^, nEMRSTO_A^.nChars);
    
              with nOTR^ do
              begin
                Text := Trim(nStrA);
                DisplayRect := Rect(nEMRSTO_A^.ptlReference.X, nEMRSTO_A^.ptlReference.Y,
                  MAXWORD, MAXWORD);
                LineKey := '';
              end;
            finally
              StrDispose(nStrA);
            end;
          end
          else
          begin
            nEMRSTO_AC := Pointer(nEMRSTOHead);
            nStrA := AnsiStrAlloc(nEMRSTO_AC^.nChars + 1);
            try
              FillChar(nStrA^, nEMRSTO_AC^.nChars + 1, 0);
              Move(nEMRSTO_AC^.cString[0], nStrA^, nEMRSTO_AC^.nChars);
    
              with nOTR^ do
              begin
                Text := Trim(nStrA);
                DisplayRect := nEMRSTO_AC^.rclClip;
                DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference;
                LineKey := '';
              end;
            finally
              StrDispose(nStrA);
            end;
          end;
        end
        else
        begin
          if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then
          begin
            nEMRSTO_W := Pointer(nEMRSTOHead);
            nStrW := WideStrAlloc(nEMRSTO_W^.nChars + 1);
            try
              FillChar(nStrW^, (nEMRSTO_W^.nChars + 1) * 2, 0);
              Move(nEMRSTO_W^.cString[0], nStrW^, nEMRSTO_W^.nChars * 2);
    
              with nOTR^ do
              begin
                Text := Trim(nStrW);
                DisplayRect := Rect(nEMRSTO_W^.ptlReference.X, nEMRSTO_W^.ptlReference.Y,
                  MAXWORD, MAXWORD);
                LineKey := '';
              end;
            finally
              StrDispose(nStrA);
            end;
          end
          else
          begin
            nEMRSTO_WC := Pointer(nEMRSTOHead);
            nStrW := WideStrAlloc(nEMRSTO_WC^.nChars + 1);
            try
              FillChar(nStrW^, (nEMRSTO_WC^.nChars + 1) * 2, 0);
              Move(nEMRSTO_WC^.cString[0], nStrW^, nEMRSTO_WC^.nChars * 2);
    
              with nOTR^ do
              begin
                Text := Trim(nStrW);
                DisplayRect := nEMRSTO_AC^.rclClip;
                DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference;
                LineKey := '';
              end;
            finally
              StrDispose(nStrA);
            end;
          end;
        end;
    
        Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
          nOTR^.Text, Length(nOTR^.Text), nSize);
        nOTR^.DisplayRect.NormalizeRect;
        _ShrinkRect(nOTR^.DisplayRect, nSize);
    
        if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
          nEMFElementList.Add(nOTR)
        else
          Dispose(nOTR);
      end;
    
      Result := 1;
    end;
    
    type
      TEMFStrInfoCompare = class(TComparer<PEMFStrInfo>)
      public
        function Compare(const Left, Right: PEMFStrInfo): Integer; override;
      end;
    
    { TEMFStrInfoCompare }
    
    function TEMFStrInfoCompare.Compare(const Left, Right: PEMFStrInfo): Integer;
    var
      nCPLeft, nCPRight: TPoint;
      nLIR, nRIL: Int8;
      nLineKey: string;
    begin
      nCPLeft := Left^.DisplayRect.CenterPoint;
      nCPRight := Right^.DisplayRect.CenterPoint;
    
      if nCPLeft.Y <= Right^.DisplayRect.Top then
        nLIR := -1
      else if nCPLeft.Y >= Right^.DisplayRect.Bottom then
        nLIR := 1
      else
        nLIR := 0;
    
      if nCPRight.Y <= Left^.DisplayRect.Top then
        nRIL := -1
      else if nCPRight.Y >= Left^.DisplayRect.Bottom then
        nRIL := 1
      else
        nRIL := 0;
    
      if (nLIR = 0) or (nRIL = 0) then
      begin
        if Left^.LineKey <> '' then
          Right^.LineKey := Left^.LineKey
        else if Right^.LineKey <> '' then
          Left^.LineKey := Right^.LineKey
        else
        begin
          Left^.LineKey := TGUID.NewGuid.ToString;
          Right^.LineKey := Left^.LineKey;
        end;
    
        {有任意left或right在另一方区域内的, 认为在同一行, 通过x位置判断排序}
        if nCPLeft.X < nCPRight.X then {根据左侧判断位置}
          Result := -1
        else if nCPLeft.X > nCPRight.X then
          Result := 1
        else if nCPLeft.Y < nCPRight.Y then
          Result := -1
        else if nCPLeft.Y > nCPRight.Y then
          Result := 1
        else
          Result := 0;
      end
      else
      begin
        Result := nLIR;
      end;
    end;
    
    { TEMFStrInfoList }
    
    procedure TEMFStrInfoList.Append(AEMF: TMetafile; var AHeight: Integer);
    var
      nList: TList<PEMFStrInfo>;
      nInfoExists: Boolean;
      nCheckPoint: TPoint;
      i: Integer;
      nCompare: TEMFStrInfoCompare;
      nPI: PEMFStrInfo;
      nTmpLineKey, nTmpJSONStr: string;
      nJ, nJLine: TQJson;
    begin
      nList := TList<PEMFStrInfo>.Create;
      try
        {读取文件元素存入列表}
        EnumEnhMetafile(0, AEMF.Handle, @EnumTextProc, Pointer(nList), Rect(0, 0, 0, 0));
    
        nCompare := TEMFStrInfoCompare.Create;
        try
          {排序}
          try
            nList.Sort(nCompare);
          finally
            nCompare.Free;
          end;
        except
        end;
    
        {计算最大高度, 元素名称存入字典}
        AHeight := 0;
        nJ := TQJson.Create;
        try
    //      nJ.TryParse(FJSONStrs);
          nJ.DataType := jdtArray;
          nJLine := nil;
          nTmpLineKey := '';
          for i := 0 to nList.Count - 1 do
          begin
            nPI := nList[i];
            if nPI^.LineKey = '' then
              nPI^.LineKey := TGUID.NewGuid.ToString; {没有相同行标记的给一个标记}
            {需要换行}
            if (nTmpLineKey = '') or (not SameText(nTmpLineKey, nPI^.LineKey)) then
              nJLine := nil;
            {当前行标记}
            nTmpLineKey := nPI^.LineKey;
    
            if nPI^.DisplayRect.Bottom > AHeight then
              AHeight := nPI^.DisplayRect.Bottom;
    
            OffsetRect(nPI^.DisplayRect, 0, FMaxHeight);
            FDic.AddOrSetValue(nPI^.Text, FList.Add(nPI));
    
            if (nJLine = nil) then
              nJLine := nJ.AddArray('');
    
            nJLine.Add.AsString := nPI^.Text;
          end;
          nTmpJSONStr := nJ.Encode(False);
          nTmpJSONStr := Copy(nTmpJSONStr, 2, Length(nTmpJSONStr) - 2);
          if FJSONStrs = '' then
            FJSONStrs := nTmpJSONStr
          else
            FJSONStrs := FJSONStrs + ',' + nTmpJSONStr;
        finally
          nJ.Free;
        end;
        FMaxHeight := FMaxHeight + AHeight;
      finally
        nList.Free;
      end;
    end;
    
    procedure TEMFStrInfoList.Clear;
    var
      i: Integer;
    begin
      FMaxHeight := 0;
      FJsonStrs := '';
      for i := 0 to FList.Count - 1 do
        Dispose(FList[i]);
      FList.Clear;
      FDic.Clear;
    end;
    
    constructor TEMFStrInfoList.Create;
    begin
      FList := TList<PEMFStrInfo>.Create;
      FDic := TDictionary<string, UInt32>.Create;
      FMaxHeight := 0;
      FJsonStrs := '';
    end;
    
    destructor TEMFStrInfoList.Destroy;
    var
      i: Integer;
    begin
      for i := 0 to FList.Count - 1 do
        Dispose(FList[i]);
      FList.Free;
      FDic.Free;
      inherited;
    end;
    
    function TEMFStrInfoList.GetCount: UInt32;
    begin
      Result := FList.Count;
    end;
    
    function TEMFStrInfoList.GetItem(Index: UInt32): TEMFStrInfo;
    begin
      Result := FList[Index]^;
    end;
    
    function TEMFStrInfoList.GetJSONStrs: string;
    begin
      Result := '[' + FJSONStrs + ']';
    end;
    
    function TEMFStrInfoList.StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean;
    
      function _RegExAnalyze(AData, APattern: string): string;
      var
        nMatches: TMatchCollection;
      begin
        nMatches := TRegEx.Matches(AData, APattern, [roMultiLine]);
        if nMatches.Count > 0 then
          Result := nMatches.Item[0].Value;
      end;
    
    var
      i: Integer;
      nTmpData: string;
    begin
      AResult := '';
      try
        nTmpData := FJSONStrs;
        for i := Low(ALeavePattern) to High(ALeavePattern) do
        begin
          nTmpData := _RegExAnalyze(nTmpData, ALeavePattern[i]);
          if nTmpData = '' then
            Break;
        end;
        AResult := nTmpData;
      except
        on E: Exception do
          raise Exception.CreateFmt('正则分析失败[%s]', [E.Message]);
      end;
      Result := AResult <> '';
    end;
    
    function TEMFStrInfoList.TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
    begin
      Result := FDic.TryGetValue(AInfoName, AIndex);
      if Result then
        AInfo := FList[AIndex]^;
    end;
    
    initialization
      FReferenceDC := VCL.Graphics.TBitmap.Create;
      with FReferenceDC do
      begin
        PixelFormat := pf24bit;
        Width := 2048;
        Height := 2048;
      end;
    
    finalization
      FreeAndNil(FReferenceDC);
    
    end.
  • 相关阅读:
    【WIN10】我的第一個WIN10-UWP應用——古文觀止
    【WIN10】文本圖標
    【WIN10】VisualStateManager使用說明
    【WIN10】Storyboard動畫板
    【WIN10】Bind、Binding與Converter的使用
    struts执行过程
    在jsp中的局部和全局变量
    jsp页面中的:<%@ page contentType="text/html; charset=utf-8" language="java"%>的作用及含义
    jsp中的this
    在Java接口中怎样访问定义的常量呢?
  • 原文地址:https://www.cnblogs.com/lzl_17948876/p/4683484.html
Copyright © 2011-2022 走看看