zoukankan      html  css  js  c++  java
  • 20170601xlVBA正则表达式提取体检数据

    Public Sub GetFirst()
        GetDataFromWord "初检"
    End Sub
    
    Public Sub GetDataFromWord(ByVal SheetName As String)
        AppSettings
        'On Error GoTo ErrHandler
        Dim StartTime, UsedTime As Variant
        StartTime = VBA.Timer
        'Input code here
    
        Dim Wb As Workbook
        Dim Sht As Worksheet
        Dim oSht As Worksheet
        Dim Rng As Range
        Dim Arr As Variant
    
        Dim wdApp As Word.Application
        Dim wdDoc As Word.Document
        Dim wdRng As Word.Range
    
    
        'Const SHEET_NAME As String = "提取信息"
        Set Wb = Application.ThisWorkbook
        Set Sht = Wb.Worksheets(SheetName)
    
        Dim FilePath As String
        With Application.FileDialog(msoFileDialogFilePicker)
            .AllowMultiSelect = False
            .InitialFileName = Wb.Path
            .Title = "提取" & SheetName & "数据"
            .Filters.Clear
            .Filters.Add "Word文档", "*.rtf*"
            If .Show = -1 Then
                FilePath = .SelectedItems(1)
            Else
                MsgBox "您没有选中任何文件夹,本次汇总中断!"
                Exit Sub
            End If
        End With
    
        Debug.Print FilePath
    
    
    
        Set wdApp = New Word.Application
        Set wdDoc = wdApp.Documents.Open(FilePath)
        Application.StatusBar = ">>>>>>>>Positioning & Replacing >>>>>>>>"
        PositioningClear wdDoc, 5    '定位删除英文行 避免正则提取造成干扰
    
    
        Application.StatusBar = ">>>>>>>>Regexpress Getting array >>>>>>>>"
        Arr = RegGetArray(wdDoc.Content.Text)    '正则从全文提取内容 存入数组
        wdDoc.Close False    '关闭doc
        wdApp.Quit    '退出app
        Set wdApp = Nothing
        Set wdDoc = Nothing
    
    
        With Sht
            .Cells.Clear
            .Range("A1:D1").Value = Array("大项", "小项", "D值", "E值")
            Set Rng = .Range("A2").Resize(UBound(Arr, 2), UBound(Arr))
            Rng.Value = Application.WorksheetFunction.Transpose(Arr)
            Sort2003 .UsedRange
        End With
    
    
        UsedTime = VBA.Timer - StartTime
        Debug.Print "UsedTime:" & Format(UsedTime, "0.000 Seconds")
        'MsgBox "UsedTime:" & Format(UsedTime, "0.000 Seconds"), vbOKOnly, "NextSeven  QQ "
    ErrorExit:
        Set Wb = Nothing
        Set Sht = Nothing
        Set Rng = Nothing
        AppSettings False
    
        On Error Resume Next
        wdApp.Quit
    
        Exit Sub
    ErrHandler:
        If Err.Number <> 0 Then
            MsgBox Err.Description & "!", vbCritical, "NextSeven QQ "
            Debug.Print Err.Description
            Err.Clear
            Resume ErrorExit
        End If
    End Sub
    Public Sub AppSettings(Optional IsStart As Boolean = True)
        If IsStart Then
            Application.ScreenUpdating = False
            Application.DisplayAlerts = False
            Application.Calculation = xlCalculationManual
            Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>"
        Else
            Application.ScreenUpdating = True
            Application.DisplayAlerts = True
            Application.Calculation = xlCalculationAutomatic
            Application.StatusBar = False
        End If
    End Sub
    Function RegGetArray(ByVal OrgText As String) As String()
        Dim Reg As Object, Mh As Object, OneMh As Object
        Dim Reg2 As Object
    
        Dim Arr() As String, Index As Long
        Dim Elm As String
        Set Reg = CreateObject("Vbscript.Regexp")
        Set Reg2 = CreateObject("Vbscript.Regexp")
    
        Reg2.Global = True
    
    
        With Reg
            'OrgText = Application.ActiveDocument.Content
            .MultiLine = True
            .Global = True
            .Ignorecase = False
            '可用
            '.Pattern = "(?:s)?(S*?)?s? *" & "(?:[ ])([^ ][^
    v]*?)s+?(D=[d.]+)s+(E=[d.]+)[s]+?"
            .Pattern = "(?:s+?)([一-龥;,,]*?)?s? *" & "(?:[ ])([^ ][^
    v]*?)s+?(D=[d.]+)s+(E=[d.]+)[s]+?"
            Set Mh = .Execute(OrgText)
            Index = 0
            ReDim Arr(1 To 4, 1 To 1)
            For Each OneMh In Mh
                Index = Index + 1
                ReDim Preserve Arr(1 To 4, 1 To Index)
                If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0)
    
                Reg2.Pattern = "[;,,]?(左视图|前视图|纵切面)+[;,,]?"
                Arr(1, Index) = Reg2.Replace(Elm, "")
    
    
                Reg2.Pattern = "[s#G]"
                Arr(2, Index) = Reg2.Replace(OneMh.submatches(1), "")
                'Debug.Print OneMh.submatches(2)
                Arr(3, Index) = Split(OneMh.submatches(2), "=")(1)
                'Debug.Print OneMh.submatches(3)
                Arr(4, Index) = Split(OneMh.submatches(3), "=")(1)
            Next OneMh
        End With
        RegGetArray = Arr
        Set Reg = Nothing: Set Mh = Nothing
        Set Reg2 = Nothing
    End Function
    
    Public Sub PositioningClear(ByVal OpenDoc As Word.Document, ByVal Times As Long)
        Dim wdRng As Word.Range
        Dim lngStart As Long
        Dim lngEnd As Long
        Dim lngTime As Long
        For lngTime = 1 To Times
            lngEnd = OpenDoc.Content.End
            With OpenDoc.Content.Find
                .ClearFormatting
                .Replacement.ClearFormatting
                .Text = "ALIMENTARY SYSTEM"
                .Replacement.Text = ""
                If .Execute Then
                    lngStart = .Parent.Start
                    Set wdRng = OpenDoc.Range(lngStart, lngEnd)
                End If
            End With
    
            If Not wdRng Is Nothing Then
                With wdRng.Find
                    .ClearFormatting
                    .Replacement.ClearFormatting
                    .Text = "[^l^13][A-Za-z0-9- ,;:.]@[^l^13]"
                    .MatchWildcards = True
                    .Wrap = wdFindStop
                    .Forward = True
                    .Replacement.Text = "^l"
                    'n = 0
                    .Execute Replace:=wdReplaceAll
                    'Do While .Execute
                    '   n = n + 1
                    '   Debug.Print n; "____________"; .Parent.Text
                    '    If n > 1000 Then Exit Do
                    'Loop
                End With
            End If
            Set wdRng = Nothing
        Next lngTime
    
    End Sub
    
    Sub Sort2003(ByVal RngWithTitle As Range, Optional SortColumnNo As Long = 1)
    'key1代表第一个排序的列的关键字
    'Order1表示第一字段的排序方式,赋值为xlAscending表示升序,改为xlDescending表示降序。
    'Header表示是否包含标题,赋值为xlYes表示标题不参与排序,赋值为xlNo表示标题也参数排序
    'MatchCase表示排序时是否区分大小写,赋值为False表示不区分大小写
    'Orientation表示排序方向,赋值为xlTopToBottom或者xlSortColumns表示按列排序,赋值为xlSortRows 表示排行排序
    'SortMethod用于限制对汉字排序时的排序方式,赋值为xlPinYin表示按拼音排序,赋值为xlStroke表示按笔划排序
        With RngWithTitle
            .Sort Key1:=RngWithTitle.Cells(1, SortColumnNo), Order1:=xlAscending, Header:=xlYes, _
                  MatchCase:=False, Orientation:=xlTopToBottom, SortMethod:=xlPinYin
        End With
    End Sub
    

      

  • 相关阅读:
    java_000.JAVA语言课堂测试试卷01
    2018暑假第八周总结&暑假代码成果总结(8.27-9.2)
    C#_最基础的计算器
    Java_学生信息管理系统——数组版——初次编写
    Java_计算器001,支持加减乘除,括号,小数点,√,^ 运算
    Java_计算器001,支持加减乘除,括号运算
    2018暑假第七周总结(8.20-8.26)
    2018暑假第六周总结(8.13-8.19)
    2018暑假第五周总结(8.6-8.12)
    poj 1984(带权并查集)
  • 原文地址:https://www.cnblogs.com/nextseven/p/7129186.html
Copyright © 2011-2022 走看看