zoukankan      html  css  js  c++  java
  • 词频统计 in office

    ROSTCM6

     1. http://www.writewords.org.uk/word_count.asp
    2. http://darylkinsman.ca/tools/wordfreq.shtml
    3. http://www.wordcounter.com/

    VBA macro of    word

    Sub ChineseCharCounting()
            '统计汉字的字词频,并按降序排序
            '中文词语的判断与Word的词典关联
            Dim a As Byte
            Dim n As Long
            Dim TF As Boolean
            Dim filetext As String
            Dim d
            Dim Wd As Range
            Dim W As Range
            Dim b
            Dim e As Long
            Dim c() As String
            Dim i As Long
            Dim temp As String
            Dim st As Single
           
            a = MsgBox("词频统计请按“是”,字频统计请按“否”", vbYesNo, "中文字词频统计")
            st = Timer
            Application.ScreenUpdating = False
            n = ActiveDocument.Content.ComputeStatistics(wdStatisticFarEastCharacters)
            If ActiveDocument.Content.Text Like "*[【】〖〗《》〈〉〔〕]*" Then TF = True
            With ActiveDocument.Content.Find
                .Text = "[【】〖〗《》〈〉〔〕]"
                .MatchWildcards = True
                .Execute Replace:=wdReplaceAll
            End With
            Set d = CreateObject("Scripting.Dictionary")
            If a = vbYes Then
                For Each Wd In ActiveDocument.Words
                    With Wd
                        If .Start < e Then .Start = e
                        e = .End
                        If .Text Like "*[一-龥]*" And Len(.Text) > 1 Then
                            If .Text Like "*[!一-龥]*" = False And .Words.Count = 1 Then
                                d(.Text) = d(.Text) + 1
                            Else
                                For i = 1 To Len(.Text)
                                    If Mid(.Text, i, 1) Like "[!一-龥]" Then Exit For
                                Next
                                With .Duplicate
                                    .End = .Start + i - 1
                                    For Each W In .Words
                                        With W
                                            If Len(.Text) > 1 Then
                                                If Right(.Text, 1) Like "[!一-龥]" Then .End = .End - 1
                                                If .Text Like "*[!一-龥]*" = False Then d(.Text) = d(.Text) + 1
                                            End If
                                        End With
                                    Next
                                End With
                            End If
                        End If
                    End With
                Next
            Else
                filetext = ActiveDocument.Content.Text
                For i = 1 To Len(filetext)
                    temp = Mid(filetext, i, 1)
                    If temp Like "[一-龥]" Then d(temp) = d(temp) + 1
                Next
            End If
            b = d.keys
            ReDim c(UBound(b))
            For i = 0 To UBound(b)
                c(i) = b(i) & vbTab & d(b(i))
            Next
            If TF = True Then ActiveDocument.Undo 1
           
            With Documents.Add.Content
                .Text = "文档共有" & n & "个中文字符。共提取到" & d.Count _
                    & IIf(a = 6, "个中文词语", "个不同的汉字") & ",其出现次数分别为:" & vbCrLf & Join(c, vbCrLf)
                .Parent.DefaultTabStop = .Characters.First.Font.Size * 6
                .MoveStart wdParagraph
                .Sort , 2, wdSortFieldNumeric, wdSortOrderDescending, 1, , , , , , wdSortSeparateByTabs
            End With
            MsgBox "提取完毕。用时" & Format(Timer - st, "0") & "秒。"
            Application.ScreenUpdating = True
        End Sub

  • 相关阅读:
    概率派VS贝叶斯派
    Numpy-数组array操作
    Numpy基础
    PCA基本原理
    编程语言
    卷积神经网络基础
    IntelliJ IDEA Merge
    Mybatis 问题总结
    Lambda用法
    Map键值对取值, key是在"|"转义, value是在::取值
  • 原文地址:https://www.cnblogs.com/SZLLQ2000/p/5120239.html
Copyright © 2011-2022 走看看