zoukankan      html  css  js  c++  java
  • 中科院分词工具使用

      中科院分词工具java的配置与里面自带的讲解相同,下面是代码

    package xieru;
    
    import hello.Hello.CLibrary;
    
    import java.io.BufferedReader;
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.UnsupportedEncodingException;
    import java.nio.charset.Charset;
    import java.util.regex.Pattern;
    
    import com.csvreader.CsvReader;
    import com.csvreader.CsvWriter;
    import com.sun.jna.Library;
    import com.sun.jna.Native;
    
    public class WriteSeparatewords {
        public interface CLibrary extends Library {
    
            // 定义并初始化接口的静态变量
            CLibrary Instance = (CLibrary) Native.loadLibrary(
                    "E:\workplace\hello\NLPIR", CLibrary.class);
    
            // printf函数声明
            public boolean NLPIR_Init(byte[] sDataPath, int encoding,
                    byte[] sLicenceCode);
    
            public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);
            
            public String NLPIR_GetKeyWords(String sLine,int nMaxKeyLimit,boolean bWeightOut);
            
            
            public void NLPIR_Exit();
        }
    
        public static String transString(String aidString, String ori_encoding,
                String new_encoding) {
            try {
                return new String(aidString.getBytes(ori_encoding), new_encoding);
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            return null;
        }
        public static void it(String inFile,String outFile)throws IOException {
        
            
            File file=new File(inFile);
            FileInputStream fi=new FileInputStream(file);
              CsvReader cr=new CsvReader(fi,',', Charset.forName("GBK")); 
              cr.readHeaders();
              String[] readerS;
              FileWriter wr=new FileWriter(outFile);
              BufferedWriter bw=new BufferedWriter(wr);
              while(cr.readRecord()){
                  readerS=cr.getValues();
                  if(readerS[10].equals("技术")){
                      for(int i=0;i<readerS.length-1;i++)
                        bw.write("""+readerS[i]+"""+",");
                      bw.write("""+readerS[readerS.length-1]+""");
                      bw.newLine();
                  }
                  
                
              }
              bw.flush();
              bw.close();
            
        }
        public static void fenci(String inFile,String outFile) throws IOException{
            String argu = "";
            
            String system_charset = "UTF-8";
            int charset_type = 1;
            // int charset_type = 0;
            // 调用printf打印信息
            if (!CLibrary.Instance.NLPIR_Init(argu.getBytes(system_charset),
                    charset_type, "0".getBytes(system_charset))) {
                System.err.println("初始化失败!");
            }
            String filePath=inFile;
            File file=new File(filePath);
            FileInputStream fi=new FileInputStream(file);
            CsvReader cr=new CsvReader(fi,',', Charset.forName("GBK"));
            cr.readHeaders();
            String[] readerS;
           FileWriter wr=new FileWriter(outFile);
           BufferedWriter bw = new BufferedWriter(wr);
    
            while(cr.readRecord()){
                
                readerS=cr.getValues();
    
                String nativeBytes=null;
                nativeBytes = CLibrary.Instance.NLPIR_ParagraphProcess(readerS[1], 3);
                bw.write(readerS[0]+","+"""+nativeBytes+"""+",");
                System.out.println("分词结果为: " + nativeBytes);
                String nativeByte = CLibrary.Instance.NLPIR_GetKeyWords(readerS[1],10,true);
                bw.write(nativeByte);
                System.out.println("关键词提取结果是:"+nativeByte);
                bw.newLine();
                System.out.println("-----------------------------------");
                
            }
            bw.flush();
            bw.close();
    
    
                CLibrary.Instance.NLPIR_Exit();
            
        }
        public static void main(String[] args) throws Exception {
    //        WriteSeparatewords.fenci("F:/c/zhiweiyaoqiu.csv", "F:/c/fenci.csv");
                WriteSeparatewords.it("F:/c/zhaopinxinxi.csv", "F:/c/it.csv");
    
        }
    }
  • 相关阅读:
    关于==和equals的区别和联系,面试这么回答就可以
    (附运行结果和截图)关于try{return}finally中都有return 运行结果测试之旅
    [已解决]踩过的坑之mysql连接报“Communications link failure”错误
    JVM虚拟机----运行时数据区-------方法区
    JVM虚拟机------运行时数据区------堆
    JVM虚拟机-----运行时数据区-----本地方法栈
    JVM虚拟机栈------运行时数据区------方法的调用
    JVM虚拟机-----运行时数据区------动态链接
    JVM虚拟机栈------运行时数据区-------栈顶缓存技术
    JVM虚拟机-----运行时数据区-----JVM虚拟机栈-----操作数栈
  • 原文地址:https://www.cnblogs.com/herefree/p/5657621.html
Copyright © 2011-2022 走看看