zoukankan      html  css  js  c++  java
  • Go读取论文并转换为simhahs

    package main
    
    import (
    	"fmt"
    	_"flag"
    	_ "os"
    	_ "io/ioutil"
    	_"strings"
    	_ "path"
    	 "log"
    	_ "baliance.com/gooxml/document"
    	"database/sql"
    	_ "github.com/go-sql-driver/mysql"
    	"time"
    	"github.com/yanyiwu/gosimhash"
    	
    ) 
    
    
    func main(){
    
    
    
    	t1 := time.Now()
    
    	Mylog(doc)
    	if err != nil {
    		Mylog(err)
    	}
    
        db, err := sql.Open("mysql", "root:123456@tcp(127.0.0.1:3306)/gzpg_crs_jsj?charset=utf8");
        if err != nil {
            fmt.Println(err);
        }
    	sql :="select s1.paper_id,s2.title_cn,s2.abstract_cn,s2.keyword_cn,s2.title_en,s2.abstract_en,s2.keyword_en,s1.s_content from sf_content s1,sf_paper s2 where  s1.paper_id=s2.paper_id limit 10"
    	rows, err := db.Query(sql)
        if err != nil {
    		fmt.Println(err);
    	}
    	stmt, err := db.Prepare("INSERT  sim_path SET paperid=?,simcode=?")
    	if err != nil {
    		fmt.Println(err);
    	}
    
    	var str string
    	var code string
    	//查询多个
        for rows.Next() {
    		var paper_id int //论文id
    		var title_cn string //中文题目
    		var abstract_cn string //中文摘要
    		var keyword_cn string //中文关键词
    		var title_en string //英文题目
    		var abstract_en string //英文摘要
    		var keyword_en string //英文关键词
    		var s_content string//全文内容
    		
            err = rows.Scan(&paper_id, &title_cn,&abstract_cn,&keyword_cn,&title_en,&abstract_en,&keyword_en,&s_content)
    		str = fmt.Sprintf("%s
     摘要:%s
     关键词:%s
     %s
     Abstract:%s
     Keywords:%s
     %s
    ",title_cn,abstract_cn,keyword_cn,title_en,abstract_en,keyword_en,s_content)
    		code=simhash(str)
    		res, err := stmt.Exec(paper_id, code)
    		if err != nil {
    			fmt.Println(err);
    		}
    		id, err := res.LastInsertId()
    		if err != nil {
    			fmt.Println(err);
    		}
    		fmt.Print("%s成功%s 
    ",id,paper_id);
    	
    	}
    	db.Close()
    	elapsed := time.Since(t1)
    	log.Println("时间花费位:
    " , elapsed)
    
    }
    
    func simhash(str string) (string) {
    
    	hasher := gosimhash.New("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8")
    	defer hasher.Free()
    	fingerprint := hasher.MakeSimhash(str, 1)
    	var code string
    	var s string = "0000000000000000000000000000000000000000000000000000000000000000"
    	bs := []byte(s)
    						
        for i := 63; i >= 0; i-- {
    		
    		if (fingerprint&1)==1 {
    
    			bs[i]='1'
    		} else {
    
    			bs[i]='0'
    		}
    		fingerprint >>=1
    	}
    	code =string(bs)
    	return code
    }
    
    
    func Mylog(v ...interface{}) {
        f, err := os.OpenFile("20181105go.log", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
    	if err != nil {
    		Mylog(err)
    	}
        defer f.Close()
        logger := log.New(f, TAG, log.Ldate|log.Ltime|log.Lmicroseconds)
        logger.Println(v...)
    }
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
  • 相关阅读:
    Linux中zip基本用法
    containerd安装教程
    git拉取远程tag并进行代码crud
    pip环境安装
    Docker资源宿主机监控平台
    Docker部署Kafka单节点
    CRT——新建连接向导关闭了
    Excel——整行上移或下移
    DB2——DB2的字典视图
    Shell——windows上写完放入linux的时候需要注意的问题
  • 原文地址:https://www.cnblogs.com/mengluo/p/9915440.html
Copyright © 2011-2022 走看看