zoukankan      html  css  js  c++  java
  • Go读取论文并转换为simhahs

    package main
    
    import (
    	"fmt"
    	_"flag"
    	_ "os"
    	_ "io/ioutil"
    	_"strings"
    	_ "path"
    	 "log"
    	_ "baliance.com/gooxml/document"
    	"database/sql"
    	_ "github.com/go-sql-driver/mysql"
    	"time"
    	"github.com/yanyiwu/gosimhash"
    	
    ) 
    
    
    func main(){
    
    
    
    	t1 := time.Now()
    
    	Mylog(doc)
    	if err != nil {
    		Mylog(err)
    	}
    
        db, err := sql.Open("mysql", "root:123456@tcp(127.0.0.1:3306)/gzpg_crs_jsj?charset=utf8");
        if err != nil {
            fmt.Println(err);
        }
    	sql :="select s1.paper_id,s2.title_cn,s2.abstract_cn,s2.keyword_cn,s2.title_en,s2.abstract_en,s2.keyword_en,s1.s_content from sf_content s1,sf_paper s2 where  s1.paper_id=s2.paper_id limit 10"
    	rows, err := db.Query(sql)
        if err != nil {
    		fmt.Println(err);
    	}
    	stmt, err := db.Prepare("INSERT  sim_path SET paperid=?,simcode=?")
    	if err != nil {
    		fmt.Println(err);
    	}
    
    	var str string
    	var code string
    	//查询多个
        for rows.Next() {
    		var paper_id int //论文id
    		var title_cn string //中文题目
    		var abstract_cn string //中文摘要
    		var keyword_cn string //中文关键词
    		var title_en string //英文题目
    		var abstract_en string //英文摘要
    		var keyword_en string //英文关键词
    		var s_content string//全文内容
    		
            err = rows.Scan(&paper_id, &title_cn,&abstract_cn,&keyword_cn,&title_en,&abstract_en,&keyword_en,&s_content)
    		str = fmt.Sprintf("%s
     摘要:%s
     关键词:%s
     %s
     Abstract:%s
     Keywords:%s
     %s
    ",title_cn,abstract_cn,keyword_cn,title_en,abstract_en,keyword_en,s_content)
    		code=simhash(str)
    		res, err := stmt.Exec(paper_id, code)
    		if err != nil {
    			fmt.Println(err);
    		}
    		id, err := res.LastInsertId()
    		if err != nil {
    			fmt.Println(err);
    		}
    		fmt.Print("%s成功%s 
    ",id,paper_id);
    	
    	}
    	db.Close()
    	elapsed := time.Since(t1)
    	log.Println("时间花费位:
    " , elapsed)
    
    }
    
    func simhash(str string) (string) {
    
    	hasher := gosimhash.New("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8")
    	defer hasher.Free()
    	fingerprint := hasher.MakeSimhash(str, 1)
    	var code string
    	var s string = "0000000000000000000000000000000000000000000000000000000000000000"
    	bs := []byte(s)
    						
        for i := 63; i >= 0; i-- {
    		
    		if (fingerprint&1)==1 {
    
    			bs[i]='1'
    		} else {
    
    			bs[i]='0'
    		}
    		fingerprint >>=1
    	}
    	code =string(bs)
    	return code
    }
    
    
    func Mylog(v ...interface{}) {
        f, err := os.OpenFile("20181105go.log", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
    	if err != nil {
    		Mylog(err)
    	}
        defer f.Close()
        logger := log.New(f, TAG, log.Ldate|log.Ltime|log.Lmicroseconds)
        logger.Println(v...)
    }
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
  • 相关阅读:
    关于.NET2.0下的脱机文件App_Offline.htm文件
    VS2005中安装AJAX指南
    Ajax中“Sys未定义”错误的解决方法汇总
    GridView控件模板列中的按钮单击时,在RowDataBound事件中获取该行行号
    用户控件中使用User.Identity
    同一个页面中的不同Button分别验证某一部分输入控件
    hdu Tempter of the Bone
    acm steps chapter1总结
    ORACLE中的TOPN查询(TOPN分析),分页查询
    MySQL中如何实现select top n
  • 原文地址:https://www.cnblogs.com/mengluo/p/9915440.html
Copyright © 2011-2022 走看看