zoukankan      html  css  js  c++  java
  • 基于rust 编写的高性能全文检索服务

    sonic 是基于rust 编写的一个高性能,可简单替换es 的方案,没有提供http 接口,而是走的tcp 协议,目前已经
    有了好多语言的包装库,可以方便使用

    参考使用

    • 环境准备
     
    version: "3"
    services:
       sonic:
          image:  valeriansaliou/sonic:v1.3.0
          ports: 
          - "1491:1491"
          volumes:
          - "./config.cfg:/etc/sonic.cfg"
       meilisearch:
          image:  getmeili/meilisearch
          ports: 
          - "7700:7700"
    • golang 代码
    package main
    import (
       "fmt"
       "os"
       "github.com/expectedsh/go-sonic/sonic"
       "github.com/meilisearch/meilisearch-go"
    )
    func main() {
       ingester, err := sonic.NewIngester("localhost", 1491, "dalong")
       if err != nil {
          panic(err)
       }
       _ = ingester.BulkPush("movies", "general", 3, []sonic.IngestBulkRecord{
          {Object: "id:6ab56b4kk3", Text: "Star wars"},
          {Object: "id:5hg67f8dg5", Text: "Spider man"},
          {Object: "id:1m2n3b4vf6", Text: "Batman"},
          {Object: "id:1111111", Text: "荣锋亮 测试应用 技术测试"},
          {Object: "id:68d96h5h9d0", Text: "This is another movie"},
       })
       _ = ingester.BulkPush("movies", "general", 3, []sonic.IngestBulkRecord{
          {Object: "id:6ab56b4kk3", Text: "Star wars"},
          {Object: "id:5hg67f8dg5", Text: "Spider man"},
          {Object: "id:1m2n3b4vf6", Text: "Batman"},
          {Object: "id:1111112", Text: "111荣锋亮 测试应用 技术测试"},
          {Object: "id:68d96h5h9d0", Text: "This is another movie"},
       })
       _ = ingester.BulkPush("movies", "general", 3, []sonic.IngestBulkRecord{
          {Object: "id:6ab56b4kk3", Text: "Star wars"},
          {Object: "id:5hg67f8dg5", Text: "Spider man"},
          {Object: "id:1m2n3b4vf6", Text: "Batman"},
          {Object: "id:1111113", Text: "111荣锋亮 测试应用 技术测试"},
          {Object: "id:68d96h5h9d0", Text: "This is another movie"},
       })
       search, err := sonic.NewSearch("localhost", 1491, "dalong")
       if err != nil {
          panic(err)
       }
       results, _ := search.Query("movies", "general", "测", 10, 0)
       fmt.Println(results)
       var client = meilisearch.NewClient(meilisearch.Config{
          Host: "http://127.0.0.1:7700",
       })
       // Create an index if your index does not already exist
       if index, _ := client.Indexes().Get("books"); index.UID == "" {
          fmt.Println("create index")
          _, err = client.Indexes().Create(meilisearch.CreateIndexRequest{
             UID: "books",
          })
          if err != nil {
             fmt.Println(err)
             os.Exit(1)
          }
       } else {
          fmt.Println("index is created")
       }
       documents := []map[string]interface{}{
          {"book_id": 123, "title": "Pride and Prejudice"},
          {"book_id": 456, "title": "Le Petit Prince"},
          {"book_id": 1, "title": "Alice In Wonderland"},
          {"book_id": 1344, "title": "The Hobbit"},
          {"book_id": 1345, "title": "荣锋亮 测试 应用"},
          {"book_id": 1346, "title": "荣锋亮 测试 应用"},
          {"book_id": 1347, "title": "荣锋亮 测试 使用应用"},
          {"book_id": 1348, "title": `摘要:johnfercher/maroto 借鉴了bootstrap 的网格模式,使用了gofpdf 生成pdf,是一个很不错的golang pdf 工具 有一个问题是不支持中文(因为配置写的的原因)看到网上有一个中国人fork添加了AddUTF8Font 支持,这样 中文就可以显示了,以下是一个参考的使 `},
          {"book_id": 4, "title": "Harry Potter and the Half-Blood Prince"},
          {"book_id": 42, "title": "The Hitchhiker's Guide to the Galaxy"},
       }
       updateRes, err := client.Documents("books").AddOrUpdate(documents) // => { "updateId": 0 }
       if err != nil {
          fmt.Println(err)
          os.Exit(1)
       }
       fmt.Println(updateRes.UpdateID)
       searchRes, err := client.Search("books").Search(meilisearch.SearchRequest{
          Query: "使用",
          Limit: 10,
       })
       if err != nil {
          fmt.Println(err)
          os.Exit(1)
       }
       fmt.Println(searchRes.Hits)
    }

    说明

    基于rust 开发的全文检索引擎还是比较多的,MeiliSearch 也是一个类似的,同时比较活跃,提供的功能也比较多

    参考资料

    https://github.com/valeriansaliou/sonic
    https://github.com/expectedsh/go-sonic
    https://github.com/tantivy-search/tantivy
    https://github.com/meilisearch/MeiliSearch

  • 相关阅读:
    关于python urlopen 一个类似radio流的timeout方法
    Python nltk English Detection
    Python依赖打包发布详细
    python 怎么和命令行交互
    Python中多维数组flatten的技巧
    Python中的url编码问题
    python数据持久存储:pickle模块的基本使用
    Python控制台输出不换行(进度条等)
    UnicodeEncodeError: 'ascii' codec can't encode character u'xe9' in position 7: ordinal not in range(128) [duplicate]
    json.loads(s) returns error message like this: ValueError: Invalid control character at: line 1 column 33 (char 33)
  • 原文地址:https://www.cnblogs.com/rongfengliang/p/14078710.html
Copyright © 2011-2022 走看看