zoukankan      html  css  js  c++  java
  • python3 操作elasticsearch

    准备篇

    1. 安装依赖

    pip install elasticsearch
    

    2. 建立连接

    from elasticsearch import Elasticsearch
    
    es = Elasticsearch(["192.168.1.84"],http_auth=("elastic", "elastic"),port=9200)
    

    3. 写入数据

    doc = {'id': 1, 'lv_id': 12, 'sentiment':0, 'news_id': 1673578, 'review': '错字连篇,受不了,还真的看完了[笑着哭]', 'keyword': '受不了 错字连篇', 'ner': ''}
    
    res = es.index(index="match_review", doc_type='review_feature' ,id=doc['news_id'], body=doc)
    
    print(res['result'])
    

    4. 批量写入

    from elasticsearch import helpers
    
    actions = []
    data = [{'id': 1, 'lv_id': 12, 'sentiment':0, 'news_id': 1673578, 'review': '错字连篇,受不了,还真的看完了[笑着哭]', 'keyword': '受不了 错字连篇', 'ner': ''}, ...]
    
    for doc in data:
        action = {
                "_index": "match_review",
                "_type": "review_feature",
                "_id": doc["news_id"],
                "_source": doc
                }
        actions.append(action)
    
    helpers.bulk(es, actions)
    

    5. 根据id查询

    news_id = 1673578
    res = es.get(index="match_review", doc_type='review_feature' ,id=news_id)
    

    6. 查询全部

    query = es.search(index="match_review", body={"query": {"match_all": {}}}, scroll='5m', size=100)
        res = query['hits']['hits'] # es查询出的结果第一页
        total = query['hits']['total']  # es查询出的结果总量
        scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果
        for i in range(0, int(total/100)+1):
            # scroll参数必须指定否则会报错
            query_scroll = es.scroll(scroll_id=scroll_id,scroll='5m')['hits']['hits']
            res += query_scroll
    

    7. 按条件搜索

    body = {
        "query":{
            "bool":{
                # lv_id相等, sentiment满足范围-1到1
                "must":[{"term":{"lv_id":lv_id}}, {"range":{"sentiment":{"gte": -1, "lte":1}}}], 
                # ner匹配到一个
                "should": [{"match": {"ner":i}} for i in ["中国", "美国"]], "minimum_should_match": 1}
            }, 
            # 随机排序
            "sort" : [{"_script" : {"script" : {"source" : "Math.random()","lang" : "painless"},"type" : "number","order" : "asc"}}]
        }
    res = es.search(index="match_review", body=body)
    

    附录

    自定义索引语句(指定分词方式)

    {       "settings":{
                "analysis":{
                    "analyzer":{
                        "my_lowercase_analyzer":{
                            "type":"custom",
                            "tokenizer":"whitespace",
                            "filter":[
                                "lowercase"
                            ]
                        }
                    }
                }
            },
            "mappings":{
                "review_feature":{
                    "properties":{
                        "id": {
                        "type": "integer"
                        },
                        "keyword": {
                        "type": "text",
                        "analyzer":"my_lowercase_analyzer"
                        },
                        "lv1_id": {
                        "type": "integer"
                        },
                        "ner": {
                        "type": "text",
                        "analyzer":"my_lowercase_analyzer",
                        "fields": {
                        "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                        }
                        }
                        },
                        "news_id": {
                        "type": "integer"
                        },
                        "review": {
                        "type": "text"
                        },
                        "sentiment": {
                        "type": "integer"
                        }
                    }
                }
            }
        }
    
  • 相关阅读:
    Angular 中使用第三方模块 axios 请求数据
    angular 创建服务
    Promise和RxJS处理异步对比
    ES6中的迭代器(Iterator)和生成器(Generator)
    async await
    Ajax分析
    JSTL
    EL
    Spring-常用依赖及配置
    Spring-AOP的三种方式
  • 原文地址:https://www.cnblogs.com/cooolr/p/12125607.html
Copyright © 2011-2022 走看看