zoukankan      html  css  js  c++  java
  • Elasticsearch之pythonAPI简单使用

    elasticsearch自动补全建议功能

    数据入库操作

    ESmapping要求
    PUT music
    {
        "mappings": {
            "_doc" : {
                "properties" : {
                    "suggest" : {
                        "type" : "completion"
                    },
                    "title" : {
                        "type": "keyword"
                    }
                }
            }
        }
    }

     DocType类

    from elasticsearch_dsl import DocType, Date, Nested, Boolean, 
        analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer
    
    from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer
    
    from elasticsearch_dsl.connections import connections
    connections.create_connection(hosts=["localhost"])
    
    class CustomAnalyzer(_CustomAnalyzer):
        """
            避免ik_analyzer参数传递时会报错的问题
        """
    
        def get_analysis_definition(self):
            return {}
    
    
    ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])
    
    class ArticleType(DocType):
    
        suggest = Completion(analyzer=ik_analyzer)
    
        ... 

     Items类

    from models.es_types import ArticleType
    from elasticsearch_dsl.connections import connections
    es = connections.create_connection(ArticleType._doc_type.using)
    
    
    def gen_suggests(index, info_tuple):
        # 根据字符串生成搜索建议数组
        used_words = set()
        suggests = []
        for text, weight in info_tuple:
            if text:
                # 调用es的analyze接口分析字符串
                words = es.indices.analyze(index=index, analyzer="ik_max_word", params={'filter':["lowercase"]}, body=text)
                anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1])
                new_words = anylyzed_words - used_words
            else:
                new_words = set()
    
            if new_words:
                suggests.append({"input":list(new_words), "weight":weight})
    
    
    class JobBoleArticleItem(scrapy.Item):
    
        ...
    
        def save_to_es(self):
            
            ...
    
            article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10),(article.tags, 7)))
    
            article.save()
    
            redis_cli.incr("jobbole_count")
    
            return

     ES搜索语法

    POST myindex/_search?pretty
    {
        "suggest": {
            "my-suggest": {
                "text": "linux",
                "completion": {
                    "field": "suggest",
                    "fuzzy": {
                        "fuzziness": 2
                    }
                }
            }
        },
        "_source": ["title"]  
    }
    自动补全建议核心代码
    # django_views中的写法
    
    from search.models import ArticleType
    
    class SearchSuggest(View):
        def get(self, request):
            key_words = request.GET.get('s','')
            re_datas = []
            if key_words:
                s = ArticleType.search()
                s = s.suggest('my_suggest', key_words, completion={
                    "field":"suggest", "fuzzy":{
                        "fuzziness":2
                    },
                    "size": 10
                })
                suggestions = s.execute_suggest()
                for match in suggestions.my_suggest[0].options:
                    source = match._source
                    re_datas.append(source["title"])
            return HttpResponse(json.dumps(re_datas), content_type="application/json")

    elasticsearch内容搜索功能

    数据入库操作
      和上面一样
     
    搜索核心代码
    # django_views中的写法
    
    from elasticsearch import Elasticsearch
    
    client = Elasticsearch(hosts=["127.0.0.1"])
    
    class SearchView(View):
        
        def get(self, request):
            key_words = request.GET.get("q","")
            s_type = request.GET.get("s_type", "article")
            page = request.GET.get("p", "1")
            try:
                page = int(page)
            except:
                page = 1
    
            start_time = datetime.now()
            response = client.search(
                index= "jobbole",
                body={
                    "query":{
                        "multi_match":{
                            "query":key_words,
                            "fields":["tags", "title", "content"]
                        }
                    },
                    "from":(page-1)*10,
                    "size":10,
                    "highlight": {
                        "pre_tags": ['<span class="keyWord">'],
                        "post_tags": ['</span>'],
                        "fields": {
                            "title": {},
                            "content": {},
                        }
                    }
                }
            )
    
            end_time = datetime.now()
            last_seconds = (end_time-start_time).total_seconds()
            total_nums = response["hits"]["total"]
            if (page%10) > 0:
                page_nums = int(total_nums/10) +1
            else:
                page_nums = int(total_nums/10)
            hit_list = []
            for hit in response["hits"]["hits"]:
                hit_dict = {}
                if "title" in hit["highlight"]:
                    hit_dict["title"] = "".join(hit["highlight"]["title"])
                else:
                    hit_dict["title"] = hit["_source"]["title"]
                if "content" in hit["highlight"]:
                    hit_dict["content"] = "".join(hit["highlight"]["content"])[:500]
                else:
                    hit_dict["content"] = hit["_source"]["content"][:500]
    
                hit_dict["create_date"] = hit["_source"]["create_date"]
                hit_dict["url"] = hit["_source"]["url"]
                hit_dict["score"] = hit["_score"]
    
                hit_list.append(hit_dict)
    
            return render(request, "result.html", {"page":page,
                                                "all_hits":hit_list,
                                                "key_words":key_words,
                                                "total_nums":total_nums,
                                                "page_nums":page_nums,
                                                "last_seconds":last_seconds
                                                })

    scrapy框架+django框架组合使用

    github项目参考

     https://github.com/holgerd77/django-dynamic-scraper 

  • 相关阅读:
    async 异步协程进阶
    linux 磁盘100% 清理
    时间戳,日期,string互转
    ioutils
    logging basic
    【BZOJ5323】【JXOI2018】—游戏(组合数学+线性筛)
    【BZOJ5323】【JXOI2018】—游戏(组合数学+线性筛)
    【CodeChef】—Sum of Cubes(斯特林数+容斥+三元环计数)
    【CodeChef】—Sum of Cubes(斯特林数+容斥+三元环计数)
    【TopCoder SRM 686】—CyclesNumber(斯特林数)
  • 原文地址:https://www.cnblogs.com/cq146637/p/9093700.html
Copyright © 2011-2022 走看看