zoukankan      html  css  js  c++  java
  • es--映射修改和数据迁移

    首先,我们先看文档

     文档中百度翻译了一下,大意是:,

    除了支持的映射参数外,不能更改现有字段的映射或字段类型。更改现有字段可能会使已编制索引的数据无效。

    如果需要更改字段的映射,请使用正确的映射创建一个新索引,并将数据重新索引到该索引中。

    重命名字段将使已在旧字段名下编制索引的数据无效。而是添加一个别名字段来创建备用字段名。

    在es中,是不支持更改现有字段的映射或字段类型的,如果我们非得需要更改字段的类型,怎么办,数据迁移,重建索引,建立我们想要的正确的映射规则;

    1,查看旧的索引

    GET /customer/_mapping
    {
      "customer" : {
        "mappings" : {
          "properties" : {
            "addr" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "age" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              },
              "fielddata" : true
            },
            "content" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "interests" : {
              "type" : "text",
              "fielddata" : true
            },
            "name" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "properties" : {
              "properties" : {
                "content" : {
                  "properties" : {
                    "analyzer" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    },
                    "search_analyzer" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    },
                    "type" : {
                      "type" : "text",
                      "fields" : {
                        "keyword" : {
                          "type" : "keyword",
                          "ignore_above" : 256
                        }
                      }
                    }
                  }
                }
              }
            },
            "sex" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            }
          }
        }
      }
    }
    View Code

     2,创建新的索引

    PUT /new_customer
    {
      "mappings": {
        "properties": {
          "addr": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "age": {
            "type": "integer",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "content": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "interests": {
            "type": "text",
            "fielddata": true
          },
          "name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "properties": {
            "properties": {
              "content": {
                "properties": {
                  "analyzer": {
                    "type": "text",
                    "fields": {
                      "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                      }
                    }
                  },
                  "search_analyzer": {
                    "type": "text",
                    "fields": {
                      "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                      }
                    }
                  },
                  "type": {
                    "type": "text",
                    "fields": {
                      "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                      }
                    }
                  }
                }
              }
            }
          },
          "sex": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    }
    View Code

     3,数据迁移

    POST _reindex
    {
      "source": {
        "index": "customer"
      },
      "dest": {
        "index": "new_customer"
      }
    }

    比如,我们在老索引中进行age聚合:

    GET /customer/_search
    {
      "query": {
        "match_all": {}
      }, 
      "aggs": {
        "ageAgg":{
          "terms": {
            "field": "age",
            "size": 10
          },
          "aggs": {
            "ageave": {
              "avg": {
                "field": "age"
              }
            }
          }
        }
      }
    }
    {
      "error" : {
        "root_cause" : [
          {
            "type" : "illegal_argument_exception",
            "reason" : "Expected numeric type on field [age], but got [text]"
          }
        ],
        "type" : "search_phase_execution_exception",
        "reason" : "all shards failed",
        "phase" : "query",
        "grouped" : true,
        "failed_shards" : [
          {
            "shard" : 0,
            "index" : "customer",
            "node" : "pWQJLIs9RbmQ5CrTldrIiQ",
            "reason" : {
              "type" : "illegal_argument_exception",
              "reason" : "Expected numeric type on field [age], but got [text]"
            }
          }
        ],
        "caused_by" : {
          "type" : "illegal_argument_exception",
          "reason" : "Expected numeric type on field [age], but got [text]",
          "caused_by" : {
            "type" : "illegal_argument_exception",
            "reason" : "Expected numeric type on field [age], but got [text]"
          }
        }
      },
      "status" : 400
    }

    age是text类型的,我们把age改成了int类型的后,在进行聚合就不会报错了

    {
      "took" : 2,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 8,
          "relation" : "eq"
        },
        "max_score" : 1.0,
        "hits" : [
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "4",
            "_score" : 1.0,
            "_source" : {
              "name" : "nadia",
              "sex" : "gril",
              "age" : 31
            }
          },
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "5",
            "_score" : 1.0,
            "_source" : {
              "name" : "jxx",
              "sex" : "gril",
              "age" : 45,
              "addr" : "北京"
            }
          },
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "7",
            "_score" : 1.0,
            "_source" : {
              "name" : "abb",
              "sex" : "boy",
              "age" : 35,
              "addr" : "南京"
            }
          },
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "8",
            "_score" : 1.0,
            "_source" : {
              "name" : "abb11",
              "sex" : "boy",
              "age" : 36,
              "addr" : "南京"
            }
          },
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "2",
            "_score" : 1.0,
            "_source" : {
              "name" : "木子小僧888666",
              "sex" : "boy",
              "age" : 25,
              "addr" : "白宫"
            }
          },
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "3",
            "_score" : 1.0,
            "_source" : {
              "name" : "特朗普",
              "sex" : "boy",
              "age" : 88,
              "addr" : "白宫"
            }
          },
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "1",
            "_score" : 1.0,
            "_source" : {
              "name" : "木子小僧666999888",
              "sex" : "boy",
              "age" : 30
            }
          },
          {
            "_index" : "new_customer",
            "_type" : "_doc",
            "_id" : "dwy-YXcB9G0dRFSpm0LY",
            "_score" : 1.0,
            "_source" : {
              "properties" : {
                "content" : {
                  "type" : "text",
                  "analyzer" : "ik_max_word",
                  "search_analyzer" : "ik_max_word"
                }
              }
            }
          }
        ]
      },
      "aggregations" : {
        "ageAgg" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : 25,
              "doc_count" : 1,
              "ageave" : {
                "value" : 25.0
              }
            },
            {
              "key" : 30,
              "doc_count" : 1,
              "ageave" : {
                "value" : 30.0
              }
            },
            {
              "key" : 31,
              "doc_count" : 1,
              "ageave" : {
                "value" : 31.0
              }
            },
            {
              "key" : 35,
              "doc_count" : 1,
              "ageave" : {
                "value" : 35.0
              }
            },
            {
              "key" : 36,
              "doc_count" : 1,
              "ageave" : {
                "value" : 36.0
              }
            },
            {
              "key" : 45,
              "doc_count" : 1,
              "ageave" : {
                "value" : 45.0
              }
            },
            {
              "key" : 88,
              "doc_count" : 1,
              "ageave" : {
                "value" : 88.0
              }
            }
          ]
        }
      }
    }
  • 相关阅读:
    中文文本分类 pytorch实现
    常用各类数据集
    20 个大型中文文本数据集
    Transformers 简介(上)
    磐创AI|人工智能开发者中文文档大全-TensorFlow,PyTorch,Keras,skearn,fastai,OpenCV,聊天机器人,智能客服,推荐系统,知识图谱
    JointBert代码解读(五)
    模拟测试20190803
    模拟测试20190802
    模拟测试20190729
    模拟测试20190727
  • 原文地址:https://www.cnblogs.com/invban/p/14416569.html
Copyright © 2011-2022 走看看