zoukankan      html  css  js  c++  java
  • elasticsearch-1.3.0 之索引代码粗略梳理

    elasticsearch-1.3.0

    发送请求
    创建

    [root@centos ~]# curl -XPUT 172.16.136.159:9200/customer?pretty
    {
      "acknowledged" : true
    }
    

    索引

    [root@centos ~]# curl -XPUT 172.16.136.159:9200/customer/external/1?pretty '-d { "name":"JOhn Doe"}' 
    {
      "_index" : "customer",
      "_type" : "external",
      "_id" : "1",
      "_version" : 1,
      "created" : true
    }
    [root@centos ~]# curl -XPUT 172.16.136.159:9200/customer/external/1?pretty '-d { "name":"JOhn Doe"}' 
    {
      "_index" : "customer",
      "_type" : "external",
      "_id" : "1",
      "_version" : 2,
      "created" : false
    }
    

    这里先跟踪下索引的流程,netty的bootstrap暂且不管,从HttpRequestHandler的messageReceived说起

    public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Exception {
        HttpRequest request = (HttpRequest) e.getMessage();
        // the netty HTTP handling always copy over the buffer to its own buffer, either in NioWorker internally
        // when reading, or using a cumalation buffer
        NettyHttpRequest httpRequest = new NettyHttpRequest(request, e.getChannel());
        serverTransport.dispatchRequest(httpRequest, new NettyHttpChannel(serverTransport, e.getChannel(), httpRequest));
        super.messageReceived(ctx, e);
    }
    

    这里的dispatchRequest启示就是NettyHttpServerTransport
    NettyHttpServerTransport

    void dispatchRequest(HttpRequest request, HttpChannel channel) {
        httpServerAdapter.dispatchRequest(request, channel);
    }
    

    Dispatcher,static class Dispatcher implements HttpServerAdapter

    public void dispatchRequest(HttpRequest request, HttpChannel channel) {
        server.internalDispatchRequest(request, channel);
    }
    

    HttpServer

    public void internalDispatchRequest(final HttpRequest request, final HttpChannel channel) {
        if (request.rawPath().startsWith("/_plugin/")) {
            RestFilterChain filterChain = restController.filterChain(pluginSiteFilter);
            filterChain.continueProcessing(request, channel);
            return;
        }
        restController.dispatchRequest(request, channel);
    }
    

    RestController的dispatchRequest()主要是executeHandler()

    try {
        executeHandler(request, channel);
    } catch (Throwable e) {
    

    executeHandler方法中不同的handler处理请求,这里的handler是RestIndexAction,继承自

    final RestHandler handler = getHandler(request);
    if (handler != null) {
        handler.handleRequest(request, channel);
    }
    

    在BaseRestHandler中

    public final void handleRequest(RestRequest request, RestChannel channel) throws Exception {
        handleRequest(request, channel, usefulHeaders.length == 0 ? client : new HeadersCopyClient(client, request, usefulHeaders));
    }
    protected abstract void handleRequest(RestRequest request, RestChannel channel, Client client) throws Exception;
    

    实现父类在BaseRestHandler的handleRequest方法,最后调用NodeClient的index方法

    client.index(indexRequest, new RestBuilderListener<IndexResponse>(channel) {
    

    NodeClient的父类AbstractClient中index的实现

    public void index(final IndexRequest request, final ActionListener<IndexResponse> listener) {
        execute(IndexAction.INSTANCE, request, listener);
    }
    

    NodeClient中的execute方法实现

    public <Request extends ActionRequest, Response extends ActionResponse, RequestBuilder extends ActionRequestBuilder<Request, Response, RequestBuilder, Client>> void execute(Action<Request, Response, RequestBuilder, Client> action, Request request, ActionListener<Response> listener) {
        TransportAction<Request, Response> transportAction = actions.get((ClientAction)action);
        transportAction.execute(request, listener);//TransportIndexAction extends TransportShardReplicationOperationAction
    }
    

    这里的transportAction是TransportIndexAction,
    其中TransportShardReplicationOperationAction是TransportIndexAction的父类,中TransportShardReplicationOperationAction继承自TransportAction,TransportAction中execute的实现

    public void execute(Request request, ActionListener<Response> listener) {
        if (request.listenerThreaded()) {
            listener = new ThreadedActionListener<>(threadPool, listener, logger);
        }
        ActionRequestValidationException validationException = request.validate();
        if (validationException != null) {
            listener.onFailure(validationException);
            return;
        }
        try {
            doExecute(request, listener);
        } catch (Throwable e) {
            logger.trace("Error during transport action execution.", e);
            listener.onFailure(e);
        }
    }
    

    直接调用TransportIndexAction的doExecute

     protected void doExecute(final IndexRequest request, final ActionListener<IndexResponse> listener) {
            // if we don't have a master, we don't have metadata, that's fine, let it find a master using create index API
            if (autoCreateIndex.shouldAutoCreate(request.index(), clusterService.state())) {
                request.beforeLocalFork(); // we fork on another thread...
                createIndexAction.execute(new CreateIndexRequest(request.index()).cause("auto(index api)").masterNodeTimeout(request.timeout()), new ActionListener<CreateIndexResponse>() {
                    @Override
                    public void onResponse(CreateIndexResponse result) {
                        innerExecute(request, listener);
                    }
    
                    @Override
                    public void onFailure(Throwable e) {
                        if (ExceptionsHelper.unwrapCause(e) instanceof IndexAlreadyExistsException) {
                            // we have the index, do it
                            try {
                                innerExecute(request, listener);
                            } catch (Throwable e1) {
                                listener.onFailure(e1);
                            }
                        } else {
                            listener.onFailure(e);
                        }
                    }
                });
            } else {
                innerExecute(request, listener);
            }
        }
    

    这里走 innerExecute(request, listener);

    private void innerExecute(final IndexRequest request, final ActionListener<IndexResponse> listener) {
        super.doExecute(request, listener);
    }
    

    这里的super就是TransportShardReplicationOperationAction了,TransportShardReplicationOperationAction中doExecute的实现

    protected void doExecute(Request request, ActionListener<Response> listener) {
        new AsyncShardOperationAction(request, listener).start();
    }
    

    主要两个方法,一个是获取shard,另一个是shardOperationOnPrimary;
    其中shard后边再说,shardOperationOnPrimary在TransportIndexAction实现

    protected PrimaryResponse<IndexResponse, IndexRequest> shardOperationOnPrimary(ClusterState clusterState, PrimaryOperationRequest shardRequest) {
        final IndexRequest request = shardRequest.request;
    
        // validate, if routing is required, that we got routing
        IndexMetaData indexMetaData = clusterState.metaData().index(request.index());
        MappingMetaData mappingMd = indexMetaData.mappingOrDefault(request.type());
        if (mappingMd != null && mappingMd.routing().required()) {
            if (request.routing() == null) {
                throw new RoutingMissingException(request.index(), request.type(), request.id());
            }
        }
    
        IndexService indexService = indicesService.indexServiceSafe(shardRequest.request.index());
        IndexShard indexShard = indexService.shardSafe(shardRequest.shardId);
        SourceToParse sourceToParse = SourceToParse.source(SourceToParse.Origin.PRIMARY, request.source()).type(request.type()).id(request.id())
                .routing(request.routing()).parent(request.parent()).timestamp(request.timestamp()).ttl(request.ttl());
        long version;
        boolean created;
        Engine.IndexingOperation op;
        if (request.opType() == IndexRequest.OpType.INDEX) {
            Engine.Index index = indexShard.prepareIndex(sourceToParse, request.version(), request.versionType(), Engine.Operation.Origin.PRIMARY, request.canHaveDuplicates());
            if (index.parsedDoc().mappingsModified()) {
                mappingUpdatedAction.updateMappingOnMaster(request.index(), index.docMapper(), indexService.indexUUID());
            }
            indexShard.index(index);
            version = index.version();
            op = index;
            created = index.created();
        } else {
            Engine.Create create = indexShard.prepareCreate(sourceToParse,
                    request.version(), request.versionType(), Engine.Operation.Origin.PRIMARY, request.canHaveDuplicates(), request.autoGeneratedId());
            if (create.parsedDoc().mappingsModified()) {
                mappingUpdatedAction.updateMappingOnMaster(request.index(), create.docMapper(), indexService.indexUUID());
            }
            indexShard.create(create);
            version = create.version();
            op = create;
            created = true;
        }
        if (request.refresh()) {
            try {
                indexShard.refresh(new Engine.Refresh("refresh_flag_index").force(false));
            } catch (Throwable e) {
                // ignore
            }
        }
    
        // update the version on the request, so it will be used for the replicas
        request.version(version);
        request.versionType(request.versionType().versionTypeForReplicationAndRecovery());
    
        assert request.versionType().validateVersionForWrites(request.version());
    
        IndexResponse response = new IndexResponse(request.index(), request.type(), request.id(), version, created);
        return new PrimaryResponse<>(shardRequest.request, response, op);
    }
    

    走request.opType() == IndexRequest.OpType.INDEX分支,主要是indexShard.prepareIndex,indexShard.index(index)这里IndexShard是InternalIndexShard,的index实现

    public ParsedDocument index(Engine.Index index) throws ElasticsearchException {
        writeAllowed(index.origin());
        index = indexingService.preIndex(index);
        try {
            if (logger.isTraceEnabled()) {
                logger.trace("index [{}][{}]{}", index.type(), index.id(), index.docs());
            }
            engine.index(index);
            index.endTime(System.nanoTime());
        } catch (RuntimeException ex) {
            indexingService.failedIndex(index);
            throw ex;
        }
        indexingService.postIndex(index);
        return index.parsedDoc();
    }
    

    indexingService对应ShardIndexingService, engine是InternalEngine,InternalEngine的index()

    public void index(Index index) throws EngineException {
        final IndexWriter writer;
        try (InternalLock _ = readLock.acquire()) {
            writer = currentIndexWriter();
            try (Releasable r = throttle.acquireThrottle()) {
                innerIndex(index, writer);
            }
            dirty = true;
            possibleMergeNeeded = true;
            flushNeeded = true;
        } catch (OutOfMemoryError | IllegalStateException | IOException t) {
            maybeFailEngine(t, "index");
            throw new IndexFailedEngineException(shardId, index, t);
        }
        checkVersionMapRefresh();
    }
    

    最终在InternalEngine的innerIndex方法中调用lunece的IndexWriter的,依据是不是存在有版本,来通过 writer.addDocuments或者updateDocument方法添加或者更新索引
    添加add索引

    if (index.docs().size() > 1) {
        writer.addDocuments(index.docs(), index.analyzer());
    } else {
        writer.addDocument(index.docs().get(0), index.analyzer());
    }
    

    更新update索引

    if (index.docs().size() > 1) {
        writer.updateDocuments(index.uid(), index.docs(), index.analyzer());
    } else {
        writer.updateDocument(index.uid(), index.docs().get(0), index.analyzer());
    }
    

    最后Translog

    Translog.Location translogLocation = translog.add(new Translog.Index(index));
    

    具体代码

    private void innerIndex(Index index, IndexWriter writer) throws IOException {
        synchronized (dirtyLock(index.uid())) {
            final long currentVersion;
            VersionValue versionValue = versionMap.getUnderLock(index.uid().bytes());
            if (versionValue == null) {
                currentVersion = loadCurrentVersionFromIndex(index.uid());
            } else {
                if (enableGcDeletes && versionValue.delete() && (threadPool.estimatedTimeInMillis() - versionValue.time()) > gcDeletesInMillis) {
                    currentVersion = Versions.NOT_FOUND; // deleted, and GC
                } else {
                    currentVersion = versionValue.version();
                }
            }
    
            long updatedVersion;
            long expectedVersion = index.version();
            if (index.versionType().isVersionConflictForWrites(currentVersion, expectedVersion)) {
                if (index.origin() == Operation.Origin.RECOVERY) {
                    return;
                } else {
                    throw new VersionConflictEngineException(shardId, index.type(), index.id(), currentVersion, expectedVersion);
                }
            }
            updatedVersion = index.versionType().updateVersion(currentVersion, expectedVersion);
    
            index.updateVersion(updatedVersion);
            if (currentVersion == Versions.NOT_FOUND) {
                // document does not exists, we can optimize for create
                index.created(true);
                if (index.docs().size() > 1) {
                    writer.addDocuments(index.docs(), index.analyzer());
                } else {
                    writer.addDocument(index.docs().get(0), index.analyzer());
                }
            } else {
                if (versionValue != null) {
                    index.created(versionValue.delete()); // we have a delete which is not GC'ed...
                }
                if (index.docs().size() > 1) {
                    writer.updateDocuments(index.uid(), index.docs(), index.analyzer());
                } else {
                    writer.updateDocument(index.uid(), index.docs().get(0), index.analyzer());
                }
            }
            Translog.Location translogLocation = translog.add(new Translog.Index(index));
    
            versionMap.putUnderLock(index.uid().bytes(), new VersionValue(updatedVersion, translogLocation));
    
            indexingService.postIndexUnderLock(index);
        }
    }
    

    link
    分布式搜索Elasticsearch源码分析之二------索引过程源码概要分析

  • 相关阅读:
    Mybatis入门
    Ajax
    产品经理之产品规划
    产品经理之用户研究(下)
    产品经理之用户研究(上)
    Spring Cloud
    Spring MVC
    synchronized
    Spring Boot入门
    Spring
  • 原文地址:https://www.cnblogs.com/donganwangshi/p/4318045.html
Copyright © 2011-2022 走看看