zoukankan      html  css  js  c++  java
  • Apache Jackrabbit源码研究(二)

    上文分析了jackrabbit对富文档的文本抽取实现的源码,我们注意到,上文的文本抽取类LazyTextExtractorField继承自lucene的AbstractField类,即LazyTextExtractorField类是Field类型的,这正是构建lucene索引Document里面的Field对象,那么jackrabbit是怎样构建Document对象的呢,这里面jackrabbit用到了一个叫NodeIndexer类,专门用来构建lucene的Document对象的,NodeIndexer类的源码如下:

    /**
     * Creates a lucene <code>Document</code> object from a {@link javax.jcr.Node}.
     */
    public class NodeIndexer {
    
        /**
         * The logger instance for this class.
         */
        private static final Logger log = LoggerFactory.getLogger(NodeIndexer.class);
    
        /**
         * The default boost for a lucene field: 1.0f.
         */
        protected static final float DEFAULT_BOOST = 1.0f;
    
        /**
         * The <code>NodeState</code> of the node to index
         */
        protected final NodeState node;
    
        /**
         * The persistent item state provider
         */
        protected final ItemStateManager stateProvider;
    
        /**
         * Namespace mappings to use for indexing. This is the internal
         * namespace mapping.
         */
        protected final NamespaceMappings mappings;
    
        /**
         * Name and Path resolver.
         */
        protected final NamePathResolver resolver;
    
        /**
         * Background task executor used for full text extraction.
         */
        private final Executor executor;
    
        /**
         * Parser used for extracting text content from binary properties
         * for full text indexing.
         */
        private final Parser parser;
    
        /**
         * The indexing configuration or <code>null</code> if none is available.
         */
        protected IndexingConfiguration indexingConfig;
    
        /**
         * If set to <code>true</code> the fulltext field is stored and and a term
         * vector is created with offset information.
         */
        protected boolean supportHighlighting = false;
    
        /**
         * Indicates index format for this node indexer.
         */
        protected IndexFormatVersion indexFormatVersion = IndexFormatVersion.V1;
    
        /**
         * List of {@link FieldNames#FULLTEXT} fields which should not be used in
         * an excerpt.
         */
        protected List<Fieldable> doNotUseInExcerpt = new ArrayList<Fieldable>();
    
        /**
         * The maximum number of characters to extract from binaries.
         */
        private int maxExtractLength = Integer.MAX_VALUE;
    
        /**
         * Creates a new node indexer.
         *
         * @param node          the node state to index.
         * @param stateProvider the persistent item state manager to retrieve properties.
         * @param mappings      internal namespace mappings.
         * @param executor      background task executor for text extraction
         * @param parser        parser for binary properties
         */
        public NodeIndexer(
                NodeState node, ItemStateManager stateProvider,
                NamespaceMappings mappings, Executor executor, Parser parser) {
            this.node = node;
            this.stateProvider = stateProvider;
            this.mappings = mappings;
            this.resolver = NamePathResolverImpl.create(mappings);
            this.executor = executor;
            this.parser = parser;
        }
    
        /**
         * Returns the <code>NodeId</code> of the indexed node.
         * @return the <code>NodeId</code> of the indexed node.
         */
        public NodeId getNodeId() {
            return node.getNodeId();
        }
    
        /**
         * If set to <code>true</code> additional information is stored in the index
         * to support highlighting using the rep:excerpt pseudo property.
         *
         * @param b <code>true</code> to enable highlighting support.
         */
        public void setSupportHighlighting(boolean b) {
            supportHighlighting = b;
        }
    
        /**
         * Sets the index format version
         *
         * @param indexFormatVersion the index format version
         */
        public void setIndexFormatVersion(IndexFormatVersion indexFormatVersion) {
            this.indexFormatVersion = indexFormatVersion;
        }
    
        /**
         * Sets the indexing configuration for this node indexer.
         *
         * @param config the indexing configuration.
         */
        public void setIndexingConfiguration(IndexingConfiguration config) {
            this.indexingConfig = config;
        }
    
        /**
         * Returns the maximum number of characters to extract from binaries.
         *
         * @return maximum extraction length
         */
        public int getMaxExtractLength() {
            return maxExtractLength;
        }
    
        /**
         * Sets the maximum number of characters to extract from binaries.
         *
         * @param length maximum extraction length
         */
        public void setMaxExtractLength(int length) {
            this.maxExtractLength = length;
        }
    
        /**
         * Creates a lucene Document.
         *
         * @return the lucene Document with the index layout.
         * @throws RepositoryException if an error occurs while reading property
         *                             values from the <code>ItemStateProvider</code>.
         */
        public Document createDoc() throws RepositoryException {
            doNotUseInExcerpt.clear();
            Document doc = new Document();
    
            doc.setBoost(getNodeBoost());
    
            // special fields
            // UUID
            doc.add(new IDField(node.getNodeId()));
            try {
                // parent UUID
                if (node.getParentId() == null) {
                    // root node
                    doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES,
                            Field.Index.NOT_ANALYZED_NO_NORMS));
                    addNodeName(doc, "", "");
                } else if (node.getSharedSet().isEmpty()) {
                    addParentChildRelation(doc, node.getParentId());
                } else {
                    // shareable node
                    for (NodeId id : node.getSharedSet()) {
                        addParentChildRelation(doc, id);
                    }
                    // mark shareable nodes
                    doc.add(new Field(FieldNames.SHAREABLE_NODE, "",
                            Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
                }
            } catch (NoSuchItemStateException e) {
                throwRepositoryException(e);
            } catch (ItemStateException e) {
                throwRepositoryException(e);
            } catch (NamespaceException e) {
                // will never happen, because this.mappings will dynamically add
                // unknown uri<->prefix mappings
            }
    
            Set<Name> props = node.getPropertyNames();
            for (Name propName : props) {
                PropertyId id = new PropertyId(node.getNodeId(), propName);
                try {
                    PropertyState propState = (PropertyState) stateProvider.getItemState(id);
    
                    // add each property to the _PROPERTIES_SET for searching
                    // beginning with V2
                    if (indexFormatVersion.getVersion() >= IndexFormatVersion.V2.getVersion()) {
                        addPropertyName(doc, propState.getName());
                    }
    
                    InternalValue[] values = propState.getValues();
                    for (InternalValue value : values) {
                        addValue(doc, value, propState.getName());
                    }
                    if (values.length > 1) {
                        // real multi-valued
                        addMVPName(doc, propState.getName());
                    }
                } catch (NoSuchItemStateException e) {
                    throwRepositoryException(e);
                } catch (ItemStateException e) {
                    throwRepositoryException(e);
                }
            }
    
            // now add fields that are not used in excerpt (must go at the end)
            for (Fieldable field : doNotUseInExcerpt) {
                doc.add(field);
            }
            return doc;
        }
    
        /**
         * Wraps the exception <code>e</code> into a <code>RepositoryException</code>
         * and throws the created exception.
         *
         * @param e the base exception.
         */
        protected void throwRepositoryException(Exception e)
                throws RepositoryException {
            String msg = "Error while indexing node: " + node.getNodeId() + " of "
                + "type: " + node.getNodeTypeName();
            throw new RepositoryException(msg, e);
        }
    
        /**
         * Adds a {@link FieldNames#MVP} field to <code>doc</code> with the resolved
         * <code>name</code> using the internal search index namespace mapping.
         *
         * @param doc  the lucene document.
         * @param name the name of the multi-value property.
         */
        protected void addMVPName(Document doc, Name name) {
            try {
                String propName = resolver.getJCRName(name);
                doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
            } catch (NamespaceException e) {
                // will never happen, prefixes are created dynamically
            }
        }
    
        /**
         * Adds a value to the lucene Document.
         *
         * @param doc   the document.
         * @param value the internal jackrabbit value.
         * @param name  the name of the property.
         */
        protected void addValue(Document doc, InternalValue value, Name name) throws RepositoryException {
            String fieldName = name.getLocalName();
            try {
                fieldName = resolver.getJCRName(name);
            } catch (NamespaceException e) {
                // will never happen
            }
            switch (value.getType()) {
                case PropertyType.BINARY:
                    if (isIndexed(name)) {
                        addBinaryValue(doc, fieldName, value);
                    }
                    break;
                case PropertyType.BOOLEAN:
                    if (isIndexed(name)) {
                        addBooleanValue(doc, fieldName, value.getBoolean());
                    }
                    break;
                case PropertyType.DATE:
                    if (isIndexed(name)) {
                        addCalendarValue(doc, fieldName, value.getDate());
                    }
                    break;
                case PropertyType.DOUBLE:
                    if (isIndexed(name)) {
                        addDoubleValue(doc, fieldName, value.getDouble());
                    }
                    break;
                case PropertyType.LONG:
                    if (isIndexed(name)) {
                        addLongValue(doc, fieldName, value.getLong());
                    }
                    break;
                case PropertyType.REFERENCE:
                    if (isIndexed(name)) {
                        addReferenceValue(doc, fieldName, value.getNodeId(), false);
                    }
                    break;
                case PropertyType.WEAKREFERENCE:
                    if (isIndexed(name)) {
                        addReferenceValue(doc, fieldName, value.getNodeId(), true);
                    }
                    break;
                case PropertyType.PATH:
                    if (isIndexed(name)) {
                        addPathValue(doc, fieldName, value.getPath());
                    }
                    break;
                case PropertyType.URI:
                    if (isIndexed(name)) {
                        addURIValue(doc, fieldName, value.getURI());
                    }
                    break;
                case PropertyType.STRING:
                    if (isIndexed(name)) {
                        // never fulltext index jcr:uuid String
                        if (name.equals(NameConstants.JCR_UUID)) {
                            addStringValue(doc, fieldName, value.getString(),
                                    false, false, DEFAULT_BOOST);
                        } else {
                            addStringValue(doc, fieldName, value.getString(),
                                    true, isIncludedInNodeIndex(name),
                                    getPropertyBoost(name), useInExcerpt(name));
                        }
                    }
                    break;
                case PropertyType.NAME:
                    // jcr:primaryType and jcr:mixinTypes are required for correct
                    // node type resolution in queries
                    if (name.equals(NameConstants.JCR_PRIMARYTYPE)
                            || name.equals(NameConstants.JCR_MIXINTYPES)
                            || isIndexed(name)) {
                        addNameValue(doc, fieldName, value.getName());
                    }
                    break;
                case PropertyType.DECIMAL:
                    if (isIndexed(name)) {
                        addDecimalValue(doc, fieldName, value.getDecimal());
                    }
                    break;
    
    
                default:
                    throw new IllegalArgumentException("illegal internal value type: " + value.getType());
            }
    
            // add length
            if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) {
                addLength(doc, fieldName, value);
            }
        }
    
        /**
         * Adds the property name to the lucene _:PROPERTIES_SET field.
         *
         * @param doc  the document.
         * @param name the name of the property.
         */
        protected void addPropertyName(Document doc, Name name) {
            String fieldName = name.getLocalName();
            try {
                fieldName = resolver.getJCRName(name);
            } catch (NamespaceException e) {
                // will never happen
            }
            doc.add(new Field(FieldNames.PROPERTIES_SET, fieldName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
        }
    
        /**
         * Adds the binary value to the document as the named field.
         * <p/>
         * This implementation checks if this {@link #node} is of type nt:resource
         * and if that is the case, tries to extract text from the binary property
         * using the {@link #extractor}.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addBinaryValue(Document doc,
                                      String fieldName,
                                      InternalValue internalValue) {
            // 'check' if node is of type nt:resource
            try {
                String jcrData = mappings.getPrefix(Name.NS_JCR_URI) + ":data";
                if (!jcrData.equals(fieldName)) {
                    // don't know how to index
                    return;
                }
    
                InternalValue type = getValue(NameConstants.JCR_MIMETYPE);
                if (type != null) {
                    Metadata metadata = new Metadata();
                    metadata.set(Metadata.CONTENT_TYPE, type.getString());
    
                    // jcr:encoding is not mandatory
                    InternalValue encoding = getValue(NameConstants.JCR_ENCODING);
                    if (encoding != null) {
                        metadata.set(
                                Metadata.CONTENT_ENCODING, encoding.getString());
                    }
    
                    doc.add(createFulltextField(internalValue, metadata));
                }
            } catch (Throwable t) {
                // TODO: How to recover from a transient indexing failure?
                log.warn("Exception while indexing binary property", t);
            }
        }
    
        /**
         * Utility method that extracts the first value of the named property
         * of the current node. Returns <code>null</code> if the property does
         * not exist or contains no values.
         *
         * @param name property name
         * @return value of the named property, or <code>null</code>
         * @throws ItemStateException if the property can not be accessed
         */
        protected InternalValue getValue(Name name) throws ItemStateException {
            try {
                PropertyId id = new PropertyId(node.getNodeId(), name);
                PropertyState property =
                    (PropertyState) stateProvider.getItemState(id);
                InternalValue[] values = property.getValues();
                if (values.length > 0) {
                    return values[0];
                } else {
                    return null;
                }
            } catch (NoSuchItemStateException e) {
                return null;
            }
        }
    
        /**
         * Adds the string representation of the boolean value to the document as
         * the named field.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addBooleanValue(Document doc, String fieldName, Object internalValue) {
            doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(),
                    PropertyType.BOOLEAN));
        }
    
        /**
         * Creates a field of name <code>fieldName</code> with the value of <code>
         * internalValue</code>. The created field is indexed without norms.
         *
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         * @param propertyType  the property type.
         */
        protected Field createFieldWithoutNorms(String fieldName,
                                                String internalValue,
                                                int propertyType) {
            if (indexFormatVersion.getVersion()
                    >= IndexFormatVersion.V3.getVersion()) {
                Field field = new Field(FieldNames.PROPERTIES,
                        new SingletonTokenStream(
                                FieldNames.createNamedValue(fieldName, internalValue),
                                propertyType)
                        );
                field.setOmitNorms(true);
                return field;
            } else {
                return new Field(FieldNames.PROPERTIES,
                        FieldNames.createNamedValue(fieldName, internalValue),
                        Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS,
                        Field.TermVector.NO);
            }
        }
    
        /**
         * Adds the calendar value to the document as the named field. The calendar
         * value is converted to an indexable string value using the
         * {@link DateField} class.
         *
         * @param doc
         *            The document to which to add the field
         * @param fieldName
         *            The name of the field to add
         * @param internalValue
         *            The value for the field to add to the document.
         */
        protected void addCalendarValue(Document doc, String fieldName, Object internalValue) {
            Calendar value = (Calendar) internalValue;
            long millis = value.getTimeInMillis();
            try {
                doc.add(createFieldWithoutNorms(fieldName, DateField.timeToString(millis),
                        PropertyType.DATE));
            } catch (IllegalArgumentException e) {
                log.warn("'{}' is outside of supported date value range.",
                        new Date(value.getTimeInMillis()));
            }
        }
    
        /**
         * Adds the double value to the document as the named field. The double
         * value is converted to an indexable string value using the
         * {@link DoubleField} class.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addDoubleValue(Document doc, String fieldName, Object internalValue) {
            double doubleVal = (Double) internalValue;
            doc.add(createFieldWithoutNorms(fieldName, DoubleField.doubleToString(doubleVal),
                    PropertyType.DOUBLE));
        }
    
        /**
         * Adds the long value to the document as the named field. The long
         * value is converted to an indexable string value using the {@link LongField}
         * class.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addLongValue(Document doc, String fieldName, Object internalValue) {
            long longVal = (Long) internalValue;
            doc.add(createFieldWithoutNorms(fieldName, LongField.longToString(longVal),
                    PropertyType.LONG));
        }
    
        /**
         * Adds the long value to the document as the named field. The long
         * value is converted to an indexable string value using the {@link LongField}
         * class.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addDecimalValue(Document doc, String fieldName, Object internalValue) {
            BigDecimal decVal = (BigDecimal) internalValue;
            doc.add(createFieldWithoutNorms(fieldName, DecimalField.decimalToString(decVal),
                    PropertyType.DECIMAL));
        }
    
        /**
         * Adds the reference value to the document as the named field. The value's
         * string representation is added as the reference data. Additionally the
         * reference data is stored in the index. As of Jackrabbit 2.0 this method
         * also adds the reference UUID as a {@link FieldNames#WEAK_REFS} field
         * to the index if it is a weak reference.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         * @param weak          Flag indicating whether it's a WEAKREFERENCE (true) or a REFERENCE (flase)
         */
        protected void addReferenceValue(Document doc, String fieldName, Object internalValue, boolean weak) {
            String uuid = internalValue.toString();
            doc.add(createFieldWithoutNorms(fieldName, uuid,
                    weak ? PropertyType.WEAKREFERENCE : PropertyType.REFERENCE));
            doc.add(new Field(FieldNames.PROPERTIES,
                    FieldNames.createNamedValue(fieldName, uuid),
                    Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
            if (weak) {
                doc.add(new Field(FieldNames.WEAK_REFS, uuid, Field.Store.NO,
                        Field.Index.NOT_ANALYZED_NO_NORMS));
            }
        }
    
        /**
         * Adds the path value to the document as the named field. The path
         * value is converted to an indexable string value using the name space
         * mappings with which this class has been created.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addPathValue(Document doc, String fieldName, Object internalValue) {
            Path path = (Path) internalValue;
            String pathString = path.toString();
            try {
                pathString = resolver.getJCRPath(path);
            } catch (NamespaceException e) {
                // will never happen
            }
            doc.add(createFieldWithoutNorms(fieldName, pathString,
                    PropertyType.PATH));
        }
    
        /**
         * Adds the uri value to the document as the named field.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addURIValue(Document doc, String fieldName, Object internalValue) {
            URI uri = (URI) internalValue;
            doc.add(createFieldWithoutNorms(fieldName, uri.toString(),
                    PropertyType.URI));
        }
    
        /**
         * Adds the string value to the document both as the named field and for
         * full text indexing.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         * @deprecated Use {@link #addStringValue(Document, String, Object, boolean)
         *             addStringValue(Document, String, Object, boolean)} instead.
         */
        protected void addStringValue(Document doc, String fieldName, Object internalValue) {
            addStringValue(doc, fieldName, internalValue, true, true, DEFAULT_BOOST);
        }
    
        /**
         * Adds the string value to the document both as the named field and
         * optionally for full text indexing if <code>tokenized</code> is
         * <code>true</code>.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         * @param tokenized     If <code>true</code> the string is also tokenized
         *                      and fulltext indexed.
         */
        protected void addStringValue(Document doc, String fieldName,
                                      Object internalValue, boolean tokenized) {
            addStringValue(doc, fieldName, internalValue, tokenized, true, DEFAULT_BOOST);
        }
    
        /**
         * Adds the string value to the document both as the named field and
         * optionally for full text indexing if <code>tokenized</code> is
         * <code>true</code>.
         *
         * @param doc                The document to which to add the field
         * @param fieldName          The name of the field to add
         * @param internalValue      The value for the field to add to the
         *                           document.
         * @param tokenized          If <code>true</code> the string is also
         *                           tokenized and fulltext indexed.
         * @param includeInNodeIndex If <code>true</code> the string is also
         *                           tokenized and added to the node scope fulltext
         *                           index.
         * @param boost              the boost value for this string field.
         * @deprecated use {@link #addStringValue(Document, String, Object, boolean, boolean, float, boolean)} instead.
         */
        protected void addStringValue(Document doc, String fieldName,
                                      Object internalValue, boolean tokenized,
                                      boolean includeInNodeIndex, float boost) {
            addStringValue(doc, fieldName, internalValue, tokenized, includeInNodeIndex, boost, true);
        }
    
        /**
         * Adds the string value to the document both as the named field and
         * optionally for full text indexing if <code>tokenized</code> is
         * <code>true</code>.
         *
         * @param doc                The document to which to add the field
         * @param fieldName          The name of the field to add
         * @param internalValue      The value for the field to add to the
         *                           document.
         * @param tokenized          If <code>true</code> the string is also
         *                           tokenized and fulltext indexed.
         * @param includeInNodeIndex If <code>true</code> the string is also
         *                           tokenized and added to the node scope fulltext
         *                           index.
         * @param boost              the boost value for this string field.
         * @param useInExcerpt       If <code>true</code> the string may show up in
         *                           an excerpt.
         */
        protected void addStringValue(Document doc, String fieldName,
                                      Object internalValue, boolean tokenized,
                                      boolean includeInNodeIndex, float boost,
                                      boolean useInExcerpt) {
    
            // simple String
            String stringValue = (String) internalValue;
            doc.add(createFieldWithoutNorms(fieldName, stringValue,
                    PropertyType.STRING));
            if (tokenized) {
                if (stringValue.length() == 0) {
                    return;
                }
                // create fulltext index on property
                int idx = fieldName.indexOf(':');
                fieldName = fieldName.substring(0, idx + 1)
                        + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);
                Field f = new Field(fieldName, stringValue,
                        Field.Store.NO,
                        Field.Index.ANALYZED,
                        Field.TermVector.NO);
                f.setBoost(boost);
                doc.add(f);
    
                if (includeInNodeIndex) {
                    // also create fulltext index of this value
                    boolean store = supportHighlighting && useInExcerpt;
                    f = createFulltextField(stringValue, store, supportHighlighting);
                    if (useInExcerpt) {
                        doc.add(f);
                    } else {
                        doNotUseInExcerpt.add(f);
                    }
                }
            }
        }
    
        /**
         * Adds the name value to the document as the named field. The name
         * value is converted to an indexable string treating the internal value
         * as a <code>Name</code> and mapping the name space using the name space
         * mappings with which this class has been created.
         *
         * @param doc           The document to which to add the field
         * @param fieldName     The name of the field to add
         * @param internalValue The value for the field to add to the document.
         */
        protected void addNameValue(Document doc, String fieldName, Object internalValue) {
            try {
                Name qualiName = (Name) internalValue;
                String normValue = mappings.getPrefix(qualiName.getNamespaceURI())
                        + ":" + qualiName.getLocalName();
                doc.add(createFieldWithoutNorms(fieldName, normValue,
                        PropertyType.NAME));
            } catch (NamespaceException e) {
                // will never happen
            }
        }
    
        /**
         * Creates a fulltext field for the string <code>value</code>.
         *
         * @param value the string value.
         * @return a lucene field.
         * @deprecated use {@link #createFulltextField(String, boolean, boolean)} instead.
         */
        protected Field createFulltextField(String value) {
            return createFulltextField(value, supportHighlighting, supportHighlighting);
        }
    
        /**
         * Creates a fulltext field for the string <code>value</code>.
         *
         * @param value the string value.
         * @param store if the value of the field should be stored.
         * @param withOffsets if a term vector with offsets should be stored.
         * @return a lucene field.
         */
        protected Field createFulltextField(String value,
                                            boolean store,
                                            boolean withOffsets) {
            Field.TermVector tv;
            if (withOffsets) {
                tv = Field.TermVector.WITH_OFFSETS;
            } else {
                tv = Field.TermVector.NO;
            }
            if (store) {
                // store field compressed if greater than 16k
                Field.Store stored;
                if (value.length() > 0x4000) {
                    stored = Field.Store.COMPRESS;
                } else {
                    stored = Field.Store.YES;
                }
                return new Field(FieldNames.FULLTEXT, value, stored,
                        Field.Index.ANALYZED, tv);
            } else {
                return new Field(FieldNames.FULLTEXT, value,
                        Field.Store.NO, Field.Index.ANALYZED, tv);
            }
        }
    
        /**
         * Creates a fulltext field for the reader <code>value</code>.
         *
         * @param value the binary value
         * @param metadata document metatadata
         * @return a lucene field.
         */
        protected Fieldable createFulltextField(
                InternalValue value, Metadata metadata) {
            return new LazyTextExtractorField(
                    parser, value, metadata, executor,
                    supportHighlighting, getMaxExtractLength());
        }
    
        /**
         * Returns <code>true</code> if the property with the given name should be
         * indexed.
         *
         * @param propertyName name of a property.
         * @return <code>true</code> if the property should be fulltext indexed;
         *         <code>false</code> otherwise.
         */
        protected boolean isIndexed(Name propertyName) {
            if (indexingConfig == null) {
                return true;
            } else {
                return indexingConfig.isIndexed(node, propertyName);
            }
        }
    
        /**
         * Returns <code>true</code> if the property with the given name should also
         * be added to the node scope index.
         *
         * @param propertyName the name of a property.
         * @return <code>true</code> if it should be added to the node scope index;
         *         <code>false</code> otherwise.
         */
        protected boolean isIncludedInNodeIndex(Name propertyName) {
            if (indexingConfig == null) {
                return true;
            } else {
                return indexingConfig.isIncludedInNodeScopeIndex(node, propertyName);
            }
        }
    
        /**
         * Returns <code>true</code> if the content of the property with the given
         * name should the used to create an excerpt.
         *
         * @param propertyName the name of a property.
         * @return <code>true</code> if it should be used to create an excerpt;
         *         <code>false</code> otherwise.
         */
        protected boolean useInExcerpt(Name propertyName) {
            if (indexingConfig == null) {
                return true;
            } else {
                return indexingConfig.useInExcerpt(node, propertyName);
            }
        }
    
        /**
         * Returns the boost value for the given property name.
         *
         * @param propertyName the name of a property.
         * @return the boost value for the given property name.
         */
        protected float getPropertyBoost(Name propertyName) {
            if (indexingConfig == null) {
                return DEFAULT_BOOST;
            } else {
                return indexingConfig.getPropertyBoost(node, propertyName);
            }
        }
    
        /**
         * @return the boost value for this {@link #node} state.
         */
        protected float getNodeBoost() {
            if (indexingConfig == null) {
                return DEFAULT_BOOST;
            } else {
                return indexingConfig.getNodeBoost(node);
            }
        }
    
        /**
         * Adds a {@link FieldNames#PROPERTY_LENGTHS} field to <code>document</code>
         * with a named length value.
         *
         * @param doc          the lucene document.
         * @param propertyName the property name.
         * @param value        the internal value.
         */
        protected void addLength(Document doc,
                                 String propertyName,
                                 InternalValue value) {
            long length = Util.getLength(value);
            if (length != -1) {
                doc.add(new Field(FieldNames.PROPERTY_LENGTHS,
                        FieldNames.createNamedLength(propertyName, length),
                        Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
            }
        }
    
        /**
         * Depending on the index format version adds one or two fields to the
         * document for the node name.
         *
         * @param doc the lucene document.
         * @param namespaceURI the namespace URI of the node name.
         * @param localName the local name of the node.
         */
        protected void addNodeName(Document doc,
                                   String namespaceURI,
                                   String localName) throws NamespaceException {
            String name = mappings.getPrefix(namespaceURI) + ":" + localName;
            doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
            // as of version 3, also index combination of namespace URI and local name
            if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) {
                doc.add(new Field(FieldNames.NAMESPACE_URI, namespaceURI, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
                doc.add(new Field(FieldNames.LOCAL_NAME, localName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
            }
        }
    
        /**
         * Adds a parent child relation to the given <code>doc</code>.
         *
         * @param doc      the document.
         * @param parentId the id of the parent node.
         * @throws ItemStateException  if the parent node cannot be read.
         * @throws RepositoryException if the parent node does not have a child node
         *                             entry for the current node.
         */
        protected void addParentChildRelation(Document doc,
                                              NodeId parentId)
                throws ItemStateException, RepositoryException {
            doc.add(new Field(
                    FieldNames.PARENT, parentId.toString(),
                    Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
            NodeState parent = (NodeState) stateProvider.getItemState(parentId);
            ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId());
            if (child == null) {
                // this can only happen when jackrabbit
                // is running in a cluster.
                throw new RepositoryException(
                        "Missing child node entry for node with id: "
                        + node.getNodeId());
            }
            Name name = child.getName();
            addNodeName(doc, name.getNamespaceURI(), name.getLocalName());
        }
    }

    如果我们把一个类当做一个黑箱,我们就要了解的逻辑是在我们输入什么信息的情况下,然后是我们能够得到什么信息

    循着这种思路,首先要了解的是一个类的构造方法参数,其次再是其他功能方法参数,然后才是返回的类型了

     如是,NodeIndexer类的构造方法如下

     /**
         * Creates a new node indexer.
         *
         * @param node          the node state to index.
         * @param stateProvider the persistent item state manager to retrieve properties.
         * @param mappings      internal namespace mappings.
         * @param executor      background task executor for text extraction
         * @param parser        parser for binary properties
         */
        public NodeIndexer(
                NodeState node, ItemStateManager stateProvider,
                NamespaceMappings mappings, Executor executor, Parser parser) {
            this.node = node;
            this.stateProvider = stateProvider;
            this.mappings = mappings;
            this.resolver = NamePathResolverImpl.create(mappings);
            this.executor = executor;
            this.parser = parser;
        }

    由此可以看到,NodeIndexer类实现lucene的Document对象构建的信息来源主要是依赖 NodeState node参数的

    其次是NodeIndexer类最重要的构建lucene的Document方法

    /**
         * Creates a lucene Document.
         *
         * @return the lucene Document with the index layout.
         * @throws RepositoryException if an error occurs while reading property
         *                             values from the <code>ItemStateProvider</code>.
         */
        public Document createDoc() throws RepositoryException {
            doNotUseInExcerpt.clear();
            Document doc = new Document();
    
            doc.setBoost(getNodeBoost());
    
            // special fields
            // UUID
            doc.add(new IDField(node.getNodeId()));
            try {
                // parent UUID
                if (node.getParentId() == null) {
                    // root node
                    doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES,
                            Field.Index.NOT_ANALYZED_NO_NORMS));
                    addNodeName(doc, "", "");
                } else if (node.getSharedSet().isEmpty()) {
                    addParentChildRelation(doc, node.getParentId());
                } else {
                    // shareable node
                    for (NodeId id : node.getSharedSet()) {
                        addParentChildRelation(doc, id);
                    }
                    // mark shareable nodes
                    doc.add(new Field(FieldNames.SHAREABLE_NODE, "",
                            Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
                }
            } catch (NoSuchItemStateException e) {
                throwRepositoryException(e);
            } catch (ItemStateException e) {
                throwRepositoryException(e);
            } catch (NamespaceException e) {
                // will never happen, because this.mappings will dynamically add
                // unknown uri<->prefix mappings
            }
    
            Set<Name> props = node.getPropertyNames();
            for (Name propName : props) {
                PropertyId id = new PropertyId(node.getNodeId(), propName);
                try {
                    PropertyState propState = (PropertyState) stateProvider.getItemState(id);
    
                    // add each property to the _PROPERTIES_SET for searching
                    // beginning with V2
                    if (indexFormatVersion.getVersion() >= IndexFormatVersion.V2.getVersion()) {
                        addPropertyName(doc, propState.getName());
                    }
    
                    InternalValue[] values = propState.getValues();
                    for (InternalValue value : values) {
                        addValue(doc, value, propState.getName());
                    }
                    if (values.length > 1) {
                        // real multi-valued
                        addMVPName(doc, propState.getName());
                    }
                } catch (NoSuchItemStateException e) {
                    throwRepositoryException(e);
                } catch (ItemStateException e) {
                    throwRepositoryException(e);
                }
            }
    
            // now add fields that are not used in excerpt (must go at the end)
            for (Fieldable field : doNotUseInExcerpt) {
                doc.add(field);
            }
            return doc;
        }

    这里面根据 构造方法初始化的成员变量NodeState node的属性集依次向Document添加不同类型的Field对象

    ---------------------------------------------------------------------------

    本系列Apache Jackrabbit源码研究系本人原创

    转载请注明出处 博客园 刺猬的温驯

    本文链接 http://www.cnblogs.com/chenying99/archive/2013/04/03/3002823.html

  • 相关阅读:
    83. Remove Duplicates from Sorted List
    35. Search Insert Position
    96. Unique Binary Search Trees
    94. Binary Tree Inorder Traversal
    117. Populating Next Right Pointers in Each Node II
    116. Populating Next Right Pointers in Each Node
    111. Minimum Depth of Binary Tree
    169. Majority Element
    171. Excel Sheet Column Number
    190. Reverse Bits
  • 原文地址:https://www.cnblogs.com/chenying99/p/3002823.html
Copyright © 2011-2022 走看看