From a4e215106dc13403f5908922d870c20444661aaf Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 08:01:26 +0000 Subject: [PATCH 1/6] Jules was unable to complete the task in time. Please review the work done so far and provide feedback for Jules to continue. --- lucene/pom.xml | 150 +++++++ .../ArcadeLuceneIndexFactoryHandler.java | 34 ++ .../lucene/ArcadeLuceneLifecycleManager.java | 46 ++ .../analyzer/OLuceneAnalyzerFactory.java | 141 +++++++ .../OLucenePerFieldAnalyzerWrapper.java | 89 ++++ .../lucene/builder/OLuceneIndexType.java | 207 +++++++++ .../engine/OLuceneCrossClassIndexEngine.java | 399 ++++++++++++++++++ .../engine/OLuceneFullTextIndexEngine.java | 304 +++++++++++++ .../lucene/engine/OLuceneIndexEngine.java | 68 +++ .../OLuceneCrossClassFunctionsFactory.java | 25 ++ .../OLuceneCrossClassSearchFunction.java | 181 ++++++++ .../functions/OLuceneFunctionsFactory.java | 27 ++ .../functions/OLuceneFunctionsUtils.java | 60 +++ .../OLuceneSearchFunctionTemplate.java | 90 ++++ .../OLuceneSearchMoreLikeThisFunction.java | 396 +++++++++++++++++ .../OLuceneSearchOnClassFunction.java | 184 ++++++++ .../OLuceneSearchOnFieldsFunction.java | 200 +++++++++ .../OLuceneSearchOnIndexFunction.java | 198 +++++++++ .../index/ArcadeLuceneFullTextIndex.java | 362 ++++++++++++++++ .../lucene/index/OLuceneFullTextIndex.java | 118 ++++++ .../lucene/query/LuceneIndexCursor.java | 113 +++++ .../lucene/query/OLuceneQueryContext.java | 138 ++++++ .../arcadedb/lucene/tx/OLuceneTxChanges.java | 52 +++ .../lucene/tx/OLuceneTxChangesAbstract.java | 74 ++++ .../lucene/tx/OLuceneTxChangesMultiRid.java | 108 +++++ .../lucene/tx/OLuceneTxChangesSingleRid.java | 92 ++++ .../com.arcadedb.database.index.OIndexFactory | 21 + ...database.sql.functions.OSQLFunctionFactory | 21 + ...atabase.sql.operator.OQueryOperatorFactory | 20 + .../com.arcadedb.index.IndexFactoryHandler | 1 + lucene/src/main/resources/plugin.json | 8 + pom.xml | 1 + 32 files changed, 3928 insertions(+) create mode 100644 lucene/pom.xml create mode 100644 lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler create mode 100644 lucene/src/main/resources/plugin.json diff --git a/lucene/pom.xml b/lucene/pom.xml new file mode 100644 index 0000000000..16d6341c39 --- /dev/null +++ b/lucene/pom.xml @@ -0,0 +1,150 @@ + + + 4.0.0 + + + com.arcadedb + arcadedb-parent + 25.6.1-SNAPSHOT + ../pom.xml + + + arcadedb-lucene + jar + ArcadeDB Lucene + Lucene full-text search engine integration for ArcadeDB. + + + + + + 10.2.1 + 0.8 + 1.20.0 + + + + + + com.arcadedb + arcadedb-engine + ${project.version} + + + + + + org.apache.lucene + lucene-core + ${lucene.version} + + + org.apache.lucene + lucene-analysis-common + ${lucene.version} + + + org.apache.lucene + lucene-queryparser + ${lucene.version} + + + org.apache.lucene + lucene-queries + ${lucene.version} + + + org.apache.lucene + lucene-misc + ${lucene.version} + + + org.apache.lucene + lucene-facet + ${lucene.version} + + + org.apache.lucene + lucene-memory + ${lucene.version} + + + org.apache.lucene + lucene-highlighter + ${lucene.version} + + + org.apache.lucene + lucene-codecs + ${lucene.version} + + + org.apache.lucene + lucene-backward-codecs + ${lucene.version} + + + org.apache.lucene + lucene-spatial-extras + ${lucene.version} + + + + + + org.locationtech.spatial4j + spatial4j + ${spatial4j.version} + + + org.locationtech.jts + jts-core + ${jts-core.version} + + + + + org.slf4j + slf4j-api + 1.7.36 + + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + true + + + + + + + + diff --git a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java new file mode 100644 index 0000000000..3edb15da20 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java @@ -0,0 +1,34 @@ +package com.arcadedb.lucene; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.index.IndexFactoryHandler; +import com.arcadedb.index.IndexInternal; +import com.arcadedb.schema.IndexBuilder; +import com.arcadedb.schema.Type; +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; +import java.util.Map; + +public class ArcadeLuceneIndexFactoryHandler implements IndexFactoryHandler { + + @Override + public IndexInternal create(IndexBuilder builder) { + DatabaseInternal database = builder.getDatabase(); + String indexName = builder.getIndexName(); + boolean unique = builder.isUnique(); + // Schema.INDEX_TYPE indexType = builder.getIndexType(); // This is implicitly "FULL_TEXT" for this handler + Type[] keyTypes = builder.getKeyTypes(); + Map properties = builder.getProperties(); + String filePath = builder.getFilePath(); + + + String analyzerClassName = org.apache.lucene.analysis.standard.StandardAnalyzer.class.getName(); + if (properties != null && properties.containsKey("analyzer")) { + analyzerClassName = properties.get("analyzer"); + } + + // The actual ArcadeLuceneFullTextIndex will need to be instantiated here. + // Its constructor will need to be defined to accept these parameters. + // Adding filePath and keyTypes to the constructor call. + return new com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex(database, indexName, unique, analyzerClassName, filePath, keyTypes); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java new file mode 100644 index 0000000000..35e0947279 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java @@ -0,0 +1,46 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +// This class might serve as the main plugin class listed in plugin.json for initialization purposes, +// or handle lifecycle events if ArcadeDB's plugin API expects a specific class for that. +// For now, it's minimal. +public class ArcadeLuceneLifecycleManager { + private static final Logger logger = LoggerFactory.getLogger(ArcadeLuceneLifecycleManager.class); + + // This constant might be better placed in ArcadeLuceneIndexFactoryHandler or a shared constants class. + public static final String LUCENE_ALGORITHM = "LUCENE"; + + public ArcadeLuceneLifecycleManager() { + this(false); + } + + public ArcadeLuceneLifecycleManager(boolean manual) { + if (!manual) { + logger.info("ArcadeLuceneLifecycleManager initialized (manual: {}).", manual); + // Further initialization or listener registration logic specific to ArcadeDB's plugin system + // would go here if this class is the entry point. + } + } + + // Any necessary lifecycle methods (e.g., from a specific ArcadeDB plugin interface) would be here. + // For now, assuming it does not need to implement DatabaseListener directly. + // Drop logic for indexes of this type should be handled by the Index.drop() method. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java new file mode 100644 index 0000000000..3641f43536 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java @@ -0,0 +1,141 @@ +package com.arcadedb.lucene.analyzer; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.database.index.OIndexDefinition; +import com.arcadedb.database.index.OIndexException; +import com.arcadedb.database.metadata.schema.OType; +import com.arcadedb.database.record.impl.ODocument; +import java.lang.reflect.Constructor; +import java.util.Collection; +import java.util.Locale; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +/** Created by frank on 30/10/2015. */ +public class OLuceneAnalyzerFactory { + private static final OLogger logger = OLogManager.instance().logger(OLuceneAnalyzerFactory.class); + + public Analyzer createAnalyzer( + final OIndexDefinition index, final AnalyzerKind kind, final ODocument metadata) { + if (index == null) { + throw new IllegalArgumentException("Index must not be null"); + } + if (kind == null) { + throw new IllegalArgumentException("Analyzer kind must not be null"); + } + if (metadata == null) { + throw new IllegalArgumentException("Metadata must not be null"); + } + final String defaultAnalyzerFQN = metadata.field("default"); + final String prefix = index.getClassName() + "."; + + final OLucenePerFieldAnalyzerWrapper analyzer = + geLucenePerFieldPresetAnalyzerWrapperForAllFields(defaultAnalyzerFQN); + setDefaultAnalyzerForRequestedKind(index, kind, metadata, prefix, analyzer); + setSpecializedAnalyzersForEachField(index, kind, metadata, prefix, analyzer); + return analyzer; + } + + private OLucenePerFieldAnalyzerWrapper geLucenePerFieldPresetAnalyzerWrapperForAllFields( + final String defaultAnalyzerFQN) { + if (defaultAnalyzerFQN == null) { + return new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); + } else { + return new OLucenePerFieldAnalyzerWrapper(buildAnalyzer(defaultAnalyzerFQN)); + } + } + + private void setDefaultAnalyzerForRequestedKind( + final OIndexDefinition index, + final AnalyzerKind kind, + final ODocument metadata, + final String prefix, + final OLucenePerFieldAnalyzerWrapper analyzer) { + final String specializedAnalyzerFQN = metadata.field(kind.toString()); + if (specializedAnalyzerFQN != null) { + for (final String field : index.getFields()) { + analyzer.add(field, buildAnalyzer(specializedAnalyzerFQN)); + analyzer.add(prefix + field, buildAnalyzer(specializedAnalyzerFQN)); + } + } + } + + private void setSpecializedAnalyzersForEachField( + final OIndexDefinition index, + final AnalyzerKind kind, + final ODocument metadata, + final String prefix, + final OLucenePerFieldAnalyzerWrapper analyzer) { + for (final String field : index.getFields()) { + final String analyzerName = field + "_" + kind.toString(); + final String analyzerStopwords = analyzerName + "_stopwords"; + + if (metadata.containsField(analyzerName) && metadata.containsField(analyzerStopwords)) { + final Collection stopWords = metadata.field(analyzerStopwords, OType.EMBEDDEDLIST); + analyzer.add(field, buildAnalyzer(metadata.field(analyzerName), stopWords)); + analyzer.add(prefix + field, buildAnalyzer(metadata.field(analyzerName), stopWords)); + } else if (metadata.containsField(analyzerName)) { + analyzer.add(field, buildAnalyzer(metadata.field(analyzerName))); + analyzer.add(prefix + field, buildAnalyzer(metadata.field(analyzerName))); + } + } + } + + private Analyzer buildAnalyzer(final String analyzerFQN) { + try { + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getConstructor(); + return (Analyzer) constructor.newInstance(); + } catch (final ClassNotFoundException e) { + throw OException.wrapException( + new OIndexException("Analyzer: " + analyzerFQN + " not found"), e); + } catch (final NoSuchMethodException e) { + Class classAnalyzer = null; + try { + classAnalyzer = Class.forName(analyzerFQN); + return (Analyzer) classAnalyzer.newInstance(); + } catch (Exception e1) { + logger.error("Exception is suppressed, original exception is ", e); + //noinspection ThrowInsideCatchBlockWhichIgnoresCaughtException + throw OException.wrapException( + new OIndexException("Couldn't instantiate analyzer: public constructor not found"), + e1); + } + } catch (Exception e) { + logger.error( + "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + return new StandardAnalyzer(); + } + } + + private Analyzer buildAnalyzer(final String analyzerFQN, final Collection stopwords) { + try { + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class); + return (Analyzer) constructor.newInstance(new CharArraySet(stopwords, true)); + } catch (final ClassNotFoundException e) { + throw OException.wrapException( + new OIndexException("Analyzer: " + analyzerFQN + " not found"), e); + } catch (final NoSuchMethodException e) { + throw OException.wrapException( + new OIndexException("Couldn't instantiate analyzer: public constructor not found"), e); + } catch (final Exception e) { + logger.error( + "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + return new StandardAnalyzer(); + } + } + + public enum AnalyzerKind { + INDEX, + QUERY; + + @Override + public String toString() { + return name().toLowerCase(Locale.ENGLISH); + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java new file mode 100644 index 0000000000..53237815ff --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java @@ -0,0 +1,89 @@ +package com.arcadedb.lucene.analyzer; + +import static com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract.RID; + +import com.arcadedb.lucene.builder.OLuceneIndexType; +import java.util.HashMap; +import java.util.Map; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; +import org.apache.lucene.analysis.core.KeywordAnalyzer; + +/** + * Created by frank on 10/12/15. + * + *

Doesn't allow to wrap components or readers. Thread local resources can be + delegated to the + * delegate analyzer, but not allocated on this analyzer (limit memory consumption). Uses a per + * field reuse strategy. + */ +public class OLucenePerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper { + private final Analyzer defaultDelegateAnalyzer; + private final Map fieldAnalyzers; + + /** + * Constructs with default analyzer. + * + * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use + * the one provided here. + */ + public OLucenePerFieldAnalyzerWrapper(final Analyzer defaultAnalyzer) { + this(defaultAnalyzer, new HashMap<>()); + } + + /** + * Constructs with default analyzer and a map of analyzers to use for specific fields. + * + * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use + * the one provided here. + * @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields + */ + public OLucenePerFieldAnalyzerWrapper( + final Analyzer defaultAnalyzer, final Map fieldAnalyzers) { + super(PER_FIELD_REUSE_STRATEGY); + this.defaultDelegateAnalyzer = defaultAnalyzer; + this.fieldAnalyzers = new HashMap<>(); + + this.fieldAnalyzers.putAll(fieldAnalyzers); + + this.fieldAnalyzers.put(RID, new KeywordAnalyzer()); + this.fieldAnalyzers.put(OLuceneIndexType.RID_HASH, new KeywordAnalyzer()); + this.fieldAnalyzers.put("_CLASS", new KeywordAnalyzer()); + this.fieldAnalyzers.put("_CLUSTER", new KeywordAnalyzer()); + this.fieldAnalyzers.put("_JSON", new KeywordAnalyzer()); + } + + @Override + protected Analyzer getWrappedAnalyzer(final String fieldName) { + final Analyzer analyzer = fieldAnalyzers.get(fieldName); + return (analyzer != null) ? analyzer : defaultDelegateAnalyzer; + } + + @Override + public String toString() { + return "PerFieldAnalyzerWrapper(" + + fieldAnalyzers + + ", default=" + + defaultDelegateAnalyzer + + ")"; + } + + public OLucenePerFieldAnalyzerWrapper add(final String field, final Analyzer analyzer) { + fieldAnalyzers.put(field, analyzer); + return this; + } + + public OLucenePerFieldAnalyzerWrapper add(final OLucenePerFieldAnalyzerWrapper analyzer) { + fieldAnalyzers.putAll(analyzer.getAnalyzers()); + return this; + } + + public OLucenePerFieldAnalyzerWrapper remove(final String field) { + fieldAnalyzers.remove(field); + return this; + } + + protected Map getAnalyzers() { + return fieldAnalyzers; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java b/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java new file mode 100644 index 0000000000..8459173f22 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java @@ -0,0 +1,207 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.builder; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.index.OCompositeKey; +import com.arcadedb.database.index.OIndexDefinition; +import com.arcadedb.database.record.impl.ODocument; +import java.io.UnsupportedEncodingException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; + +/** Created by enricorisa on 21/03/14. */ +public class OLuceneIndexType { + public static final String RID_HASH = "_RID_HASH"; + + public static Field createField( + final String fieldName, final Object value, final Field.Store store /*,Field.Index index*/) { + // metadata fields: _CLASS, _CLUSTER + if (fieldName.startsWith("_CLASS") || fieldName.startsWith("_CLUSTER")) { + return new StringField(fieldName, value.toString(), store); + } + return new TextField(fieldName, value.toString(), Field.Store.YES); + } + + public static String extractId(Document doc) { + String value = doc.get(RID_HASH); + if (value != null) { + int pos = value.indexOf("|"); + if (pos > 0) { + return value.substring(0, pos); + } else { + return value; + } + } else { + return null; + } + } + + public static Field createIdField(final OIdentifiable id, final Object key) { + return new StringField(RID_HASH, genValueId(id, key), Field.Store.YES); + } + + public static Field createOldIdField(final OIdentifiable id) { + return new StringField( + OLuceneIndexEngineAbstract.RID, id.getIdentity().toString(), Field.Store.YES); + } + + public static String genValueId(final OIdentifiable id, final Object key) { + String value = id.getIdentity().toString() + "|"; + value += hashKey(key); + return value; + } + + public static List createFields( + String fieldName, Object value, Field.Store store, Boolean sort) { + List fields = new ArrayList<>(); + if (value instanceof Number) { + Number number = (Number) value; + if (value instanceof Long) { + fields.add(new NumericDocValuesField(fieldName, number.longValue())); + fields.add(new LongPoint(fieldName, number.longValue())); + return fields; + } else if (value instanceof Float) { + fields.add(new FloatDocValuesField(fieldName, number.floatValue())); + fields.add(new FloatPoint(fieldName, number.floatValue())); + return fields; + } else if (value instanceof Double) { + fields.add(new DoubleDocValuesField(fieldName, number.doubleValue())); + fields.add(new DoublePoint(fieldName, number.doubleValue())); + return fields; + } + fields.add(new NumericDocValuesField(fieldName, number.longValue())); + fields.add(new IntPoint(fieldName, number.intValue())); + return fields; + } else if (value instanceof Date) { + Date date = (Date) value; + fields.add(new NumericDocValuesField(fieldName, date.getTime())); + fields.add(new LongPoint(fieldName, date.getTime())); + return fields; + } + if (Boolean.TRUE.equals(sort)) { + fields.add(new SortedDocValuesField(fieldName, new BytesRef(value.toString()))); + } + fields.add(new TextField(fieldName, value.toString(), Field.Store.YES)); + return fields; + } + + public static Query createExactQuery(OIndexDefinition index, Object key) { + Query query = null; + if (key instanceof String) { + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + if (index.getFields().size() > 0) { + for (String idx : index.getFields()) { + queryBuilder.add( + new TermQuery(new Term(idx, key.toString())), BooleanClause.Occur.SHOULD); + } + } else { + queryBuilder.add( + new TermQuery(new Term(OLuceneIndexEngineAbstract.KEY, key.toString())), + BooleanClause.Occur.SHOULD); + } + query = queryBuilder.build(); + } else if (key instanceof OCompositeKey) { + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + int i = 0; + OCompositeKey keys = (OCompositeKey) key; + for (String idx : index.getFields()) { + String val = (String) keys.getKeys().get(i); + queryBuilder.add(new TermQuery(new Term(idx, val)), BooleanClause.Occur.MUST); + i++; + } + query = queryBuilder.build(); + } + return query; + } + + public static Query createQueryId(OIdentifiable value) { + return new TermQuery(new Term(OLuceneIndexEngineAbstract.RID, value.getIdentity().toString())); + } + + public static Query createQueryId(OIdentifiable value, Object key) { + return new TermQuery(new Term(RID_HASH, genValueId(value, key))); + } + + public static String hashKey(Object key) { + try { + String keyString; + if (key instanceof ODocument) { + keyString = ((ODocument) key).toJSON(); + } else { + keyString = key.toString(); + } + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + byte[] bytes = sha256.digest(keyString.getBytes("UTF-8")); + return Base64.getEncoder().encodeToString(bytes); + } catch (NoSuchAlgorithmException e) { + throw OException.wrapException(new OLuceneIndexException("fail to find sha algorithm"), e); + + } catch (UnsupportedEncodingException e) { + throw OException.wrapException(new OLuceneIndexException("fail to find utf-8 encoding"), e); + } + } + + public static Query createDeleteQuery( + OIdentifiable value, List fields, Object key, ODocument metadata) { + + // TODO Implementation of Composite keys with Collection + final BooleanQuery.Builder filter = new BooleanQuery.Builder(); + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + // TODO: Condition on Id and field key only for backward compatibility + if (value != null) { + builder.add(createQueryId(value), BooleanClause.Occur.MUST); + } + String field = fields.iterator().next(); + builder.add( + new TermQuery(new Term(field, key.toString().toLowerCase(Locale.ENGLISH))), + BooleanClause.Occur.MUST); + + filter.add(builder.build(), BooleanClause.Occur.SHOULD); + if (value != null) { + filter.add(createQueryId(value, key), BooleanClause.Occur.SHOULD); + } + + return filter.build(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java new file mode 100644 index 0000000000..31a8811ddd --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java @@ -0,0 +1,399 @@ +package com.arcadedb.lucene.engine; + +import static com.arcadedb.lucene.OLuceneIndexFactory.LUCENE_ALGORITHM; + +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.common.util.ORawPair; +import com.arcadedb.lucene.analyzer.OLucenePerFieldAnalyzerWrapper; +import com.arcadedb.lucene.collections.OLuceneResultSet; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.parser.OLuceneMultiFieldQueryParser; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.lucene.query.OLuceneQueryContext; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.config.IndexEngineData; +import com.arcadedb.database.ODatabaseRecordThreadLocal; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.OContextualRecordId; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OIndex; +import com.arcadedb.database.index.OIndexDefinition; +import com.arcadedb.database.index.OIndexKeyUpdater; +import com.arcadedb.database.index.OIndexMetadata; +import com.arcadedb.database.index.engine.IndexEngineValidator; +import com.arcadedb.database.index.engine.IndexEngineValuesTransformer; +import com.arcadedb.database.metadata.schema.OClass; +import com.arcadedb.database.metadata.schema.OType; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.storage.OStorage; +import com.arcadedb.database.storage.impl.local.paginated.atomicoperations.OAtomicOperation; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** + * Created by frank on 03/11/2016. + */ +public class OLuceneCrossClassIndexEngine implements OLuceneIndexEngine { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneCrossClassIndexEngine.class); + private final OStorage storage; + private final String indexName; + private final int indexId; + + public OLuceneCrossClassIndexEngine(int indexId, OStorage storage, String indexName) { + this.indexId = indexId; + + this.storage = storage; + this.indexName = indexName; + } + + @Override + public void init(OIndexMetadata metadata) {} + + @Override + public void flush() {} + + @Override + public int getId() { + return indexId; + } + + @Override + public void create(OAtomicOperation atomicOperation, IndexEngineData data) throws IOException {} + + @Override + public void delete(OAtomicOperation atomicOperation) {} + + @Override + public void load(IndexEngineData data) {} + + @Override + public boolean remove(OAtomicOperation atomicOperation, Object key) { + return false; + } + + @Override + public void clear(OAtomicOperation atomicOperation) {} + + @Override + public void close() {} + + @Override + public Object get(Object key) { + + final OLuceneKeyAndMetadata keyAndMeta = (OLuceneKeyAndMetadata) key; + final ODocument metadata = keyAndMeta.metadata; + final List excludes = + Optional.ofNullable(metadata.>getProperty("excludes")) + .orElse(Collections.emptyList()); + final List includes = + Optional.ofNullable(metadata.>getProperty("includes")) + .orElse(Collections.emptyList()); + + final Collection indexes = + ODatabaseRecordThreadLocal.instance() + .get() + .getMetadata() + .getIndexManager() + .getIndexes() + .stream() + .filter(i -> !excludes.contains(i.getName())) + .filter(i -> includes.isEmpty() || includes.contains(i.getName())) + .collect(Collectors.toList()); + + final OLucenePerFieldAnalyzerWrapper globalAnalyzer = + new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); + + final List globalFields = new ArrayList(); + + final List globalReaders = new ArrayList(); + final Map types = new HashMap<>(); + + try { + for (OIndex index : indexes) { + + if (index.getAlgorithm().equalsIgnoreCase(LUCENE_ALGORITHM) + && index.getType().equalsIgnoreCase(OClass.INDEX_TYPE.FULLTEXT.toString())) { + + final OIndexDefinition definition = index.getDefinition(); + final String className = definition.getClassName(); + + String[] indexFields = + definition.getFields().toArray(new String[definition.getFields().size()]); + + for (int i = 0; i < indexFields.length; i++) { + String field = indexFields[i]; + + types.put(className + "." + field, definition.getTypes()[i]); + globalFields.add(className + "." + field); + } + + OLuceneFullTextIndex fullTextIndex = (OLuceneFullTextIndex) index.getInternal(); + + globalAnalyzer.add((OLucenePerFieldAnalyzerWrapper) fullTextIndex.queryAnalyzer()); + + globalReaders.add(fullTextIndex.searcher().getIndexReader()); + } + } + + IndexReader indexReader = new MultiReader(globalReaders.toArray(new IndexReader[] {})); + + IndexSearcher searcher = new IndexSearcher(indexReader); + + Map boost = + Optional.ofNullable(metadata.>getProperty("boost")) + .orElse(new HashMap<>()); + + OLuceneMultiFieldQueryParser p = + new OLuceneMultiFieldQueryParser( + types, globalFields.toArray(new String[] {}), globalAnalyzer, boost); + + p.setAllowLeadingWildcard( + Optional.ofNullable(metadata.getProperty("allowLeadingWildcard")).orElse(false)); + + p.setSplitOnWhitespace( + Optional.ofNullable(metadata.getProperty("splitOnWhitespace")).orElse(true)); + + Object params = keyAndMeta.key.getKeys().get(0); + + Query query = p.parse(params.toString()); + + final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); + + OLuceneQueryContext ctx = new OLuceneQueryContext(null, searcher, query, fields); + return new OLuceneResultSet(this, ctx, metadata); + } catch (IOException e) { + logger.error("unable to create multi-reader", e); + } catch (ParseException e) { + logger.error("unable to parse query", e); + } + + return null; + } + + @Override + public void put(OAtomicOperation atomicOperation, Object key, Object value) {} + + @Override + public void put(OAtomicOperation atomicOperation, Object key, ORID value) {} + + @Override + public boolean remove(OAtomicOperation atomicOperation, Object key, ORID value) { + return false; + } + + @Override + public void update( + OAtomicOperation atomicOperation, Object key, OIndexKeyUpdater updater) {} + + @Override + public boolean validatedPut( + OAtomicOperation atomicOperation, + Object key, + ORID value, + IndexEngineValidator validator) { + return false; + } + + @Override + public Stream> iterateEntriesBetween( + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMajor( + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMinor( + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> stream(IndexEngineValuesTransformer valuesTransformer) { + return Stream.empty(); + } + + @Override + public Stream> descStream(IndexEngineValuesTransformer valuesTransformer) { + return Stream.empty(); + } + + @Override + public Stream keyStream() { + return Stream.empty(); + } + + @Override + public long size(IndexEngineValuesTransformer transformer) { + return 0; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public String getName() { + return indexName; + } + + @Override + public boolean acquireAtomicExclusiveLock(Object key) { + return false; + } + + @Override + public String getIndexNameByKey(Object key) { + return null; + } + + @Override + public String indexName() { + return indexName; + } + + @Override + public void onRecordAddedToResultSet( + OLuceneQueryContext queryContext, + OContextualRecordId recordId, + Document ret, + final ScoreDoc score) { + + recordId.setContext( + new HashMap() { + { + Map frag = queryContext.getFragments(); + + frag.entrySet().stream() + .forEach( + f -> { + TextFragment[] fragments = f.getValue(); + StringBuilder hlField = new StringBuilder(); + for (int j = 0; j < fragments.length; j++) { + if ((fragments[j] != null) && (fragments[j].getScore() > 0)) { + hlField.append(fragments[j].toString()); + } + } + put("$" + f.getKey() + "_hl", hlField.toString()); + }); + + put("$score", score.score); + } + }); + } + + @Override + public Document buildDocument(Object key, OIdentifiable value) { + return null; + } + + @Override + public Query buildQuery(Object query) { + return null; + } + + @Override + public Analyzer indexAnalyzer() { + return null; + } + + @Override + public Analyzer queryAnalyzer() { + return null; + } + + @Override + public boolean remove(Object key, OIdentifiable value) { + return false; + } + + @Override + public IndexSearcher searcher() { + return null; + } + + @Override + public void release(IndexSearcher searcher) {} + + @Override + public Set getInTx(Object key, OLuceneTxChanges changes) { + return null; + } + + @Override + public long sizeInTx(OLuceneTxChanges changes) { + return 0; + } + + @Override + public OLuceneTxChanges buildTxChanges() throws IOException { + return null; + } + + @Override + public Query deleteQuery(Object key, OIdentifiable value) { + return null; + } + + @Override + public boolean isCollectionIndex() { + return false; + } + + @Override + public void freeze(boolean throwException) {} + + @Override + public void release() {} + + @Override + public void updateUniqueIndexVersion(Object key) {} + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; + } + + @Override + public boolean remove(Object key) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java new file mode 100644 index 0000000000..646ac57992 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java @@ -0,0 +1,304 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.engine; + +import static com.arcadedb.lucene.builder.OLuceneQueryBuilder.EMPTY_METADATA; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.common.util.ORawPair; +import com.arcadedb.lucene.builder.OLuceneDocumentBuilder; +import com.arcadedb.lucene.builder.OLuceneIndexType; +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.LuceneIndexTransformer; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.collections.OLuceneResultSet; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.lucene.query.OLuceneQueryContext; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.OContextualRecordId; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OCompositeKey; +import com.arcadedb.database.index.OIndexEngineException; +import com.arcadedb.database.index.OIndexKeyUpdater; +import com.arcadedb.database.index.OIndexMetadata; +import com.arcadedb.database.index.engine.IndexEngineValidator; +import com.arcadedb.database.index.engine.IndexEngineValuesTransformer; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.parser.ParseException; +import com.arcadedb.database.storage.OStorage; +import com.arcadedb.database.storage.impl.local.paginated.atomicoperations.OAtomicOperation; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.store.Directory; + +public class OLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneFullTextIndexEngine.class); + + private final OLuceneDocumentBuilder builder; + private OLuceneQueryBuilder queryBuilder; + private final AtomicLong bonsayFileId = new AtomicLong(0); + + public OLuceneFullTextIndexEngine(OStorage storage, String idxName, int id) { + super(id, storage, idxName); + builder = new OLuceneDocumentBuilder(); + } + + @Override + public void init(OIndexMetadata im) { + super.init(im); + queryBuilder = new OLuceneQueryBuilder(im.getMetadata()); + } + + @Override + public IndexWriter createIndexWriter(Directory directory) throws IOException { + + OLuceneIndexWriterFactory fc = new OLuceneIndexWriterFactory(); + + logger.debug("Creating Lucene index in '%s'...", directory); + + return fc.createIndexWriter(directory, metadata, indexAnalyzer()); + } + + @Override + public void onRecordAddedToResultSet( + final OLuceneQueryContext queryContext, + final OContextualRecordId recordId, + final Document ret, + final ScoreDoc score) { + HashMap data = new HashMap(); + + final Map frag = queryContext.getFragments(); + frag.forEach( + (key, fragments) -> { + final StringBuilder hlField = new StringBuilder(); + for (final TextFragment fragment : fragments) { + if ((fragment != null) && (fragment.getScore() > 0)) { + hlField.append(fragment.toString()); + } + } + data.put("$" + key + "_hl", hlField.toString()); + }); + data.put("$score", score.score); + + recordId.setContext(data); + } + + @Override + public boolean remove(final OAtomicOperation atomicOperation, final Object key) { + return remove(key); + } + + @Override + public boolean remove(OAtomicOperation atomicOperation, Object key, ORID value) { + return remove(key, value); + } + + @Override + public Object get(final Object key) { + return getInTx(key, null); + } + + @Override + public void update( + final OAtomicOperation atomicOperation, + final Object key, + final OIndexKeyUpdater updater) { + put(atomicOperation, key, updater.update(null, bonsayFileId).getValue()); + } + + @Override + public void put(final OAtomicOperation atomicOperation, final Object key, final Object value) { + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, (OIdentifiable) value); + addDocument(doc); + } + + @Override + public void put(OAtomicOperation atomicOperation, Object key, ORID value) { + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, value); + addDocument(doc); + } + + @Override + public boolean validatedPut( + OAtomicOperation atomicOperation, + Object key, + ORID value, + IndexEngineValidator validator) { + throw new UnsupportedOperationException( + "Validated put is not supported by OLuceneFullTextIndexEngine"); + } + + @Override + public Stream> iterateEntriesBetween( + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return LuceneIndexTransformer.transformToStream((OLuceneResultSet) get(rangeFrom), rangeFrom); + } + + private Set getResults( + final Query query, + final OCommandContext context, + final OLuceneTxChanges changes, + final ODocument metadata) { + // sort + final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); + final IndexSearcher luceneSearcher = searcher(); + final OLuceneQueryContext queryContext = + new OLuceneQueryContext(context, luceneSearcher, query, fields).withChanges(changes); + return new OLuceneResultSet(this, queryContext, metadata); + } + + @Override + public Stream> iterateEntriesMajor( + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return null; + } + + @Override + public Stream> iterateEntriesMinor( + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return null; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public void updateUniqueIndexVersion(Object key) { + // not implemented + } + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; // not implemented + } + + @Override + public Document buildDocument(Object key, OIdentifiable value) { + if (indexDefinition.isAutomatic()) { + // builder.newBuild(index, key, value); + + return builder.build(indexDefinition, key, value, collectionFields, metadata); + } else { + return putInManualindex(key, value); + } + } + + private static Document putInManualindex(Object key, OIdentifiable oIdentifiable) { + Document doc = new Document(); + doc.add(OLuceneIndexType.createOldIdField(oIdentifiable)); + doc.add(OLuceneIndexType.createIdField(oIdentifiable, key)); + + if (key instanceof OCompositeKey) { + + List keys = ((OCompositeKey) key).getKeys(); + + int k = 0; + for (Object o : keys) { + doc.add(OLuceneIndexType.createField("k" + k, o, Field.Store.YES)); + k++; + } + } else if (key instanceof Collection) { + @SuppressWarnings("unchecked") + Collection keys = (Collection) key; + + int k = 0; + for (Object o : keys) { + doc.add(OLuceneIndexType.createField("k" + k, o, Field.Store.YES)); + k++; + } + } else { + doc.add(OLuceneIndexType.createField("k0", key, Field.Store.NO)); + } + return doc; + } + + @Override + public Query buildQuery(final Object maybeQuery) { + try { + if (maybeQuery instanceof String) { + return queryBuilder.query(indexDefinition, maybeQuery, EMPTY_METADATA, queryAnalyzer()); + } else { + OLuceneKeyAndMetadata q = (OLuceneKeyAndMetadata) maybeQuery; + return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + } + } catch (final ParseException e) { + throw OException.wrapException(new OIndexEngineException("Error parsing query"), e); + } + } + + @Override + public Set getInTx(Object key, OLuceneTxChanges changes) { + updateLastAccess(); + openIfClosed(); + try { + if (key instanceof OLuceneKeyAndMetadata) { + OLuceneKeyAndMetadata q = (OLuceneKeyAndMetadata) key; + Query query = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + + OCommandContext commandContext = q.key.getContext(); + return getResults(query, commandContext, changes, q.metadata); + + } else { + Query query = queryBuilder.query(indexDefinition, key, EMPTY_METADATA, queryAnalyzer()); + + OCommandContext commandContext = null; + if (key instanceof OLuceneCompositeKey) { + commandContext = ((OLuceneCompositeKey) key).getContext(); + } + return getResults(query, commandContext, changes, EMPTY_METADATA); + } + } catch (ParseException e) { + throw OException.wrapException(new OIndexEngineException("Error parsing lucene query"), e); + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java new file mode 100644 index 0000000000..7336b359b9 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java @@ -0,0 +1,68 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.engine; + +import com.arcadedb.lucene.query.OLuceneQueryContext; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.OContextualRecordId; +import com.arcadedb.database.index.engine.OIndexEngine; +import com.arcadedb.database.storage.impl.local.OFreezableStorageComponent; +import java.io.IOException; +import java.util.Set; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; + +/** Created by Enrico Risa on 04/09/15. */ +public interface OLuceneIndexEngine extends OIndexEngine, OFreezableStorageComponent { + + String indexName(); + + void onRecordAddedToResultSet( + OLuceneQueryContext queryContext, OContextualRecordId recordId, Document ret, ScoreDoc score); + + Document buildDocument(Object key, OIdentifiable value); + + Query buildQuery(Object query); + + Analyzer indexAnalyzer(); + + Analyzer queryAnalyzer(); + + boolean remove(Object key, OIdentifiable value); + + boolean remove(Object key); + + IndexSearcher searcher(); + + void release(IndexSearcher searcher); + + Set getInTx(Object key, OLuceneTxChanges changes); + + long sizeInTx(OLuceneTxChanges changes); + + OLuceneTxChanges buildTxChanges() throws IOException; + + Query deleteQuery(Object key, OIdentifiable value); + + boolean isCollectionIndex(); +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java new file mode 100644 index 0000000000..cd2ac218fb --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java @@ -0,0 +1,25 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.sql.functions.OSQLFunctionFactoryTemplate; + +public class OLuceneCrossClassFunctionsFactory extends OSQLFunctionFactoryTemplate { + + public OLuceneCrossClassFunctionsFactory() { + register(new OLuceneCrossClassSearchFunction()); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java new file mode 100644 index 0000000000..484616e332 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java @@ -0,0 +1,181 @@ +package com.arcadedb.lucene.functions; + +import static com.arcadedb.lucene.OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS; + +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseDocumentInternal; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OIndex; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.functions.OIndexableSQLFunction; +import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * This function uses the CrossClassIndex to search documents across all the Lucene indexes defined in a database + *

+ * Created by frank on 19/02/2016. + */ +public class OLuceneCrossClassSearchFunction extends OSQLFunctionAbstract + implements OIndexableSQLFunction { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneCrossClassSearchFunction.class); + + public static final String NAME = "SEARCH_CROSS"; + + public OLuceneCrossClassSearchFunction() { + super(NAME, 1, 2); + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex fullTextIndex = searchForIndex(ctx); + + OExpression expression = args[0]; + String query = (String) expression.execute((OResult) null, ctx); + + if (fullTextIndex != null) { + + ODocument metadata = getMetadata(args); + List luceneResultSet; + try (Stream rids = + fullTextIndex + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata))) { + luceneResultSet = rids.collect(Collectors.toList()); + } + return luceneResultSet; + } + return Collections.emptySet(); + } + + @Override + public long estimate( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return 1L; + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + @Override + public boolean allowsIndexedExecution( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return true; + } + + @Override + public boolean shouldExecuteAfterSearch( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + protected OLuceneFullTextIndex searchForIndex(OCommandContext ctx) { + + Collection indexes = + ((ODatabaseDocumentInternal) ctx.getDatabase()) + .getMetadata() + .getIndexManager() + .getIndexes(); + for (OIndex index : indexes) { + if (index.getInternal() instanceof OLuceneFullTextIndex) { + if (index.getAlgorithm().equalsIgnoreCase(LUCENE_CROSS_CLASS)) { + return (OLuceneFullTextIndex) index; + } + } + } + return null; + } + + private ODocument getMetadata(OExpression[] args) { + if (args.length == 2) { + return new ODocument().fromJSON(args[1].toString()); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable currentRecord, + Object currentResult, + Object[] params, + OCommandContext ctx) { + + OLuceneFullTextIndex fullTextIndex = searchForIndex(ctx); + + String query = (String) params[0]; + + if (fullTextIndex != null) { + + ODocument metadata = getMetadata(params); + + Collection luceneResultSet = + fullTextIndex.get( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata)); + + return luceneResultSet; + } + return Collections.emptySet(); + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 2) { + return new ODocument().fromMap((Map) params[1]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + logger.debug("syntax"); + return "SEARCH_CROSS('', {metadata})"; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java new file mode 100644 index 0000000000..2251ba9e31 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java @@ -0,0 +1,27 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.sql.functions.OSQLFunctionFactoryTemplate; + +public class OLuceneFunctionsFactory extends OSQLFunctionFactoryTemplate { + public OLuceneFunctionsFactory() { + register(new OLuceneSearchOnIndexFunction()); + register(new OLuceneSearchOnFieldsFunction()); + register(new OLuceneSearchOnClassFunction()); + register(new OLuceneSearchMoreLikeThisFunction()); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java new file mode 100644 index 0000000000..f7d2c33646 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java @@ -0,0 +1,60 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseDocumentInternal; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.parser.OExpression; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 13/02/2017. */ +public class OLuceneFunctionsUtils { + public static final String MEMORY_INDEX = "_memoryIndex"; + + protected static OLuceneFullTextIndex searchForIndex(OExpression[] args, OCommandContext ctx) { + final String indexName = (String) args[0].execute((OResult) null, ctx); + return getLuceneFullTextIndex(ctx, indexName); + } + + protected static OLuceneFullTextIndex getLuceneFullTextIndex( + final OCommandContext ctx, final String indexName) { + final ODatabaseDocumentInternal documentDatabase = + (ODatabaseDocumentInternal) ctx.getDatabase(); + documentDatabase.activateOnCurrentThread(); + final OMetadataInternal metadata = documentDatabase.getMetadata(); + + final OLuceneFullTextIndex index = + (OLuceneFullTextIndex) + metadata.getIndexManagerInternal().getIndex(documentDatabase, indexName); + if (!(index instanceof OLuceneFullTextIndex)) { + throw new IllegalArgumentException("Not a valid Lucene index:: " + indexName); + } + return index; + } + + public static MemoryIndex getOrCreateMemoryIndex(OCommandContext ctx) { + MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); + if (memoryIndex == null) { + memoryIndex = new MemoryIndex(); + ctx.setVariable(MEMORY_INDEX, memoryIndex); + } + memoryIndex.reset(); + return memoryIndex; + } + + public static String doubleEscape(final String s) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); ++i) { + final char c = s.charAt(i); + if (c == 92 || c == 43 || c == 45 || c == 33 || c == 40 || c == 41 || c == 58 || c == 94 + || c == 91 || c == 93 || c == 34 || c == 123 || c == 125 || c == 126 || c == 42 || c == 63 + || c == 124 || c == 38 || c == 47) { + sb.append('\\'); + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java new file mode 100644 index 0000000000..774e252023 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java @@ -0,0 +1,90 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.lucene.collections.OLuceneResultSet; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.functions.OIndexableSQLFunction; +import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import java.util.Map; + +/** Created by frank on 25/05/2017. */ +public abstract class OLuceneSearchFunctionTemplate extends OSQLFunctionAbstract + implements OIndexableSQLFunction { + + public OLuceneSearchFunctionTemplate(String iName, int iMinParams, int iMaxParams) { + super(iName, iMinParams, iMaxParams); + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return allowsIndexedExecution(target, operator, rightValue, ctx, args); + } + + @Override + public boolean allowsIndexedExecution( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + OLuceneFullTextIndex index = searchForIndex(target, ctx, args); + return index != null; + } + + @Override + public boolean shouldExecuteAfterSearch( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + @Override + public long estimate( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + Iterable a = searchFromTarget(target, operator, rightValue, ctx, args); + if (a instanceof OLuceneResultSet) { + return ((OLuceneResultSet) a).size(); + } + long count = 0; + for (Object o : a) { + count++; + } + + return count; + } + + protected ODocument getMetadata(OExpression metadata, OCommandContext ctx) { + final Object md = metadata.execute((OResult) null, ctx); + if (md instanceof ODocument) { + return (ODocument) md; + } else if (md instanceof Map) { + return new ODocument().fromMap((Map) md); + } else if (md instanceof String) { + return new ODocument().fromJSON((String) md); + } else { + return new ODocument().fromJSON(metadata.toString()); + } + } + + protected abstract OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args); +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java new file mode 100644 index 0000000000..813ee9f0e1 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java @@ -0,0 +1,396 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.io.OIOException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseSession; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.id.ORecordId; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.record.OElement; +import com.arcadedb.database.record.ORecord; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.functions.OIndexableSQLFunction; +import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.mlt.MoreLikeThis; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery.Builder; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchMoreLikeThisFunction extends OSQLFunctionAbstract + implements OIndexableSQLFunction { + + private static final OLogger logger = + OLogManager.instance().logger(OLuceneSearchMoreLikeThisFunction.class); + + public static final String NAME = "search_more"; + + public OLuceneSearchMoreLikeThisFunction() { + super(OLuceneSearchMoreLikeThisFunction.NAME, 1, 2); + } + + @Override + public String getName() { + return OLuceneSearchMoreLikeThisFunction.NAME; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + + // TODO: slow implementation can be made faster + if (!(iCurrentRecord instanceof ODocument)) { + return false; + } + String className = ((ODocument) iCurrentRecord).getClassName(); + OLuceneFullTextIndex index = this.searchForIndex(ctx, className); + + if (index == null) return Collections.emptySet(); + + IndexSearcher searcher = index.searcher(); + + ODocument metadata = new ODocument((Map) params[1]); + + List ridsAsString = parseRidsObj(ctx, params[0]); + + List others = + ridsAsString.stream() + .map( + rid -> { + ORecordId recordId = new ORecordId(); + + recordId.fromString(rid); + return recordId; + }) + .map(id -> id.getRecord()) + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); + + Builder queryBuilder = new Builder(); + + excludeOtherFromResults(ridsAsString, queryBuilder); + + ODatabaseSession contest = ctx.getDatabase(); + addLikeQueries(others, mlt, queryBuilder, contest); + + Query mltQuery = queryBuilder.build(); + + Set luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), + metadata))) { + luceneResultSet = rids.collect(Collectors.toSet()); + } + + return luceneResultSet.contains(iCurrentRecord); + } + + @Override + public String getSyntax() { + return "SEARCH_MORE( [rids], [ metdatada {} ] )"; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = this.searchForIndex(target, ctx); + + if (index == null) return Collections.emptySet(); + + IndexSearcher searcher = index.searcher(); + + OExpression expression = args[0]; + + ODocument metadata = parseMetadata(args); + + List ridsAsString = parseRids(ctx, expression); + + List others = + ridsAsString.stream() + .map( + rid -> { + ORecordId recordId = new ORecordId(); + + recordId.fromString(rid); + return recordId; + }) + .map(id -> id.getRecord()) + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); + + Builder queryBuilder = new Builder(); + + excludeOtherFromResults(ridsAsString, queryBuilder); + + ODatabaseSession contest = ctx.getDatabase(); + addLikeQueries(others, mlt, queryBuilder, contest); + + Query mltQuery = queryBuilder.build(); + + Set luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), + metadata))) { + luceneResultSet = rids.collect(Collectors.toSet()); + } + + return luceneResultSet; + } + + private List parseRids(OCommandContext ctx, OExpression expression) { + + Object expResult = expression.execute((OResult) null, ctx); + return parseRidsObj(ctx, expResult); + } + + private List parseRidsObj(OCommandContext ctx, Object expResult) { + // single rind + if (expResult instanceof OIdentifiable) { + return Collections.singletonList(((OIdentifiable) expResult).getIdentity().toString()); + } + + Iterator iter; + if (expResult instanceof Iterable) { + iter = ((Iterable) expResult).iterator(); + } else if (expResult instanceof Iterator) { + iter = (Iterator) expResult; + } else { + return Collections.emptyList(); + } + + List rids = new ArrayList<>(); + while (iter.hasNext()) { + Object item = iter.next(); + if (item instanceof OResult) { + if (((OResult) item).isElement()) { + rids.add(((OResult) item).getIdentity().get().toString()); + } else { + Set properties = ((OResult) item).getPropertyNames(); + if (properties.size() == 1) { + Object val = ((OResult) item).getProperty(properties.iterator().next()); + if (val instanceof OIdentifiable) { + rids.add(((OIdentifiable) val).getIdentity().toString()); + } + } + } + } else if (item instanceof OIdentifiable) { + rids.add(((OIdentifiable) item).getIdentity().toString()); + } + } + return rids; + } + + private ODocument parseMetadata(OExpression[] args) { + ODocument metadata = new ODocument(); + if (args.length == 2) { + metadata.fromJSON(args[1].toString()); + } + return metadata; + } + + private MoreLikeThis buildMoreLikeThis( + OLuceneFullTextIndex index, IndexSearcher searcher, ODocument metadata) { + + try { + MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); + + mlt.setAnalyzer(index.queryAnalyzer()); + + mlt.setFieldNames( + Optional.ofNullable(metadata.>getProperty("fieldNames")) + .orElse(index.getDefinition().getFields()) + .toArray(new String[] {})); + + mlt.setMaxQueryTerms( + Optional.ofNullable(metadata.getProperty("maxQueryTerms")) + .orElse(MoreLikeThis.DEFAULT_MAX_QUERY_TERMS)); + + mlt.setMinTermFreq( + Optional.ofNullable(metadata.getProperty("minTermFreq")) + .orElse(MoreLikeThis.DEFAULT_MIN_TERM_FREQ)); + + mlt.setMaxDocFreq( + Optional.ofNullable(metadata.getProperty("maxDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); + + mlt.setMinDocFreq( + Optional.ofNullable(metadata.getProperty("minDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); + + mlt.setBoost( + Optional.ofNullable(metadata.getProperty("boost")) + .orElse(MoreLikeThis.DEFAULT_BOOST)); + + mlt.setBoostFactor( + Optional.ofNullable(metadata.getProperty("boostFactor")).orElse(1f)); + + mlt.setMaxWordLen( + Optional.ofNullable(metadata.getProperty("maxWordLen")) + .orElse(MoreLikeThis.DEFAULT_MAX_WORD_LENGTH)); + + mlt.setMinWordLen( + Optional.ofNullable(metadata.getProperty("minWordLen")) + .orElse(MoreLikeThis.DEFAULT_MIN_WORD_LENGTH)); + + mlt.setMaxNumTokensParsed( + Optional.ofNullable(metadata.getProperty("maxNumTokensParsed")) + .orElse(MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED)); + + mlt.setStopWords( + (Set) + Optional.ofNullable(metadata.getProperty("stopWords")) + .orElse(MoreLikeThis.DEFAULT_STOP_WORDS)); + + return mlt; + } catch (IOException e) { + throw OException.wrapException(new OIOException("Lucene IO Exception"), e); + } + } + + private void addLikeQueries( + List others, MoreLikeThis mlt, Builder queryBuilder, ODatabaseSession contest) { + others.stream() + .map(or -> contest.load(or)) + .forEach( + element -> + Arrays.stream(mlt.getFieldNames()) + .forEach( + fieldName -> { + String property = element.getProperty(fieldName); + try { + Query fieldQuery = mlt.like(fieldName, new StringReader(property)); + if (!fieldQuery.toString().isEmpty()) + queryBuilder.add(fieldQuery, Occur.SHOULD); + } catch (IOException e) { + // FIXME handle me! + logger.error("Error during Lucene query generation", e); + } + })); + } + + private void excludeOtherFromResults(List ridsAsString, Builder queryBuilder) { + ridsAsString.stream() + .forEach( + rid -> + queryBuilder.add( + new TermQuery(new Term("RID", QueryParser.escape(rid))), Occur.MUST_NOT)); + } + + private OLuceneFullTextIndex searchForIndex(OFromClause target, OCommandContext ctx) { + OFromItem item = target.getItem(); + + String className = item.getIdentifier().getStringValue(); + + return searchForIndex(ctx, className); + } + + private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String className) { + OMetadataInternal dbMetadata = + (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); + + List indices = + dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() + .filter(idx -> idx instanceof OLuceneFullTextIndex) + .map(idx -> (OLuceneFullTextIndex) idx) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + throw new IllegalArgumentException("too many full-text indices on given class: " + className); + } + + return indices.size() == 0 ? null : indices.get(0); + } + + @Override + public long estimate( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + OLuceneFullTextIndex index = this.searchForIndex(target, ctx); + + if (index != null) return index.size(); + return 0; + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + @Override + public boolean allowsIndexedExecution( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = this.searchForIndex(target, ctx); + + return index != null; + } + + @Override + public boolean shouldExecuteAfterSearch( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java new file mode 100644 index 0000000000..e9e6f21e04 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java @@ -0,0 +1,184 @@ +package com.arcadedb.lucene.functions; + +import static com.arcadedb.lucene.functions.OLuceneFunctionsUtils.getOrCreateMemoryIndex; + +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.record.OElement; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.executor.OResultInternal; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchOnClassFunction extends OLuceneSearchFunctionTemplate { + + public static final String NAME = "search_class"; + + public OLuceneSearchOnClassFunction() { + super(NAME, 1, 2); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return true; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + + OResult result; + if (iThis instanceof OResult) { + result = (OResult) iThis; + } else { + result = new OResultInternal((OIdentifiable) iThis); + } + + if (!result.getElement().isPresent()) return false; + OElement element = result.getElement().get(); + if (!element.getSchemaType().isPresent()) return false; + + String className = element.getSchemaType().get().getName(); + + OLuceneFullTextIndex index = searchForIndex(ctx, className); + + if (index == null) return false; + + String query = (String) params[0]; + + MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); + + List key = + index.getDefinition().getFields().stream() + .map(s -> element.getProperty(s)) + .collect(Collectors.toList()); + + for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { + memoryIndex.addField(field, index.indexAnalyzer()); + } + + ODocument metadata = getMetadata(params); + OLuceneKeyAndMetadata keyAndMetadata = + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 2) { + return new ODocument().fromMap((Map) params[1]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; + } + + @Override + public boolean filterResult() { + return true; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = searchForIndex(target, ctx); + + OExpression expression = args[0]; + String query = (String) expression.execute((OResult) null, ctx); + + if (index != null) { + + ODocument metadata = getMetadata(args, ctx); + + List luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata))) { + luceneResultSet = rids.collect(Collectors.toList()); + } + + return luceneResultSet; + } + return Collections.emptySet(); + } + + private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { + if (args.length == 2) { + return getMetadata(args[1], ctx); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args) { + OFromItem item = target.getItem(); + + String className = item.getIdentifier().getStringValue(); + + return searchForIndex(ctx, className); + } + + private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String className) { + OMetadataInternal dbMetadata = + (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); + + List indices = + dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() + .filter(idx -> idx instanceof OLuceneFullTextIndex) + .map(idx -> (OLuceneFullTextIndex) idx) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + throw new IllegalArgumentException("too many full-text indices on given class: " + className); + } + + return indices.size() == 0 ? null : indices.get(0); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java new file mode 100644 index 0000000000..7ebe6e7b27 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java @@ -0,0 +1,200 @@ +package com.arcadedb.lucene.functions; + +import static com.arcadedb.lucene.functions.OLuceneFunctionsUtils.getOrCreateMemoryIndex; + +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.record.OElement; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.executor.OResultInternal; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchOnFieldsFunction extends OLuceneSearchFunctionTemplate { + + public static final String NAME = "search_fields"; + + public OLuceneSearchOnFieldsFunction() { + super(NAME, 2, 3); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + + if (iThis instanceof ORID) { + iThis = ((ORID) iThis).getRecord(); + } + if (iThis instanceof OIdentifiable) { + iThis = new OResultInternal((OIdentifiable) iThis); + } + OResult result = (OResult) iThis; + + if (!result.getElement().isPresent()) return false; + OElement element = result.getElement().get(); + if (!element.getSchemaType().isPresent()) return false; + String className = element.getSchemaType().get().getName(); + List fieldNames = (List) params[0]; + + OLuceneFullTextIndex index = searchForIndex(className, ctx, fieldNames); + + if (index == null) return false; + + String query; + if (params[1] == null) { + query = null; + } else { + query = (String) params[1].toString(); + } + + MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); + + List key = + index.getDefinition().getFields().stream() + .map(s -> element.getProperty(s)) + .collect(Collectors.toList()); + + for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { + memoryIndex.addField(field, index.indexAnalyzer()); + } + + ODocument metadata = getMetadata(params); + OLuceneKeyAndMetadata keyAndMetadata = + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 3) { + return new ODocument().fromMap((Map) params[2]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = searchForIndex(target, ctx, args); + + OExpression expression = args[1]; + Object query = expression.execute((OResult) null, ctx); + if (index != null) { + + ODocument meta = getMetadata(args, ctx); + Set luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { + luceneResultSet = rids.collect(Collectors.toSet()); + } + + return luceneResultSet; + } + throw new RuntimeException(); + } + + private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { + if (args.length == 3) { + return getMetadata(args[2], ctx); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args) { + List fieldNames = (List) args[0].execute((OResult) null, ctx); + OFromItem item = target.getItem(); + String className = item.getIdentifier().getStringValue(); + + return searchForIndex(className, ctx, fieldNames); + } + + private OLuceneFullTextIndex searchForIndex( + String className, OCommandContext ctx, List fieldNames) { + OMetadataInternal dbMetadata = + (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); + + List indices = + dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() + .filter(idx -> idx instanceof OLuceneFullTextIndex) + .map(idx -> (OLuceneFullTextIndex) idx) + .filter(idx -> intersect(idx.getDefinition().getFields(), fieldNames)) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + throw new IllegalArgumentException( + "too many indices matching given field name: " + String.join(",", fieldNames)); + } + + return indices.size() == 0 ? null : indices.get(0); + } + + public List intersection(List list1, List list2) { + List list = new ArrayList(); + + for (T t : list1) { + if (list2.contains(t)) { + list.add(t); + } + } + + return list; + } + + public boolean intersect(List list1, List list2) { + + for (T t : list1) { + if (list2.contains(t)) { + return true; + } + } + + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java new file mode 100644 index 0000000000..c27b3ea8ff --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java @@ -0,0 +1,198 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseDocumentInternal; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OIndex; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.executor.OResultInternal; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import com.arcadedb.database.sql.parser.OIdentifier; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchOnIndexFunction extends OLuceneSearchFunctionTemplate { + + public static final String MEMORY_INDEX = "_memoryIndex"; + + public static final String NAME = "search_index"; + + public OLuceneSearchOnIndexFunction() { + super(NAME, 2, 3); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + if (iThis instanceof ORID) { + iThis = ((ORID) iThis).getRecord(); + } + if (iThis instanceof OIdentifiable) { + iThis = new OResultInternal((OIdentifiable) iThis); + } + OResult result = (OResult) iThis; + + String indexName = (String) params[0]; + + OLuceneFullTextIndex index = searchForIndex(ctx, indexName); + + if (index == null) return false; + + String query = (String) params[1]; + + MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); + + List key = + index.getDefinition().getFields().stream() + .map(s -> result.getProperty(s)) + .collect(Collectors.toList()); + + for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { + memoryIndex.addField(field, index.indexAnalyzer()); + } + + ODocument metadata = getMetadata(params); + OLuceneKeyAndMetadata keyAndMetadata = + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 3) { + return new ODocument().fromMap((Map) params[2]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + private MemoryIndex getOrCreateMemoryIndex(OCommandContext ctx) { + MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); + if (memoryIndex == null) { + memoryIndex = new MemoryIndex(); + ctx.setVariable(MEMORY_INDEX, memoryIndex); + } + + memoryIndex.reset(); + return memoryIndex; + } + + @Override + public String getSyntax() { + return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; + } + + @Override + public boolean filterResult() { + return true; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = searchForIndex(target, ctx, args); + + OExpression expression = args[1]; + String query = (String) expression.execute((OResult) null, ctx); + if (index != null && query != null) { + + ODocument meta = getMetadata(args, ctx); + + List luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { + luceneResultSet = rids.collect(Collectors.toList()); + } + + return luceneResultSet; + } + return Collections.emptyList(); + } + + private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { + if (args.length == 3) { + return getMetadata(args[2], ctx); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args) { + + OFromItem item = target.getItem(); + OIdentifier identifier = item.getIdentifier(); + return searchForIndex(identifier.getStringValue(), ctx, args); + } + + private OLuceneFullTextIndex searchForIndex( + String className, OCommandContext ctx, OExpression... args) { + + String indexName = (String) args[0].execute((OResult) null, ctx); + + final ODatabaseDocumentInternal database = (ODatabaseDocumentInternal) ctx.getDatabase(); + OIndex index = + database + .getMetadata() + .getIndexManagerInternal() + .getClassIndex(database, className, indexName); + + if (index != null && index.getInternal() instanceof OLuceneFullTextIndex) { + return (OLuceneFullTextIndex) index; + } + + return null; + } + + private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String indexName) { + final ODatabaseDocumentInternal database = (ODatabaseDocumentInternal) ctx.getDatabase(); + OIndex index = database.getMetadata().getIndexManagerInternal().getIndex(database, indexName); + + if (index != null && index.getInternal() instanceof OLuceneFullTextIndex) { + return (OLuceneFullTextIndex) index; + } + + return null; + } + + @Override + public Object getResult(OCommandContext ctx) { + return super.getResult(ctx); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java new file mode 100644 index 0000000000..520cf75087 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java @@ -0,0 +1,362 @@ +package com.arcadedb.lucene.index; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.index.Index; +import com.arcadedb.index.IndexCursor; +import com.arcadedb.index.IndexException; +import com.arcadedb.index.IndexInternal; +import com.arcadedb.index.RangeIndexCursor; +import com.arcadedb.index.TypeIndex; +import com.arcadedb.index.engine.IndexEngine; +import com.arcadedb.schema.IndexBuilder; // Added for build method +import com.arcadedb.schema.IndexDefinition; +import com.arcadedb.schema.Type; +import com.arcadedb.tx.TransactionContext; + +import java.io.IOException; // Added for compact +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; + +public class ArcadeLuceneFullTextIndex implements IndexInternal { + + private final DatabaseInternal database; + private final String name; + private final boolean unique; + private final String analyzerClassName; + private final String filePath; + private final Type[] keyTypes; + // Other fields like IndexDefinition, IndexEngine, pageSize, nullStrategy, etc. + private IndexDefinition definition; // Will be set by setMetadata or build + + public ArcadeLuceneFullTextIndex(DatabaseInternal database, String name, boolean unique, String analyzerClassName, String filePath, Type[] keyTypes) { + this.database = database; + this.name = name; + this.unique = unique; + this.analyzerClassName = analyzerClassName; + this.filePath = filePath; // Store filePath + this.keyTypes = keyTypes; // Store keyTypes + // Further initialization for Lucene engine would go here. + // This constructor might be called by the handler, then setMetadata/build by schema loading/creation. + } + + // --- IndexInternal Methods --- + + @Override + public String getAssociatedFileName() { + return filePath; // Return stored filePath + } + + @Override + public void build(IndexBuilder builder) { + // This method is typically called when an index is being built from scratch. + // The IndexBuilder contains all necessary information. + // this.definition = builder.getIndexDefinition(); // Or create one + // Initialize/create the Lucene IndexWriter and other resources here. + throw new UnsupportedOperationException("Not yet implemented: build"); + } + + @Override + public void setMetadata(IndexDefinition definition, String filePath, int pageSize, byte nullStrategy) { + this.definition = definition; + // this.filePath = filePath; // Already set in constructor, ensure consistency or update + // this.pageSize = pageSize; + // this.nullStrategy = nullStrategy; + throw new UnsupportedOperationException("Not yet implemented: setMetadata"); + } + + @Override + public STATUS getStatus() { + // Return current status, e.g., from engine + throw new UnsupportedOperationException("Not yet implemented: getStatus"); + } + + + @Override + public void setStatus(STATUS status) { + // Set current status, e.g., on engine + throw new UnsupportedOperationException("Not yet implemented: setStatus"); + } + + @Override + public void close() { + // Release Lucene resources (IndexWriter, IndexSearcher, Directory) + throw new UnsupportedOperationException("Not yet implemented: close"); + } + + @Override + public void drop() { + // Remove Lucene index files from disk. + // Unregister from schema should be handled by Schema.dropIndex() calling this. + throw new UnsupportedOperationException("Not yet implemented: drop"); + } + + @Override + public int getFileId() { + // Lucene might not use file IDs in the same way ArcadeDB's native engine does. + // Return a sentinel or appropriate value. + return -1; + } + + @Override + public T getComponent(String name, Class type) { + // Used for accessing underlying components, might be relevant for engine access. + throw new UnsupportedOperationException("Not yet implemented: getComponent"); + } + + @Override + public Type[] getKeyTypes() { + return keyTypes; // Return stored keyTypes + } + + @Override + public byte[] getBinaryKeyTypes() { + // Convert Type[] to byte[] if necessary for serialization, or return null if not used. + throw new UnsupportedOperationException("Not yet implemented: getBinaryKeyTypes"); + } + + @Override + public void setTypeIndex(TypeIndex typeIndex) { + // Associated with schema type's index list. + throw new UnsupportedOperationException("Not yet implemented: setTypeIndex"); + } + + @Override + public TypeIndex getTypeIndex() { + throw new UnsupportedOperationException("Not yet implemented: getTypeIndex"); + } + + @Override + public void scheduleCompaction() { + // Lucene has its own merging/optimization, might not map directly. + throw new UnsupportedOperationException("Not yet implemented: scheduleCompaction"); + } + + @Override + public String getMostRecentFileName() { + // Relates to WAL, might not be applicable or needs specific handling for Lucene. + throw new UnsupportedOperationException("Not yet implemented: getMostRecentFileName"); + } + + @Override + public Map toJSON() { + // Serialize index configuration/stats to JSON. + throw new UnsupportedOperationException("Not yet implemented: toJSON"); + } + + @Override + public Index getAssociatedIndex() { + // For sub-indexes, typically null for a main index. + return null; + } + + // --- Index Methods --- + + @Override + public String getName() { + return name; + } + + @Override + public String getTypeName() { + // This should return the algorithm name, e.g., "LUCENE" + // return ArcadeLuceneLifecycleManager.LUCENE_ALGORITHM; // If constant is accessible + return "LUCENE"; // Or get from definition if set + } + + @Override + public IndexDefinition getDefinition() { + // Return the stored IndexDefinition + if (this.definition == null) { + throw new UnsupportedOperationException("IndexDefinition not set for index: " + name); + } + return this.definition; + } + + @Override + public boolean isUnique() { + return this.unique; + } + + @Override + public List getPropertyNames() { + // Get from IndexDefinition + if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); + return this.definition.getPropertyNames(); + } + + @Override + public long countEntries() { + // Count documents in Lucene index + throw new UnsupportedOperationException("Not yet implemented: countEntries"); + } + + @Override + public IndexCursor get(Object[] keys) { + // Perform Lucene search + throw new UnsupportedOperationException("Not yet implemented: get"); + } + + @Override + public IndexCursor get(Object[] keys, int limit) { + throw new UnsupportedOperationException("Not yet implemented: get with limit"); + } + + + @Override + public Stream getRidsStream(Object[] keys) { + throw new UnsupportedOperationException("Not yet implemented: getRidsStream"); + } + + @Override + public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded) { + throw new UnsupportedOperationException("Not yet implemented: range"); + } + + @Override + public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit) { + throw new UnsupportedOperationException("Not yet implemented: range with limit"); + } + + @Override + public IndexCursor iterator(boolean ascendingOrder) { + // Iterate all documents + throw new UnsupportedOperationException("Not yet implemented: iterator"); + } + + @Override + public IndexCursor iterator(boolean ascendingOrder, Object[] fromKey, boolean fromKeyInclusive) { + throw new UnsupportedOperationException("Not yet implemented: iterator with fromKey"); + } + + @Override + public IndexCursor descendingIterator() { + throw new UnsupportedOperationException("Not yet implemented: descendingIterator"); + } + + @Override + public IndexCursor descendingIterator(Object[] fromKey, boolean fromKeyInclusive) { + throw new UnsupportedOperationException("Not yet implemented: descendingIterator with fromKey"); + } + + @Override + public boolean supportsOrderedIterations() { + return false; // Lucene supports score-based ordering, key-based might not be natural. + } + + @Override + public boolean isAutomatic() { + // Get from IndexDefinition + if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); + return this.definition.isAutomatic(); + } + + @Override + public void setRebuilding(boolean rebuilding) { + // Set a flag if the index is rebuilding + throw new UnsupportedOperationException("Not yet implemented: setRebuilding"); + } + + @Override + public IndexEngine getEngine() { + // Return the LuceneIndexEngine instance associated with this index + throw new UnsupportedOperationException("Not yet implemented: getEngine"); + } + + @Override + public boolean isValid() { + throw new UnsupportedOperationException("Not yet implemented: isValid"); + } + + @Override + public Map getStats() { + // Return Lucene specific stats + throw new UnsupportedOperationException("Not yet implemented: getStats"); + } + + @Override + public void setStats(Map stats) { + // Not typically set from outside + throw new UnsupportedOperationException("Not yet implemented: setStats"); + } + + @Override + public void compact() throws IOException { + // Trigger Lucene merge/optimize if applicable + throw new UnsupportedOperationException("Not yet implemented: compact"); + } + + @Override + public boolean isCompacting() { + // Check if Lucene merge/optimize is running + throw new UnsupportedOperationException("Not yet implemented: isCompacting"); + } + + @Override + public List getFileIds() { + // Lucene manages its own files; this might not map directly. + throw new UnsupportedOperationException("Not yet implemented: getFileIds"); + } + + @Override + public int getPageSize() { + // Lucene doesn't use pages in the same way as ArcadeDB's native engine. + throw new UnsupportedOperationException("Not yet implemented: getPageSize"); + } + + @Override + public void setPageSize(int pageSize) { + throw new UnsupportedOperationException("Not yet implemented: setPageSize"); + } + + @Override + public byte getNullStrategy() { + // Get from IndexDefinition + if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); + return this.definition.getNullStrategy().getValue(); + } + + @Override + public void setNullStrategy(byte nullStrategy) { + // Set in IndexDefinition (usually immutable after creation) + throw new UnsupportedOperationException("Not yet implemented: setNullStrategy"); + } + + @Override + public void set(TransactionContext tx, Object[] keys, RID[] rids) throws IndexException { + // Add entries to Lucene index + throw new UnsupportedOperationException("Not yet implemented: set"); + } + + @Override + public void remove(TransactionContext tx, Object[] keys, Identifiable rid) throws IndexException { + // Remove specific RID associated with keys + throw new UnsupportedOperationException("Not yet implemented: remove with rid"); + } + + @Override + public void remove(TransactionContext tx, Object[] keys) throws IndexException { + // Remove all RIDs associated with keys + throw new UnsupportedOperationException("Not yet implemented: remove"); + } + + @Override + public IndexCursor range(boolean ascendingOrder) { + throw new UnsupportedOperationException("Not yet implemented: range without keys"); + } + + @Override + public IndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit, int skip) { + throw new UnsupportedOperationException("Not yet implemented: range with limit and skip"); + } + + @Override + public int getAssociatedBucketId() { + // Lucene indexes are not directly associated with a single bucket in the same way. + return -1; // Or derive from schema/type if applicable + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java new file mode 100644 index 0000000000..a0b1cc9d48 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java @@ -0,0 +1,118 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.index; + +import com.arcadedb.lucene.OLuceneCrossClassIndexFactory; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.exception.OInvalidIndexEngineIdException; +import com.arcadedb.database.index.OIndexMetadata; +import com.arcadedb.database.storage.OStorage; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.Query; + +public class OLuceneFullTextIndex extends OLuceneIndexNotUnique { + + public OLuceneFullTextIndex(OIndexMetadata im, final OStorage storage) { + super(im, storage); + } + + public Document buildDocument(final Object key, OIdentifiable identifieable) { + + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.buildDocument(key, identifieable); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public Query buildQuery(final Object query) { + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.buildQuery(query); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public Analyzer queryAnalyzer() { + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.queryAnalyzer(); + }); + } catch (final OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public boolean isCollectionIndex() { + while (true) { + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.isCollectionIndex(); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + } + + public Analyzer indexAnalyzer() { + while (true) { + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.indexAnalyzer(); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + } + + @Override + public boolean isAutomatic() { + return super.isAutomatic() + || OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS.equals(im.getAlgorithm()); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java new file mode 100644 index 0000000000..0c311f42d5 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java @@ -0,0 +1,113 @@ +package com.arcadedb.lucene.query; + +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.index.IndexCursor; + +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; + +// import org.apache.lucene.search.ScoreDoc; +// import org.apache.lucene.search.IndexSearcher; +// import org.apache.lucene.document.Document; +// import java.io.IOException; + +public class LuceneIndexCursor implements IndexCursor { + + // private ScoreDoc[] scoreDocs; + // private IndexSearcher searcher; + // private int currentIndex = 0; + // private Document currentDocument; + // private RID currentRID; + + // public LuceneIndexCursor(ScoreDoc[] scoreDocs, IndexSearcher searcher) { + // this.scoreDocs = scoreDocs; + // this.searcher = searcher; + // // Potentially pre-fetch the first one or do it in hasNext/next + // } + + @Override + public Object[] getKeys() { + // This would typically return the terms that matched for the current document, + // which might not be straightforward or always relevant for a Lucene full-text search result. + // Or, if the cursor iterates over specific keys that led to this document. + throw new UnsupportedOperationException("Not yet implemented: getKeys"); + } + + @Override + public Identifiable getRecord() { + // if (currentRID == null && currentDocument != null) { + // // Assuming RID is stored in a field, e.g., "RID" + // String ridString = currentDocument.get("RID"); + // if (ridString != null) { + // currentRID = new RID(null, ridString); // Database instance might be needed + // } + // } + // return currentRID; + throw new UnsupportedOperationException("Not yet implemented: getRecord"); + } + + @Override + public Map getProperties() { + throw new UnsupportedOperationException("Not implemented for LuceneIndexCursor"); + } + + @Override + public int getScore() { + // if (currentIndex > 0 && currentIndex <= scoreDocs.length) { + // return (int) (scoreDocs[currentIndex -1].score * 1000); // Example scaling + // } + return 0; + } + + @Override + public boolean hasNext() { + // return currentIndex < scoreDocs.length; + throw new UnsupportedOperationException("Not yet implemented: hasNext"); + } + + @Override + public Identifiable next() { + // if (!hasNext()) { + // throw new NoSuchElementException(); + // } + // try { + // currentDocument = searcher.doc(scoreDocs[currentIndex].doc); + // currentRID = null; // Reset so getRecord re-fetches it + // currentIndex++; + // return getRecord(); // This might need the database instance to load the actual record + // } catch (IOException e) { + // throw new RuntimeException("Error fetching document from Lucene index", e); + // } + throw new UnsupportedOperationException("Not yet implemented: next"); + } + + @Override + public void close() { + // Release any Lucene resources if necessary, e.g., if the searcher was context-specific. + // scoreDocs = null; + // searcher = null; + } + + @Override + public long size() { + // return scoreDocs != null ? scoreDocs.length : 0; + throw new UnsupportedOperationException("Not yet implemented: size"); + } + + @Override + public void setLimit(int limit) { + throw new UnsupportedOperationException("Not supported after creation."); + } + + @Override + public int getLimit() { + return -1; // Or actual limit if supported + } + + @Override + public boolean isPaginated() { + return true; // Or based on actual implementation + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java b/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java new file mode 100644 index 0000000000..3bbee4581e --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java @@ -0,0 +1,138 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.query; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** Created by Enrico Risa on 08/01/15. */ +public class OLuceneQueryContext { + private final OCommandContext context; + private final IndexSearcher searcher; + private final Query query; + private final Sort sort; + private Optional changes; + private HashMap fragments; + + public OLuceneQueryContext( + final OCommandContext context, final IndexSearcher searcher, final Query query) { + this(context, searcher, query, Collections.emptyList()); + } + + public OLuceneQueryContext( + final OCommandContext context, + final IndexSearcher searcher, + final Query query, + final List sortFields) { + this.context = context; + this.searcher = searcher; + this.query = query; + if (sortFields.isEmpty()) { + sort = null; + } else { + sort = new Sort(sortFields.toArray(new SortField[] {})); + } + changes = Optional.empty(); + fragments = new HashMap<>(); + } + + public boolean isInTx() { + return changes.isPresent(); + } + + public OLuceneQueryContext withChanges(final OLuceneTxChanges changes) { + this.changes = Optional.ofNullable(changes); + return this; + } + + public OLuceneQueryContext addHighlightFragment( + final String field, final TextFragment[] fieldFragment) { + fragments.put(field, fieldFragment); + return this; + } + + public OCommandContext getContext() { + return context; + } + + public Query getQuery() { + return query; + } + + public Optional getChanges() { + return changes; + } + + public Sort getSort() { + return sort; + } + + public IndexSearcher getSearcher() { + return changes.map(c -> new IndexSearcher(multiReader(c))).orElse(searcher); + } + + private MultiReader multiReader(final OLuceneTxChanges luceneTxChanges) { + final IndexReader primaryReader = searcher.getIndexReader(); + final IndexReader txReader = luceneTxChanges.searcher().getIndexReader(); + try { + // Transfer ownership to the MultiReader so the index searcher can be released transparently. + // Without this, the primary IndexReader will leak a refcount each time it is wrapped. + MultiReader multiReader = new MultiReader(new IndexReader[] {primaryReader, txReader}, false); + primaryReader.decRef(); + txReader.decRef(); + return multiReader; + } catch (final IOException e) { + throw OException.wrapException( + new OLuceneIndexException("unable to create reader on changes"), e); + } + } + + public long deletedDocs(final Query query) { + return changes.map(c -> c.deletedDocs(query)).orElse(0l); + } + + public boolean isUpdated(final Document doc, final Object key, final OIdentifiable value) { + return changes.map(c -> c.isUpdated(doc, key, value)).orElse(false); + } + + public boolean isDeleted(final Document doc, final Object key, final OIdentifiable value) { + return changes.map(c -> c.isDeleted(doc, key, value)).orElse(false); + } + + public Map getFragments() { + return fragments; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java new file mode 100644 index 0000000000..117c4d911f --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java @@ -0,0 +1,52 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.OIdentifiable; +import java.util.Collections; +import java.util.Set; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; + +/** Created by Enrico Risa on 15/09/15. */ +public interface OLuceneTxChanges { + + void put(Object key, OIdentifiable value, Document doc); + + void remove(Object key, OIdentifiable value); + + IndexSearcher searcher(); + + default long numDocs() { + return 0; + } + + default Set getDeletedDocs() { + return Collections.emptySet(); + } + + boolean isDeleted(Document document, Object key, OIdentifiable value); + + boolean isUpdated(Document document, Object key, OIdentifiable value); + + default long deletedDocs(Query query) { + return 0; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java new file mode 100644 index 0000000000..52fd4f629f --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java @@ -0,0 +1,74 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import java.io.IOException; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; + +/** Created by Enrico Risa on 28/09/15. */ +public abstract class OLuceneTxChangesAbstract implements OLuceneTxChanges { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneTxChangesAbstract.class); + public static final String TMP = "_tmp_rid"; + + protected final OLuceneIndexEngine engine; + protected final IndexWriter writer; + protected final IndexWriter deletedIdx; + + public OLuceneTxChangesAbstract( + final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { + this.engine = engine; + this.writer = writer; + this.deletedIdx = deletedIdx; + } + + public IndexSearcher searcher() { + // TODO optimize + try { + return new IndexSearcher(DirectoryReader.open(writer, true, true)); + } catch (IOException e) { + // logger.error("Error during searcher index instantiation on new documents", e); + throw OException.wrapException( + new OLuceneIndexException("Error during searcher index instantiation on new documents"), + e); + } + } + + @Override + public long deletedDocs(Query query) { + try { + final IndexSearcher indexSearcher = + new IndexSearcher(DirectoryReader.open(deletedIdx, true, true)); + final TopDocs search = indexSearcher.search(query, Integer.MAX_VALUE); + return search.totalHits.value; + } catch (IOException e) { + logger.error("Error during searcher index instantiation on deleted documents ", e); + } + return 0; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java new file mode 100644 index 0000000000..c3758ba6d6 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java @@ -0,0 +1,108 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.database.OIdentifiable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.Query; + +/** Created by Enrico Risa on 15/09/15. */ +public class OLuceneTxChangesMultiRid extends OLuceneTxChangesAbstract { + private final Map> deleted = new HashMap>(); + private final Set deletedDocs = new HashSet(); + + public OLuceneTxChangesMultiRid( + final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { + super(engine, writer, deletedIdx); + } + + public void put(final Object key, final OIdentifiable value, final Document doc) { + try { + writer.addDocument(doc); + } catch (IOException e) { + throw OException.wrapException( + new OLuceneIndexException("unable to add document to changes index"), e); + } + } + + public void remove(final Object key, final OIdentifiable value) { + try { + if (value.getIdentity().isTemporary()) { + writer.deleteDocuments(engine.deleteQuery(key, value)); + } else { + deleted.putIfAbsent(value.getIdentity().toString(), new ArrayList<>()); + deleted.get(value.getIdentity().toString()).add(key.toString()); + + final Document doc = engine.buildDocument(key, value); + deletedDocs.add(doc); + deletedIdx.addDocument(doc); + } + } catch (final IOException e) { + throw OException.wrapException( + new OLuceneIndexException( + "Error while deleting documents in transaction from lucene index"), + e); + } + } + + public long numDocs() { + return searcher().getIndexReader().numDocs() - deletedDocs.size(); + } + + public Set getDeletedDocs() { + return deletedDocs; + } + + public boolean isDeleted(final Document document, final Object key, final OIdentifiable value) { + boolean match = false; + final List strings = deleted.get(value.getIdentity().toString()); + if (strings != null) { + final MemoryIndex memoryIndex = new MemoryIndex(); + for (final String string : strings) { + final Query q = engine.deleteQuery(string, value); + memoryIndex.reset(); + for (final IndexableField field : document.getFields()) { + memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer()); + } + match = match || (memoryIndex.search(q) > 0.0f); + } + return match; + } + return match; + } + + // TODO is this valid? + public boolean isUpdated(final Document document, final Object key, final OIdentifiable value) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java new file mode 100644 index 0000000000..dcc87fe84e --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java @@ -0,0 +1,92 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.builder.OLuceneIndexType; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.database.OIdentifiable; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; + +/** Created by Enrico Risa on 15/09/15. */ +public class OLuceneTxChangesSingleRid extends OLuceneTxChangesAbstract { + private final Set deleted = new HashSet(); + private final Set updated = new HashSet(); + private final Set deletedDocs = new HashSet(); + + public OLuceneTxChangesSingleRid( + final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { + super(engine, writer, deletedIdx); + } + + public void put(final Object key, final OIdentifiable value, final Document doc) { + if (deleted.remove(value.getIdentity().toString())) { + doc.add(OLuceneIndexType.createField(TMP, value.getIdentity().toString(), Field.Store.YES)); + updated.add(value.getIdentity().toString()); + } + try { + writer.addDocument(doc); + } catch (IOException e) { + throw OException.wrapException( + new OLuceneIndexException("unable to add document to changes index"), e); + } + } + + public void remove(final Object key, final OIdentifiable value) { + try { + if (value == null) { + writer.deleteDocuments(engine.deleteQuery(key, value)); + } else if (value.getIdentity().isTemporary()) { + writer.deleteDocuments(engine.deleteQuery(key, value)); + } else { + deleted.add(value.getIdentity().toString()); + Document doc = engine.buildDocument(key, value); + deletedDocs.add(doc); + deletedIdx.addDocument(doc); + } + } catch (final IOException e) { + throw OException.wrapException( + new OLuceneIndexException( + "Error while deleting documents in transaction from lucene index"), + e); + } + } + + public long numDocs() { + return searcher().getIndexReader().numDocs() - deleted.size() - updated.size(); + } + + public Set getDeletedDocs() { + return deletedDocs; + } + + public boolean isDeleted(Document document, Object key, OIdentifiable value) { + return deleted.contains(value.getIdentity().toString()); + } + + public boolean isUpdated(Document document, Object key, OIdentifiable value) { + return updated.contains(value.getIdentity().toString()); + } +} diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory new file mode 100644 index 0000000000..2dbcff89d3 --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory @@ -0,0 +1,21 @@ +# +# /* +# * Copyright 2014 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.OLuceneIndexFactory +com.orientechnologies.spatial.OLuceneSpatialIndexFactory +com.arcadedb.lucene.OLuceneCrossClassIndexFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory new file mode 100644 index 0000000000..72a6b3fbab --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory @@ -0,0 +1,21 @@ +# +# /* +# * Copyright 2015 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.functions.OLuceneFunctionsFactory +com.orientechnologies.spatial.functions.OSpatialFunctionsFactory +com.arcadedb.lucene.functions.OLuceneCrossClassFunctionsFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory new file mode 100644 index 0000000000..02b1024bcd --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory @@ -0,0 +1,20 @@ +# +# /* +# * Copyright 2014 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.operator.OLuceneOperatorFactory +com.orientechnologies.spatial.operator.OLuceneSpatialOperatorFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler new file mode 100644 index 0000000000..db660fa15e --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler @@ -0,0 +1 @@ +com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler diff --git a/lucene/src/main/resources/plugin.json b/lucene/src/main/resources/plugin.json new file mode 100644 index 0000000000..c0ef9caa33 --- /dev/null +++ b/lucene/src/main/resources/plugin.json @@ -0,0 +1,8 @@ +{ + "name": "lucene-index", + "version": "1.7", + "javaClass": "com.arcadedb.lucene.ArcadeLuceneLifecycleManager", + "parameters": {}, + "description": "This is the Lucene Index integration", + "copyrights": "Orient Technologies LTD" +} diff --git a/pom.xml b/pom.xml index 4dc3484765..a07eaa3c68 100644 --- a/pom.xml +++ b/pom.xml @@ -120,6 +120,7 @@ studio package e2e + lucene From b0eb1e168e93db3b8b8e802bcd0da7c3785f3290 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 09:14:36 +0000 Subject: [PATCH 2/6] Jules was unable to complete the task in time. Please review the work done so far and provide feedback for Jules to continue. --- ....java => ArcadeLuceneAnalyzerFactory.java} | 90 ++- ... ArcadeLucenePerFieldAnalyzerWrapper.java} | 22 +- .../lucene/builder/OLuceneIndexType.java | 207 ------ .../ArcadeLuceneCrossClassIndexEngine.java | 402 +++++++++++ .../ArcadeLuceneFullTextIndexEngine.java | 328 +++++++++ ...ndexEngine.java => LuceneIndexEngine.java} | 31 +- .../engine/OLuceneCrossClassIndexEngine.java | 399 ----------- .../engine/OLuceneFullTextIndexEngine.java | 304 -------- .../engine/OLuceneIndexEngineAbstract.java | 671 ++++++++++++++++++ ...cadeLuceneCrossClassFunctionsFactory.java} | 11 +- .../ArcadeLuceneCrossClassSearchFunction.java | 237 +++++++ .../ArcadeLuceneFunctionsFactory.java | 32 + .../functions/ArcadeLuceneFunctionsUtils.java | 64 ++ .../ArcadeLuceneSearchFunctionTemplate.java | 114 +++ ...rcadeLuceneSearchMoreLikeThisFunction.java | 389 ++++++++++ .../ArcadeLuceneSearchOnClassFunction.java | 242 +++++++ .../ArcadeLuceneSearchOnFieldsFunction.java | 291 ++++++++ .../ArcadeLuceneSearchOnIndexFunction.java | 186 +++++ .../OLuceneCrossClassSearchFunction.java | 181 ----- .../functions/OLuceneFunctionsFactory.java | 27 - .../functions/OLuceneFunctionsUtils.java | 60 -- .../OLuceneSearchFunctionTemplate.java | 90 --- .../OLuceneSearchMoreLikeThisFunction.java | 396 ----------- .../OLuceneSearchOnClassFunction.java | 184 ----- .../OLuceneSearchOnFieldsFunction.java | 200 ------ .../OLuceneSearchOnIndexFunction.java | 198 ------ .../lucene/index/ArcadeLuceneIndexType.java | 220 ++++++ 27 files changed, 3253 insertions(+), 2323 deletions(-) rename lucene/src/main/java/com/arcadedb/lucene/analyzer/{OLuceneAnalyzerFactory.java => ArcadeLuceneAnalyzerFactory.java} (51%) rename lucene/src/main/java/com/arcadedb/lucene/analyzer/{OLucenePerFieldAnalyzerWrapper.java => ArcadeLucenePerFieldAnalyzerWrapper.java} (71%) delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java rename lucene/src/main/java/com/arcadedb/lucene/engine/{OLuceneIndexEngine.java => LuceneIndexEngine.java} (53%) delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java rename lucene/src/main/java/com/arcadedb/lucene/functions/{OLuceneCrossClassFunctionsFactory.java => ArcadeLuceneCrossClassFunctionsFactory.java} (55%) create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLuceneAnalyzerFactory.java similarity index 51% rename from lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java rename to lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLuceneAnalyzerFactory.java index 3641f43536..2bfcb49424 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLuceneAnalyzerFactory.java @@ -1,25 +1,25 @@ package com.arcadedb.lucene.analyzer; -import com.arcadedb.common.exception.OException; -import com.arcadedb.common.log.OLogManager; -import com.arcadedb.common.log.OLogger; -import com.arcadedb.database.index.OIndexDefinition; -import com.arcadedb.database.index.OIndexException; -import com.arcadedb.database.metadata.schema.OType; -import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.document.Document; +import com.arcadedb.exception.ArcadeDBException; +import com.arcadedb.exception.IndexException; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.schema.Type; import java.lang.reflect.Constructor; import java.util.Collection; import java.util.Locale; +import java.util.logging.Level; +import java.util.logging.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.standard.StandardAnalyzer; /** Created by frank on 30/10/2015. */ -public class OLuceneAnalyzerFactory { - private static final OLogger logger = OLogManager.instance().logger(OLuceneAnalyzerFactory.class); +public class ArcadeLuceneAnalyzerFactory { + private static final Logger logger = Logger.getLogger(ArcadeLuceneAnalyzerFactory.class.getName()); public Analyzer createAnalyzer( - final OIndexDefinition index, final AnalyzerKind kind, final ODocument metadata) { + final IndexDefinition index, final AnalyzerKind kind, final Document metadata) { if (index == null) { throw new IllegalArgumentException("Index must not be null"); } @@ -29,32 +29,32 @@ public Analyzer createAnalyzer( if (metadata == null) { throw new IllegalArgumentException("Metadata must not be null"); } - final String defaultAnalyzerFQN = metadata.field("default"); - final String prefix = index.getClassName() + "."; + final String defaultAnalyzerFQN = metadata.getString("default"); + final String prefix = index.getTypeName() + "."; - final OLucenePerFieldAnalyzerWrapper analyzer = + final OLucenePerFieldAnalyzerWrapper analyzer = // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper geLucenePerFieldPresetAnalyzerWrapperForAllFields(defaultAnalyzerFQN); setDefaultAnalyzerForRequestedKind(index, kind, metadata, prefix, analyzer); setSpecializedAnalyzersForEachField(index, kind, metadata, prefix, analyzer); return analyzer; } - private OLucenePerFieldAnalyzerWrapper geLucenePerFieldPresetAnalyzerWrapperForAllFields( + private OLucenePerFieldAnalyzerWrapper geLucenePerFieldPresetAnalyzerWrapperForAllFields( // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper final String defaultAnalyzerFQN) { if (defaultAnalyzerFQN == null) { - return new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); + return new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper } else { - return new OLucenePerFieldAnalyzerWrapper(buildAnalyzer(defaultAnalyzerFQN)); + return new OLucenePerFieldAnalyzerWrapper(buildAnalyzer(defaultAnalyzerFQN)); // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper } } private void setDefaultAnalyzerForRequestedKind( - final OIndexDefinition index, + final IndexDefinition index, final AnalyzerKind kind, - final ODocument metadata, + final Document metadata, final String prefix, - final OLucenePerFieldAnalyzerWrapper analyzer) { - final String specializedAnalyzerFQN = metadata.field(kind.toString()); + final OLucenePerFieldAnalyzerWrapper analyzer) { // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper + final String specializedAnalyzerFQN = metadata.getString(kind.toString()); if (specializedAnalyzerFQN != null) { for (final String field : index.getFields()) { analyzer.add(field, buildAnalyzer(specializedAnalyzerFQN)); @@ -64,67 +64,63 @@ private void setDefaultAnalyzerForRequestedKind( } private void setSpecializedAnalyzersForEachField( - final OIndexDefinition index, + final IndexDefinition index, final AnalyzerKind kind, - final ODocument metadata, + final Document metadata, final String prefix, - final OLucenePerFieldAnalyzerWrapper analyzer) { + final OLucenePerFieldAnalyzerWrapper analyzer) { // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper for (final String field : index.getFields()) { final String analyzerName = field + "_" + kind.toString(); final String analyzerStopwords = analyzerName + "_stopwords"; if (metadata.containsField(analyzerName) && metadata.containsField(analyzerStopwords)) { - final Collection stopWords = metadata.field(analyzerStopwords, OType.EMBEDDEDLIST); - analyzer.add(field, buildAnalyzer(metadata.field(analyzerName), stopWords)); - analyzer.add(prefix + field, buildAnalyzer(metadata.field(analyzerName), stopWords)); + final Collection stopWords = metadata.get(analyzerStopwords, Collection.class); + analyzer.add(field, buildAnalyzer(metadata.getString(analyzerName), stopWords)); + analyzer.add(prefix + field, buildAnalyzer(metadata.getString(analyzerName), stopWords)); } else if (metadata.containsField(analyzerName)) { - analyzer.add(field, buildAnalyzer(metadata.field(analyzerName))); - analyzer.add(prefix + field, buildAnalyzer(metadata.field(analyzerName))); + analyzer.add(field, buildAnalyzer(metadata.getString(analyzerName))); + analyzer.add(prefix + field, buildAnalyzer(metadata.getString(analyzerName))); } } } private Analyzer buildAnalyzer(final String analyzerFQN) { try { - final Class classAnalyzer = Class.forName(analyzerFQN); - final Constructor constructor = classAnalyzer.getConstructor(); + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getDeclaredConstructor(); return (Analyzer) constructor.newInstance(); } catch (final ClassNotFoundException e) { - throw OException.wrapException( - new OIndexException("Analyzer: " + analyzerFQN + " not found"), e); + throw new IndexException("Analyzer: " + analyzerFQN + " not found", e); } catch (final NoSuchMethodException e) { - Class classAnalyzer = null; + Class classAnalyzer; try { classAnalyzer = Class.forName(analyzerFQN); + //noinspection deprecation return (Analyzer) classAnalyzer.newInstance(); } catch (Exception e1) { - logger.error("Exception is suppressed, original exception is ", e); + logger.log(Level.SEVERE, "Exception is suppressed, original exception is ", e); //noinspection ThrowInsideCatchBlockWhichIgnoresCaughtException - throw OException.wrapException( - new OIndexException("Couldn't instantiate analyzer: public constructor not found"), - e1); + throw new IndexException("Couldn't instantiate analyzer: public constructor not found", e1); } } catch (Exception e) { - logger.error( - "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + logger.log( + Level.SEVERE, "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); return new StandardAnalyzer(); } } private Analyzer buildAnalyzer(final String analyzerFQN, final Collection stopwords) { try { - final Class classAnalyzer = Class.forName(analyzerFQN); - final Constructor constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class); + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class); return (Analyzer) constructor.newInstance(new CharArraySet(stopwords, true)); } catch (final ClassNotFoundException e) { - throw OException.wrapException( - new OIndexException("Analyzer: " + analyzerFQN + " not found"), e); + throw new IndexException("Analyzer: " + analyzerFQN + " not found", e); } catch (final NoSuchMethodException e) { - throw OException.wrapException( - new OIndexException("Couldn't instantiate analyzer: public constructor not found"), e); + throw new IndexException("Couldn't instantiate analyzer: public constructor not found", e); } catch (final Exception e) { - logger.error( - "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + logger.log( + Level.SEVERE, "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); return new StandardAnalyzer(); } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java similarity index 71% rename from lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java rename to lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java index 53237815ff..ba140f59eb 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java @@ -1,8 +1,8 @@ package com.arcadedb.lucene.analyzer; -import static com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract.RID; +import static com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract.RID; // FIXME: This might need to be ArcadeDB specific constant if RID definition changes -import com.arcadedb.lucene.builder.OLuceneIndexType; +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // FIXME: Ensure this is the correct refactored class for OLuceneIndexType import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.Analyzer; @@ -17,7 +17,7 @@ * delegate analyzer, but not allocated on this analyzer (limit memory consumption). Uses a per * field reuse strategy. */ -public class OLucenePerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper { +public class ArcadeLucenePerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper { private final Analyzer defaultDelegateAnalyzer; private final Map fieldAnalyzers; @@ -27,7 +27,7 @@ public class OLucenePerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper { * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use * the one provided here. */ - public OLucenePerFieldAnalyzerWrapper(final Analyzer defaultAnalyzer) { + public ArcadeLucenePerFieldAnalyzerWrapper(final Analyzer defaultAnalyzer) { this(defaultAnalyzer, new HashMap<>()); } @@ -38,7 +38,7 @@ public OLucenePerFieldAnalyzerWrapper(final Analyzer defaultAnalyzer) { * the one provided here. * @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields */ - public OLucenePerFieldAnalyzerWrapper( + public ArcadeLucenePerFieldAnalyzerWrapper( final Analyzer defaultAnalyzer, final Map fieldAnalyzers) { super(PER_FIELD_REUSE_STRATEGY); this.defaultDelegateAnalyzer = defaultAnalyzer; @@ -47,7 +47,7 @@ public OLucenePerFieldAnalyzerWrapper( this.fieldAnalyzers.putAll(fieldAnalyzers); this.fieldAnalyzers.put(RID, new KeywordAnalyzer()); - this.fieldAnalyzers.put(OLuceneIndexType.RID_HASH, new KeywordAnalyzer()); + this.fieldAnalyzers.put(ArcadeLuceneIndexType.RID_HASH, new KeywordAnalyzer()); this.fieldAnalyzers.put("_CLASS", new KeywordAnalyzer()); this.fieldAnalyzers.put("_CLUSTER", new KeywordAnalyzer()); this.fieldAnalyzers.put("_JSON", new KeywordAnalyzer()); @@ -61,24 +61,24 @@ protected Analyzer getWrappedAnalyzer(final String fieldName) { @Override public String toString() { - return "PerFieldAnalyzerWrapper(" + return "ArcadeLucenePerFieldAnalyzerWrapper(" // Updated class name in toString + fieldAnalyzers + ", default=" + defaultDelegateAnalyzer + ")"; } - public OLucenePerFieldAnalyzerWrapper add(final String field, final Analyzer analyzer) { + public ArcadeLucenePerFieldAnalyzerWrapper add(final String field, final Analyzer analyzer) { fieldAnalyzers.put(field, analyzer); return this; } - public OLucenePerFieldAnalyzerWrapper add(final OLucenePerFieldAnalyzerWrapper analyzer) { - fieldAnalyzers.putAll(analyzer.getAnalyzers()); + public ArcadeLucenePerFieldAnalyzerWrapper add(final ArcadeLucenePerFieldAnalyzerWrapper wrapper) { // Changed parameter type + fieldAnalyzers.putAll(wrapper.getAnalyzers()); return this; } - public OLucenePerFieldAnalyzerWrapper remove(final String field) { + public ArcadeLucenePerFieldAnalyzerWrapper remove(final String field) { fieldAnalyzers.remove(field); return this; } diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java b/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java deleted file mode 100644 index 8459173f22..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.arcadedb.lucene.builder; - -import com.arcadedb.common.exception.OException; -import com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract; -import com.arcadedb.lucene.exception.OLuceneIndexException; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.index.OCompositeKey; -import com.arcadedb.database.index.OIndexDefinition; -import com.arcadedb.database.record.impl.ODocument; -import java.io.UnsupportedEncodingException; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Base64; -import java.util.Date; -import java.util.List; -import java.util.Locale; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.DoubleDocValuesField; -import org.apache.lucene.document.DoublePoint; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FloatDocValuesField; -import org.apache.lucene.document.FloatPoint; -import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.SortedDocValuesField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.util.BytesRef; - -/** Created by enricorisa on 21/03/14. */ -public class OLuceneIndexType { - public static final String RID_HASH = "_RID_HASH"; - - public static Field createField( - final String fieldName, final Object value, final Field.Store store /*,Field.Index index*/) { - // metadata fields: _CLASS, _CLUSTER - if (fieldName.startsWith("_CLASS") || fieldName.startsWith("_CLUSTER")) { - return new StringField(fieldName, value.toString(), store); - } - return new TextField(fieldName, value.toString(), Field.Store.YES); - } - - public static String extractId(Document doc) { - String value = doc.get(RID_HASH); - if (value != null) { - int pos = value.indexOf("|"); - if (pos > 0) { - return value.substring(0, pos); - } else { - return value; - } - } else { - return null; - } - } - - public static Field createIdField(final OIdentifiable id, final Object key) { - return new StringField(RID_HASH, genValueId(id, key), Field.Store.YES); - } - - public static Field createOldIdField(final OIdentifiable id) { - return new StringField( - OLuceneIndexEngineAbstract.RID, id.getIdentity().toString(), Field.Store.YES); - } - - public static String genValueId(final OIdentifiable id, final Object key) { - String value = id.getIdentity().toString() + "|"; - value += hashKey(key); - return value; - } - - public static List createFields( - String fieldName, Object value, Field.Store store, Boolean sort) { - List fields = new ArrayList<>(); - if (value instanceof Number) { - Number number = (Number) value; - if (value instanceof Long) { - fields.add(new NumericDocValuesField(fieldName, number.longValue())); - fields.add(new LongPoint(fieldName, number.longValue())); - return fields; - } else if (value instanceof Float) { - fields.add(new FloatDocValuesField(fieldName, number.floatValue())); - fields.add(new FloatPoint(fieldName, number.floatValue())); - return fields; - } else if (value instanceof Double) { - fields.add(new DoubleDocValuesField(fieldName, number.doubleValue())); - fields.add(new DoublePoint(fieldName, number.doubleValue())); - return fields; - } - fields.add(new NumericDocValuesField(fieldName, number.longValue())); - fields.add(new IntPoint(fieldName, number.intValue())); - return fields; - } else if (value instanceof Date) { - Date date = (Date) value; - fields.add(new NumericDocValuesField(fieldName, date.getTime())); - fields.add(new LongPoint(fieldName, date.getTime())); - return fields; - } - if (Boolean.TRUE.equals(sort)) { - fields.add(new SortedDocValuesField(fieldName, new BytesRef(value.toString()))); - } - fields.add(new TextField(fieldName, value.toString(), Field.Store.YES)); - return fields; - } - - public static Query createExactQuery(OIndexDefinition index, Object key) { - Query query = null; - if (key instanceof String) { - final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); - if (index.getFields().size() > 0) { - for (String idx : index.getFields()) { - queryBuilder.add( - new TermQuery(new Term(idx, key.toString())), BooleanClause.Occur.SHOULD); - } - } else { - queryBuilder.add( - new TermQuery(new Term(OLuceneIndexEngineAbstract.KEY, key.toString())), - BooleanClause.Occur.SHOULD); - } - query = queryBuilder.build(); - } else if (key instanceof OCompositeKey) { - final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); - int i = 0; - OCompositeKey keys = (OCompositeKey) key; - for (String idx : index.getFields()) { - String val = (String) keys.getKeys().get(i); - queryBuilder.add(new TermQuery(new Term(idx, val)), BooleanClause.Occur.MUST); - i++; - } - query = queryBuilder.build(); - } - return query; - } - - public static Query createQueryId(OIdentifiable value) { - return new TermQuery(new Term(OLuceneIndexEngineAbstract.RID, value.getIdentity().toString())); - } - - public static Query createQueryId(OIdentifiable value, Object key) { - return new TermQuery(new Term(RID_HASH, genValueId(value, key))); - } - - public static String hashKey(Object key) { - try { - String keyString; - if (key instanceof ODocument) { - keyString = ((ODocument) key).toJSON(); - } else { - keyString = key.toString(); - } - MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); - byte[] bytes = sha256.digest(keyString.getBytes("UTF-8")); - return Base64.getEncoder().encodeToString(bytes); - } catch (NoSuchAlgorithmException e) { - throw OException.wrapException(new OLuceneIndexException("fail to find sha algorithm"), e); - - } catch (UnsupportedEncodingException e) { - throw OException.wrapException(new OLuceneIndexException("fail to find utf-8 encoding"), e); - } - } - - public static Query createDeleteQuery( - OIdentifiable value, List fields, Object key, ODocument metadata) { - - // TODO Implementation of Composite keys with Collection - final BooleanQuery.Builder filter = new BooleanQuery.Builder(); - final BooleanQuery.Builder builder = new BooleanQuery.Builder(); - // TODO: Condition on Id and field key only for backward compatibility - if (value != null) { - builder.add(createQueryId(value), BooleanClause.Occur.MUST); - } - String field = fields.iterator().next(); - builder.add( - new TermQuery(new Term(field, key.toString().toLowerCase(Locale.ENGLISH))), - BooleanClause.Occur.MUST); - - filter.add(builder.build(), BooleanClause.Occur.SHOULD); - if (value != null) { - filter.add(createQueryId(value, key), BooleanClause.Occur.SHOULD); - } - - return filter.build(); - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java new file mode 100644 index 0000000000..03c4c19a20 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java @@ -0,0 +1,402 @@ +package com.arcadedb.lucene.engine; + +// import static com.arcadedb.lucene.OLuceneIndexFactory.LUCENE_ALGORITHM; // FIXME: Define or import appropriately + +import com.arcadedb.database.DatabaseThreadLocal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.database.RecordId; +import com.arcadedb.database.TransactionContext; // For AtomicOperation +// import com.arcadedb.database.config.IndexEngineData; // FIXME: Find ArcadeDB equivalent or refactor +import com.arcadedb.document.Document; +import com.arcadedb.engine.Storage; +import com.arcadedb.index.Index; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.index.IndexKeyUpdater; +import com.arcadedb.index.IndexMetadata; +import com.arcadedb.index.engine.IndexValidator; +import com.arcadedb.index.IndexValuesTransformer; +import com.arcadedb.lucene.analyzer.ArcadeLucenePerFieldAnalyzerWrapper; // Refactored +import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.parser.ArcadeLuceneMultiFieldQueryParser; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneQueryContext; // FIXME: Needs refactoring +import com.arcadedb.lucene.tx.LuceneTxChanges; // FIXME: Needs refactoring +import com.arcadedb.schema.DocumentType; // Changed from OClass +import com.arcadedb.schema.Type; // Changed from OType +import com.arcadedb.utility.Pair; // Changed from ORawPair +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; // Lucene Document +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** + * Created by frank on 03/11/2016. + */ +public class ArcadeLuceneCrossClassIndexEngine implements LuceneIndexEngine { // Changed class name and interface + private static final Logger logger = + Logger.getLogger(ArcadeLuceneCrossClassIndexEngine.class.getName()); // Changed logger + private final Storage storage; // Changed OStorage + private final String indexName; + private final int indexId; + private static final String LUCENE_ALGORITHM = "LUCENE"; // Placeholder for algorithm name + + public ArcadeLuceneCrossClassIndexEngine(int indexId, Storage storage, String indexName) { // Changed OStorage + this.indexId = indexId; + this.storage = storage; + this.indexName = indexName; + } + + @Override + public void init(IndexMetadata metadata) {} // Changed OIndexMetadata + + @Override + public void flush() {} + + @Override + public int getId() { + return indexId; + } + + // FIXME: IndexEngineData equivalent in ArcadeDB? + @Override + public void create(TransactionContext atomicOperation, Object data) throws IOException {} // Changed OAtomicOperation, IndexEngineData + + @Override + public void delete(TransactionContext atomicOperation) {} // Changed OAtomicOperation + + // FIXME: IndexEngineData equivalent in ArcadeDB? + @Override + public void load(Object data) {} // Changed IndexEngineData + + @Override + public boolean remove(TransactionContext atomicOperation, Object key) { // Changed OAtomicOperation + return false; + } + + @Override + public void clear(TransactionContext atomicOperation) {} // Changed OAtomicOperation + + @Override + public void close() {} + + @Override + public Object get(Object key) { + // FIXME: This method requires significant refactoring once dependent classes are updated + // (LuceneKeyAndMetadata, ArcadeLuceneFullTextIndex, ArcadeLuceneMultiFieldQueryParser, OLuceneIndexEngineUtils, LuceneResultSet) + + final LuceneKeyAndMetadata keyAndMeta = (LuceneKeyAndMetadata) key; // FIXME + final Document arcadedbMetadata = keyAndMeta.metadata; // ArcadeDB Document // FIXME + final List excludes = + Optional.ofNullable(arcadedbMetadata.>getProperty("excludes")) + .orElse(Collections.emptyList()); + final List includes = + Optional.ofNullable(arcadedbMetadata.>getProperty("includes")) + .orElse(Collections.emptyList()); + + final Collection indexes = // Changed OIndex to Index + DatabaseThreadLocal.INSTANCE // Changed ODatabaseRecordThreadLocal + .get() + .getSchema() // Changed getMetadata().getIndexManager() + .getIndexes() + .stream() + .filter(i -> !excludes.contains(i.getName())) + .filter(i -> includes.isEmpty() || includes.contains(i.getName())) + .collect(Collectors.toList()); + + final ArcadeLucenePerFieldAnalyzerWrapper globalAnalyzer = // Changed OLucenePerFieldAnalyzerWrapper + new ArcadeLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); + + final List globalFields = new ArrayList(); + final List globalReaders = new ArrayList(); + final Map types = new HashMap<>(); // Changed OType to Type + + try { + for (Index index : indexes) { // Changed OIndex to Index + // FIXME: index.getAlgorithm() might be different, DocumentType.INDEX_TYPE.FULLTEXT might be different + if (index.getAlgorithm().equalsIgnoreCase(LUCENE_ALGORITHM) + && index.getType().equalsIgnoreCase(DocumentType.INDEX_TYPE.FULLTEXT.toString())) { + + final IndexDefinition definition = index.getDefinition(); // Changed OIndexDefinition + final String typeName = definition.getTypeName(); // Changed getClassName + + String[] indexFields = + definition.getFields().toArray(new String[definition.getFields().size()]); + + for (int i = 0; i < indexFields.length; i++) { + String field = indexFields[i]; + types.put(typeName + "." + field, definition.getTypes()[i]); + globalFields.add(typeName + "." + field); + } + + ArcadeLuceneFullTextIndex fullTextIndex = (ArcadeLuceneFullTextIndex) index.getAssociatedIndex(); // Changed OLuceneFullTextIndex, getInternal() + + globalAnalyzer.add((ArcadeLucenePerFieldAnalyzerWrapper) fullTextIndex.queryAnalyzer()); // FIXME: queryAnalyzer might not be directly on ArcadeLuceneFullTextIndex + + globalReaders.add(fullTextIndex.searcher().getIndexReader()); // FIXME: searcher might not be directly on ArcadeLuceneFullTextIndex + } + } + + if (globalReaders.isEmpty()) { + return new LuceneResultSet(this, null, arcadedbMetadata); // FIXME: LuceneResultSet + } + + IndexReader indexReader = new MultiReader(globalReaders.toArray(new IndexReader[] {})); + IndexSearcher searcher = new IndexSearcher(indexReader); + + Map boost = + Optional.ofNullable(arcadedbMetadata.>getProperty("boost")) + .orElse(new HashMap<>()); + + // FIXME: ArcadeLuceneMultiFieldQueryParser needs refactoring + ArcadeLuceneMultiFieldQueryParser p = + new ArcadeLuceneMultiFieldQueryParser( + types, globalFields.toArray(new String[] {}), globalAnalyzer, boost); + + p.setAllowLeadingWildcard( + Optional.ofNullable(arcadedbMetadata.getProperty("allowLeadingWildcard")).orElse(false)); + p.setSplitOnWhitespace( + Optional.ofNullable(arcadedbMetadata.getProperty("splitOnWhitespace")).orElse(true)); + + Object params = keyAndMeta.key.getKeys().get(0); // FIXME: keyAndMeta.key structure might change + Query query = p.parse(params.toString()); + + // FIXME: OLuceneIndexEngineUtils.buildSortFields needs refactoring + final List sortFields = Collections.emptyList(); // Placeholder + // final List fields = OLuceneIndexEngineUtils.buildSortFields(arcadedbMetadata); + + LuceneQueryContext ctx = new LuceneQueryContext(null, searcher, query, sortFields); // FIXME + return new LuceneResultSet(this, ctx, arcadedbMetadata); // FIXME + } catch (IOException e) { + logger.log(Level.SEVERE, "unable to create multi-reader", e); + } catch (ParseException e) { + logger.log(Level.SEVERE, "unable to parse query", e); + } + return null; + } + + @Override + public void put(TransactionContext atomicOperation, Object key, Object value) {} // Changed OAtomicOperation + + @Override + public void put(TransactionContext atomicOperation, Object key, RID value) {} // Changed OAtomicOperation, ORID + + @Override + public boolean remove(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID + return false; + } + + @Override + public void update( // Changed OAtomicOperation, OIndexKeyUpdater + TransactionContext atomicOperation, Object key, IndexKeyUpdater updater) {} + + @Override + public boolean validatedPut( // Changed OAtomicOperation, ORID, IndexEngineValidator + TransactionContext atomicOperation, + Object key, + RID value, + IndexValidator validator) { + return false; + } + + @Override + public Stream> iterateEntriesBetween( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMajor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMinor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> stream(IndexValuesTransformer valuesTransformer) { // Changed ORawPair, ORID + return Stream.empty(); + } + + @Override + public Stream> descStream(IndexValuesTransformer valuesTransformer) { // Changed ORawPair, ORID + return Stream.empty(); + } + + @Override + public Stream keyStream() { + return Stream.empty(); + } + + @Override + public long size(IndexValuesTransformer transformer) { // Changed IndexEngineValuesTransformer + return 0; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public String getName() { + return indexName; + } + + @Override + public boolean acquireAtomicExclusiveLock(Object key) { + return false; + } + + @Override + public String getIndexNameByKey(Object key) { + return null; + } + + @Override + public String indexName() { + return indexName; + } + + @Override + public void onRecordAddedToResultSet( // Changed parameter types + LuceneQueryContext queryContext, // FIXME + RecordId recordId, // Changed OContextualRecordId + Document ret, // Lucene Document + final ScoreDoc score) { + + // FIXME: RecordId in ArcadeDB does not have setContext. How to pass this data? + // recordId.setContext( + // new HashMap() { + // { + // Map frag = queryContext.getFragments(); + // frag.entrySet().stream() + // .forEach( + // f -> { + // TextFragment[] fragments = f.getValue(); + // StringBuilder hlField = new StringBuilder(); + // for (int j = 0; j < fragments.length; j++) { + // if ((fragments[j] != null) && (fragments[j].getScore() > 0)) { + // hlField.append(fragments[j].toString()); + // } + // } + // put("$" + f.getKey() + "_hl", hlField.toString()); + // }); + // put("$score", score.score); + // } + // }); + } + + @Override + public Document buildDocument(Object key, Identifiable value) { // Changed OIdentifiable, Lucene Document + return null; + } + + @Override + public Query buildQuery(Object query) { + return null; + } + + @Override + public Analyzer indexAnalyzer() { + return null; + } + + @Override + public Analyzer queryAnalyzer() { + return null; + } + + @Override + public boolean remove(Object key, Identifiable value) { // Changed OIdentifiable + return false; + } + + @Override + public IndexSearcher searcher() { + return null; + } + + @Override + public void release(IndexSearcher searcher) {} + + @Override + public Set getInTx(Object key, LuceneTxChanges changes) { // Changed OIdentifiable, OLuceneTxChanges + return null; + } + + @Override + public long sizeInTx(LuceneTxChanges changes) { // Changed OLuceneTxChanges + return 0; + } + + @Override + public LuceneTxChanges buildTxChanges() throws IOException { // Changed OLuceneTxChanges + return null; + } + + @Override + public Query deleteQuery(Object key, Identifiable value) { // Changed OIdentifiable + return null; + } + + @Override + public boolean isCollectionIndex() { + return false; + } + + @Override + public void freeze(boolean throwException) {} + + @Override + public void release() {} + + @Override + public void updateUniqueIndexVersion(Object key) {} + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; + } + + @Override + public boolean remove(Object key) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java new file mode 100644 index 0000000000..3388defc9d --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java @@ -0,0 +1,328 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2014 Orient Technologies. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.engine; + +import static com.arcadedb.lucene.builder.LuceneQueryBuilder.EMPTY_METADATA; // FIXME: LuceneQueryBuilder needs refactoring + +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.database.RecordId; +import com.arcadedb.database.TransactionContext; // For AtomicOperation +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.engine.Storage; +import com.arcadedb.exception.IndexException; // Changed exception +import com.arcadedb.index.CompositeKey; +import com.arcadedb.index.IndexKeyUpdater; +import com.arcadedb.index.IndexMetadata; +import com.arcadedb.index.IndexValuesTransformer; +import com.arcadedb.index.engine.IndexValidator; +import com.arcadedb.lucene.builder.LuceneDocumentBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.ArcadeLuceneIndexTransformer; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneQueryContext; +import com.arcadedb.lucene.tx.LuceneTxChanges; +import com.arcadedb.query.sql.executor.CommandContext; +import com.arcadedb.query.sql.parser.ParseException; +import com.arcadedb.schema.Type; // For manual index field creation +import com.arcadedb.utility.Pair; // Changed from ORawPair +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Stream; +import org.apache.lucene.document.Document; // Lucene Document +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.store.Directory; + +public class ArcadeLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract implements LuceneIndexEngine { // Changed class, base, and interface + private static final Logger logger = + Logger.getLogger(ArcadeLuceneFullTextIndexEngine.class.getName()); // Changed logger + + private final LuceneDocumentBuilder builder; // FIXME: Needs refactoring + private LuceneQueryBuilder queryBuilder; // FIXME: Needs refactoring + private final AtomicLong bonsayFileId = new AtomicLong(0); // TODO: Review if bonsayFileId is still relevant in ArcadeDB context + + public ArcadeLuceneFullTextIndexEngine(Storage storage, String idxName, int id) { // Changed OStorage + super(storage, idxName); // FIXME: OLuceneIndexEngineAbstract constructor might have changed + builder = new LuceneDocumentBuilder(); // FIXME: Needs refactoring + } + + @Override + public void init(IndexMetadata im) { // Changed OIndexMetadata + super.init(im.getName(), im.getType(), im.getDefinition(), im.isAutomatic(), im.getMetadata()); // FIXME: super.init might have changed + // FIXME: getMetadata() on IndexMetadata might be different from OIndexMetadata.getMetadata() + // queryBuilder = new LuceneQueryBuilder(im.getMetadata()); // FIXME: Needs refactoring and correct metadata access + if (im.getDefinition() != null && im.getDefinition().getOptions() != null) { + queryBuilder = new LuceneQueryBuilder(new Document(getDatabase(), im.getDefinition().getOptions())); // FIXME Needs correct metadata Document + } else { + queryBuilder = new LuceneQueryBuilder(new Document(getDatabase())); // Empty metadata if not available + } + } + + @Override + public IndexWriter createIndexWriter(Directory directory) throws IOException { + // FIXME: OLuceneIndexWriterFactory needs to be ArcadeLuceneIndexWriterFactory + // OLuceneIndexWriterFactory fc = new OLuceneIndexWriterFactory(); + // logger.log(Level.FINE, "Creating Lucene index in ''{0}''...", directory); + // return fc.createIndexWriter(directory, metadata, indexAnalyzer()); + throw new UnsupportedOperationException("ArcadeLuceneIndexWriterFactory not yet implemented"); + } + + @Override + public void onRecordAddedToResultSet( // Changed parameter types + final LuceneQueryContext queryContext, + final RecordId recordId, // Changed OContextualRecordId + final Document ret, // Lucene Document + final ScoreDoc score) { + HashMap data = new HashMap(); + + final Map frag = queryContext.getFragments(); + frag.forEach( + (key, fragments) -> { + final StringBuilder hlField = new StringBuilder(); + for (final TextFragment fragment : fragments) { + if ((fragment != null) && (fragment.getScore() > 0)) { + hlField.append(fragment.toString()); + } + } + data.put("$" + key + "_hl", hlField.toString()); + }); + data.put("$score", score.score); + + // recordId.setContext(data); // FIXME: RecordId in ArcadeDB does not have setContext. How to pass this data? + // This might need a wrapper class or different result handling. + } + + @Override + public boolean remove(final TransactionContext atomicOperation, final Object key) { // Changed OAtomicOperation + return remove(key); + } + + @Override + public boolean remove(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID + return remove(key, value); + } + + @Override + public Object get(final Object key) { + return getInTx(key, null); + } + + @Override + public void update( // Changed OAtomicOperation, OIndexKeyUpdater + final TransactionContext atomicOperation, + final Object key, + final IndexKeyUpdater updater) { + // FIXME: bonsayFileId might not be relevant. updater.update might change. + put(atomicOperation, key, updater.update(null, bonsayFileId).getValue()); + } + + @Override + public void put(final TransactionContext atomicOperation, final Object key, final Object value) { // Changed OAtomicOperation + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, (Identifiable) value); // Lucene Document + addDocument(doc); + } + + @Override + public void put(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, value); // Lucene Document + addDocument(doc); + } + + @Override + public boolean validatedPut( // Changed OAtomicOperation, ORID, IndexEngineValidator + TransactionContext atomicOperation, + Object key, + RID value, + IndexValidator validator) { + throw new UnsupportedOperationException( + "Validated put is not supported by ArcadeLuceneFullTextIndexEngine"); + } + + @Override + public Stream> iterateEntriesBetween( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + // FIXME: OLuceneResultSet and LuceneIndexTransformer need refactoring + return ArcadeLuceneIndexTransformer.transformToStream((LuceneResultSet) get(rangeFrom), rangeFrom); + } + + private Set getResults( // Changed OIdentifiable, OCommandContext, OLuceneTxChanges, ODocument + final Query query, + final CommandContext context, + final LuceneTxChanges changes, + final Document metadata) { // ArcadeDB Document for metadata + // sort + // FIXME: OLuceneIndexEngineUtils.buildSortFields needs refactoring + // final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); + final List fields = null; // Placeholder + final IndexSearcher luceneSearcher = searcher(); + final LuceneQueryContext queryContext = + new LuceneQueryContext(context, luceneSearcher, query, fields).withChanges(changes); + // FIXME: OLuceneResultSet needs refactoring to LuceneResultSet + return new LuceneResultSet(this, queryContext, metadata); + } + + @Override + public Stream> iterateEntriesMajor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return null; + } + + @Override + public Stream> iterateEntriesMinor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return null; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public void updateUniqueIndexVersion(Object key) { + // not implemented + } + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; // not implemented + } + + @Override + public Document buildDocument(Object key, Identifiable value) { // Changed OIdentifiable, Lucene Document + if (indexDefinition.isAutomatic()) { + // FIXME: builder (LuceneDocumentBuilder) needs refactoring + // return builder.build(indexDefinition, key, value, collectionFields, metadata); + throw new UnsupportedOperationException("Automatic index document building not yet fully refactored."); + } else { + return putInManualindex(key, value); + } + } + + private static Document putInManualindex(Object key, Identifiable oIdentifiable) { // Changed OIdentifiable, Lucene Document + Document doc = new Document(); // Lucene Document + doc.add(ArcadeLuceneIndexType.createRidField(oIdentifiable)); + doc.add(ArcadeLuceneIndexType.createIdField(oIdentifiable, key)); + + // FIXME: This manual field creation is CRITICAL and needs to use ArcadeLuceneIndexType.createFields + // with proper Type resolution for each object 'o'. + if (key instanceof CompositeKey) { // Changed OCompositeKey + List keys = ((CompositeKey) key).getKeys(); + int k = 0; + for (Object o : keys) { + // Determine Type of 'o' here. For now, defaulting to String. + // Type type = Type.STRING; // Placeholder - this needs to be dynamic + // doc.add(ArcadeLuceneIndexType.createFields("k" + k, o, Field.Store.YES, false, type)); + doc.add(ArcadeLuceneIndexType.createField("k" + k, o, Field.Store.YES)); // Simplified call, needs type + k++; + } + } else if (key instanceof Collection) { + @SuppressWarnings("unchecked") + Collection keys = (Collection) key; + int k = 0; + for (Object o : keys) { + // Determine Type of 'o' here. For now, defaulting to String. + // Type type = Type.STRING; // Placeholder - this needs to be dynamic + // doc.add(ArcadeLuceneIndexType.createFields("k" + k, o, Field.Store.YES, false, type)); + doc.add(ArcadeLuceneIndexType.createField("k" + k, o, Field.Store.YES)); // Simplified call, needs type + k++; + } + } else { + // Determine Type of 'key' here. For now, defaulting to String. + // Type type = Type.STRING; // Placeholder - this needs to be dynamic + // doc.add(ArcadeLuceneIndexType.createFields("k0", key, Field.Store.NO, false, type)); + doc.add(ArcadeLuceneIndexType.createField("k0", key, Field.Store.NO)); // Simplified call, needs type + } + return doc; + } + + @Override + public Query buildQuery(final Object maybeQuery) { + try { + if (maybeQuery instanceof String) { + // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring + return queryBuilder.query(indexDefinition, (String) maybeQuery, new Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer()); + } else { + LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) maybeQuery; // FIXME: LuceneKeyAndMetadata needs refactoring + // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring + return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + } + } catch (final ParseException e) { + throw new IndexException("Error parsing query", e); // Changed exception + } + } + + @Override + public Set getInTx(Object key, LuceneTxChanges changes) { // Changed OIdentifiable, OLuceneTxChanges + updateLastAccess(); + openIfClosed(); + try { + if (key instanceof LuceneKeyAndMetadata) { // FIXME: LuceneKeyAndMetadata needs refactoring + LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) key; + // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring + Query query = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + + CommandContext commandContext = q.key.getContext(); // FIXME: LuceneKeyAndMetadata.key might not have getContext + return getResults(query, commandContext, changes, q.metadata); + + } else { + // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring + Query query = queryBuilder.query(indexDefinition, key, new Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer()); + + CommandContext commandContext = null; + if (key instanceof LuceneCompositeKey) { // FIXME: LuceneCompositeKey needs refactoring + commandContext = ((LuceneCompositeKey) key).getContext(); + } + return getResults(query, commandContext, changes, new Document(getDatabase())/*EMPTY_METADATA*/); + } + } catch (ParseException e) { + throw new IndexException("Error parsing lucene query", e); // Changed exception + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java similarity index 53% rename from lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java rename to lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java index 7336b359b9..49e1c5db19 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java @@ -1,6 +1,7 @@ /* * * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2014 Orient Technologies. * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. @@ -18,12 +19,12 @@ package com.arcadedb.lucene.engine; -import com.arcadedb.lucene.query.OLuceneQueryContext; -import com.arcadedb.lucene.tx.OLuceneTxChanges; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.OContextualRecordId; -import com.arcadedb.database.index.engine.OIndexEngine; -import com.arcadedb.database.storage.impl.local.OFreezableStorageComponent; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RecordId; +import com.arcadedb.engine.WALFile; // For Freezeable +import com.arcadedb.index.IndexEngine; +import com.arcadedb.lucene.query.LuceneQueryContext; // Will be refactored +import com.arcadedb.lucene.tx.LuceneTxChanges; // Will be refactored import java.io.IOException; import java.util.Set; import org.apache.lucene.analysis.Analyzer; @@ -33,14 +34,14 @@ import org.apache.lucene.search.ScoreDoc; /** Created by Enrico Risa on 04/09/15. */ -public interface OLuceneIndexEngine extends OIndexEngine, OFreezableStorageComponent { +public interface LuceneIndexEngine extends IndexEngine, WALFile.Freezeable { // Changed interface name and extended interfaces String indexName(); - void onRecordAddedToResultSet( - OLuceneQueryContext queryContext, OContextualRecordId recordId, Document ret, ScoreDoc score); + void onRecordAddedToResultSet( // Changed parameter types + LuceneQueryContext queryContext, RecordId recordId, Document ret, ScoreDoc score); - Document buildDocument(Object key, OIdentifiable value); + Document buildDocument(Object key, Identifiable value); // Changed parameter type Query buildQuery(Object query); @@ -48,7 +49,7 @@ void onRecordAddedToResultSet( Analyzer queryAnalyzer(); - boolean remove(Object key, OIdentifiable value); + boolean remove(Object key, Identifiable value); // Changed parameter type boolean remove(Object key); @@ -56,13 +57,13 @@ void onRecordAddedToResultSet( void release(IndexSearcher searcher); - Set getInTx(Object key, OLuceneTxChanges changes); + Set getInTx(Object key, LuceneTxChanges changes); // Changed parameter and return types - long sizeInTx(OLuceneTxChanges changes); + long sizeInTx(LuceneTxChanges changes); // Changed parameter type - OLuceneTxChanges buildTxChanges() throws IOException; + LuceneTxChanges buildTxChanges() throws IOException; // Changed return type - Query deleteQuery(Object key, OIdentifiable value); + Query deleteQuery(Object key, Identifiable value); // Changed parameter type boolean isCollectionIndex(); } diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java deleted file mode 100644 index 31a8811ddd..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java +++ /dev/null @@ -1,399 +0,0 @@ -package com.arcadedb.lucene.engine; - -import static com.arcadedb.lucene.OLuceneIndexFactory.LUCENE_ALGORITHM; - -import com.arcadedb.common.log.OLogManager; -import com.arcadedb.common.log.OLogger; -import com.arcadedb.common.util.ORawPair; -import com.arcadedb.lucene.analyzer.OLucenePerFieldAnalyzerWrapper; -import com.arcadedb.lucene.collections.OLuceneResultSet; -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.lucene.parser.OLuceneMultiFieldQueryParser; -import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; -import com.arcadedb.lucene.query.OLuceneQueryContext; -import com.arcadedb.lucene.tx.OLuceneTxChanges; -import com.arcadedb.database.config.IndexEngineData; -import com.arcadedb.database.ODatabaseRecordThreadLocal; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.OContextualRecordId; -import com.arcadedb.database.id.ORID; -import com.arcadedb.database.index.OIndex; -import com.arcadedb.database.index.OIndexDefinition; -import com.arcadedb.database.index.OIndexKeyUpdater; -import com.arcadedb.database.index.OIndexMetadata; -import com.arcadedb.database.index.engine.IndexEngineValidator; -import com.arcadedb.database.index.engine.IndexEngineValuesTransformer; -import com.arcadedb.database.metadata.schema.OClass; -import com.arcadedb.database.metadata.schema.OType; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.storage.OStorage; -import com.arcadedb.database.storage.impl.local.paginated.atomicoperations.OAtomicOperation; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiReader; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.highlight.TextFragment; - -/** - * Created by frank on 03/11/2016. - */ -public class OLuceneCrossClassIndexEngine implements OLuceneIndexEngine { - private static final OLogger logger = - OLogManager.instance().logger(OLuceneCrossClassIndexEngine.class); - private final OStorage storage; - private final String indexName; - private final int indexId; - - public OLuceneCrossClassIndexEngine(int indexId, OStorage storage, String indexName) { - this.indexId = indexId; - - this.storage = storage; - this.indexName = indexName; - } - - @Override - public void init(OIndexMetadata metadata) {} - - @Override - public void flush() {} - - @Override - public int getId() { - return indexId; - } - - @Override - public void create(OAtomicOperation atomicOperation, IndexEngineData data) throws IOException {} - - @Override - public void delete(OAtomicOperation atomicOperation) {} - - @Override - public void load(IndexEngineData data) {} - - @Override - public boolean remove(OAtomicOperation atomicOperation, Object key) { - return false; - } - - @Override - public void clear(OAtomicOperation atomicOperation) {} - - @Override - public void close() {} - - @Override - public Object get(Object key) { - - final OLuceneKeyAndMetadata keyAndMeta = (OLuceneKeyAndMetadata) key; - final ODocument metadata = keyAndMeta.metadata; - final List excludes = - Optional.ofNullable(metadata.>getProperty("excludes")) - .orElse(Collections.emptyList()); - final List includes = - Optional.ofNullable(metadata.>getProperty("includes")) - .orElse(Collections.emptyList()); - - final Collection indexes = - ODatabaseRecordThreadLocal.instance() - .get() - .getMetadata() - .getIndexManager() - .getIndexes() - .stream() - .filter(i -> !excludes.contains(i.getName())) - .filter(i -> includes.isEmpty() || includes.contains(i.getName())) - .collect(Collectors.toList()); - - final OLucenePerFieldAnalyzerWrapper globalAnalyzer = - new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); - - final List globalFields = new ArrayList(); - - final List globalReaders = new ArrayList(); - final Map types = new HashMap<>(); - - try { - for (OIndex index : indexes) { - - if (index.getAlgorithm().equalsIgnoreCase(LUCENE_ALGORITHM) - && index.getType().equalsIgnoreCase(OClass.INDEX_TYPE.FULLTEXT.toString())) { - - final OIndexDefinition definition = index.getDefinition(); - final String className = definition.getClassName(); - - String[] indexFields = - definition.getFields().toArray(new String[definition.getFields().size()]); - - for (int i = 0; i < indexFields.length; i++) { - String field = indexFields[i]; - - types.put(className + "." + field, definition.getTypes()[i]); - globalFields.add(className + "." + field); - } - - OLuceneFullTextIndex fullTextIndex = (OLuceneFullTextIndex) index.getInternal(); - - globalAnalyzer.add((OLucenePerFieldAnalyzerWrapper) fullTextIndex.queryAnalyzer()); - - globalReaders.add(fullTextIndex.searcher().getIndexReader()); - } - } - - IndexReader indexReader = new MultiReader(globalReaders.toArray(new IndexReader[] {})); - - IndexSearcher searcher = new IndexSearcher(indexReader); - - Map boost = - Optional.ofNullable(metadata.>getProperty("boost")) - .orElse(new HashMap<>()); - - OLuceneMultiFieldQueryParser p = - new OLuceneMultiFieldQueryParser( - types, globalFields.toArray(new String[] {}), globalAnalyzer, boost); - - p.setAllowLeadingWildcard( - Optional.ofNullable(metadata.getProperty("allowLeadingWildcard")).orElse(false)); - - p.setSplitOnWhitespace( - Optional.ofNullable(metadata.getProperty("splitOnWhitespace")).orElse(true)); - - Object params = keyAndMeta.key.getKeys().get(0); - - Query query = p.parse(params.toString()); - - final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); - - OLuceneQueryContext ctx = new OLuceneQueryContext(null, searcher, query, fields); - return new OLuceneResultSet(this, ctx, metadata); - } catch (IOException e) { - logger.error("unable to create multi-reader", e); - } catch (ParseException e) { - logger.error("unable to parse query", e); - } - - return null; - } - - @Override - public void put(OAtomicOperation atomicOperation, Object key, Object value) {} - - @Override - public void put(OAtomicOperation atomicOperation, Object key, ORID value) {} - - @Override - public boolean remove(OAtomicOperation atomicOperation, Object key, ORID value) { - return false; - } - - @Override - public void update( - OAtomicOperation atomicOperation, Object key, OIndexKeyUpdater updater) {} - - @Override - public boolean validatedPut( - OAtomicOperation atomicOperation, - Object key, - ORID value, - IndexEngineValidator validator) { - return false; - } - - @Override - public Stream> iterateEntriesBetween( - Object rangeFrom, - boolean fromInclusive, - Object rangeTo, - boolean toInclusive, - boolean ascSortOrder, - IndexEngineValuesTransformer transformer) { - return Stream.empty(); - } - - @Override - public Stream> iterateEntriesMajor( - Object fromKey, - boolean isInclusive, - boolean ascSortOrder, - IndexEngineValuesTransformer transformer) { - return Stream.empty(); - } - - @Override - public Stream> iterateEntriesMinor( - Object toKey, - boolean isInclusive, - boolean ascSortOrder, - IndexEngineValuesTransformer transformer) { - return Stream.empty(); - } - - @Override - public Stream> stream(IndexEngineValuesTransformer valuesTransformer) { - return Stream.empty(); - } - - @Override - public Stream> descStream(IndexEngineValuesTransformer valuesTransformer) { - return Stream.empty(); - } - - @Override - public Stream keyStream() { - return Stream.empty(); - } - - @Override - public long size(IndexEngineValuesTransformer transformer) { - return 0; - } - - @Override - public boolean hasRangeQuerySupport() { - return false; - } - - @Override - public String getName() { - return indexName; - } - - @Override - public boolean acquireAtomicExclusiveLock(Object key) { - return false; - } - - @Override - public String getIndexNameByKey(Object key) { - return null; - } - - @Override - public String indexName() { - return indexName; - } - - @Override - public void onRecordAddedToResultSet( - OLuceneQueryContext queryContext, - OContextualRecordId recordId, - Document ret, - final ScoreDoc score) { - - recordId.setContext( - new HashMap() { - { - Map frag = queryContext.getFragments(); - - frag.entrySet().stream() - .forEach( - f -> { - TextFragment[] fragments = f.getValue(); - StringBuilder hlField = new StringBuilder(); - for (int j = 0; j < fragments.length; j++) { - if ((fragments[j] != null) && (fragments[j].getScore() > 0)) { - hlField.append(fragments[j].toString()); - } - } - put("$" + f.getKey() + "_hl", hlField.toString()); - }); - - put("$score", score.score); - } - }); - } - - @Override - public Document buildDocument(Object key, OIdentifiable value) { - return null; - } - - @Override - public Query buildQuery(Object query) { - return null; - } - - @Override - public Analyzer indexAnalyzer() { - return null; - } - - @Override - public Analyzer queryAnalyzer() { - return null; - } - - @Override - public boolean remove(Object key, OIdentifiable value) { - return false; - } - - @Override - public IndexSearcher searcher() { - return null; - } - - @Override - public void release(IndexSearcher searcher) {} - - @Override - public Set getInTx(Object key, OLuceneTxChanges changes) { - return null; - } - - @Override - public long sizeInTx(OLuceneTxChanges changes) { - return 0; - } - - @Override - public OLuceneTxChanges buildTxChanges() throws IOException { - return null; - } - - @Override - public Query deleteQuery(Object key, OIdentifiable value) { - return null; - } - - @Override - public boolean isCollectionIndex() { - return false; - } - - @Override - public void freeze(boolean throwException) {} - - @Override - public void release() {} - - @Override - public void updateUniqueIndexVersion(Object key) {} - - @Override - public int getUniqueIndexVersion(Object key) { - return 0; - } - - @Override - public boolean remove(Object key) { - return false; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java deleted file mode 100644 index 646ac57992..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.arcadedb.lucene.engine; - -import static com.arcadedb.lucene.builder.OLuceneQueryBuilder.EMPTY_METADATA; - -import com.arcadedb.common.exception.OException; -import com.arcadedb.common.log.OLogManager; -import com.arcadedb.common.log.OLogger; -import com.arcadedb.common.util.ORawPair; -import com.arcadedb.lucene.builder.OLuceneDocumentBuilder; -import com.arcadedb.lucene.builder.OLuceneIndexType; -import com.arcadedb.lucene.builder.OLuceneQueryBuilder; -import com.arcadedb.lucene.collections.LuceneIndexTransformer; -import com.arcadedb.lucene.collections.OLuceneCompositeKey; -import com.arcadedb.lucene.collections.OLuceneResultSet; -import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; -import com.arcadedb.lucene.query.OLuceneQueryContext; -import com.arcadedb.lucene.tx.OLuceneTxChanges; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.OContextualRecordId; -import com.arcadedb.database.id.ORID; -import com.arcadedb.database.index.OCompositeKey; -import com.arcadedb.database.index.OIndexEngineException; -import com.arcadedb.database.index.OIndexKeyUpdater; -import com.arcadedb.database.index.OIndexMetadata; -import com.arcadedb.database.index.engine.IndexEngineValidator; -import com.arcadedb.database.index.engine.IndexEngineValuesTransformer; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.sql.parser.ParseException; -import com.arcadedb.database.storage.OStorage; -import com.arcadedb.database.storage.impl.local.paginated.atomicoperations.OAtomicOperation; -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Stream; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.highlight.TextFragment; -import org.apache.lucene.store.Directory; - -public class OLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract { - private static final OLogger logger = - OLogManager.instance().logger(OLuceneFullTextIndexEngine.class); - - private final OLuceneDocumentBuilder builder; - private OLuceneQueryBuilder queryBuilder; - private final AtomicLong bonsayFileId = new AtomicLong(0); - - public OLuceneFullTextIndexEngine(OStorage storage, String idxName, int id) { - super(id, storage, idxName); - builder = new OLuceneDocumentBuilder(); - } - - @Override - public void init(OIndexMetadata im) { - super.init(im); - queryBuilder = new OLuceneQueryBuilder(im.getMetadata()); - } - - @Override - public IndexWriter createIndexWriter(Directory directory) throws IOException { - - OLuceneIndexWriterFactory fc = new OLuceneIndexWriterFactory(); - - logger.debug("Creating Lucene index in '%s'...", directory); - - return fc.createIndexWriter(directory, metadata, indexAnalyzer()); - } - - @Override - public void onRecordAddedToResultSet( - final OLuceneQueryContext queryContext, - final OContextualRecordId recordId, - final Document ret, - final ScoreDoc score) { - HashMap data = new HashMap(); - - final Map frag = queryContext.getFragments(); - frag.forEach( - (key, fragments) -> { - final StringBuilder hlField = new StringBuilder(); - for (final TextFragment fragment : fragments) { - if ((fragment != null) && (fragment.getScore() > 0)) { - hlField.append(fragment.toString()); - } - } - data.put("$" + key + "_hl", hlField.toString()); - }); - data.put("$score", score.score); - - recordId.setContext(data); - } - - @Override - public boolean remove(final OAtomicOperation atomicOperation, final Object key) { - return remove(key); - } - - @Override - public boolean remove(OAtomicOperation atomicOperation, Object key, ORID value) { - return remove(key, value); - } - - @Override - public Object get(final Object key) { - return getInTx(key, null); - } - - @Override - public void update( - final OAtomicOperation atomicOperation, - final Object key, - final OIndexKeyUpdater updater) { - put(atomicOperation, key, updater.update(null, bonsayFileId).getValue()); - } - - @Override - public void put(final OAtomicOperation atomicOperation, final Object key, final Object value) { - updateLastAccess(); - openIfClosed(); - final Document doc = buildDocument(key, (OIdentifiable) value); - addDocument(doc); - } - - @Override - public void put(OAtomicOperation atomicOperation, Object key, ORID value) { - updateLastAccess(); - openIfClosed(); - final Document doc = buildDocument(key, value); - addDocument(doc); - } - - @Override - public boolean validatedPut( - OAtomicOperation atomicOperation, - Object key, - ORID value, - IndexEngineValidator validator) { - throw new UnsupportedOperationException( - "Validated put is not supported by OLuceneFullTextIndexEngine"); - } - - @Override - public Stream> iterateEntriesBetween( - Object rangeFrom, - boolean fromInclusive, - Object rangeTo, - boolean toInclusive, - boolean ascSortOrder, - IndexEngineValuesTransformer transformer) { - return LuceneIndexTransformer.transformToStream((OLuceneResultSet) get(rangeFrom), rangeFrom); - } - - private Set getResults( - final Query query, - final OCommandContext context, - final OLuceneTxChanges changes, - final ODocument metadata) { - // sort - final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); - final IndexSearcher luceneSearcher = searcher(); - final OLuceneQueryContext queryContext = - new OLuceneQueryContext(context, luceneSearcher, query, fields).withChanges(changes); - return new OLuceneResultSet(this, queryContext, metadata); - } - - @Override - public Stream> iterateEntriesMajor( - Object fromKey, - boolean isInclusive, - boolean ascSortOrder, - IndexEngineValuesTransformer transformer) { - return null; - } - - @Override - public Stream> iterateEntriesMinor( - Object toKey, - boolean isInclusive, - boolean ascSortOrder, - IndexEngineValuesTransformer transformer) { - return null; - } - - @Override - public boolean hasRangeQuerySupport() { - return false; - } - - @Override - public void updateUniqueIndexVersion(Object key) { - // not implemented - } - - @Override - public int getUniqueIndexVersion(Object key) { - return 0; // not implemented - } - - @Override - public Document buildDocument(Object key, OIdentifiable value) { - if (indexDefinition.isAutomatic()) { - // builder.newBuild(index, key, value); - - return builder.build(indexDefinition, key, value, collectionFields, metadata); - } else { - return putInManualindex(key, value); - } - } - - private static Document putInManualindex(Object key, OIdentifiable oIdentifiable) { - Document doc = new Document(); - doc.add(OLuceneIndexType.createOldIdField(oIdentifiable)); - doc.add(OLuceneIndexType.createIdField(oIdentifiable, key)); - - if (key instanceof OCompositeKey) { - - List keys = ((OCompositeKey) key).getKeys(); - - int k = 0; - for (Object o : keys) { - doc.add(OLuceneIndexType.createField("k" + k, o, Field.Store.YES)); - k++; - } - } else if (key instanceof Collection) { - @SuppressWarnings("unchecked") - Collection keys = (Collection) key; - - int k = 0; - for (Object o : keys) { - doc.add(OLuceneIndexType.createField("k" + k, o, Field.Store.YES)); - k++; - } - } else { - doc.add(OLuceneIndexType.createField("k0", key, Field.Store.NO)); - } - return doc; - } - - @Override - public Query buildQuery(final Object maybeQuery) { - try { - if (maybeQuery instanceof String) { - return queryBuilder.query(indexDefinition, maybeQuery, EMPTY_METADATA, queryAnalyzer()); - } else { - OLuceneKeyAndMetadata q = (OLuceneKeyAndMetadata) maybeQuery; - return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); - } - } catch (final ParseException e) { - throw OException.wrapException(new OIndexEngineException("Error parsing query"), e); - } - } - - @Override - public Set getInTx(Object key, OLuceneTxChanges changes) { - updateLastAccess(); - openIfClosed(); - try { - if (key instanceof OLuceneKeyAndMetadata) { - OLuceneKeyAndMetadata q = (OLuceneKeyAndMetadata) key; - Query query = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); - - OCommandContext commandContext = q.key.getContext(); - return getResults(query, commandContext, changes, q.metadata); - - } else { - Query query = queryBuilder.query(indexDefinition, key, EMPTY_METADATA, queryAnalyzer()); - - OCommandContext commandContext = null; - if (key instanceof OLuceneCompositeKey) { - commandContext = ((OLuceneCompositeKey) key).getContext(); - } - return getResults(query, commandContext, changes, EMPTY_METADATA); - } - } catch (ParseException e) { - throw OException.wrapException(new OIndexEngineException("Error parsing lucene query"), e); - } - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java new file mode 100644 index 0000000000..1818963685 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java @@ -0,0 +1,671 @@ +/* + * Copyright 2014 Orient Technologies. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.engine; + +import com.arcadedb.GlobalConfiguration; +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.DatabaseThreadLocal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RecordId; +import com.arcadedb.document.Document; +import com.arcadedb.engine.PaginatedFile; +import com.arcadedb.engine.Storage; +import com.arcadedb.exception.ArcadeDBException; +import com.arcadedb.exception.IndexException; +import com.arcadedb.index.IndexCursor; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.index.IndexKeyCursor; +import com.arcadedb.lucene.analyzer.ArcadeLuceneAnalyzerFactory; +import com.arcadedb.lucene.exception.LuceneIndexException; +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; +import com.arcadedb.lucene.query.LuceneQueryContext; +import com.arcadedb.lucene.tx.LuceneTxChanges; +import com.arcadedb.query.sql.executor.CommandContext; +import com.arcadedb.schema.DocumentType; +import com.arcadedb.schema.Property; +import com.arcadedb.schema.Type; +import com.arcadedb.utility.FileUtils; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.TrackingIndexWriter; +import org.apache.lucene.search.*; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static com.arcadedb.lucene.analyzer.ArcadeLuceneAnalyzerFactory.AnalyzerK +ind.INDEX; +import static com.arcadedb.lucene.analyzer.ArcadeLuceneAnalyzerFactory.AnalyzerK +ind.QUERY; + +public abstract class OLuceneIndexEngineAbstract /* extends OSharedResourceAd +aptiveExternal */ implements OLuceneIndexEngine { // FIXME + + public static final String RID = "RID"; + public static final String KEY = "KEY"; + public static final String STORED = "_STORED"; + + public static final String OLUCENE_BASE_DIR = "luceneIndexes"; + + protected final AtomicLong lastAccess; + protected SearcherManager searcherManager; + protected IndexDefinition index; + protected String name; + protected String clusterIndexName; + protected boolean automatic; + protected ControlledRealTimeReopenThread nrt; + protected Document metadata; + + protected Map collectionFields = new HashMap +(); + protected TimerTask commitTask; + protected AtomicBoolean closed = new AtomicBoolean(false); + protected Storage storage; + private long reopenToken; + private Analyzer indexAnalyzer; + private Analyzer queryAnalyzer; + private Directory directory; + private TrackingIndexWriter mgrWriter; + private long flushIndexInterval; + private long closeAfterInterval; + private long firstFlushAfter; + + public OLuceneIndexEngineAbstract(Storage storage, String indexName) { + this.storage = storage; + this.name = indexName; + + lastAccess = new AtomicLong(System.currentTimeMillis()); + + closed = new AtomicBoolean(true); + + } + + // TODO: move to utility class + public static void sendTotalHits(String indexName, CommandContext context, int + totalHits) { + if (context != null) { + + if (context.getVariable("totalHits") == null) { + context.setVariable("totalHits", totalHits); + } else { + context.setVariable("totalHits", null); + } + context.setVariable((indexName + ".totalHits").replace(".", "_"), totalHit +s); + } + } + + // TODO: move to utility class + public static void sendLookupTime(String indexName, CommandContext context, fi +nal TopDocs docs, final Integer limit, + long startFetching) { + if (context != null) { + + final long finalTime = System.currentTimeMillis() - startFetching; + context.setVariable((indexName + ".lookupTime").replace(".", "_"), new Has +hMap() { + { + put("limit", limit); + put("totalTime", finalTime); + put("totalHits", docs.totalHits); + put("returnedHits", docs.scoreDocs.length); + if (!Float.isNaN(docs.getMaxScore())) { + put("maxScore", docs.getMaxScore()); + } + + } + }); + } + } + + protected void updateLastAccess() { + lastAccess.set(System.currentTimeMillis()); + } + + protected abstract IndexWriter openIndexWriter(Directory directory) throws IOE +xception; + + protected void addDocument(Document doc) { + try { + + reopenToken = mgrWriter.addDocument(doc); + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on adding + new document '" + doc + "' to Lucene index", e); + } + } + + @Override + public void init(String indexName, String indexType, IndexDefinition indexDefi +nition, boolean isAutomatic, Document metadata) { + + this.index = indexDefinition; + this.automatic = isAutomatic; + this.metadata = metadata; + + ArcadeLuceneAnalyzerFactory fc = new ArcadeLuceneAnalyzerFactory(); + indexAnalyzer = fc.createAnalyzer(indexDefinition, INDEX, metadata); + queryAnalyzer = fc.createAnalyzer(indexDefinition, QUERY, metadata); + + checkCollectionIndex(indexDefinition); + + if (metadata.containsField("flushIndexInterval")) { + flushIndexInterval = Integer.valueOf(metadata.field("flushIndexIn +terval")).longValue(); + } else { + flushIndexInterval = 10000l; + } + + if (metadata.containsField("closeAfterInterval")) { + closeAfterInterval = Integer.valueOf(metadata.field("closeAfterIn +terval")).longValue(); + } else { + closeAfterInterval = 20000l; + } + + if (metadata.containsField("firstFlushAfter")) { + firstFlushAfter = Integer.valueOf(metadata.field("firstFlushAfter +")).longValue(); + } else { + firstFlushAfter = 10000l; + } + + } + + private void scheduleCommitTask() { + commitTask = new TimerTask() { + @Override + public boolean cancel() { +// Logger.getLogger(getClass().getName()).info(" Cancelling commit task f +or index:: " + indexName()); + return super.cancel(); + } + + @Override + public void run() { + + if (System.currentTimeMillis() - lastAccess.get() > closeAfterInterval) +{ + +// Logger.getLogger(getClass().getName()).info(" Closing index:: " + in +dexName()); + close(); + } + if (!closed.get()) { + +// Logger.getLogger(getClass().getName()).info(" Flushing index:: " + i +ndexName()); + flush(); + } + } + }; + // FIXME + // Orient.instance().scheduleTask(commitTask, firstFlushAfter, flushIndexInt +erval); + getDatabase().getSchema().getScheduler().scheduleTask(commitTask, firstFlush +After, flushIndexInterval); + } + + private void checkCollectionIndex(IndexDefinition indexDefinition) { + + List fields = indexDefinition.getFields(); + + DocumentType aClass = getDatabase().getSchema().getType(indexDefinition.getT +ypeName()); + for (String field : fields) { + Property property = aClass.getProperty(field); + + if (property.getType().isEmbedded() && property.getLinkedType() != null) { + collectionFields.put(field, true); + } else { + collectionFields.put(field, false); + } + } + } + + protected void reOpen() throws IOException { + + if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen() && directory in +stanceof RAMDirectory) { + // don't waste time reopening an in memory index + return; + } + open(); + } + + protected DatabaseInternal getDatabase() { + return DatabaseThreadLocal.INSTANCE.get(); + } + + private synchronized void open() throws IOException { + + if (!closed.get()) + return; + + ArcadeLuceneDirectoryFactory directoryFactory = new ArcadeLuceneDirectoryFac +tory(); // FIXME OLuceneDirectoryFactory + + directory = directoryFactory.createDirectory(getDatabase(), name, metadata); + + final IndexWriter indexWriter = createIndexWriter(directory); + mgrWriter = new TrackingIndexWriter(indexWriter); + searcherManager = new SearcherManager(indexWriter, true, null); + + reopenToken = 0; + + startNRT(); + + closed.set(false); + + flush(); + + scheduleCommitTask(); + + } + + private void startNRT() { + nrt = new ControlledRealTimeReopenThread(mgrWriter, searcherManager, 60.00, +0.1); + nrt.setDaemon(true); + nrt.start(); + } + + private void closeNRT() { + if (nrt != null) { + nrt.interrupt(); + nrt.close(); + } + } + + private void cancelCommitTask() { + if (commitTask != null) { + commitTask.cancel(); + } + } + + private void closeSearchManager() throws IOException { + if (searcherManager != null) { + searcherManager.close(); + } + } + + private void commitAndCloseWriter() throws IOException { + if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen()) { + mgrWriter.getIndexWriter().commit(); + mgrWriter.getIndexWriter().close(); + closed.set(true); + } + } + + protected abstract IndexWriter createIndexWriter(Directory directory) throws I +OException; + + @Override + public void flush() { + + try { + if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen()) + mgrWriter.getIndexWriter().commit(); + + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on flushi +ng Lucene index", e); + } catch (Throwable e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on flushi +ng Lucene index", e); + } + + } + + @Override + public void create(com.arcadedb.serializer.BinarySerializer valueSerializer, b +oolean isAutomatic, Type[] keyTypes, boolean nullPointerSupport, + com.arcadedb.serializer.BinarySerializer keySerializer, int keySize, Set clustersToIndex, Map engineProperties, + Document metadata) { + } + + @Override + public void delete() { + updateLastAccess(); + openIfClosed(); + + if (mgrWriter != null && mgrWriter.getIndexWriter() != null) { + + try { + mgrWriter.getIndexWriter().deleteUnusedFiles(); + } catch (IOException e) { + e.printStackTrace(); + } + close(); + } + + final DatabaseInternal database = getDatabase(); + deleteIndexFolder(indexName(), database); + } + + private void deleteIndexFolder(String indexName, DatabaseInternal database) { +// FIXME OLocalPaginatedStorage + File f = new File(getIndexPath(database, indexName)); + FileUtils.deleteRecursively(f); + f = new File(getIndexBasePath(database)); + FileUtils.deleteFolderIfEmpty(f); + } + + @Override + public String indexName() { + return name; + } + + private String getIndexPath(DatabaseInternal database, String indexName) { // F +IXME OLocalPaginatedStorage + return database.getDatabasePath() + File.separator + OLUCENE_BASE_DIR + File +.separator + indexName; // FIXME getStoragePath + } + + protected String getIndexBasePath(DatabaseInternal database) { // FIXME OLocal +PaginatedStorage + return database.getDatabasePath() + File.separator + OLUCENE_BASE_DIR; // FIX +ME getStoragePath + } + + public abstract void onRecordAddedToResultSet(LuceneQueryContext queryContext, + RecordId recordId, Document ret, + ScoreDoc score); + + @Override + public Analyzer indexAnalyzer() { + return indexAnalyzer; + } + + @Override + public Analyzer queryAnalyzer() { + return queryAnalyzer; + } + + @Override + public boolean remove(Object key, Identifiable value) { + updateLastAccess(); + openIfClosed(); + + Query query = deleteQuery(key, value); + if (query != null) + deleteDocument(query); + return true; + } + + protected void deleteDocument(Query query) { + try { + reopenToken = mgrWriter.deleteDocuments(query); + if (!mgrWriter.getIndexWriter().hasDeletions()) { + Logger.getLogger(getClass().getName()) + .log(Level.SEVERE, "Error on deleting document by query '" + query + + "' to Lucene index", new IndexException("Error deleting document")); + } + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on deleti +ng document by query '" + query + "' to Lucene index", e); + } + } + + protected boolean isCollectionDelete() { + boolean collectionDelete = false; + for (Boolean aBoolean : collectionFields.values()) { + collectionDelete = collectionDelete || aBoolean; + } + return collectionDelete; + } + + protected void openIfClosed() { + if (closed.get()) { +// Logger.getLogger(getClass().getName()).info("open closed index:: " + ind +exName()); + + try { + reOpen(); + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "error while o +pening closed index:: " + indexName(), e); + + } + } + } + + @Override + public boolean isCollectionIndex() { + return isCollectionDelete(); + } + + @Override + public IndexSearcher searcher() { + try { + updateLastAccess(); + openIfClosed(); + nrt.waitForGeneration(reopenToken); + IndexSearcher searcher = searcherManager.acquire(); + return searcher; + } catch (Exception e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on get se +archer from Lucene index", e); + throw new LuceneIndexException("Error on get searcher from Lucene index", +e); + } + + } + + @Override + public long sizeInTx(LuceneTxChanges changes) { + IndexSearcher searcher = searcher(); + try { + IndexReader reader = searcher.getIndexReader(); + + return changes == null ? reader.numDocs() : reader.numDocs() + changes.get +NumDocs(); + } finally { + + release(searcher); + } + } + + @Override + public LuceneTxChanges buildTxChanges() throws IOException { + if (isCollectionDelete()) { + // FIXME + // return new OLuceneTxChangesMultiRid(this, createIndexWriter(new RAMDire +ctory()), createIndexWriter(new RAMDirectory())); + return null; + } else { + // FIXME + // return new OLuceneTxChangesSingleRid(this, createIndexWriter(new RAMDire +ctory()), createIndexWriter(new RAMDirectory())); + return null; + } + } + + @Override + public Query deleteQuery(Object key, Identifiable value) { + updateLastAccess(); + openIfClosed(); + if (isCollectionDelete()) { + return ArcadeLuceneIndexType.createDeleteQuery(value, index.getFields(), +key); + } + return ArcadeLuceneIndexType.createQueryId(value); + } + + @Override + public void deleteWithoutLoad(String indexName) { + internalDelete(indexName); + } + + protected void internalDelete(String indexName) { + if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen()) { + close(); + } + + final DatabaseInternal database = getDatabase(); + deleteIndexFolder(indexName, database); + } + + @Override + public void load(String indexName, com.arcadedb.serializer.BinarySerializer v +alueSerializer, boolean isAutomatic, com.arcadedb.serializer.BinarySerializer k +eySerializer, + Type[] keyTypes, boolean nullPointerSupport, int keySize, Map engineProperties) { + // initIndex(indexName, indexDefinition, isAutomatic, metadata); + } + + @Override + public void clear() { + updateLastAccess(); + openIfClosed(); + try { + reopenToken = mgrWriter.deleteAll(); + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on cleari +ng Lucene index", e); + } + } + + @Override + public synchronized void close() { + if (closed.get()) + return; + + try { +// Logger.getLogger(getClass().getName()).info("Closing Lucene index '" + t +his.name + "'..."); + + closeNRT(); + + closeSearchManager(); + + commitAndCloseWriter(); + +// Logger.getLogger(getClass().getName()).info("Closed Lucene index '" + th +is.name); + cancelCommitTask(); + + } catch (Throwable e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on closin +g Lucene index", e); + } + } + + @Override + public IndexCursor descCursor(ValuesTransformer valuesTransformer) { + throw new UnsupportedOperationException("Cannot iterate over a lucene index" +); + } + + @Override + public IndexCursor cursor(ValuesTransformer valuesTransformer) { + throw new UnsupportedOperationException("Cannot iterate over a lucene index" +); + } + + @Override + public IndexKeyCursor keyCursor() { + throw new UnsupportedOperationException("Cannot iterate over a lucene index" +); + } + + public long size(final ValuesTransformer transformer) { + return sizeInTx(null); + } + + protected void release(IndexSearcher searcher) { + updateLastAccess(); + openIfClosed(); + try { + searcherManager.release(searcher); + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on releas +ing index searcher of Lucene index", e); + } + } + + @Override + public int getVersion() { + return 0; + } + + @Override + public String getName() { + return name; + } + + @Override + public boolean acquireAtomicExclusiveLock(Object key) { + return true; // do nothing + } + + @Override + public String getIndexNameByKey(final Object key) { + return name; + } + + private String getIndexPath(DatabaseInternal database) { // FIXME OLocalPagina +tedStorage + return getIndexPath(database, name); + } + + protected Field.Store isToStore(String f) { + return collectionFields.get(f) ? Field.Store.YES : Field.Store.NO; + } + + @Override + public void freeze(boolean throwException) { + + try { + closeNRT(); + cancelCommitTask(); + commitAndCloseWriter(); + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on freezi +ng Lucene index:: " + indexName(), e); + } + + } + + @Override + public void release() { + try { + close(); + reOpen(); + } catch (IOException e) { + Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on releas +ing Lucene index:: " + indexName(), e); + } + } + + @Override + public boolean isFrozen() { + return closed.get(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java similarity index 55% rename from lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java rename to lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java index cd2ac218fb..5a24641965 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java @@ -1,5 +1,6 @@ /* * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2023 Arcade Data Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +16,13 @@ */ package com.arcadedb.lucene.functions; -import com.arcadedb.database.sql.functions.OSQLFunctionFactoryTemplate; +import com.arcadedb.query.sql.SQLFunctionRegistry; // Assuming this is the ArcadeDB equivalent -public class OLuceneCrossClassFunctionsFactory extends OSQLFunctionFactoryTemplate { +// FIXME: The actual function class (ArcadeLuceneCrossClassSearchFunction) will need to be created/refactored separately. - public OLuceneCrossClassFunctionsFactory() { - register(new OLuceneCrossClassSearchFunction()); +public class ArcadeLuceneCrossClassFunctionsFactory { // Changed class name + + public static void onStartup() { // Changed to a static method for registration + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneCrossClassSearchFunction()); // FIXME: Placeholder for refactored class } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java new file mode 100644 index 0000000000..8cfe4060e0 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java @@ -0,0 +1,237 @@ +package com.arcadedb.lucene.functions; + +// import static com.arcadedb.lucene.OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS; // FIXME Define or import +import com.arcadedb.database.DatabaseContext; +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.engine.ArcadeLuceneCrossClassIndexEngine; // Changed +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring (used as type in old code, though engine is likely target) +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; // Changed +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * This function uses the CrossClassIndex to search documents across all the Lucene indexes defined in a database + *

+ * Created by frank on 19/02/2016. + */ +public class ArcadeLuceneCrossClassSearchFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class + private static final Logger logger = + Logger.getLogger(ArcadeLuceneCrossClassSearchFunction.class.getName()); // Changed + + public static final String NAME = "search_cross"; // Changed from SEARCH_CROSS + private static final String LUCENE_CROSS_CLASS_ALGORITHM = "LUCENE_CROSS_CLASS"; // Placeholder + + public ArcadeLuceneCrossClassSearchFunction() { + super(NAME, 1, 2); // query, [metadata] + } + + // searchForIndex in the template expects args for index name. This class doesn't use that. + // It finds a specific *kind* of index (cross class). + // So, the searchForIndex from the template is not suitable. + // This function might not be a good fit for ArcadeLuceneSearchFunctionTemplate if it cannot provide a single index. + // However, if ArcadeLuceneCrossClassIndexEngine is treated as *the* index, it could work. + + @Override + public Iterable searchFromTarget( // Changed + FromClause target, // Target is ignored by this function as it's cross-class + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, // Changed + Expression... args) { // Changed + + ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); // FIXME: Needs robust way to get this engine + + if (engine == null) { + logger.warning("Lucene Cross Class Index Engine not found."); + return Collections.emptySet(); + } + + Expression expression = args[0]; + String query = (String) expression.execute((Result) null, ctx); // Changed + + Document metadata = getMetadata(args, ctx, 1); // Changed, metadata is args[1] + + // The engine's 'get' method should return Iterable or similar + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + Object result = engine.get( + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata)); + + if (result instanceof Iterable) { + return (Iterable) result; + } + return Collections.emptySet(); + } + + @Override + public Object execute( + Object iThis, + Identifiable currentRecord, // Changed + Object currentResult, + Object[] params, + CommandContext ctx) { // Changed + + ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); // FIXME + + if (engine == null) { + logger.warning("Lucene Cross Class Index Engine not found for execute."); + return Collections.emptySet(); + } + + String query = (String) params[0]; + Document metadata = getMetadata(params, 1); // Changed + + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + Object result = engine.get( + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata)); + + return result; + } + + private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed + if (args.length > metadataParamIndex) { + // Assuming getMetadata from ArcadeLuceneSearchFunctionTemplate is suitable + return super.getMetadata(args[metadataParamIndex], ctx); + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + private Document getMetadata(Object[] params, int metadataParamIndex) { // Changed + if (params.length > metadataParamIndex) { + if (params[metadataParamIndex] instanceof Map) { + return new Document().fromMap((Map) params[metadataParamIndex]); + } else if (params[metadataParamIndex] instanceof String) { + return new Document().fromJSON((String) params[metadataParamIndex]); + } else if (params[metadataParamIndex] instanceof Document) { + return (Document) params[metadataParamIndex]; + } + // Fallback or error if type is not recognized + try { + return new Document().fromJSON(params[metadataParamIndex].toString()); + } catch (Exception e) { + // ignore + } + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + // This method is problematic as the template expects an ArcadeLuceneFullTextIndex. + // This function uses a different kind of engine. + // Returning null tells the template that direct indexed execution (via that specific index type) is not possible. + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // FIXME: This signature might not be appropriate for this class + FromClause target, CommandContext ctx, Expression... args) { + return null; // This function doesn't use a single, standard Lucene full-text index from the target. + // It uses the ArcadeLuceneCrossClassIndexEngine. + } + + // Helper to get the specific cross-class engine instance + // This assumes there's a way to identify and retrieve this engine. + // It might be registered with a specific name or type. + private ArcadeLuceneCrossClassIndexEngine getCrossClassEngine(CommandContext ctx) { // FIXME + DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify + Collection indexes = database.getSchema().getIndexes(); + for (Index index : indexes) { + // FIXME: Need a reliable way to identify the CrossClassEngine. + // This could be by a specific name, or if the engine itself is registered as an Index. + // The original code checked index.getAlgorithm().equalsIgnoreCase(LUCENE_CROSS_CLASS) + // and then cast index.getInternal() to OLuceneFullTextIndex, which seems problematic as the + // cross class engine is not a typical "full text index" on a specific class. + // For now, assuming the ArcadeLuceneCrossClassIndexEngine might be registered as an Index itself + // with a specific algorithm name. + if (index.getAlgorithm().equalsIgnoreCase(LUCENE_CROSS_CLASS_ALGORITHM) && index instanceof ArcadeLuceneCrossClassIndexEngine) { + return (ArcadeLuceneCrossClassIndexEngine) index; + } + // Alternative: if the engine is not an Index, how is it accessed? + // Perhaps it's a global component or registered differently. + // The original code `(OLuceneFullTextIndex) index.getInternal()` suggests the index itself was a shell. + } + // Fallback: Try to find an index whose *engine* is the cross-class one. + // This is speculative. + for (Index index : indexes) { + if (index.getAssociatedIndex() instanceof ArcadeLuceneCrossClassIndexEngine) { // getAssociatedIndex might be getEngine() + return (ArcadeLuceneCrossClassIndexEngine) index.getAssociatedIndex(); + } + } + return null; + } + + + @Override + public String getSyntax() { + // logger.debug("syntax"); // Logging in getSyntax is unusual + return NAME + "('', [ ])"; + } + + // Other overrides from OIndexableSQLFunction (estimate, canExecuteInline, etc.) + // The original class had specific implementations for these. + // If extending ArcadeLuceneSearchFunctionTemplate, these might be inherited or need specific overrides. + // For now, relying on template's (which has FIXMEs) or needing specific ones here. + + @Override + public long estimate( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + // Cross-class estimation is complex. Returning a default or trying to get a count from the engine. + ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); + if (engine != null) { + // FIXME: The engine might need a size estimation method + // return engine.sizeEstimate(args...); + } + return super.estimate(target, operator, rightValue, ctx, args); // Fallback to template's estimate + } + + @Override + public boolean allowsIndexedExecution( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + // This function *always* uses its specialized engine, so it's "indexed" in that sense. + return getCrossClassEngine(ctx) != null; + } + @Override + public boolean canExecuteInline( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + return false; // Cross class search is likely too complex for simple inline execution + } + + @Override + public boolean shouldExecuteAfterSearch( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + return false; + } + +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java new file mode 100644 index 0000000000..ca707bae1a --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java @@ -0,0 +1,32 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.functions; + +import com.arcadedb.query.sql.SQLFunctionRegistry; // Assuming this is the ArcadeDB equivalent + +// FIXME: The actual function classes (e.g., ArcadeLuceneSearchOnIndexFunction) will need to be created/refactored separately. +// For now, we are just changing the instantiation call. + +public class ArcadeLuceneFunctionsFactory { // Changed class name + + public static void onStartup() { // Changed to a static method for registration + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnIndexFunction()); // FIXME: Placeholder for refactored class + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnFieldsFunction()); // FIXME: Placeholder for refactored class + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnClassFunction()); // FIXME: Placeholder for refactored class + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchMoreLikeThisFunction()); // FIXME: Placeholder for refactored class + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java new file mode 100644 index 0000000000..df3f5b7307 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java @@ -0,0 +1,64 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.schema.Schema; // Changed +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 13/02/2017. */ +public class ArcadeLuceneFunctionsUtils { // Changed class name + public static final String MEMORY_INDEX = "_memoryIndex"; + + protected static ArcadeLuceneFullTextIndex searchForIndex(Expression[] args, CommandContext ctx) { // Changed types + final String indexName = (String) args[0].execute((Result) null, ctx); // Changed types + return getLuceneFullTextIndex(ctx, indexName); + } + + protected static ArcadeLuceneFullTextIndex getLuceneFullTextIndex( // Changed types + final CommandContext ctx, final String indexName) { + // Assuming CommandContext gives access to DatabaseInternal instance + final DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify how to get DatabaseInternal from CommandContext + // database.activateOnCurrentThread(); // This might not be needed or done differently in ArcadeDB + + final Schema schema = database.getSchema(); // Changed OMetadataInternal + + // FIXME: metadata.getIndexManagerInternal().getIndex(documentDatabase, indexName) changed to schema.getIndex() + // Also, the casting and type checking for ArcadeLuceneFullTextIndex needs ArcadeLuceneFullTextIndex to be properly defined and refactored. + final Index index = schema.getIndex(indexName); + + if (!(index instanceof ArcadeLuceneFullTextIndex)) { // FIXME + throw new IllegalArgumentException("Not a valid Lucene index:: " + indexName); + } + return (ArcadeLuceneFullTextIndex) index; // FIXME + } + + public static MemoryIndex getOrCreateMemoryIndex(CommandContext ctx) { // Changed OCommandContext + MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); + if (memoryIndex == null) { + memoryIndex = new MemoryIndex(); + ctx.setVariable(MEMORY_INDEX, memoryIndex); + } + memoryIndex.reset(); + return memoryIndex; + } + + public static String doubleEscape(final String s) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); ++i) { + final char c = s.charAt(i); + if (c == 92 || c == 43 || c == 45 || c == 33 || c == 40 || c == 41 || c == 58 || c == 94 + || c == 91 || c == 93 || c == 34 || c == 123 || c == 125 || c == 126 || c == 42 || c == 63 + || c == 124 || c == 38 || c == 47) { + sb.append('\\'); + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java new file mode 100644 index 0000000000..140ccbb28e --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java @@ -0,0 +1,114 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.function.IndexableSQLFunction; // Assuming this exists +import com.arcadedb.query.sql.function.SQLFunctionAbstract; // Assuming this is the base class +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import java.util.Map; + +/** Created by frank on 25/05/2017. */ +// Changed base class and interface +public abstract class ArcadeLuceneSearchFunctionTemplate extends SQLFunctionAbstract + implements IndexableSQLFunction { + + public ArcadeLuceneSearchFunctionTemplate(String iName, int iMinParams, int iMaxParams) { + super(iName, iMinParams, iMaxParams); + } + + // FIXME: Signature of these methods depends heavily on the actual ArcadeDB interfaces for IndexableSQLFunction + @Override + public boolean canExecuteInline( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { // Changed parameter types + return allowsIndexedExecution(target, operator, rightValue, ctx, args); + } + + @Override + public boolean allowsIndexedExecution( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { // Changed parameter types + ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME + return index != null; + } + + @Override + public boolean shouldExecuteAfterSearch( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { // Changed parameter types + return false; + } + + @Override + public long estimate( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { // Changed parameter types + + // FIXME: searchFromTarget is not defined in this template, assuming it's from OIndexableSQLFunction or a subclass + // For now, commenting out as its direct equivalent/necessity in ArcadeDB is unclear without seeing concrete function implementation + /* + Iterable a = searchFromTarget(target, operator, rightValue, ctx, args); // Changed OIdentifiable + if (a instanceof LuceneResultSet) { // FIXME + return ((LuceneResultSet) a).size(); // FIXME + } + long count = 0; + for (Object o : a) { + count++; + } + return count; + */ + return 0; // Placeholder + } + + protected Document getMetadata(Expression metadata, CommandContext ctx) { // Changed ODocument, OExpression, OCommandContext + final Object md = metadata.execute((Result) null, ctx); // Changed OResult + if (md instanceof Document) { // Changed ODocument + return (Document) md; + } else if (md instanceof Map) { + return new Document().fromMap((Map) md); // Changed ODocument + } else if (md instanceof String) { + try { + return new Document().fromJSON((String) md); // Changed ODocument + } catch (Exception e) { + // It might not be a JSON string, but the raw metadata string itself (e.g. analyzer class name) + // This part needs careful review based on how metadata is actually passed and used. + // For now, returning a document with a field containing the string. + Document doc = new Document(); + doc.set("metadata", (String) md); // FIXME: Review this fallback for non-JSON metadata strings + return doc; + } + } else if (metadata != null) { + // Fallback if metadata is not null but not a recognized type, try its string representation as JSON + try { + return new Document().fromJSON(metadata.toString()); // Changed ODocument + } catch (Exception e) { + Document doc = new Document(); + doc.set("metadata", metadata.toString()); // FIXME: Review this fallback + return doc; + } + } + return new Document(); // Empty document if null or unparseable + } + + // Changed OLuceneFullTextIndex, OFromClause, OCommandContext, OExpression + protected abstract ArcadeLuceneFullTextIndex searchForIndex( // FIXME + FromClause target, CommandContext ctx, Expression... args); +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java new file mode 100644 index 0000000000..1b67424217 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java @@ -0,0 +1,389 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.Database; // Changed ODatabaseSession to Database +import com.arcadedb.database.DatabaseContext; // For context access +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.document.Element; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // For RID field name +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.function.IndexableSQLFunction; // Assuming +import com.arcadedb.query.sql.function.SQLFunctionAbstract; // Assuming +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.query.sql.parser.Identifier; +import com.arcadedb.schema.DocumentType; +import com.arcadedb.schema.Schema; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.mlt.MoreLikeThis; +import org.apache.lucene.queryparser.classic.QueryParser; // Used for escape +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; // Directly use BooleanQuery.Builder +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchMoreLikeThisFunction extends ArcadeLuceneSearchFunctionTemplate // Changed base + implements IndexableSQLFunction { // Assuming from template + + private static final Logger logger = + Logger.getLogger(ArcadeLuceneSearchMoreLikeThisFunction.class.getName()); // Changed + + public static final String NAME = "search_more_like_this"; // Changed name + + public ArcadeLuceneSearchMoreLikeThisFunction() { + super(NAME, 1, 2); // params: rids, [metadata] + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( // FIXME: Signature might change + Object iThis, + Identifiable iCurrentRecord, // Changed + Object iCurrentResult, + Object[] params, + CommandContext ctx) { // Changed + + // This function's logic in OrientDB was to check if iCurrentRecord is similar to records identified by RIDs in params[0]. + // This seems more like a filter for a WHERE clause rather than a direct result-producing function. + // The return type 'boolean' suggests this. + + if (!(iCurrentRecord instanceof Document)) { // Changed + return false; + } + String className = ((Document) iCurrentRecord).getTypeName(); // Changed + ArcadeLuceneFullTextIndex index = this.searchForIndex(ctx, className); // FIXME + + if (index == null) return false; // Cannot perform MLT without an index + + IndexSearcher searcher = index.searcher(); // FIXME + if (searcher == null) return false; + + Document metadata = getMetadataDoc(params, 1); // metadata is params[1] // Changed + + List ridsAsString = parseRidsObj(ctx, params[0]); + if (ridsAsString.isEmpty()) return false; + + List others = // Changed ORecord to Identifiable + ridsAsString.stream() + .map(ridStr -> (Identifiable) new RID(ctx.getDatabase(), ridStr)) // Changed ORecordId + .map(id -> ctx.getDatabase().lookupByRID(id.getIdentity(), true).getRecord()) // Load record // Changed + .filter(r -> r instanceof Element) // Ensure it's an element + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); // FIXME + + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); // Changed + + // The MLT query should be built against the content of 'others' + // And then we check if iCurrentRecord matches this mltQuery. + // This is different from how 'searchFromTarget' works. + + // This part seems to generate a query based on the 'others' documents + addLikeQueries(others, mlt, queryBuilder, ctx.getDatabase()); // Changed + + Query mltQuery = queryBuilder.build(); + if (mltQuery.toString().isEmpty()) { // No terms generated if documents are empty or too common/rare + return false; + } + + // Now, check if iCurrentRecord matches the mltQuery. + // This requires indexing iCurrentRecord in-memory. + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + org.apache.lucene.document.Document luceneDoc = index.buildDocument(null, iCurrentRecord); // FIXME: Key might be needed or different buildDocument signature + if (luceneDoc != null) { + for (org.apache.lucene.index.IndexableField field : luceneDoc.getFields()) { + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // FIXME + } + } else { + return false; + } + return memoryIndex.search(mltQuery) > 0.0f; + } + + @Override + public String getSyntax() { + return NAME + "( , [ ] )"; // Corrected syntax + } + + @Override + public Iterable searchFromTarget( // Changed + FromClause target, // Changed + BinaryCompareOperator operator, // Changed + Object rightValue, + CommandContext ctx, // Changed + Expression... args) { // Changed + + ArcadeLuceneFullTextIndex index = this.searchForIndex(target, ctx, args); // FIXME + + if (index == null) return Collections.emptySet(); + + IndexSearcher searcher = index.searcher(); // FIXME + if (searcher == null) return Collections.emptySet(); + + + Expression ridExpression = args[0]; + Document metadata = getMetadataFromExpression(args, ctx, 1); // metadata is args[1] // Changed + + List ridsAsString = parseRids(ctx, ridExpression); + if (ridsAsString.isEmpty()) return Collections.emptySet(); + + List others = // Changed + ridsAsString.stream() + .map(ridStr -> (Identifiable) new RID(ctx.getDatabase(), ridStr)) // Changed + .map(id -> ctx.getDatabase().lookupByRID(id.getIdentity(), true).getRecord()) // Load record // Changed + .filter(r -> r instanceof Element) + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); // FIXME + + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); // Changed + + excludeOtherFromResults(ridsAsString, queryBuilder); // Keep input RIDs out of results + + addLikeQueries(others, mlt, queryBuilder, ctx.getDatabase()); // Changed + + Query mltQuery = queryBuilder.build(); + if (mltQuery.toString().isEmpty()) return Collections.emptySet(); + + + // Execute the mltQuery against the main index + // FIXME: index.getInternal().getRids() needs to be replaced + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + // This part is highly dependent on how ArcadeLuceneFullTextIndex exposes search capabilities + try (Stream rids = // Changed + index + .getAssociatedIndex() // Assuming + .getRids( // This method might not exist + new LuceneKeyAndMetadata( // FIXME + new LuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), // FIXME + metadata))) { + return rids.map(rid -> (Identifiable) rid).collect(Collectors.toSet()); // Changed + } catch (Exception e) { + logger.log(Level.SEVERE, "Error executing MoreLikeThis query via getRids", e); + return Collections.emptySet(); + } + } + + private List parseRids(CommandContext ctx, Expression expression) { // Changed + Object expResult = expression.execute((Result) null, ctx); // Changed + return parseRidsObj(ctx, expResult); + } + + private List parseRidsObj(CommandContext ctx, Object expResult) { // Changed + if (expResult instanceof Identifiable) { // Changed + return Collections.singletonList(((Identifiable) expResult).getIdentity().toString()); + } + + Iterator iter; // Wildcard for iterator type + if (expResult instanceof Iterable) { + iter = ((Iterable) expResult).iterator(); + } else if (expResult instanceof Iterator) { + iter = (Iterator) expResult; + } else { + return Collections.emptyList(); + } + + List rids = new ArrayList<>(); + while (iter.hasNext()) { + Object item = iter.next(); + if (item instanceof Result) { // Changed + if (((Result) item).isElement()) { + ((Result) item).getIdentity().ifPresent(id -> rids.add(id.toString())); // Changed + } else { + Set properties = ((Result) item).getPropertyNames(); + if (properties.size() == 1) { + Object val = ((Result) item).getProperty(properties.iterator().next()); + if (val instanceof Identifiable) { // Changed + rids.add(((Identifiable) val).getIdentity().toString()); + } + } + } + } else if (item instanceof Identifiable) { // Changed + rids.add(((Identifiable) item).getIdentity().toString()); + } + } + return rids; + } + + private Document getMetadataDoc(Object[] params, int metadataParamIndex) { // Changed + if (params.length > metadataParamIndex) { + if (params[metadataParamIndex] instanceof Map) { + return new Document().fromMap((Map) params[metadataParamIndex]); + } else if (params[metadataParamIndex] instanceof String) { + return new Document().fromJSON((String) params[metadataParamIndex]); + } + return new Document().fromJSON(params[metadataParamIndex].toString()); + } + return new Document(); // Empty if not present + } + + private Document getMetadataFromExpression(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed + if (args.length > metadataParamIndex) { + return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate + } + return new Document(); // Empty if not present + } + + + private MoreLikeThis buildMoreLikeThis( // Changed + ArcadeLuceneFullTextIndex index, IndexSearcher searcher, Document metadata) { // FIXME + + try { + MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); + + mlt.setAnalyzer(index.queryAnalyzer()); // FIXME + + // FIXME: index.getDefinition() might be different + mlt.setFieldNames( + Optional.ofNullable(metadata.>getProperty("fieldNames")) + .orElse(index.getDefinition().getFields()) + .toArray(new String[] {})); + + mlt.setMaxQueryTerms( + Optional.ofNullable(metadata.getProperty("maxQueryTerms")) + .orElse(MoreLikeThis.DEFAULT_MAX_QUERY_TERMS)); + // ... (rest of MoreLikeThis setters, ensure getProperty types match) + mlt.setMinTermFreq( + Optional.ofNullable(metadata.getProperty("minTermFreq")) + .orElse(MoreLikeThis.DEFAULT_MIN_TERM_FREQ)); + mlt.setMaxDocFreq( + Optional.ofNullable(metadata.getProperty("maxDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); + mlt.setMinDocFreq( + Optional.ofNullable(metadata.getProperty("minDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MIN_DOC_FREQ)); // Corrected from DEFAULT_MAX_DOC_FREQ + mlt.setBoost( + Optional.ofNullable(metadata.getProperty("boost")) + .orElse(MoreLikeThis.DEFAULT_BOOST)); + mlt.setBoostFactor( + Optional.ofNullable(metadata.getProperty("boostFactor")).orElse(1f)); + mlt.setMaxWordLen( + Optional.ofNullable(metadata.getProperty("maxWordLen")) + .orElse(MoreLikeThis.DEFAULT_MAX_WORD_LENGTH)); + mlt.setMinWordLen( + Optional.ofNullable(metadata.getProperty("minWordLen")) + .orElse(MoreLikeThis.DEFAULT_MIN_WORD_LENGTH)); + // setMaxNumTokensParsed was removed in later Lucene versions, check alternatives if needed. + // mlt.setMaxNumTokensParsed( + // Optional.ofNullable(metadata.getProperty("maxNumTokensParsed")) + // .orElse(MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED)); + mlt.setStopWords( + (Set) + Optional.ofNullable(metadata.get("stopWords")) // Simpler get for Set + .orElse(MoreLikeThis.DEFAULT_STOP_WORDS)); + + + return mlt; + } catch (IOException e) { + throw ArcadeDBException.wrapException(new ArcadeDBException("Lucene IO Exception"), e); // Changed + } + } + + private void addLikeQueries( // Changed + List others, MoreLikeThis mlt, BooleanQuery.Builder queryBuilder, Database database) { // Changed + others.stream() + .filter(id -> id instanceof Element) // ensure it's an element to get properties + .map(id -> (Element) id) + .forEach( + element -> + Arrays.stream(mlt.getFieldNames()) // These are the fields to check for similarity + .forEach( + fieldName -> { + Object propertyValue = element.getProperty(fieldName); + if (propertyValue != null) { + try { + // MoreLikeThis.like() can take a String directly for a field's content + Query fieldQuery = mlt.like(fieldName, new StringReader(propertyValue.toString())); + if (!fieldQuery.toString().isEmpty()) // Check if anything was generated + queryBuilder.add(fieldQuery, Occur.SHOULD); + } catch (IOException e) { + logger.log(Level.SEVERE, "Error during Lucene MoreLikeThis query generation for field " + fieldName, e); + } + } + })); + } + + private void excludeOtherFromResults(List ridsAsString, BooleanQuery.Builder queryBuilder) { // Changed + ridsAsString.stream() + .forEach( + rid -> + queryBuilder.add( // Use ArcadeLuceneIndexType.RID for consistency + new TermQuery(new Term(ArcadeLuceneIndexType.RID, QueryParser.escape(rid))), Occur.MUST_NOT)); + } + + // searchForIndex from OLuceneSearchFunctionTemplate should be used or overridden if different logic needed for target. + // The private helpers here were specific to how OLuceneSearchMoreLikeThisFunction determined its index. + // For now, relying on the overridden searchForIndex from ArcadeLuceneSearchFunctionTemplate. + // If this function *always* uses class name from context (iThis) for 'execute' and target for 'searchFromTarget', + // then the template's searchForIndex might need to be made non-abstract or this class needs its own. + // The original OLuceneSearchMoreLikeThisFunction had its own searchForIndex. + + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // Changed + FromClause target, CommandContext ctx, Expression... args) { // FIXME + FromItem item = target.getItem(); // Changed + Identifier identifier = item.getIdentifier(); // Changed + String className = identifier.getStringValue(); + return searchForIndex(ctx, className); // Calls private helper + } + + private ArcadeLuceneFullTextIndex searchForIndex(CommandContext ctx, String className) { // Changed + DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify + // database.activateOnCurrentThread(); // May not be needed + + Schema schema = database.getSchema(); // Changed + DocumentType docType = schema.getType(className); // Changed + + if (docType == null) { + return null; + } + + List indices = // Changed + docType.getIndexes(true).stream() + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .collect(Collectors.toList()); + + if (indices.size() > 1) { + // Consider if a more specific index selection is needed, e.g. one covering certain fields if provided in metadata + throw new IllegalArgumentException("Too many full-text Lucene indices on class: " + className + ". Disambiguate or configure."); + } + return indices.size() == 0 ? null : indices.get(0); + } + + + // estimate, canExecuteInline, allowsIndexedExecution, shouldExecuteAfterSearch + // are inherited from ArcadeLuceneSearchFunctionTemplate. + // Their default implementations in the template might need review for this specific function's behavior. + // E.g., allowsIndexedExecution for MLT depends on finding *an* index on the class to get an IndexReader. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java new file mode 100644 index 0000000000..a6a893a127 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java @@ -0,0 +1,242 @@ +package com.arcadedb.lucene.functions; + +// Static import from ArcadeLuceneFunctionsUtils if getOrCreateMemoryIndex is public there, or keep local. +// For now, assuming it's accessible via ArcadeLuceneFunctionsUtils. +// import static com.arcadedb.lucene.functions.ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex; + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.document.Element; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.executor.ResultInternal; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.schema.DocumentType; // Changed +import com.arcadedb.schema.Schema; // Changed +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchOnClassFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class + + public static final String NAME = "search_class"; + + public ArcadeLuceneSearchOnClassFunction() { + super(NAME, 1, 2); // Original params: className, query, [metadata] - now query, [metadata] as class comes from context + // However, the original code takes classname as param for searchForIndex, + // but in execute it gets class from iThis. + // The original super was (NAME, 1, 2) -> (query, [metadata]), class was implicit from target. + // Let's stick to (NAME, 2, 3) -> (className, query, [metadata]) for now if it's a global function. + // If it's context aware (iThis), then (query, [metadata]) is fine. + // The original `search_class(, , [ ])` + // super(NAME, 2, 3); // (className, query, [metadata]) + // The original code for OLuceneSearchOnClassFunction used (NAME, 1, 2) + // and derived className from `iThis` in `execute` or from `target` in `searchFromTarget`. + // Let's keep the original arity and rely on context for class name. + super(NAME, 1, 2); + } + + @Override + public String getName() { + return NAME; + } + + // canExecuteInline from template is likely fine if it relies on searchForIndex. + + @Override + public Object execute( // FIXME: Signature might change + Object iThis, + Identifiable iCurrentRecord, // Changed + Object iCurrentResult, + Object[] params, + CommandContext ctx) { // Changed + + Result result; // Changed + if (iThis instanceof Result) { + result = (Result) iThis; + } else if (iThis instanceof Identifiable) { + result = new ResultInternal((Identifiable) iThis); // Changed + } else { + // Cannot determine current record or class, perhaps throw error or return false + return false; + } + + if (!result.getElement().isPresent()) return false; + Element element = result.getElement().get(); // Changed + if (element.getType() == null) return false; // Changed, was getSchemaType().isPresent() + + String className = element.getType().getName(); // Changed + + ArcadeLuceneFullTextIndex index = searchForIndex(ctx, className); // FIXME + + if (index == null) return false; + + String query = (String) params[0]; + + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + + // FIXME: index.getDefinition() might be different. + List key = + index.getDefinition().getFields().stream() + .map(s -> element.getProperty(s)) + .collect(Collectors.toList()); + + // FIXME: index.buildDocument and index.indexAnalyzer might not exist or have different signatures + org.apache.lucene.document.Document luceneDoc = index.buildDocument(key, iCurrentRecord); + if (luceneDoc != null) { + for (IndexableField field : luceneDoc.getFields()) { + // Simplified, assuming stringValue is appropriate. Lucene's MemoryIndex.addField handles various IndexableField types. + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); + } + } + + + Document metadata = getMetadataDoc(params); // Changed + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + LuceneKeyAndMetadata keyAndMetadata = + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + // FIXME: index.buildQuery might not exist or have different signature + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private Document getMetadataDoc(Object[] params) { // Changed + if (params.length == 2) { // Original used params[1] for metadata if arity was 2 (query, metadata) + if (params[1] instanceof Map) { + return new Document().fromMap((Map) params[1]); // Changed + } else if (params[1] instanceof String) { + return new Document().fromJSON((String) params[1]); + } + return new Document().fromJSON(params[1].toString()); + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); //LuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + // Original was "SEARCH_INDEX( indexName, [ metdatada {} ] )" which seems incorrect for search_class + return "search_class( , [ ] )"; // Class is implicit from context + } + + @Override + public boolean filterResult() { + return true; + } + + // FIXME: This method's signature and logic are highly dependent on ArcadeDB's IndexableSQLFunction interface + @Override + public Iterable searchFromTarget( // Changed + FromClause target, // Changed + BinaryCompareOperator operator, // Changed + Object rightValue, + CommandContext ctx, // Changed + Expression... args) { // Changed + + // In this context, the class comes from the target FromClause + ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME + + Expression expression = args[0]; // Query is the first argument to the function + String query = (String) expression.execute((Result) null, ctx); // Changed + + if (index != null) { + Document meta = getMetadata(args, ctx, 1); // Metadata is the second argument (index 1) if present + + List luceneResultSet; // Changed + try (Stream rids = // Changed + // FIXME: index.getInternal().getRids() needs to be replaced with ArcadeDB equivalent + // This whole block is highly dependent on ArcadeLuceneFullTextIndex and LuceneKeyAndMetadata refactoring + index + .getAssociatedIndex() // Assuming getAssociatedIndex() is the way + .getRids( // This method might not exist on ArcadeDB's Index interface + new LuceneKeyAndMetadata( // FIXME + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME + luceneResultSet = rids.collect(Collectors.toList()); + } + return luceneResultSet; + } + return Collections.emptySet(); + } + + private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed types + if (args.length > metadataParamIndex) { + return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types + FromClause target, CommandContext ctx, Expression... args) { // FIXME + FromItem item = target.getItem(); // Changed + + // This function determines the class from the target (FROM clause) + String className = item.getIdentifier().getStringValue(); // Changed + + return searchForIndex(ctx, className); // Calls private helper + } + + private ArcadeLuceneFullTextIndex searchForIndex(CommandContext ctx, String className) { // Changed types + DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify + // database.activateOnCurrentThread(); // May not be needed + + Schema schema = database.getSchema(); // Changed + DocumentType docType = schema.getType(className); // Changed + + if (docType == null) { + return null; + } + + List indices = // Changed + docType.getIndexes(true).stream() // getIndexes(true) for all indexes including supertypes + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .collect(Collectors.toList()); + + if (indices.size() > 1) { + // Try to find an index that is defined ONLY on this class, not subclasses/supertypes if possible + // Or, if multiple, pick one based on a convention (e.g. specific fields) + // For now, this logic is simplified. + // Original code just picked the first one if only one, or threw error. + // We might need a more sophisticated way if multiple Lucene indexes can exist on a class hierarchy. + for (ArcadeLuceneFullTextIndex idx : indices) { + if (idx.getDefinition().getTypeName().equals(className)) { // Check if index is defined on this exact class + return idx; + } + } + // If no index is defined directly on this class, but inherited, it might be ambiguous. + // However, the original code's filter `dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes()` + // would only get indexes directly on that class. + // `docType.getIndexes(true)` gets all. Let's refine to match original more closely for now: + indices = docType.getIndexes(false).stream() // false = only indexes defined on this type + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .collect(Collectors.toList()); + if (indices.size() > 1) { + throw new IllegalArgumentException("Too many full-text indices on given class: " + className + ". Specify the index name using search_index function."); + } + } + + + return indices.size() == 0 ? null : indices.get(0); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java new file mode 100644 index 0000000000..3c6123a32b --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java @@ -0,0 +1,291 @@ +package com.arcadedb.lucene.functions; + +// import static com.arcadedb.lucene.functions.ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex; // Assuming public access + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.document.Element; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.executor.ResultInternal; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.query.sql.parser.Identifier; +import com.arcadedb.schema.DocumentType; // Changed +import com.arcadedb.schema.Schema; // Changed +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchOnFieldsFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class + + public static final String NAME = "search_fields"; + + public ArcadeLuceneSearchOnFieldsFunction() { + // Original params: fieldNames, query, [metadata] + // Class name is derived from context (iThis or target) + super(NAME, 2, 3); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( // FIXME: Signature might change + Object iThis, + Identifiable iCurrentRecord, // Changed + Object iCurrentResult, + Object[] params, + CommandContext ctx) { // Changed + + Result result; // Changed + if (iThis instanceof Result) { + result = (Result) iThis; + } else if (iThis instanceof Identifiable) { + result = new ResultInternal((Identifiable) iThis); // Changed + } else { + return false; // Cannot determine current record + } + + if (!result.getElement().isPresent()) return false; + Element element = result.getElement().get(); // Changed + if (element.getType() == null) return false; // Changed + String className = element.getType().getName(); // Changed + + @SuppressWarnings("unchecked") + List fieldNames = (List) params[0]; + + // Note: searchForIndex here might not be strictly necessary if we always build an in-memory index from the current record's fields. + // However, the original code uses it to get definition and analyzer. + ArcadeLuceneFullTextIndex index = searchForIndex(className, ctx, fieldNames); // FIXME + + if (index == null) { + // If no pre-existing index matches, we might still proceed if we can get a default analyzer + // or one from metadata, but building a Lucene document without an IndexDefinition is problematic. + // For now, returning false if no suitable index is found to provide an analyzer/definition. + // This part might need a different strategy for on-the-fly indexing without a backing index. + return false; + } + + String query; + if (params.length < 2 || params[1] == null) { // query is params[1] + query = null; + } else { + query = params[1].toString(); + } + + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + + // FIXME: This part needs to build a Lucene document using ONLY the specified fieldNames + // from the 'element', and using the types from the schema for those fields. + // The 'key' concept from OLuceneSearchOnIndexFunction is not directly applicable here in the same way. + // index.buildDocument(key, iCurrentRecord) is not right for this context. + org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document(); + DocumentType docType = element.getType(); + if (docType != null) { + for(String fieldName : fieldNames) { + if (element.has(fieldName)) { + Object fieldValue = element.getProperty(fieldName); + com.arcadedb.schema.Property prop = docType.getProperty(fieldName); + Type fieldType = prop != null ? prop.getType() : Type.STRING; // Default to string if no prop + // FIXME: ArcadeLuceneIndexType.createFields needs correct store/sort parameters. + // Assuming Field.Store.YES and no sorting for memory index fields for now. + List fields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue, Field.Store.YES, false, fieldType); + for(org.apache.lucene.document.Field f : fields) { + luceneDoc.add(f); + } + } + } + } + + if (luceneDoc.getFields().isEmpty()) return false; // No fields were added + + // Add all fields from the created luceneDoc to memoryIndex + for (IndexableField field : luceneDoc.getFields()) { + // Simplified, assuming stringValue is appropriate for all, which is not robust. + // MemoryIndex.addField handles various IndexableField types, so this might be okay if createFields returns typed fields. + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // FIXME: index.indexAnalyzer() dependency + } + + + Document metadata = getMetadataDoc(params, 2); // metadata is params[2] + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + LuceneKeyAndMetadata keyAndMetadata = + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + // FIXME: index.buildQuery might not exist or have different signature + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private Document getMetadataDoc(Object[] params, int metadataParamIndex) { // Changed + if (params.length > metadataParamIndex) { + if (params[metadataParamIndex] instanceof Map) { + return new Document().fromMap((Map) params[metadataParamIndex]); // Changed + } else if (params[metadataParamIndex] instanceof String) { + return new Document().fromJSON((String) params[metadataParamIndex]); + } + return new Document().fromJSON(params[metadataParamIndex].toString()); + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + return "search_fields( , , [ ] )"; // Class is implicit + } + + // searchFromTarget and related metadata method from template might not be directly applicable + // as this function operates on specified fields of current record using MemoryIndex. + // If it were to support indexed execution, it would need to find a covering persistent index. + @Override + public Iterable searchFromTarget( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + + // This function, as implemented in execute(), builds an in-memory index for the current record. + // For it to be "indexable" in a broader query, it would need to find a persistent Lucene index + // that covers the requested fields for the target class. + ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME + + // First arg (args[0]) is fieldNamesList, second (args[1]) is query + if (args.length < 2) throw new IllegalArgumentException("search_fields requires at least fieldNames and query parameters."); + + @SuppressWarnings("unchecked") + // List fieldNames = (List) args[0].execute((Result) null, ctx); // This is how searchForIndex gets it. + // We need the query string here. + Expression queryExpression = args[1]; + String query = (String) queryExpression.execute((Result) null, ctx); + + + if (index != null && query != null) { + Document meta = getMetadata(args, ctx, 2); // Metadata is third arg (index 2) + Set luceneResultSet; // Changed + try (Stream rids = // Changed + // FIXME: index.getInternal().getRids() needs to be replaced + index + .getAssociatedIndex() + .getRids( // This method might not exist + new LuceneKeyAndMetadata( // FIXME + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME + luceneResultSet = rids.collect(Collectors.toSet()); + } + return luceneResultSet; + } + // Original threw RuntimeException, returning empty set might be safer for unhandled cases. + return Collections.emptySet(); + } + + private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed + if (args.length > metadataParamIndex) { + return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types + FromClause target, CommandContext ctx, Expression... args) { // FIXME + // First argument to the function (args[0]) is the list of field names + if (args == null || args.length == 0) { + throw new IllegalArgumentException("Field names list parameter is missing."); + } + Object fieldNamesParam = args[0].execute((Result) null, ctx); + if (!(fieldNamesParam instanceof List)) { + throw new IllegalArgumentException("Field names parameter must be a list."); + } + @SuppressWarnings("unchecked") + List fieldNames = (List) fieldNamesParam; + + FromItem item = target.getItem(); // Changed + Identifier identifier = item.getIdentifier(); // Changed + String className = identifier.getStringValue(); + + return searchForIndex(className, ctx, fieldNames); // Calls private helper + } + + private ArcadeLuceneFullTextIndex searchForIndex( // Changed types + String className, CommandContext ctx, List fieldNames) { + DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify + // database.activateOnCurrentThread(); // May not be needed + + Schema schema = database.getSchema(); // Changed + DocumentType docType = schema.getType(className); // Changed + + if (docType == null) { + return null; + } + List indices = // Changed + docType.getIndexes(true).stream() // getIndexes(true) for all indexes including supertypes + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .filter(idx -> intersect(idx.getDefinition().getFields(), fieldNames)) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + // If multiple indexes match (e.g. one on [f1], another on [f2], and we search [f1,f2]) + // This logic might need refinement. For now, it implies any single index covering *at least one* field. + // The original code would throw "too many indices matching given field name" only if multiple INDIVIDUAL indexes + // were found that EACH satisfy the intersect condition. + // A more robust approach might be to find the "best" covering index or combine results if that makes sense. + // For now, sticking to "if any index covers any of the fields, and there's only one such index" + // The original code finds an index if ANY of its fields are in fieldNames. + // If multiple such indexes exist, it's an error. + + // Let's find the one with the most matching fields? Or just the first one? + // The original code would throw if 'indices.size() > 1'. + throw new IllegalArgumentException( + "Too many Lucene indices on class '" + className + "' match the specified fields: " + String.join(",", fieldNames) + + ". Specify a single target index using search_index()."); + } + + return indices.size() == 0 ? null : indices.get(0); + } + + // intersection and intersect methods are helpers, can remain as they are (generic) + public List intersection(List list1, List list2) { + List list = new ArrayList(); + for (T t : list1) { + if (list2.contains(t)) { + list.add(t); + } + } + return list; + } + + public boolean intersect(List list1, List list2) { + for (T t : list1) { + if (list2.contains(t)) { + return true; + } + } + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java new file mode 100644 index 0000000000..eab77225ec --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java @@ -0,0 +1,186 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.document.Document; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.executor.ResultInternal; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.query.sql.parser.Identifier; // Changed +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchOnIndexFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class + + // public static final String MEMORY_INDEX = "_memoryIndex"; // Already in ArcadeLuceneFunctionsUtils + public static final String NAME = "search_index"; // OrientDB's name was luceneMatch, but class name implies search_index + + public ArcadeLuceneSearchOnIndexFunction() { + super(NAME, 2, 3); // Using "search_index" as function name + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( // FIXME: Signature might change based on actual SQLFunctionAbstract in ArcadeDB + Object iThis, + Identifiable iCurrentRecord, // Changed + Object iCurrentResult, + Object[] params, + CommandContext ctx) { // Changed + if (iThis instanceof RID) { // Changed + iThis = ((RID) iThis).getRecord(); + } + if (iThis instanceof Identifiable) { // Changed + iThis = new ResultInternal((Identifiable) iThis); // Changed + } + Result result = (Result) iThis; // Changed + + String indexName = (String) params[0]; + + ArcadeLuceneFullTextIndex index = searchForIndex(ctx, indexName); // FIXME + + if (index == null) return false; + + String query = (String) params[1]; + + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); // Use refactored util + + // FIXME: index.getDefinition() might be different. + List key = + index.getDefinition().getFields().stream() + .map(s -> result.getProperty(s)) + .collect(Collectors.toList()); + + // FIXME: index.buildDocument and index.indexAnalyzer might not exist or have different signatures + // This part is highly dependent on ArcadeLuceneFullTextIndex refactoring. + org.apache.lucene.document.Document luceneDoc = index.buildDocument(key, iCurrentRecord); + if (luceneDoc != null) { + for (IndexableField field : luceneDoc.getFields()) { + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // Simplified, assuming stringValue is appropriate + } + } + + Document metadata = getMetadataDoc(params); // Changed ODocument + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + LuceneKeyAndMetadata keyAndMetadata = + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + // FIXME: index.buildQuery might not exist or have different signature + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private Document getMetadataDoc(Object[] params) { // Changed ODocument + if (params.length == 3) { + if (params[2] instanceof Map) { + return new Document().fromMap((Map) params[2]); // Changed + } else if (params[2] instanceof String) { + return new Document().fromJSON((String) params[2]); + } + // Fallback for other types, or throw error + return new Document().fromJSON(params[2].toString()); + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA needs to be accessible or defined differently + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + // getOrCreateMemoryIndex was moved to ArcadeLuceneFunctionsUtils + + @Override + public String getSyntax() { + return "search_index( , , [ ] )"; // Updated syntax + } + + @Override + public boolean filterResult() { + return true; + } + + // FIXME: This method's signature and logic are highly dependent on ArcadeDB's IndexableSQLFunction interface + @Override + public Iterable searchFromTarget( // Changed + FromClause target, // Changed + BinaryCompareOperator operator, // Changed + Object rightValue, + CommandContext ctx, // Changed + Expression... args) { // Changed + + ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME + + Expression expression = args[1]; + String query = (String) expression.execute((Result) null, ctx); // Changed + if (index != null && query != null) { + + Document meta = getMetadata(args, ctx); // Changed + + List luceneResultSet; // Changed + try (Stream rids = // Changed + // FIXME: index.getInternal().getRids() needs to be replaced with ArcadeDB equivalent + // This whole block is highly dependent on ArcadeLuceneFullTextIndex and LuceneKeyAndMetadata refactoring + index + .getAssociatedIndex() // Assuming getAssociatedIndex() is the way, or index might be the LuceneIndexEngine itself + .getRids( // This method might not exist on ArcadeDB's Index interface or ArcadeLuceneFullTextIndex + new LuceneKeyAndMetadata( // FIXME + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME + luceneResultSet = rids.collect(Collectors.toList()); + } + + return luceneResultSet; + } + return Collections.emptyList(); + } + + private Document getMetadata(Expression[] args, CommandContext ctx) { // Changed types + if (args.length == 3) { + return getMetadata(args[2], ctx); // Calls the method in ArcadeLuceneSearchFunctionTemplate + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types + FromClause target, CommandContext ctx, Expression... args) { // FIXME + + FromItem item = target.getItem(); // Changed + Identifier identifier = item.getIdentifier(); // Changed + // FIXME: This was calling a private searchForIndex, now it should call the one from ArcadeLuceneFunctionsUtils or similar. + // For now, assuming the util class will be used by the concrete implementations. + // This abstract method in template might need rethinking or this class needs its own way to get the index. + // Let's assume for now it will use the utility. + String indexNameFromArg = (String) args[0].execute((Result) null, ctx); + // String className = identifier.getStringValue(); // This would be the class from FROM clause + // We need the index name from the function argument. + return ArcadeLuceneFunctionsUtils.getLuceneFullTextIndex(ctx, indexNameFromArg); // FIXME + } + + // Removed private searchForIndex methods, assuming logic will consolidate or use ArcadeLuceneFunctionsUtils + + // getResult(OCommandContext) is part of OSQLFunction and likely not needed if SQLFunctionAbstract is different + // @Override + // public Object getResult(CommandContext ctx) { // Changed OCommandContext + // return super.getResult(ctx); + // } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java deleted file mode 100644 index 484616e332..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java +++ /dev/null @@ -1,181 +0,0 @@ -package com.arcadedb.lucene.functions; - -import static com.arcadedb.lucene.OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS; - -import com.arcadedb.common.log.OLogManager; -import com.arcadedb.common.log.OLogger; -import com.arcadedb.lucene.builder.OLuceneQueryBuilder; -import com.arcadedb.lucene.collections.OLuceneCompositeKey; -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.ODatabaseDocumentInternal; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.ORID; -import com.arcadedb.database.index.OIndex; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.sql.executor.OResult; -import com.arcadedb.database.sql.functions.OIndexableSQLFunction; -import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; -import com.arcadedb.database.sql.parser.OBinaryCompareOperator; -import com.arcadedb.database.sql.parser.OExpression; -import com.arcadedb.database.sql.parser.OFromClause; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * This function uses the CrossClassIndex to search documents across all the Lucene indexes defined in a database - *

- * Created by frank on 19/02/2016. - */ -public class OLuceneCrossClassSearchFunction extends OSQLFunctionAbstract - implements OIndexableSQLFunction { - private static final OLogger logger = - OLogManager.instance().logger(OLuceneCrossClassSearchFunction.class); - - public static final String NAME = "SEARCH_CROSS"; - - public OLuceneCrossClassSearchFunction() { - super(NAME, 1, 2); - } - - @Override - public Iterable searchFromTarget( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - - OLuceneFullTextIndex fullTextIndex = searchForIndex(ctx); - - OExpression expression = args[0]; - String query = (String) expression.execute((OResult) null, ctx); - - if (fullTextIndex != null) { - - ODocument metadata = getMetadata(args); - List luceneResultSet; - try (Stream rids = - fullTextIndex - .getInternal() - .getRids( - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata))) { - luceneResultSet = rids.collect(Collectors.toList()); - } - return luceneResultSet; - } - return Collections.emptySet(); - } - - @Override - public long estimate( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return 1L; - } - - @Override - public boolean canExecuteInline( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return false; - } - - @Override - public boolean allowsIndexedExecution( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return true; - } - - @Override - public boolean shouldExecuteAfterSearch( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return false; - } - - protected OLuceneFullTextIndex searchForIndex(OCommandContext ctx) { - - Collection indexes = - ((ODatabaseDocumentInternal) ctx.getDatabase()) - .getMetadata() - .getIndexManager() - .getIndexes(); - for (OIndex index : indexes) { - if (index.getInternal() instanceof OLuceneFullTextIndex) { - if (index.getAlgorithm().equalsIgnoreCase(LUCENE_CROSS_CLASS)) { - return (OLuceneFullTextIndex) index; - } - } - } - return null; - } - - private ODocument getMetadata(OExpression[] args) { - if (args.length == 2) { - return new ODocument().fromJSON(args[1].toString()); - } - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - @Override - public Object execute( - Object iThis, - OIdentifiable currentRecord, - Object currentResult, - Object[] params, - OCommandContext ctx) { - - OLuceneFullTextIndex fullTextIndex = searchForIndex(ctx); - - String query = (String) params[0]; - - if (fullTextIndex != null) { - - ODocument metadata = getMetadata(params); - - Collection luceneResultSet = - fullTextIndex.get( - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata)); - - return luceneResultSet; - } - return Collections.emptySet(); - } - - private ODocument getMetadata(Object[] params) { - - if (params.length == 2) { - return new ODocument().fromMap((Map) params[1]); - } - - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - @Override - public String getSyntax() { - logger.debug("syntax"); - return "SEARCH_CROSS('', {metadata})"; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java deleted file mode 100644 index 2251ba9e31..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.arcadedb.lucene.functions; - -import com.arcadedb.database.sql.functions.OSQLFunctionFactoryTemplate; - -public class OLuceneFunctionsFactory extends OSQLFunctionFactoryTemplate { - public OLuceneFunctionsFactory() { - register(new OLuceneSearchOnIndexFunction()); - register(new OLuceneSearchOnFieldsFunction()); - register(new OLuceneSearchOnClassFunction()); - register(new OLuceneSearchMoreLikeThisFunction()); - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java deleted file mode 100644 index f7d2c33646..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.arcadedb.lucene.functions; - -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.ODatabaseDocumentInternal; -import com.arcadedb.database.metadata.OMetadataInternal; -import com.arcadedb.database.sql.executor.OResult; -import com.arcadedb.database.sql.parser.OExpression; -import org.apache.lucene.index.memory.MemoryIndex; - -/** Created by frank on 13/02/2017. */ -public class OLuceneFunctionsUtils { - public static final String MEMORY_INDEX = "_memoryIndex"; - - protected static OLuceneFullTextIndex searchForIndex(OExpression[] args, OCommandContext ctx) { - final String indexName = (String) args[0].execute((OResult) null, ctx); - return getLuceneFullTextIndex(ctx, indexName); - } - - protected static OLuceneFullTextIndex getLuceneFullTextIndex( - final OCommandContext ctx, final String indexName) { - final ODatabaseDocumentInternal documentDatabase = - (ODatabaseDocumentInternal) ctx.getDatabase(); - documentDatabase.activateOnCurrentThread(); - final OMetadataInternal metadata = documentDatabase.getMetadata(); - - final OLuceneFullTextIndex index = - (OLuceneFullTextIndex) - metadata.getIndexManagerInternal().getIndex(documentDatabase, indexName); - if (!(index instanceof OLuceneFullTextIndex)) { - throw new IllegalArgumentException("Not a valid Lucene index:: " + indexName); - } - return index; - } - - public static MemoryIndex getOrCreateMemoryIndex(OCommandContext ctx) { - MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); - if (memoryIndex == null) { - memoryIndex = new MemoryIndex(); - ctx.setVariable(MEMORY_INDEX, memoryIndex); - } - memoryIndex.reset(); - return memoryIndex; - } - - public static String doubleEscape(final String s) { - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < s.length(); ++i) { - final char c = s.charAt(i); - if (c == 92 || c == 43 || c == 45 || c == 33 || c == 40 || c == 41 || c == 58 || c == 94 - || c == 91 || c == 93 || c == 34 || c == 123 || c == 125 || c == 126 || c == 42 || c == 63 - || c == 124 || c == 38 || c == 47) { - sb.append('\\'); - sb.append('\\'); - } - sb.append(c); - } - return sb.toString(); - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java deleted file mode 100644 index 774e252023..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.arcadedb.lucene.functions; - -import com.arcadedb.lucene.collections.OLuceneResultSet; -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.sql.executor.OResult; -import com.arcadedb.database.sql.functions.OIndexableSQLFunction; -import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; -import com.arcadedb.database.sql.parser.OBinaryCompareOperator; -import com.arcadedb.database.sql.parser.OExpression; -import com.arcadedb.database.sql.parser.OFromClause; -import java.util.Map; - -/** Created by frank on 25/05/2017. */ -public abstract class OLuceneSearchFunctionTemplate extends OSQLFunctionAbstract - implements OIndexableSQLFunction { - - public OLuceneSearchFunctionTemplate(String iName, int iMinParams, int iMaxParams) { - super(iName, iMinParams, iMaxParams); - } - - @Override - public boolean canExecuteInline( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return allowsIndexedExecution(target, operator, rightValue, ctx, args); - } - - @Override - public boolean allowsIndexedExecution( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - OLuceneFullTextIndex index = searchForIndex(target, ctx, args); - return index != null; - } - - @Override - public boolean shouldExecuteAfterSearch( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return false; - } - - @Override - public long estimate( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - - Iterable a = searchFromTarget(target, operator, rightValue, ctx, args); - if (a instanceof OLuceneResultSet) { - return ((OLuceneResultSet) a).size(); - } - long count = 0; - for (Object o : a) { - count++; - } - - return count; - } - - protected ODocument getMetadata(OExpression metadata, OCommandContext ctx) { - final Object md = metadata.execute((OResult) null, ctx); - if (md instanceof ODocument) { - return (ODocument) md; - } else if (md instanceof Map) { - return new ODocument().fromMap((Map) md); - } else if (md instanceof String) { - return new ODocument().fromJSON((String) md); - } else { - return new ODocument().fromJSON(metadata.toString()); - } - } - - protected abstract OLuceneFullTextIndex searchForIndex( - OFromClause target, OCommandContext ctx, OExpression... args); -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java deleted file mode 100644 index 813ee9f0e1..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java +++ /dev/null @@ -1,396 +0,0 @@ -package com.arcadedb.lucene.functions; - -import com.arcadedb.common.exception.OException; -import com.arcadedb.common.io.OIOException; -import com.arcadedb.common.log.OLogManager; -import com.arcadedb.common.log.OLogger; -import com.arcadedb.lucene.collections.OLuceneCompositeKey; -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.ODatabaseSession; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.ORID; -import com.arcadedb.database.id.ORecordId; -import com.arcadedb.database.metadata.OMetadataInternal; -import com.arcadedb.database.record.OElement; -import com.arcadedb.database.record.ORecord; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.sql.executor.OResult; -import com.arcadedb.database.sql.functions.OIndexableSQLFunction; -import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; -import com.arcadedb.database.sql.parser.OBinaryCompareOperator; -import com.arcadedb.database.sql.parser.OExpression; -import com.arcadedb.database.sql.parser.OFromClause; -import com.arcadedb.database.sql.parser.OFromItem; -import java.io.IOException; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.lucene.index.Term; -import org.apache.lucene.queries.mlt.MoreLikeThis; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery.Builder; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; - -/** Created by frank on 15/01/2017. */ -public class OLuceneSearchMoreLikeThisFunction extends OSQLFunctionAbstract - implements OIndexableSQLFunction { - - private static final OLogger logger = - OLogManager.instance().logger(OLuceneSearchMoreLikeThisFunction.class); - - public static final String NAME = "search_more"; - - public OLuceneSearchMoreLikeThisFunction() { - super(OLuceneSearchMoreLikeThisFunction.NAME, 1, 2); - } - - @Override - public String getName() { - return OLuceneSearchMoreLikeThisFunction.NAME; - } - - @Override - public Object execute( - Object iThis, - OIdentifiable iCurrentRecord, - Object iCurrentResult, - Object[] params, - OCommandContext ctx) { - - // TODO: slow implementation can be made faster - if (!(iCurrentRecord instanceof ODocument)) { - return false; - } - String className = ((ODocument) iCurrentRecord).getClassName(); - OLuceneFullTextIndex index = this.searchForIndex(ctx, className); - - if (index == null) return Collections.emptySet(); - - IndexSearcher searcher = index.searcher(); - - ODocument metadata = new ODocument((Map) params[1]); - - List ridsAsString = parseRidsObj(ctx, params[0]); - - List others = - ridsAsString.stream() - .map( - rid -> { - ORecordId recordId = new ORecordId(); - - recordId.fromString(rid); - return recordId; - }) - .map(id -> id.getRecord()) - .collect(Collectors.toList()); - - MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); - - Builder queryBuilder = new Builder(); - - excludeOtherFromResults(ridsAsString, queryBuilder); - - ODatabaseSession contest = ctx.getDatabase(); - addLikeQueries(others, mlt, queryBuilder, contest); - - Query mltQuery = queryBuilder.build(); - - Set luceneResultSet; - try (Stream rids = - index - .getInternal() - .getRids( - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), - metadata))) { - luceneResultSet = rids.collect(Collectors.toSet()); - } - - return luceneResultSet.contains(iCurrentRecord); - } - - @Override - public String getSyntax() { - return "SEARCH_MORE( [rids], [ metdatada {} ] )"; - } - - @Override - public Iterable searchFromTarget( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - - OLuceneFullTextIndex index = this.searchForIndex(target, ctx); - - if (index == null) return Collections.emptySet(); - - IndexSearcher searcher = index.searcher(); - - OExpression expression = args[0]; - - ODocument metadata = parseMetadata(args); - - List ridsAsString = parseRids(ctx, expression); - - List others = - ridsAsString.stream() - .map( - rid -> { - ORecordId recordId = new ORecordId(); - - recordId.fromString(rid); - return recordId; - }) - .map(id -> id.getRecord()) - .collect(Collectors.toList()); - - MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); - - Builder queryBuilder = new Builder(); - - excludeOtherFromResults(ridsAsString, queryBuilder); - - ODatabaseSession contest = ctx.getDatabase(); - addLikeQueries(others, mlt, queryBuilder, contest); - - Query mltQuery = queryBuilder.build(); - - Set luceneResultSet; - try (Stream rids = - index - .getInternal() - .getRids( - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), - metadata))) { - luceneResultSet = rids.collect(Collectors.toSet()); - } - - return luceneResultSet; - } - - private List parseRids(OCommandContext ctx, OExpression expression) { - - Object expResult = expression.execute((OResult) null, ctx); - return parseRidsObj(ctx, expResult); - } - - private List parseRidsObj(OCommandContext ctx, Object expResult) { - // single rind - if (expResult instanceof OIdentifiable) { - return Collections.singletonList(((OIdentifiable) expResult).getIdentity().toString()); - } - - Iterator iter; - if (expResult instanceof Iterable) { - iter = ((Iterable) expResult).iterator(); - } else if (expResult instanceof Iterator) { - iter = (Iterator) expResult; - } else { - return Collections.emptyList(); - } - - List rids = new ArrayList<>(); - while (iter.hasNext()) { - Object item = iter.next(); - if (item instanceof OResult) { - if (((OResult) item).isElement()) { - rids.add(((OResult) item).getIdentity().get().toString()); - } else { - Set properties = ((OResult) item).getPropertyNames(); - if (properties.size() == 1) { - Object val = ((OResult) item).getProperty(properties.iterator().next()); - if (val instanceof OIdentifiable) { - rids.add(((OIdentifiable) val).getIdentity().toString()); - } - } - } - } else if (item instanceof OIdentifiable) { - rids.add(((OIdentifiable) item).getIdentity().toString()); - } - } - return rids; - } - - private ODocument parseMetadata(OExpression[] args) { - ODocument metadata = new ODocument(); - if (args.length == 2) { - metadata.fromJSON(args[1].toString()); - } - return metadata; - } - - private MoreLikeThis buildMoreLikeThis( - OLuceneFullTextIndex index, IndexSearcher searcher, ODocument metadata) { - - try { - MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); - - mlt.setAnalyzer(index.queryAnalyzer()); - - mlt.setFieldNames( - Optional.ofNullable(metadata.>getProperty("fieldNames")) - .orElse(index.getDefinition().getFields()) - .toArray(new String[] {})); - - mlt.setMaxQueryTerms( - Optional.ofNullable(metadata.getProperty("maxQueryTerms")) - .orElse(MoreLikeThis.DEFAULT_MAX_QUERY_TERMS)); - - mlt.setMinTermFreq( - Optional.ofNullable(metadata.getProperty("minTermFreq")) - .orElse(MoreLikeThis.DEFAULT_MIN_TERM_FREQ)); - - mlt.setMaxDocFreq( - Optional.ofNullable(metadata.getProperty("maxDocFreq")) - .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); - - mlt.setMinDocFreq( - Optional.ofNullable(metadata.getProperty("minDocFreq")) - .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); - - mlt.setBoost( - Optional.ofNullable(metadata.getProperty("boost")) - .orElse(MoreLikeThis.DEFAULT_BOOST)); - - mlt.setBoostFactor( - Optional.ofNullable(metadata.getProperty("boostFactor")).orElse(1f)); - - mlt.setMaxWordLen( - Optional.ofNullable(metadata.getProperty("maxWordLen")) - .orElse(MoreLikeThis.DEFAULT_MAX_WORD_LENGTH)); - - mlt.setMinWordLen( - Optional.ofNullable(metadata.getProperty("minWordLen")) - .orElse(MoreLikeThis.DEFAULT_MIN_WORD_LENGTH)); - - mlt.setMaxNumTokensParsed( - Optional.ofNullable(metadata.getProperty("maxNumTokensParsed")) - .orElse(MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED)); - - mlt.setStopWords( - (Set) - Optional.ofNullable(metadata.getProperty("stopWords")) - .orElse(MoreLikeThis.DEFAULT_STOP_WORDS)); - - return mlt; - } catch (IOException e) { - throw OException.wrapException(new OIOException("Lucene IO Exception"), e); - } - } - - private void addLikeQueries( - List others, MoreLikeThis mlt, Builder queryBuilder, ODatabaseSession contest) { - others.stream() - .map(or -> contest.load(or)) - .forEach( - element -> - Arrays.stream(mlt.getFieldNames()) - .forEach( - fieldName -> { - String property = element.getProperty(fieldName); - try { - Query fieldQuery = mlt.like(fieldName, new StringReader(property)); - if (!fieldQuery.toString().isEmpty()) - queryBuilder.add(fieldQuery, Occur.SHOULD); - } catch (IOException e) { - // FIXME handle me! - logger.error("Error during Lucene query generation", e); - } - })); - } - - private void excludeOtherFromResults(List ridsAsString, Builder queryBuilder) { - ridsAsString.stream() - .forEach( - rid -> - queryBuilder.add( - new TermQuery(new Term("RID", QueryParser.escape(rid))), Occur.MUST_NOT)); - } - - private OLuceneFullTextIndex searchForIndex(OFromClause target, OCommandContext ctx) { - OFromItem item = target.getItem(); - - String className = item.getIdentifier().getStringValue(); - - return searchForIndex(ctx, className); - } - - private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String className) { - OMetadataInternal dbMetadata = - (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); - - List indices = - dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() - .filter(idx -> idx instanceof OLuceneFullTextIndex) - .map(idx -> (OLuceneFullTextIndex) idx) - .collect(Collectors.toList()); - - if (indices.size() > 1) { - throw new IllegalArgumentException("too many full-text indices on given class: " + className); - } - - return indices.size() == 0 ? null : indices.get(0); - } - - @Override - public long estimate( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - OLuceneFullTextIndex index = this.searchForIndex(target, ctx); - - if (index != null) return index.size(); - return 0; - } - - @Override - public boolean canExecuteInline( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return false; - } - - @Override - public boolean allowsIndexedExecution( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - - OLuceneFullTextIndex index = this.searchForIndex(target, ctx); - - return index != null; - } - - @Override - public boolean shouldExecuteAfterSearch( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return false; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java deleted file mode 100644 index e9e6f21e04..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java +++ /dev/null @@ -1,184 +0,0 @@ -package com.arcadedb.lucene.functions; - -import static com.arcadedb.lucene.functions.OLuceneFunctionsUtils.getOrCreateMemoryIndex; - -import com.arcadedb.lucene.builder.OLuceneQueryBuilder; -import com.arcadedb.lucene.collections.OLuceneCompositeKey; -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.ORID; -import com.arcadedb.database.metadata.OMetadataInternal; -import com.arcadedb.database.record.OElement; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.sql.executor.OResult; -import com.arcadedb.database.sql.executor.OResultInternal; -import com.arcadedb.database.sql.parser.OBinaryCompareOperator; -import com.arcadedb.database.sql.parser.OExpression; -import com.arcadedb.database.sql.parser.OFromClause; -import com.arcadedb.database.sql.parser.OFromItem; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.memory.MemoryIndex; - -/** Created by frank on 15/01/2017. */ -public class OLuceneSearchOnClassFunction extends OLuceneSearchFunctionTemplate { - - public static final String NAME = "search_class"; - - public OLuceneSearchOnClassFunction() { - super(NAME, 1, 2); - } - - @Override - public String getName() { - return NAME; - } - - @Override - public boolean canExecuteInline( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - return true; - } - - @Override - public Object execute( - Object iThis, - OIdentifiable iCurrentRecord, - Object iCurrentResult, - Object[] params, - OCommandContext ctx) { - - OResult result; - if (iThis instanceof OResult) { - result = (OResult) iThis; - } else { - result = new OResultInternal((OIdentifiable) iThis); - } - - if (!result.getElement().isPresent()) return false; - OElement element = result.getElement().get(); - if (!element.getSchemaType().isPresent()) return false; - - String className = element.getSchemaType().get().getName(); - - OLuceneFullTextIndex index = searchForIndex(ctx, className); - - if (index == null) return false; - - String query = (String) params[0]; - - MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); - - List key = - index.getDefinition().getFields().stream() - .map(s -> element.getProperty(s)) - .collect(Collectors.toList()); - - for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { - memoryIndex.addField(field, index.indexAnalyzer()); - } - - ODocument metadata = getMetadata(params); - OLuceneKeyAndMetadata keyAndMetadata = - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); - - return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; - } - - private ODocument getMetadata(Object[] params) { - - if (params.length == 2) { - return new ODocument().fromMap((Map) params[1]); - } - - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - @Override - public String getSyntax() { - return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; - } - - @Override - public boolean filterResult() { - return true; - } - - @Override - public Iterable searchFromTarget( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - - OLuceneFullTextIndex index = searchForIndex(target, ctx); - - OExpression expression = args[0]; - String query = (String) expression.execute((OResult) null, ctx); - - if (index != null) { - - ODocument metadata = getMetadata(args, ctx); - - List luceneResultSet; - try (Stream rids = - index - .getInternal() - .getRids( - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata))) { - luceneResultSet = rids.collect(Collectors.toList()); - } - - return luceneResultSet; - } - return Collections.emptySet(); - } - - private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { - if (args.length == 2) { - return getMetadata(args[1], ctx); - } - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - @Override - protected OLuceneFullTextIndex searchForIndex( - OFromClause target, OCommandContext ctx, OExpression... args) { - OFromItem item = target.getItem(); - - String className = item.getIdentifier().getStringValue(); - - return searchForIndex(ctx, className); - } - - private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String className) { - OMetadataInternal dbMetadata = - (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); - - List indices = - dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() - .filter(idx -> idx instanceof OLuceneFullTextIndex) - .map(idx -> (OLuceneFullTextIndex) idx) - .collect(Collectors.toList()); - - if (indices.size() > 1) { - throw new IllegalArgumentException("too many full-text indices on given class: " + className); - } - - return indices.size() == 0 ? null : indices.get(0); - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java deleted file mode 100644 index 7ebe6e7b27..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java +++ /dev/null @@ -1,200 +0,0 @@ -package com.arcadedb.lucene.functions; - -import static com.arcadedb.lucene.functions.OLuceneFunctionsUtils.getOrCreateMemoryIndex; - -import com.arcadedb.lucene.builder.OLuceneQueryBuilder; -import com.arcadedb.lucene.collections.OLuceneCompositeKey; -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.ORID; -import com.arcadedb.database.metadata.OMetadataInternal; -import com.arcadedb.database.record.OElement; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.sql.executor.OResult; -import com.arcadedb.database.sql.executor.OResultInternal; -import com.arcadedb.database.sql.parser.OBinaryCompareOperator; -import com.arcadedb.database.sql.parser.OExpression; -import com.arcadedb.database.sql.parser.OFromClause; -import com.arcadedb.database.sql.parser.OFromItem; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.memory.MemoryIndex; - -/** Created by frank on 15/01/2017. */ -public class OLuceneSearchOnFieldsFunction extends OLuceneSearchFunctionTemplate { - - public static final String NAME = "search_fields"; - - public OLuceneSearchOnFieldsFunction() { - super(NAME, 2, 3); - } - - @Override - public String getName() { - return NAME; - } - - @Override - public Object execute( - Object iThis, - OIdentifiable iCurrentRecord, - Object iCurrentResult, - Object[] params, - OCommandContext ctx) { - - if (iThis instanceof ORID) { - iThis = ((ORID) iThis).getRecord(); - } - if (iThis instanceof OIdentifiable) { - iThis = new OResultInternal((OIdentifiable) iThis); - } - OResult result = (OResult) iThis; - - if (!result.getElement().isPresent()) return false; - OElement element = result.getElement().get(); - if (!element.getSchemaType().isPresent()) return false; - String className = element.getSchemaType().get().getName(); - List fieldNames = (List) params[0]; - - OLuceneFullTextIndex index = searchForIndex(className, ctx, fieldNames); - - if (index == null) return false; - - String query; - if (params[1] == null) { - query = null; - } else { - query = (String) params[1].toString(); - } - - MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); - - List key = - index.getDefinition().getFields().stream() - .map(s -> element.getProperty(s)) - .collect(Collectors.toList()); - - for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { - memoryIndex.addField(field, index.indexAnalyzer()); - } - - ODocument metadata = getMetadata(params); - OLuceneKeyAndMetadata keyAndMetadata = - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); - - return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; - } - - private ODocument getMetadata(Object[] params) { - - if (params.length == 3) { - return new ODocument().fromMap((Map) params[2]); - } - - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - @Override - public String getSyntax() { - return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; - } - - @Override - public Iterable searchFromTarget( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - - OLuceneFullTextIndex index = searchForIndex(target, ctx, args); - - OExpression expression = args[1]; - Object query = expression.execute((OResult) null, ctx); - if (index != null) { - - ODocument meta = getMetadata(args, ctx); - Set luceneResultSet; - try (Stream rids = - index - .getInternal() - .getRids( - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { - luceneResultSet = rids.collect(Collectors.toSet()); - } - - return luceneResultSet; - } - throw new RuntimeException(); - } - - private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { - if (args.length == 3) { - return getMetadata(args[2], ctx); - } - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - @Override - protected OLuceneFullTextIndex searchForIndex( - OFromClause target, OCommandContext ctx, OExpression... args) { - List fieldNames = (List) args[0].execute((OResult) null, ctx); - OFromItem item = target.getItem(); - String className = item.getIdentifier().getStringValue(); - - return searchForIndex(className, ctx, fieldNames); - } - - private OLuceneFullTextIndex searchForIndex( - String className, OCommandContext ctx, List fieldNames) { - OMetadataInternal dbMetadata = - (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); - - List indices = - dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() - .filter(idx -> idx instanceof OLuceneFullTextIndex) - .map(idx -> (OLuceneFullTextIndex) idx) - .filter(idx -> intersect(idx.getDefinition().getFields(), fieldNames)) - .collect(Collectors.toList()); - - if (indices.size() > 1) { - throw new IllegalArgumentException( - "too many indices matching given field name: " + String.join(",", fieldNames)); - } - - return indices.size() == 0 ? null : indices.get(0); - } - - public List intersection(List list1, List list2) { - List list = new ArrayList(); - - for (T t : list1) { - if (list2.contains(t)) { - list.add(t); - } - } - - return list; - } - - public boolean intersect(List list1, List list2) { - - for (T t : list1) { - if (list2.contains(t)) { - return true; - } - } - - return false; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java deleted file mode 100644 index c27b3ea8ff..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java +++ /dev/null @@ -1,198 +0,0 @@ -package com.arcadedb.lucene.functions; - -import com.arcadedb.lucene.builder.OLuceneQueryBuilder; -import com.arcadedb.lucene.collections.OLuceneCompositeKey; -import com.arcadedb.lucene.index.OLuceneFullTextIndex; -import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.ODatabaseDocumentInternal; -import com.arcadedb.database.OIdentifiable; -import com.arcadedb.database.id.ORID; -import com.arcadedb.database.index.OIndex; -import com.arcadedb.database.record.impl.ODocument; -import com.arcadedb.database.sql.executor.OResult; -import com.arcadedb.database.sql.executor.OResultInternal; -import com.arcadedb.database.sql.parser.OBinaryCompareOperator; -import com.arcadedb.database.sql.parser.OExpression; -import com.arcadedb.database.sql.parser.OFromClause; -import com.arcadedb.database.sql.parser.OFromItem; -import com.arcadedb.database.sql.parser.OIdentifier; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.memory.MemoryIndex; - -/** Created by frank on 15/01/2017. */ -public class OLuceneSearchOnIndexFunction extends OLuceneSearchFunctionTemplate { - - public static final String MEMORY_INDEX = "_memoryIndex"; - - public static final String NAME = "search_index"; - - public OLuceneSearchOnIndexFunction() { - super(NAME, 2, 3); - } - - @Override - public String getName() { - return NAME; - } - - @Override - public Object execute( - Object iThis, - OIdentifiable iCurrentRecord, - Object iCurrentResult, - Object[] params, - OCommandContext ctx) { - if (iThis instanceof ORID) { - iThis = ((ORID) iThis).getRecord(); - } - if (iThis instanceof OIdentifiable) { - iThis = new OResultInternal((OIdentifiable) iThis); - } - OResult result = (OResult) iThis; - - String indexName = (String) params[0]; - - OLuceneFullTextIndex index = searchForIndex(ctx, indexName); - - if (index == null) return false; - - String query = (String) params[1]; - - MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); - - List key = - index.getDefinition().getFields().stream() - .map(s -> result.getProperty(s)) - .collect(Collectors.toList()); - - for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { - memoryIndex.addField(field, index.indexAnalyzer()); - } - - ODocument metadata = getMetadata(params); - OLuceneKeyAndMetadata keyAndMetadata = - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); - - return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; - } - - private ODocument getMetadata(Object[] params) { - - if (params.length == 3) { - return new ODocument().fromMap((Map) params[2]); - } - - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - private MemoryIndex getOrCreateMemoryIndex(OCommandContext ctx) { - MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); - if (memoryIndex == null) { - memoryIndex = new MemoryIndex(); - ctx.setVariable(MEMORY_INDEX, memoryIndex); - } - - memoryIndex.reset(); - return memoryIndex; - } - - @Override - public String getSyntax() { - return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; - } - - @Override - public boolean filterResult() { - return true; - } - - @Override - public Iterable searchFromTarget( - OFromClause target, - OBinaryCompareOperator operator, - Object rightValue, - OCommandContext ctx, - OExpression... args) { - - OLuceneFullTextIndex index = searchForIndex(target, ctx, args); - - OExpression expression = args[1]; - String query = (String) expression.execute((OResult) null, ctx); - if (index != null && query != null) { - - ODocument meta = getMetadata(args, ctx); - - List luceneResultSet; - try (Stream rids = - index - .getInternal() - .getRids( - new OLuceneKeyAndMetadata( - new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { - luceneResultSet = rids.collect(Collectors.toList()); - } - - return luceneResultSet; - } - return Collections.emptyList(); - } - - private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { - if (args.length == 3) { - return getMetadata(args[2], ctx); - } - return OLuceneQueryBuilder.EMPTY_METADATA; - } - - @Override - protected OLuceneFullTextIndex searchForIndex( - OFromClause target, OCommandContext ctx, OExpression... args) { - - OFromItem item = target.getItem(); - OIdentifier identifier = item.getIdentifier(); - return searchForIndex(identifier.getStringValue(), ctx, args); - } - - private OLuceneFullTextIndex searchForIndex( - String className, OCommandContext ctx, OExpression... args) { - - String indexName = (String) args[0].execute((OResult) null, ctx); - - final ODatabaseDocumentInternal database = (ODatabaseDocumentInternal) ctx.getDatabase(); - OIndex index = - database - .getMetadata() - .getIndexManagerInternal() - .getClassIndex(database, className, indexName); - - if (index != null && index.getInternal() instanceof OLuceneFullTextIndex) { - return (OLuceneFullTextIndex) index; - } - - return null; - } - - private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String indexName) { - final ODatabaseDocumentInternal database = (ODatabaseDocumentInternal) ctx.getDatabase(); - OIndex index = database.getMetadata().getIndexManagerInternal().getIndex(database, indexName); - - if (index != null && index.getInternal() instanceof OLuceneFullTextIndex) { - return (OLuceneFullTextIndex) index; - } - - return null; - } - - @Override - public Object getResult(OCommandContext ctx) { - return super.getResult(ctx); - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java new file mode 100644 index 0000000000..241fed17d6 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java @@ -0,0 +1,220 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2014 Orient Technologies. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.index; // Changed package + +import com.arcadedb.database.Identifiable; // Changed import +import com.arcadedb.database.RID; // Changed import +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.exception.ArcadeDBException; // Changed import +import com.arcadedb.index.CompositeKey; // Changed import +import com.arcadedb.index.IndexDefinition; // Changed import +import com.arcadedb.schema.Type; // Changed import +import java.io.UnsupportedEncodingException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import org.apache.lucene.document.Field; // Lucene Document Field +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; + +/** Created by enricorisa on 21/03/14. */ +public class ArcadeLuceneIndexType { // Changed class name + public static final String RID_HASH = "_RID_HASH"; + public static final String RID = "_RID"; // Defined locally + public static final String KEY = "_KEY"; // Defined locally + + public static Field createField( // Simplified, assuming store is passed correctly by caller for specific needs + final String fieldName, final Object value, final Field.Store store) { + if (fieldName.startsWith("_CLASS") || fieldName.startsWith("_CLUSTER")) { + return new StringField(fieldName, value.toString(), store); + } + // Defaulting to TextField, assuming analysis. Use StringField if non-analyzed is the default. + return new TextField(fieldName, value.toString(), store); + } + + public static String extractId(org.apache.lucene.document.Document doc) { // Lucene Document + String value = doc.get(RID_HASH); + if (value != null) { + int pos = value.indexOf("|"); + if (pos > 0) { + return value.substring(0, pos); + } else { + return value; + } + } else { + return null; + } + } + + public static Field createIdField(final Identifiable id, final Object key) { // Changed Identifiable + return new StringField(RID_HASH, genValueId(id, key), Field.Store.YES); + } + + public static Field createRidField(final Identifiable id) { // Renamed from createOldIdField, Changed Identifiable + return new StringField(RID, id.getIdentity().toString(), Field.Store.YES); + } + + public static String genValueId(final Identifiable id, final Object key) { // Changed Identifiable + String value = id.getIdentity().toString() + "|"; + value += hashKey(key); + return value; + } + + public static List createFields( + String fieldName, Object value, Field.Store store, Boolean sort, Type type) { // Added Type parameter + List luceneFields = new ArrayList<>(); + + if (value instanceof Number) { + Number number = (Number) value; + if (type == Type.LONG || value instanceof Long) { + luceneFields.add(new LongPoint(fieldName, number.longValue())); + luceneFields.add(new NumericDocValuesField(fieldName, number.longValue())); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.longValue())); + } else if (type == Type.FLOAT || value instanceof Float) { + luceneFields.add(new FloatPoint(fieldName, number.floatValue())); + luceneFields.add(new FloatDocValuesField(fieldName, number.floatValue())); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.floatValue())); + } else if (type == Type.DOUBLE || value instanceof Double) { + luceneFields.add(new DoublePoint(fieldName, number.doubleValue())); + luceneFields.add(new DoubleDocValuesField(fieldName, number.doubleValue())); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.doubleValue())); + } else { // INTEGER, SHORT, BYTE + luceneFields.add(new IntPoint(fieldName, number.intValue())); + luceneFields.add(new NumericDocValuesField(fieldName, number.longValue())); // Use long for DV for all integer types + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.intValue())); + } + // Optionally, add the original value as a TextField if it needs to be searchable as text + // luceneFields.add(new TextField(fieldName, value.toString(), store)); + } else if (type == Type.DATETIME || type == Type.DATE || value instanceof Date) { + long time = (value instanceof Date) ? ((Date) value).getTime() : Long.parseLong(value.toString()); + luceneFields.add(new LongPoint(fieldName, time)); + luceneFields.add(new NumericDocValuesField(fieldName, time)); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, time)); + // Optionally, add the original value as a TextField + // luceneFields.add(new TextField(fieldName, value.toString(), store)); + } else if (type == Type.STRING || value instanceof String) { + String stringValue = value.toString(); + luceneFields.add(new TextField(fieldName, stringValue, store)); // Analyzed + // Or use StringField for non-analyzed: + // luceneFields.add(new StringField(fieldName, stringValue, store)); + if (Boolean.TRUE.equals(sort)) { + luceneFields.add(new SortedDocValuesField(fieldName, new BytesRef(stringValue))); + } + } else { + // Default to TextField for other types or if type is null + luceneFields.add(new TextField(fieldName, value.toString(), store)); + if (Boolean.TRUE.equals(sort)) { + luceneFields.add(new SortedDocValuesField(fieldName, new BytesRef(value.toString()))); + } + } + return luceneFields; + } + + public static Query createExactQuery(IndexDefinition index, Object key) { // Changed OIndexDefinition + Query query = null; + if (key instanceof String) { + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + if (index.getFields().size() > 0) { + for (String idx : index.getFields()) { + queryBuilder.add(new TermQuery(new Term(idx, key.toString())), BooleanClause.Occur.SHOULD); + } + } else { + queryBuilder.add(new TermQuery(new Term(KEY, key.toString())), BooleanClause.Occur.SHOULD); + } + query = queryBuilder.build(); + } else if (key instanceof CompositeKey) { // Changed OCompositeKey + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + int i = 0; + CompositeKey keys = (CompositeKey) key; + for (String idx : index.getFields()) { + String val = (String) keys.getKeys().get(i); // Assuming keys are strings + queryBuilder.add(new TermQuery(new Term(idx, val)), BooleanClause.Occur.MUST); + i++; + } + query = queryBuilder.build(); + } + return query; + } + + public static Query createQueryId(Identifiable value) { // Changed OIdentifiable + return new TermQuery(new Term(RID, value.getIdentity().toString())); + } + + public static Query createQueryId(Identifiable value, Object key) { // Changed OIdentifiable + return new TermQuery(new Term(RID_HASH, genValueId(value, key))); + } + + public static String hashKey(Object key) { + try { + String keyString; + if (key instanceof Document) { // Changed ODocument to ArcadeDB Document + keyString = ((Document) key).toJSON().toString(); // Assuming toJSON returns JSON object + } else { + keyString = key.toString(); + } + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + byte[] bytes = sha256.digest(keyString.getBytes("UTF-8")); + return Base64.getEncoder().encodeToString(bytes); + } catch (NoSuchAlgorithmException e) { + throw ArcadeDBException.wrapException(new ArcadeDBException("fail to find sha algorithm"), e); // Changed exception + } catch (UnsupportedEncodingException e) { + throw ArcadeDBException.wrapException(new ArcadeDBException("fail to find utf-8 encoding"), e); // Changed exception + } + } + + public static Query createDeleteQuery( // Changed OIdentifiable, ODocument + Identifiable value, List fields, Object key, com.arcadedb.document.Document metadata) { + + // TODO Implementation of Composite keys with Collection + final BooleanQuery.Builder filter = new BooleanQuery.Builder(); + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + // TODO: Condition on Id and field key only for backward compatibility + if (value != null) { + builder.add(createQueryId(value), BooleanClause.Occur.MUST); + } + String field = fields.iterator().next(); + builder.add( + new TermQuery(new Term(field, key.toString().toLowerCase(Locale.ENGLISH))), + BooleanClause.Occur.MUST); + + filter.add(builder.build(), BooleanClause.Occur.SHOULD); + if (value != null) { + filter.add(createQueryId(value, key), BooleanClause.Occur.SHOULD); + } + return filter.build(); + } +} From 6d3c21f130c5919d7d96c5f97a6819fd22c7983c Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 09:19:39 +0000 Subject: [PATCH 3/6] Refactor Lucene module: Phase 1 - Structural Refactoring This commit includes the initial structural refactoring of the migrated Lucene module from OrientDB to ArcadeDB. **Key Changes Made:** 1. **Project Setup & Missing File:** * I added the missing `OLuceneIndexEngineAbstract.java` to the codebase. 2. **Core Engine Refactoring (`com.arcadedb.lucene.engine`):** * I renamed and refactored `OLuceneIndexEngineAbstract.java`, updating package declarations, imports, and replacing many OrientDB APIs with ArcadeDB equivalents. Logging switched to `java.util.logging.Logger`. * I refactored `OLuceneIndexEngine.java` (interface) to `LuceneIndexEngine.java`. * I structurally refactored `OLuceneFullTextIndexEngine.java` to `ArcadeLuceneFullTextIndexEngine.java`. * I structurally refactored `OLuceneCrossClassIndexEngine.java` to `ArcadeLuceneCrossClassIndexEngine.java`. * These engine classes are now using ArcadeDB types, but are heavily dependent on other unrefactored components (builders, query classes, etc.), marked with `FIXME` comments. 3. **Analyzer Refactoring (`com.arcadedb.lucene.analyzer`):** * `OLuceneAnalyzerFactory.java` refactored to `ArcadeLuceneAnalyzerFactory.java`. * `OLucenePerFieldAnalyzerWrapper.java` refactored to `ArcadeLucenePerFieldAnalyzerWrapper.java`. 4. **Index Type Definition (`com.arcadedb.lucene.index`):** * `OLuceneIndexType.java` refactored to `ArcadeLuceneIndexType.java` (moved from `builder` package). * I significantly modernized Lucene field creation logic to use Point fields and DocValues fields as per Lucene 10.x best practices. This class now requires callers to provide `com.arcadedb.schema.Type` for field creation. 5. **SQL Function Refactoring (`com.arcadedb.lucene.functions`):** * Function factories (`OLuceneFunctionsFactory`, `OLuceneCrossClassFunctionsFactory`) refactored to use `SQLFunctionRegistry.INSTANCE.register()`. * Template and utility classes (`OLuceneSearchFunctionTemplate`, `OLuceneFunctionsUtils`) structurally refactored. * All concrete SQL function classes (`OLuceneSearchOnIndexFunction`, `OLuceneSearchOnClassFunction`, `OLuceneSearchOnFieldsFunction`, `OLuceneSearchMoreLikeThisFunction`, `OLuceneCrossClassSearchFunction`) were structurally refactored with updated names, base classes, and ArcadeDB type usage. Logic for dynamic field indexing in `ArcadeLuceneSearchOnFieldsFunction` was improved. * All function classes have `FIXME` comments highlighting dependencies on unrefactored components and assumptions about ArcadeDB's SQL function infrastructure. **Overall Status:** I have completed an initial structural refactoring for the majority of the migrated Java files. This involved: * Renaming files and classes. * Updating package declarations. * Replacing OrientDB imports and API calls with ArcadeDB equivalents where direct mappings were clear. * Switching to `java.util.logging.Logger`. * Identifying and marking dependencies and areas requiring further work with `FIXME` comments. The module is not yet compilable or functional. The next steps involve refactoring the remaining dependent classes (index implementations, query/builder classes, transaction components) and then addressing the `FIXME` comments to resolve dependencies and implement core logic. This commit represents a significant portion of the initial "find and replace" and structural adaptation phase of the migration. From eb459f899b76316bfa116d3bb4798f10ed41e954 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 10:19:13 +0000 Subject: [PATCH 4/6] Jules was unable to complete the task in time. Please review the work done so far and provide feedback for Jules to continue. --- .../ArcadeLuceneIndexFactoryHandler.java | 74 ++- .../lucene/builder/LuceneDocumentBuilder.java | 298 ++++++++++++ .../lucene/builder/LuceneQueryBuilder.java | 168 +++++++ .../ArcadeLuceneCrossClassIndexEngine.java | 27 +- .../engine/ArcadeLuceneEngineUtils.java | 160 +++++++ .../ArcadeLuceneFullTextIndexEngine.java | 91 ++-- .../ArcadeLuceneCrossClassSearchFunction.java | 72 ++- .../index/ArcadeLuceneFullTextIndex.java | 445 +++++++++++++----- .../lucene/index/ArcadeLuceneIndexType.java | 147 +++++- .../ArcadeLuceneMultiFieldQueryParser.java | 183 +++++++ .../lucene/query/LuceneIndexCursor.java | 281 ++++++++--- .../lucene/query/LuceneKeyAndMetadata.java | 84 ++++ .../lucene/query/LuceneQueryContext.java | 179 +++++++ .../lucene/query/OLuceneQueryContext.java | 138 ------ .../arcadedb/lucene/tx/LuceneTxChanges.java | 108 +++++ .../lucene/tx/LuceneTxChangesAbstract.java | 322 +++++++++++++ .../lucene/tx/LuceneTxChangesMultiRid.java | 191 ++++++++ .../lucene/tx/LuceneTxChangesSingleRid.java | 203 ++++++++ .../arcadedb/lucene/tx/OLuceneTxChanges.java | 52 -- .../lucene/tx/OLuceneTxChangesAbstract.java | 74 --- .../lucene/tx/OLuceneTxChangesMultiRid.java | 108 ----- .../lucene/tx/OLuceneTxChangesSingleRid.java | 92 ---- .../arcadedb/lucene/util/LuceneDateTools.java | 130 +++++ 23 files changed, 2907 insertions(+), 720 deletions(-) create mode 100644 lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java diff --git a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java index 3edb15da20..091a3fcb9f 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java +++ b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java @@ -10,25 +10,75 @@ public class ArcadeLuceneIndexFactoryHandler implements IndexFactoryHandler { + public static final String LUCENE_FULL_TEXT_ALGORITHM = "LUCENE"; // Or just "LUCENE" + public static final String LUCENE_CROSS_CLASS_ALGORITHM = "LUCENE_CROSS_CLASS"; + + @Override public IndexInternal create(IndexBuilder builder) { DatabaseInternal database = builder.getDatabase(); String indexName = builder.getIndexName(); - boolean unique = builder.isUnique(); - // Schema.INDEX_TYPE indexType = builder.getIndexType(); // This is implicitly "FULL_TEXT" for this handler - Type[] keyTypes = builder.getKeyTypes(); - Map properties = builder.getProperties(); - String filePath = builder.getFilePath(); + // boolean unique = builder.isUnique(); // Unique is part of IndexDefinition + // Type[] keyTypes = builder.getKeyTypes(); // Key types are part of IndexDefinition + + // The IndexDefinition is the primary source of truth for index properties. + IndexDefinition definition = builder.getIndexDefinition(); + if (definition == null) { + // This case should ideally be prevented by the schema/builder logic before reaching here. + // If it can happen, we might need to construct a minimal definition. + // For now, assuming builder provides a valid definition or enough info to create one. + // If builder.build() is called before this, definition should be set. + // If this factory *is* part of builder.build(), then builder has all components. + throw new IllegalArgumentException("IndexDefinition is required to create a Lucene index."); + } + + // Algorithm is now part of IndexDefinition + // String algorithm = definition.getAlgorithm() != null ? definition.getAlgorithm() : LUCENE_FULL_TEXT_ALGORITHM; + // The factory is usually registered for a specific algorithm, so this check might be redundant + // if this factory is only invoked for "LUCENE" or "LUCENE_CROSS_CLASS". + // The constructor for ArcadeLuceneFullTextIndex is: + // (DatabaseInternal db, String name, String typeName, IndexDefinition definition, + // String filePath, PaginatedFile metadataFile, PaginatedFile[] dataFiles, + // PaginatedFile[] treeFiles, int fileId, int pageSize, + // TransactionContext.AtomicOperation atomicOperation) + // The IndexBuilder provides most of these. + // typeName here is the schema type name the index is on, not the index type/algorithm. - String analyzerClassName = org.apache.lucene.analysis.standard.StandardAnalyzer.class.getName(); - if (properties != null && properties.containsKey("analyzer")) { - analyzerClassName = properties.get("analyzer"); + // filePath should be determined by the system, often databasePath + indexFileName + String filePath = builder.getFilePath(); + if (filePath == null) { + filePath = database.getDatabasePath() + java.io.File.separator + builder.getFileName(); } - // The actual ArcadeLuceneFullTextIndex will need to be instantiated here. - // Its constructor will need to be defined to accept these parameters. - // Adding filePath and keyTypes to the constructor call. - return new com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex(database, indexName, unique, analyzerClassName, filePath, keyTypes); + + // For PaginatedFile parameters, they are usually managed by the Storage engine. + // For a Lucene index, it might not directly use these ArcadeDB PaginatedFile structures + // for its main data, but it might have a metadata file. + // The IndexBuilder should provide these if they are standard. + // If Lucene manages its own files in 'filePath', some of these might be null or placeholders. + + // Let's assume the builder provides what's needed for the generic parts of an index. + // The specific engine (Lucene) will manage its own data files within its directory (filePath). + + // The old constructor of ArcadeLuceneFullTextIndex took: + // (DatabaseInternal database, String name, boolean unique, String analyzerClassName, String filePath, Type[] keyTypes) + // This has been changed to the standard one. + // We need to ensure that IndexDefinition within builder has all necessary info (like analyzer). + // Analyzer is typically stored in definition.getOptions().get("analyzer") + + return new ArcadeLuceneFullTextIndex( + database, + indexName, + definition.getTypeName(), // Class/Type name this index is on + definition, + filePath, + builder.getMetadataFile(), // from IndexBuilder + builder.getDataFiles(), // from IndexBuilder + builder.getTreeFiles(), // from IndexBuilder (likely null/unused for Lucene) + builder.getFileId(), // from IndexBuilder + builder.getPageSize(), // from IndexBuilder (might be less relevant for Lucene) + null // AtomicOperation: build is usually outside a TX or handles its own. + ); } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java new file mode 100644 index 0000000000..778d4dd511 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java @@ -0,0 +1,298 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.builder; + +import com.arcadedb.database.Database; +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; +import com.arcadedb.schema.DocumentType; +import com.arcadedb.schema.Property; +import com.arcadedb.schema.Type; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import org.apache.lucene.document.Field; // Lucene Field +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; + +public class LuceneDocumentBuilder { + + private static final Logger logger = Logger.getLogger(LuceneDocumentBuilder.class.getName()); + + public org.apache.lucene.document.Document build(IndexDefinition indexDefinition, + Object key, // The key used for indexing (can be composite) + Identifiable identifiableValue, // The record to index + Map collectionFields, // Info about collection fields (if needed, from old engine) + com.arcadedb.document.Document metadata) { // Query/index time metadata + + org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document(); + + // Add RID field + if (identifiableValue != null && identifiableValue.getIdentity() != null) { + luceneDoc.add(ArcadeLuceneIndexType.createRidField(identifiableValue)); + } + + // Add KEY field(s) if the key is provided and the index is not on specific fields (manual index style) + // For automatic indexes, the key is usually derived from the document's fields. + if (key != null && (indexDefinition.getFields() == null || indexDefinition.getFields().isEmpty())) { + // This logic is more for manual indexes where 'key' is the value being indexed. + // For automatic indexes on document fields, this 'key' might be redundant or handled differently. + // Assuming KEY field stores the string representation of the key for now. + luceneDoc.add(new StringField(ArcadeLuceneIndexType.KEY, key.toString(), Field.Store.YES)); + } + + + if (identifiableValue instanceof com.arcadedb.document.Document) { + com.arcadedb.document.Document record = (com.arcadedb.document.Document) identifiableValue; + DatabaseInternal db = record.getDatabase(); + DocumentType recordType = record.getType(); + + List fieldsToIndex = indexDefinition.getFields(); + if (fieldsToIndex == null || fieldsToIndex.isEmpty()) { + // If no specific fields defined for index (e.g. manual index), + // and we already added KEY, then we might be done for primary content for this key. + // However, if the 'value' (record) itself should have its fields indexed, + // then fieldsToIndex should probably default to all fields of the record. + // This part depends on the semantics of "automatic" vs "manual" Lucene indexes. + // For now, if no fields are in definition, we assume only KEY and RID are added. + } else { + for (String fieldName : fieldsToIndex) { + if (!record.has(fieldName)) { + continue; + } + Object fieldValue = record.get(fieldName); + if (fieldValue == null) { + continue; + } + + Property property = recordType != null ? recordType.getProperty(fieldName) : null; + Type fieldType = property != null ? property.getType() : Type.STRING; // Default to STRING if no schema type + + // Determine if field should be stored and sorted based on index definition options or metadata + boolean storeField = isToStore(indexDefinition, fieldName, metadata); + boolean sortField = isToSort(indexDefinition, fieldName, metadata); + + if (fieldValue instanceof Collection && (fieldType == Type.EMBEDDEDLIST || fieldType == Type.EMBEDDEDSET || fieldType == Type.LIST)) { + Collection collection = (Collection) fieldValue; + Type linkedType = (property != null && property.getOfType() != null) ? property.getOfType() : null; + + if (linkedType == null && !collection.isEmpty()) { // Try to infer from first element if not specified in schema + Object firstElement = collection.iterator().next(); + if (firstElement instanceof Document) linkedType = Type.EMBEDDED; // Or specific DocumentType if available + else if (firstElement != null) linkedType = Type.getTypeByValue(firstElement); + } + + if (linkedType != null && linkedType != Type.EMBEDDED && linkedType != Type.EMBEDDEDMAP) { // Scalar list/set + for (Object item : collection) { + if (item != null) { + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item, + storeField ? Field.Store.YES : Field.Store.NO, + sortField, // Note: Sorting on multi-value fields needs specific Lucene setup. + // createFields will add DocValues for the type if sortField is true. + linkedType); + for (Field f : itemFields) { + luceneDoc.add(f); + } + } + } + } else { // EMBEDDEDLIST/SET of Documents, or list of EMBEDDEDMAP (unlikely for direct indexing here) + // FIXME: Implement flattening strategy for embedded documents in collections. + // Example: fieldName_embeddedField. This needs recursive calls or a helper. + // For now, logging a warning and indexing toString() for each item if it's a Document. + logger.warning("Full indexing of embedded documents within collection '" + fieldName + "' is not yet implemented. Indexing toString()."); + if (linkedType == Type.EMBEDDED || (linkedType == null && collection.iterator().hasNext() && collection.iterator().next() instanceof Document)) { + for (Object item : collection) { + if (item != null) { + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), + storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); // Index as string + for (Field f : itemFields) { luceneDoc.add(f); } + } + } + } + } + } else if (fieldValue instanceof Map && fieldType == Type.EMBEDDEDMAP) { + // FIXME: Implement flattening strategy for embedded maps. + // Example: fieldName_mapKey_embeddedField or index map entries as JSON/string. + logger.warning("Indexing embedded maps is not yet fully implemented for field: " + fieldName + ". Indexing toString()."); + List mapFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue.toString(), + storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); + for (Field f : mapFields) { luceneDoc.add(f); } + + } else if (fieldValue instanceof Document && fieldType == Type.EMBEDDED) { + // FIXME: Implement flattening strategy for single embedded documents. + // Example: fieldName_embeddedField. This needs recursive calls or a helper. + logger.warning("Indexing single embedded documents is not yet fully implemented for field: " + fieldName + ". Indexing toString()."); + List embeddedFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue.toString(), + storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); + for (Field f : embeddedFields) { luceneDoc.add(f); } + } else { // Scalar field + List luceneFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue, + storeField ? Field.Store.YES : Field.Store.NO, + sortField, + fieldType); + for (Field f : luceneFields) { + luceneDoc.add(f); + } + } + } + } + } else if (identifiableValue != null) { + // If the value is an Identifiable but not a Document (e.g. just an RID for a manual index key) + // and fields are defined in the index, this implies we should load the document + // and then process its fields. This case should ideally be handled by the caller + // by passing the actual Document record. + // If only key and RID are indexed for non-Document identifiables, current logic is okay. + } + + + // Add _CLASS field if type is available + String typeName = indexDefinition.getTypeName(); + if (typeName != null && !typeName.isEmpty()) { + luceneDoc.add(new StringField("_CLASS", typeName, Field.Store.YES)); // Non-analyzed + } + + // Log usage of collectionFields if it's passed but not deeply integrated yet + if (collectionFields != null && !collectionFields.isEmpty()) { + // The `collectionFields` map (from OrientDB's engine) indicated if a field was a collection of simple types. + // This information might be used to guide specific tokenization or if ArcadeLuceneIndexType.createFields + // needs more hints for collections of scalars vs. collections of embeddeds, though getType and getOfType should cover most cases. + // For now, just logging its presence. + logger.finer("Received 'collectionFields' map, but its specific nuanced behavior is not fully implemented beyond standard collection handling: " + collectionFields); + } + + return luceneDoc; + } + + /** + * Determines if a field should be stored in the Lucene index based on index definition options. + } + } + } + } + } else if (identifiableValue != null) { + // If the value is an Identifiable but not a Document (e.g. just an RID for a manual index key) + // and fields are defined in the index, this implies we should load the document + // and then process its fields. This case should ideally be handled by the caller + // by passing the actual Document record. + // If only key and RID are indexed for non-Document identifiables, current logic is okay. + } + + + // Add _CLASS field if type is available + String typeName = indexDefinition.getTypeName(); + if (typeName != null && !typeName.isEmpty()) { + luceneDoc.add(new StringField("_CLASS", typeName, Field.Store.YES)); // Non-analyzed + } + + + return luceneDoc; + } + + /** + * Determines if a field should be stored in the Lucene index based on index definition options. + * Convention: + * - "storeFields": "*" or "ALL" means store all. + * - "storeFields": "fieldA,fieldB" means store only these. + * - "dontStoreFields": "fieldC,fieldD" means do not store these (takes precedence). + * - "store.": "true" or "false" for field-specific setting. + * Defaults to Field.Store.NO if not specified otherwise for full-text search efficiency. + */ + private boolean isToStore(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) { + Map options = indexDefinition.getOptions(); + // Query-time metadata can override index-time options + if (metadata != null) { + Object fieldSpecificStoreMeta = metadata.get("store." + fieldName); + if (fieldSpecificStoreMeta != null) return Boolean.parseBoolean(fieldSpecificStoreMeta.toString()); + + List queryStoredFields = metadata.get("storedFields"); // Assuming list of strings + if (queryStoredFields != null) { + if (queryStoredFields.contains(fieldName)) return true; + if (queryStoredFields.contains("*") || queryStoredFields.contains("ALL")) return true; + } + List queryDontStoreFields = metadata.get("dontStoreFields"); + if (queryDontStoreFields != null && queryDontStoreFields.contains(fieldName)) return false; + } + + // Index definition options + if (options != null) { + String fieldSpecificStoreOpt = options.get("store." + fieldName); + if (fieldSpecificStoreOpt != null) return Boolean.parseBoolean(fieldSpecificStoreOpt); + + String dontStoreFieldsOpt = options.get("dontStoreFields"); + if (dontStoreFieldsOpt != null) { + List dontStoreList = Arrays.asList(dontStoreFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (dontStoreList.contains(fieldName.toLowerCase())) return false; + } + + String storeFieldsOpt = options.get("storeFields"); + if (storeFieldsOpt != null) { + if ("*".equals(storeFieldsOpt) || "ALL".equalsIgnoreCase(storeFieldsOpt)) return true; + List storeList = Arrays.asList(storeFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (storeList.contains(fieldName.toLowerCase())) return true; + // If storeFields is specified but doesn't list this field, and no "*" or "ALL", assume don't store (unless dontStoreFields also doesn't list it). + // This means explicit list in storeFields acts as a whitelist if present. + return false; + } + } + // Default if no specific rules found: DO NOT STORE fields unless specified. + return false; + } + + /** + * Determines if a field should have DocValues for sorting. + * Convention: + * - "sortableFields": "*" or "ALL" (less common for global sortability). + * - "sortableFields": "fieldA,fieldB". + * - "sort.": "true" or "false". + * Defaults to false. + */ + private boolean isToSort(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) { + Map options = indexDefinition.getOptions(); + // Query-time metadata can override index-time options + if (metadata != null) { + Object fieldSpecificSortMeta = metadata.get("sort." + fieldName); + if (fieldSpecificSortMeta != null) return Boolean.parseBoolean(fieldSpecificSortMeta.toString()); + + List querySortableFields = metadata.get("sortableFields"); // Assuming list of strings + if (querySortableFields != null) { + if (querySortableFields.contains("*") || querySortableFields.contains("ALL")) return true; + if (querySortableFields.contains(fieldName)) return true; + } + } + + // Index definition options + if (options != null) { + String fieldSpecificSortOpt = options.get("sort." + fieldName); + if (fieldSpecificSortOpt != null) return Boolean.parseBoolean(fieldSpecificSortOpt); + + String sortableFieldsOpt = options.get("sortableFields"); + if (sortableFieldsOpt != null) { + if ("*".equals(sortableFieldsOpt) || "ALL".equalsIgnoreCase(sortableFieldsOpt)) return true; + List sortList = Arrays.asList(sortableFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (sortList.contains(fieldName.toLowerCase())) return true; + // If sortableFields is specified but doesn't list this field, and no "*" or "ALL", assume not sortable. + return false; + } + } + return false; // Default to not sortable + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java new file mode 100644 index 0000000000..294647b19d --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java @@ -0,0 +1,168 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.builder; + +import com.arcadedb.database.Database; +import com.arcadedb.database.DatabaseInternal; // Required for schema access +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.index.CompositeKey; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.lucene.parser.ArcadeLuceneMultiFieldQueryParser; // FIXME: Needs refactoring +import com.arcadedb.schema.DocumentType; +import com.arcadedb.schema.Property; +import com.arcadedb.schema.Schema; +import com.arcadedb.schema.Type; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; // For string ranges, newStringRange +import org.apache.lucene.index.Term; +// Import Point field range queries +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.DoublePoint; + + +public class LuceneQueryBuilder { + + private static final Logger logger = Logger.getLogger(LuceneQueryBuilder.class.getName()); + public static final Document EMPTY_METADATA = new Document(null); // Assuming Document can be db-less for this constant + + private final boolean allowLeadingWildcard; + private final boolean splitOnWhitespace; + + public LuceneQueryBuilder(Document metadata) { + if (metadata == null) { + metadata = EMPTY_METADATA; + } + this.allowLeadingWildcard = Boolean.TRUE.equals(metadata.get("allowLeadingWildcard")); + // Lucene's StandardQueryParser and MultiFieldQueryParser split on whitespace by default. + // This setting in OrientDB was more about how the string was fed *to* the parser or if specific syntax implied no split. + // For now, assuming default Lucene behavior is mostly fine. If specific "phrase" vs "term" logic is needed from splitOnWhitespace, + // it would affect how the query string is constructed or which parser is used. + this.splitOnWhitespace = Boolean.TRUE.equals(metadata.get("splitOnWhitespace")); // Default true + } + + public Query query(IndexDefinition indexDefinition, Object key, Document metadata, Analyzer analyzer, DatabaseInternal database) throws ParseException { + if (key == null) { + throw new IllegalArgumentException("Query key cannot be null"); + } + if (metadata == null) { + metadata = EMPTY_METADATA; + } + + String[] fields = indexDefinition.getFields().toArray(new String[0]); + if (fields.length == 0) { + // Default to a common field if not specified, e.g. "_all" or a convention + // This case needs clarification based on how schema-less Lucene indexes were handled. + // For now, let's assume if no fields, it might be a special query type or error. + // Or, if key is string, it searches default fields of the parser. + // For now, if no fields defined in index, and key is String, let parser use its default field. + // This requires parser to be configured with default field(s). + // fields = new String[] { "_DEFAULT_SEARCH_FIELD" }; // Placeholder for default search field + logger.warning("Querying Lucene index " + indexDefinition.getName() + " with no fields defined in index definition. Query may not behave as expected."); + } + + Map fieldTypes = new HashMap<>(); + if (database != null && indexDefinition.getTypeName() != null) { + Schema schema = database.getSchema(); + DocumentType docType = schema.getType(indexDefinition.getTypeName()); + if (docType != null) { + for (String fieldName : indexDefinition.getFields()) { + Property prop = docType.getProperty(fieldName); + if (prop != null) { + fieldTypes.put(fieldName, prop.getType()); + } else { + fieldTypes.put(fieldName, Type.STRING); // Default if property not found in schema + } + } + } else { + for (String fieldName : indexDefinition.getFields()) { + fieldTypes.put(fieldName, Type.STRING); // Default if type not found + } + } + } else { + for (String fieldName : indexDefinition.getFields()) { + fieldTypes.put(fieldName, Type.STRING); // Default if no DB or typeName + } + } + + + if (key instanceof String) { + // ArcadeLuceneMultiFieldQueryParser is now available. + ArcadeLuceneMultiFieldQueryParser parser = new ArcadeLuceneMultiFieldQueryParser(fieldTypes, fields, analyzer); + parser.setAllowLeadingWildcard(allowLeadingWildcard); + // this.splitOnWhitespace is available but MultiFieldQueryParser handles split on whitespace by default. + // If specific behavior like "always phrase if false" is needed, parser logic would be more complex. + // For now, assuming standard MFQP behavior is sufficient. + // if (!this.splitOnWhitespace) { /* Potentially use different parser or pre-process query string */ } + + Map boost = metadata.get("boost", Map.class); + if (boost != null) { + parser.setBoosts(boost); + } + return parser.parse((String) key); + + } else if (key instanceof CompositeKey) { + CompositeKey compositeKey = (CompositeKey) key; + List keys = compositeKey.getKeys(); + BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); + + if (keys.size() != fields.length) { + throw new IllegalArgumentException("CompositeKey size does not match index definition fields count."); + } + + for (int i = 0; i < keys.size(); i++) { + Object partKey = keys.get(i); + String fieldName = fields[i]; + Type fieldType = fieldTypes.getOrDefault(fieldName, Type.STRING); + + if (partKey != null) { + Query partQuery = com.arcadedb.lucene.index.ArcadeLuceneIndexType.createExactFieldQuery(fieldName, partKey, fieldType, database); + booleanQuery.add(partQuery, BooleanClause.Occur.MUST); + } + } + return booleanQuery.build(); + } + // FIXME: Add support for specific range query objects if defined (this would be a new key instanceof MyCustomRangeObject) + // else if (key instanceof ...) { + // MyCustomRange range = (MyCustomRange) key; + // String fieldName = range.getField(); + // Type fieldType = fieldTypes.getOrDefault(fieldName, Type.STRING); + // if (fieldType.isNumeric()) { + // if (fieldType == Type.LONG || fieldType == Type.INTEGER || fieldType == Type.SHORT || fieldType == Type.BYTE || fieldType == Type.DATETIME || fieldType == Type.DATE) { + // return LongPoint.newRangeQuery(fieldName, (Long)range.getLower(), (Long)range.getUpper()); + // } // Add other numeric types + // } else if (fieldType == Type.STRING) { + // return TermRangeQuery.newStringRange(fieldName, range.getLower().toString(), range.getUpper().toString(), range.isLowerInclusive(), range.isUpperInclusive()); + // } + // } + + // Default fallback or throw exception for unsupported key types + logger.warning("Unsupported key type for Lucene query: " + key.getClass().getName() + ". Attempting TermQuery on toString()."); + return new TermQuery(new Term(fields.length > 0 ? fields[0] : "_DEFAULT_", key.toString())); // Fallback, likely not useful + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java index 03c4c19a20..8a023552a0 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java @@ -26,6 +26,7 @@ import com.arcadedb.schema.DocumentType; // Changed from OClass import com.arcadedb.schema.Type; // Changed from OType import com.arcadedb.utility.Pair; // Changed from ORawPair +import com.arcadedb.lucene.engine.ArcadeLuceneEngineUtils; // Added import import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -61,6 +62,8 @@ public class ArcadeLuceneCrossClassIndexEngine implements LuceneIndexEngine { // private final String indexName; private final int indexId; private static final String LUCENE_ALGORITHM = "LUCENE"; // Placeholder for algorithm name + private IndexMetadata markerIndexMetadata; // Optional: if you need to store it + public ArcadeLuceneCrossClassIndexEngine(int indexId, Storage storage, String indexName) { // Changed OStorage this.indexId = indexId; @@ -69,7 +72,26 @@ public ArcadeLuceneCrossClassIndexEngine(int indexId, Storage storage, String in } @Override - public void init(IndexMetadata metadata) {} // Changed OIndexMetadata + public void init(IndexMetadata metadata) { // Changed OIndexMetadata + // This engine orchestrates queries across other Lucene indexes. + // It doesn't manage its own Lucene directory or writers in the same way + // a full-text index engine does. + // The 'metadata' here belongs to the "marker" index that caused this + // cross-class engine to be instantiated. + + this.markerIndexMetadata = metadata; // Store if needed for any config + + // For now, primarily log initialization. + // Any specific configurations for the cross-class behavior that might + // be stored in the markerIndexMetadata.getOptions() could be parsed here. + logger.info("ArcadeLuceneCrossClassIndexEngine initialized for marker index: " + (metadata != null ? metadata.getName() : "null")); + + // Example: If you had a default list of fields to use for cross-class searches + // if not specified in query metadata, you could load it from metadata.getOptions(). + // Map options = metadata.getOptions(); + // String defaultFieldsStr = options.get("crossClassDefaultFields"); + // if (defaultFieldsStr != null) { ... parse and store ... } + } @Override public void flush() {} @@ -182,8 +204,7 @@ public Object get(Object key) { Object params = keyAndMeta.key.getKeys().get(0); // FIXME: keyAndMeta.key structure might change Query query = p.parse(params.toString()); - // FIXME: OLuceneIndexEngineUtils.buildSortFields needs refactoring - final List sortFields = Collections.emptyList(); // Placeholder + final List sortFields = ArcadeLuceneEngineUtils.buildSortFields(arcadedbMetadata, null, DatabaseThreadLocal.INSTANCE.get()); // final List fields = OLuceneIndexEngineUtils.buildSortFields(arcadedbMetadata); LuceneQueryContext ctx = new LuceneQueryContext(null, searcher, query, sortFields); // FIXME diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java new file mode 100644 index 0000000000..e9aa4127c5 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java @@ -0,0 +1,160 @@ +package com.arcadedb.lucene.engine; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.document.Document; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.schema.Property; +import com.arcadedb.schema.Type; +import com.arcadedb.schema.DocumentType; + +import org.apache.lucene.search.SortField; +// Corrected import for SortField.Type +// import org.apache.lucene.search.SortField.первый; // This was incorrect in the prompt +// No, SortField.Type is an enum inside SortField, direct import not needed for Type itself, +// but rather SortField.Type.INT etc. + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +public class ArcadeLuceneEngineUtils { + + private static final Logger logger = Logger.getLogger(ArcadeLuceneEngineUtils.class.getName()); + + /** + * Builds a list of Lucene SortField objects based on sorting criteria + * specified in the metadata document. + * + * @param arcadedbMetadata The metadata document, typically from query options. + * Expected to contain a "sort" or "orderBy" field. + * The value can be a String (e.g., "fieldA ASC, fieldB DESC") + * or a List of Maps (e.g., [{"field": "fieldA", "direction": "ASC"}, ...]). + * @param indexDefinition Optional: The index definition, used to infer field types for sorting if not specified. + * @param database Optional: The database instance, used to get schema for type inference. + * @return A list of Lucene SortField objects. + */ + public static List buildSortFields(Document arcadedbMetadata, IndexDefinition indexDefinition, DatabaseInternal database) { + List sortFields = new ArrayList<>(); + if (arcadedbMetadata == null) { + return sortFields; + } + + Object sortCriteria = arcadedbMetadata.get("sort"); + if (sortCriteria == null) { + sortCriteria = arcadedbMetadata.get("orderBy"); + } + + if (sortCriteria == null) { + return sortFields; + } + + if (sortCriteria instanceof String) { + // Parse string like "fieldA ASC, fieldB DESC" + String[] criteria = ((String) sortCriteria).split(","); + for (String criterion : criteria) { + String[] parts = criterion.trim().split("\\s+"); // Use \\s+ for one or more spaces + String fieldName = parts[0].trim(); + if (fieldName.isEmpty()) continue; + + boolean reverse = parts.length > 1 && "DESC".equalsIgnoreCase(parts[1].trim()); + + SortField.Type sortType = inferSortType(fieldName, indexDefinition, database); + sortFields.add(new SortField(fieldName, sortType, reverse)); + } + } else if (sortCriteria instanceof List) { + // Parse list of maps, e.g., [{"field": "fieldA", "direction": "ASC"}, ...] + try { + @SuppressWarnings("unchecked") // Generic type for list elements from Document.get() + List criteriaList = (List) sortCriteria; + for (Object criterionObj : criteriaList) { + if (criterionObj instanceof Map) { + @SuppressWarnings("unchecked") + Map criterion = (Map) criterionObj; + String fieldName = criterion.get("field"); + String direction = criterion.get("direction"); + if (fieldName != null && !fieldName.trim().isEmpty()) { + boolean reverse = "DESC".equalsIgnoreCase(direction); + SortField.Type sortType = inferSortType(fieldName.trim(), indexDefinition, database); + sortFields.add(new SortField(fieldName.trim(), sortType, reverse)); + } + } else if (criterionObj instanceof String) { // Support list of strings like ["fieldA ASC", "fieldB DESC"] + String[] parts = ((String)criterionObj).trim().split("\\s+"); + String fieldName = parts[0].trim(); + if (fieldName.isEmpty()) continue; + boolean reverse = parts.length > 1 && "DESC".equalsIgnoreCase(parts[1].trim()); + SortField.Type sortType = inferSortType(fieldName, indexDefinition, database); + sortFields.add(new SortField(fieldName, sortType, reverse)); + } + } + } catch (ClassCastException e) { + logger.warning("Could not parse 'sort' criteria from List due to unexpected element types: " + e.getMessage()); + } + } else { + logger.warning("Unsupported 'sort' criteria format: " + sortCriteria.getClass().getName()); + } + + return sortFields; + } + + /** + * Infers the Lucene SortField.Type for a given field name. + * + * @param fieldName The name of the field. + * @param indexDefinition Optional: The index definition containing schema information. + * @param database Optional: The database instance for schema lookup. + * @return The inferred SortField.Type, defaults to STRING if type cannot be determined. + */ + private static SortField.Type inferSortType(String fieldName, IndexDefinition indexDefinition, DatabaseInternal database) { + // Special Lucene sort field for relevance score + if ("score".equalsIgnoreCase(fieldName) || SortField.FIELD_SCORE.toString().equals(fieldName)) { + return SortField.Type.SCORE; + } + // Special Lucene sort field for document order + if (SortField.FIELD_DOC.toString().equals(fieldName)) { + return SortField.Type.DOC; + } + + if (database != null && indexDefinition != null && indexDefinition.getTypeName() != null) { + DocumentType docType = database.getSchema().getType(indexDefinition.getTypeName()); + if (docType != null) { + Property property = docType.getProperty(fieldName); + if (property != null) { + Type propertyType = property.getType(); + switch (propertyType) { + case INTEGER: + case SHORT: + case BYTE: + return SortField.Type.INT; + case LONG: + case DATETIME: // Assuming DATETIME is stored as long epoch millis for sorting + case DATE: // Assuming DATE is stored as long epoch millis for sorting + return SortField.Type.LONG; + case FLOAT: + return SortField.Type.FLOAT; + case DOUBLE: + return SortField.Type.DOUBLE; + case STRING: + case TEXT: + case ENUM: + case UUID: // UUIDs are often sorted as strings + case BINARY: // Might be sorted as string, or custom if specific byte order needed + return SortField.Type.STRING; + // Add other types as needed, e.g., CUSTOM for specific comparators + // BOOLEAN is not directly sortable with a standard SortField.Type unless mapped to INT/STRING + default: + logger.finer("Cannot infer specific Lucene SortField.Type for ArcadeDB Type " + propertyType + " on field '" + fieldName + "'. Defaulting to STRING."); + return SortField.Type.STRING; + } + } else { + logger.finer("Property '" + fieldName + "' not found in type '" + indexDefinition.getTypeName() + "'. Defaulting to STRING sort type."); + } + } else { + logger.finer("DocumentType '" + indexDefinition.getTypeName() + "' not found in schema. Defaulting to STRING sort type for field '" + fieldName + "'."); + } + } + // Default if schema info is unavailable or field not found + logger.finer("Insufficient schema information for field '" + fieldName + "'. Defaulting to STRING sort type."); + return SortField.Type.STRING; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java index 3388defc9d..50924874c2 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java @@ -72,8 +72,10 @@ public class ArcadeLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract private LuceneQueryBuilder queryBuilder; // FIXME: Needs refactoring private final AtomicLong bonsayFileId = new AtomicLong(0); // TODO: Review if bonsayFileId is still relevant in ArcadeDB context - public ArcadeLuceneFullTextIndexEngine(Storage storage, String idxName, int id) { // Changed OStorage - super(storage, idxName); // FIXME: OLuceneIndexEngineAbstract constructor might have changed + // Removed 'id' parameter as it's not used by the superclass OLuceneIndexEngineAbstract + // and not used internally in this class. + public ArcadeLuceneFullTextIndexEngine(Storage storage, String idxName) { // Changed OStorage + super(storage, idxName); builder = new LuceneDocumentBuilder(); // FIXME: Needs refactoring } @@ -237,49 +239,76 @@ public int getUniqueIndexVersion(Object key) { @Override public Document buildDocument(Object key, Identifiable value) { // Changed OIdentifiable, Lucene Document if (indexDefinition.isAutomatic()) { - // FIXME: builder (LuceneDocumentBuilder) needs refactoring - // return builder.build(indexDefinition, key, value, collectionFields, metadata); - throw new UnsupportedOperationException("Automatic index document building not yet fully refactored."); + // builder is an instance of LuceneDocumentBuilder + // LuceneDocumentBuilder.build expects: IndexDefinition, Object key, Identifiable value, Map collectionFields, Document metadata + // collectionFields and metadata are available as protected members from OLuceneIndexEngineAbstract + return builder.build(indexDefinition, key, value, this.collectionFields, this.metadata); } else { return putInManualindex(key, value); } } private static Document putInManualindex(Object key, Identifiable oIdentifiable) { // Changed OIdentifiable, Lucene Document - Document doc = new Document(); // Lucene Document - doc.add(ArcadeLuceneIndexType.createRidField(oIdentifiable)); - doc.add(ArcadeLuceneIndexType.createIdField(oIdentifiable, key)); + Document luceneDoc = new Document(); // Lucene Document + luceneDoc.add(ArcadeLuceneIndexType.createRidField(oIdentifiable)); + // The ID field for manual indexes might store the key itself if simple, or a hash if complex. + // createIdField might be more about a specific format if needed. + // For now, let's assume the key itself or its parts are added below with specific field names. + // If a single "ID" field representing the whole key is desired for searching the key: + // luceneDoc.add(ArcadeLuceneIndexType.createIdField(oIdentifiable, key)); - // FIXME: This manual field creation is CRITICAL and needs to use ArcadeLuceneIndexType.createFields - // with proper Type resolution for each object 'o'. - if (key instanceof CompositeKey) { // Changed OCompositeKey + + if (key instanceof CompositeKey) { List keys = ((CompositeKey) key).getKeys(); - int k = 0; - for (Object o : keys) { - // Determine Type of 'o' here. For now, defaulting to String. - // Type type = Type.STRING; // Placeholder - this needs to be dynamic - // doc.add(ArcadeLuceneIndexType.createFields("k" + k, o, Field.Store.YES, false, type)); - doc.add(ArcadeLuceneIndexType.createField("k" + k, o, Field.Store.YES)); // Simplified call, needs type - k++; + // If this manual index has a definition with field names for composite parts: + List definedFields = null; + // Type[] definedTypes = null; // Not directly available for manual index key parts in IndexDefinition easily + // if (indexDefinition != null) { // indexDefinition is not available in this static context directly + // definedFields = indexDefinition.getFields(); + // // definedTypes = indexDefinition.getTypes(); // This is for the main value, not necessarily for key parts + // } + + for (int i = 0; i < keys.size(); i++) { + Object subKey = keys.get(i); + if (subKey == null) continue; + String fieldName = (definedFields != null && i < definedFields.size()) ? definedFields.get(i) : "k" + i; + Type type = Type.getTypeByValue(subKey); + // For manual keys, typically store and index them. Sorting is less common for manual keys. + List fields = ArcadeLuceneIndexType.createFields(fieldName, subKey, Field.Store.YES, false, type); + for (Field f : fields) { + luceneDoc.add(f); + } } } else if (key instanceof Collection) { @SuppressWarnings("unchecked") Collection keys = (Collection) key; - int k = 0; - for (Object o : keys) { - // Determine Type of 'o' here. For now, defaulting to String. - // Type type = Type.STRING; // Placeholder - this needs to be dynamic - // doc.add(ArcadeLuceneIndexType.createFields("k" + k, o, Field.Store.YES, false, type)); - doc.add(ArcadeLuceneIndexType.createField("k" + k, o, Field.Store.YES)); // Simplified call, needs type - k++; + int i = 0; + for (Object item : keys) { + if (item == null) continue; + String fieldName = "k" + i; // Implicit field name for collection items + Type type = Type.getTypeByValue(item); + List fields = ArcadeLuceneIndexType.createFields(fieldName, item, Field.Store.YES, false, type); + for (Field f : fields) { + luceneDoc.add(f); + } + i++; } - } else { - // Determine Type of 'key' here. For now, defaulting to String. - // Type type = Type.STRING; // Placeholder - this needs to be dynamic - // doc.add(ArcadeLuceneIndexType.createFields("k0", key, Field.Store.NO, false, type)); - doc.add(ArcadeLuceneIndexType.createField("k0", key, Field.Store.NO)); // Simplified call, needs type + } else if (key != null) { + // Single key + // String fieldName = (indexDefinition != null && !indexDefinition.getFields().isEmpty()) ? indexDefinition.getFields().get(0) : "k0"; + String fieldName = "k0"; // Default field name for single manual key + Type type = Type.getTypeByValue(key); + // Store.NO was used in original for single key; this means it's indexed but not retrievable from Lucene doc. + // Let's make it configurable or default to YES for consistency if this key is what user searches. + // For now, keeping Store.NO to match original hint, but this is questionable. + // If it's the actual key to be searched, it should likely be YES or its components stored. + // Given createFields also adds Point fields which are not stored, this might be okay. + List fields = ArcadeLuceneIndexType.createFields(fieldName, key, Field.Store.NO, false, type); + for (Field f : fields) { + luceneDoc.add(f); + } } - return doc; + return luceneDoc; } @Override diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java index 8cfe4060e0..9e5fba63a6 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java @@ -38,6 +38,8 @@ public class ArcadeLuceneCrossClassSearchFunction extends ArcadeLuceneSearchFunc public static final String NAME = "search_cross"; // Changed from SEARCH_CROSS private static final String LUCENE_CROSS_CLASS_ALGORITHM = "LUCENE_CROSS_CLASS"; // Placeholder + private ArcadeLuceneCrossClassIndexEngine crossClassEngineInstance = null; // Cache for the engine + public ArcadeLuceneCrossClassSearchFunction() { super(NAME, 1, 2); // query, [metadata] } @@ -146,33 +148,53 @@ protected ArcadeLuceneFullTextIndex searchForIndex( // FIXME: This signature mig } // Helper to get the specific cross-class engine instance - // This assumes there's a way to identify and retrieve this engine. - // It might be registered with a specific name or type. - private ArcadeLuceneCrossClassIndexEngine getCrossClassEngine(CommandContext ctx) { // FIXME - DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify - Collection indexes = database.getSchema().getIndexes(); - for (Index index : indexes) { - // FIXME: Need a reliable way to identify the CrossClassEngine. - // This could be by a specific name, or if the engine itself is registered as an Index. - // The original code checked index.getAlgorithm().equalsIgnoreCase(LUCENE_CROSS_CLASS) - // and then cast index.getInternal() to OLuceneFullTextIndex, which seems problematic as the - // cross class engine is not a typical "full text index" on a specific class. - // For now, assuming the ArcadeLuceneCrossClassIndexEngine might be registered as an Index itself - // with a specific algorithm name. - if (index.getAlgorithm().equalsIgnoreCase(LUCENE_CROSS_CLASS_ALGORITHM) && index instanceof ArcadeLuceneCrossClassIndexEngine) { - return (ArcadeLuceneCrossClassIndexEngine) index; - } - // Alternative: if the engine is not an Index, how is it accessed? - // Perhaps it's a global component or registered differently. - // The original code `(OLuceneFullTextIndex) index.getInternal()` suggests the index itself was a shell. + private ArcadeLuceneCrossClassIndexEngine getCrossClassEngine(CommandContext ctx) { + if (this.crossClassEngineInstance != null && this.crossClassEngineInstance.getDatabase() == ctx.getDatabase()) { + // Ensure cached engine is for the same database instance, though typically SQL functions are per-query. + // If function instances are per-query, caching might offer little benefit unless getCrossClassEngine is called multiple times in one execution. + // If functions are singletons, then caching is more useful but needs to be thread-safe or per-database-instance. + // For now, simple instance caching. If SQLFunctions are per-query, this cache won't persist across queries. + return this.crossClassEngineInstance; + } + + DatabaseInternal database = null; + if (ctx instanceof DatabaseContext) { // Check if CommandContext is or provides DatabaseContext + database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); + } else if (ctx.getDatabase() instanceof DatabaseInternal) { // Standard way to get Database + database = (DatabaseInternal) ctx.getDatabase(); + } + + if (database == null) { + logger.warning("Database not found in CommandContext for getCrossClassEngine. CommandContext type: " + ctx.getClass().getName()); + return null; } - // Fallback: Try to find an index whose *engine* is the cross-class one. - // This is speculative. - for (Index index : indexes) { - if (index.getAssociatedIndex() instanceof ArcadeLuceneCrossClassIndexEngine) { // getAssociatedIndex might be getEngine() - return (ArcadeLuceneCrossClassIndexEngine) index.getAssociatedIndex(); + + for (Index idx : database.getSchema().getIndexes()) { + IndexEngine engine = idx.getAssociatedIndex(); // Index.getAssociatedIndex() returns IndexEngine + if (engine instanceof ArcadeLuceneCrossClassIndexEngine) { + this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) engine; + logger.fine("Found ArcadeLuceneCrossClassIndexEngine via associated engine of index: " + idx.getName()); + return this.crossClassEngineInstance; + } + // Check if the index itself is a wrapper for the engine (less likely with getAssociatedIndex) + // or if algorithm matches (if factory handler associates this engine type with an algorithm for a "marker" index) + if (LUCENE_CROSS_CLASS_ALGORITHM.equals(idx.getAlgorithm())) { + if (engine instanceof ArcadeLuceneCrossClassIndexEngine) { // Should be true if factory did its job + this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) engine; + logger.fine("Found ArcadeLuceneCrossClassIndexEngine via algorithm on index: " + idx.getName()); + return this.crossClassEngineInstance; + } else if (engine == null && idx instanceof ArcadeLuceneCrossClassIndexEngine) { + // This case is if the Index object itself *is* the engine, which is not standard for ArcadeDB. + // But keeping a check for robustness during refactoring. + this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) idx; + logger.warning("Found ArcadeLuceneCrossClassIndexEngine directly as an Index instance (unusual): " + idx.getName()); + return this.crossClassEngineInstance; + } } - } + } + + logger.warning("ArcadeLuceneCrossClassIndexEngine not found. Ensure an index of type '" + LUCENE_CROSS_CLASS_ALGORITHM + + "' (which uses this engine) is defined, or that an existing index correctly associates this engine."); return null; } diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java index 520cf75087..7c9f759cbf 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java +++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java @@ -3,6 +3,10 @@ import com.arcadedb.database.DatabaseInternal; import com.arcadedb.database.Identifiable; import com.arcadedb.database.RID; +import com.arcadedb.database.TransactionContext; +import com.arcadedb.document.Document; +import com.arcadedb.engine.PaginatedFile; // For constructor, might not be directly used by Lucene +import com.arcadedb.engine.Storage; import com.arcadedb.index.Index; import com.arcadedb.index.IndexCursor; import com.arcadedb.index.IndexException; @@ -10,147 +14,265 @@ import com.arcadedb.index.RangeIndexCursor; import com.arcadedb.index.TypeIndex; import com.arcadedb.index.engine.IndexEngine; -import com.arcadedb.schema.IndexBuilder; // Added for build method +import com.arcadedb.lucene.engine.ArcadeLuceneFullTextIndexEngine; // Changed from OLuceneFullTextIndexEngine +import com.arcadedb.lucene.engine.LuceneIndexEngine; // The refactored interface +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; +import com.arcadedb.schema.IndexBuilder; import com.arcadedb.schema.IndexDefinition; +import com.arcadedb.schema.Schema; import com.arcadedb.schema.Type; -import com.arcadedb.tx.TransactionContext; -import java.io.IOException; // Added for compact +import java.io.IOException; +import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; + public class ArcadeLuceneFullTextIndex implements IndexInternal { private final DatabaseInternal database; private final String name; - private final boolean unique; - private final String analyzerClassName; - private final String filePath; - private final Type[] keyTypes; - // Other fields like IndexDefinition, IndexEngine, pageSize, nullStrategy, etc. - private IndexDefinition definition; // Will be set by setMetadata or build - - public ArcadeLuceneFullTextIndex(DatabaseInternal database, String name, boolean unique, String analyzerClassName, String filePath, Type[] keyTypes) { - this.database = database; + private IndexDefinition definition; + private String filePath; // Path where Lucene index files are stored + private int fileId; // ArcadeDB fileId, might not be directly used by Lucene files themselves + private PaginatedFile metadataFile; // For ArcadeDB metadata about this index + + private LuceneIndexEngine engine; // Changed type to interface + private STATUS status = STATUS.OFFLINE; + + // Moved constants to ArcadeLuceneIndexFactoryHandler + // public static final String LUCENE_ALGORITHM = "LUCENE"; + + + // Constructor matching AbstractIndex an IndexFactory might call + public ArcadeLuceneFullTextIndex(DatabaseInternal db, String name, String typeName, IndexDefinition definition, + String filePath, PaginatedFile metadataFile, PaginatedFile[] dataFiles, + PaginatedFile[] treeFiles, int fileId, int pageSize, + TransactionContext.AtomicOperation atomicOperation) { + this.database = db; this.name = name; - this.unique = unique; - this.analyzerClassName = analyzerClassName; - this.filePath = filePath; // Store filePath - this.keyTypes = keyTypes; // Store keyTypes - // Further initialization for Lucene engine would go here. - // This constructor might be called by the handler, then setMetadata/build by schema loading/creation. + this.definition = definition; + this.filePath = filePath; // Should be directory for Lucene + this.metadataFile = metadataFile; // ArcadeDB own metadata for this index + this.fileId = fileId; + // pageSize, dataFiles, treeFiles might be less relevant for Lucene which manages its own files. + + // Engine initialization is deferred to lazyInit or build/load + } + + private void lazyInit() { + if (engine == null) { + // Determine if this is part of an active transaction and if an engine instance already exists for this TX. + if (database.isTransactionActive() && database.getTransaction().getInvolvedIndexEngine(getName()) instanceof LuceneIndexEngine) { + this.engine = (LuceneIndexEngine) database.getTransaction().getInvolvedIndexEngine(getName()); + if (this.engine == null) { // Should not happen if getInvolvedIndexEngine returned one + throw new IndexException("Cannot find transactional Lucene engine for index " + getName() + " though it was marked as involved."); + } + } else { + String algorithm = getAlgorithm(); // Uses the overridden getAlgorithm() + com.arcadedb.document.Document engineMetadataDoc = new com.arcadedb.document.Document(database); + if (this.definition != null && this.definition.getOptions() != null) { + engineMetadataDoc.fromMap(this.definition.getOptions()); + } + + if (com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler.LUCENE_CROSS_CLASS_ALGORITHM.equalsIgnoreCase(algorithm)) { + ArcadeLuceneCrossClassIndexEngine crossEngine = new ArcadeLuceneCrossClassIndexEngine(this.fileId, database.getStorage(), this.name); + + // Construct IndexMetadata Pojo for crossEngine.init() + // OLuceneCrossClassIndexEngine.init takes IndexMetadata. + // IndexMetadata needs: name, typeName (class this index is on, can be null for cross-class marker), List propertyNames, Type[] keyTypes, String algorithm, boolean isAutomatic, Map options + IndexMetadata im = new IndexMetadata( + this.name, + this.definition.getPropertyNames(), + this.definition.getKeyTypes(), + this.definition.getOptions() + ); + im.setTypeName(this.definition.getTypeName()); // May be null if truly cross-class and not bound to a type + im.setAlgorithm(algorithm); + im.setIsAutomatic(this.isAutomatic()); + im.setUnique(this.isUnique()); + im.setNullStrategy(this.getNullStrategy()); + // Add other relevant properties from 'this.definition' to 'im' if needed by crossEngine.init() + + crossEngine.init(im); + this.engine = crossEngine; + } else { // Default to LUCENE_FULL_TEXT_ALGORITHM + ArcadeLuceneFullTextIndexEngine ftEngine = new ArcadeLuceneFullTextIndexEngine(database.getStorage(), name); + // OLuceneIndexEngineAbstract.init expects: String indexName, String indexType(algorithm), IndexDefinition, boolean isAutomatic, Document metadata + ftEngine.init(getName(), algorithm, definition, isAutomatic(), engineMetadataDoc); + this.engine = ftEngine; + } + } + this.status = STATUS.ONLINE; + } } - // --- IndexInternal Methods --- @Override public String getAssociatedFileName() { - return filePath; // Return stored filePath + return filePath; } @Override public void build(IndexBuilder builder) { - // This method is typically called when an index is being built from scratch. - // The IndexBuilder contains all necessary information. - // this.definition = builder.getIndexDefinition(); // Or create one - // Initialize/create the Lucene IndexWriter and other resources here. - throw new UnsupportedOperationException("Not yet implemented: build"); + this.definition = builder.getIndexDefinition(); + // filePath might be set by IndexBuilder or derived, ensure it's correct for Lucene (a directory path) + this.filePath = builder.getFilePath() != null ? builder.getFilePath() : database.getDatabasePath() + "/" + builder.getFileName(); + this.fileId = builder.getFileId(); // Get fileId from builder + + lazyInit(); // Initialize engine + try { + Document engineMetadata = new Document(database); + if (this.definition.getOptions() != null) { + engineMetadata.fromMap(this.definition.getOptions()); + } + + // Parameters for engine.create: + // valueSerializer, keySerializer: null for Lucene as it handles its own types. + // keyTypes: from definition + // nullPointerSupport: from definition + // propertyNames.size(): as keySize (number of indexed fields) + // clustersToIndex: from definition + // options: from definition + engine.create( + null, // valueSerializer + this.isAutomatic(), + this.getKeyTypes(), + this.getDefinition().isNullStrategyNode(), // nullPointerSupport + null, // keySerializer + this.getDefinition().getPropertyNames() != null ? this.getDefinition().getPropertyNames().size() : 0, // keySize + this.getDefinition().getClustersToIndex(), // clustersToIndex (might be null) + this.getDefinition().getOptions(), // engineProperties + engineMetadata // metadata Document for engine + ); + this.status = STATUS.ONLINE; + } catch (Exception e) { + throw new IndexException("Error during Lucene index build for index '" + getName() + "'", e); + } } @Override public void setMetadata(IndexDefinition definition, String filePath, int pageSize, byte nullStrategy) { this.definition = definition; - // this.filePath = filePath; // Already set in constructor, ensure consistency or update - // this.pageSize = pageSize; - // this.nullStrategy = nullStrategy; - throw new UnsupportedOperationException("Not yet implemented: setMetadata"); + this.filePath = filePath; + // pageSize and nullStrategy are part of definition or handled by Lucene engine differently. + // This method is usually for loading existing index metadata. + // We might need to re-init or load the engine here. + if (engine != null) { + engine.close(); // Close existing engine if any + } + engine = null; // Reset engine + lazyInit(); // Re-initialize with new metadata + // engine.load(...) might be relevant here if this implies loading an existing index. } @Override public STATUS getStatus() { - // Return current status, e.g., from engine - throw new UnsupportedOperationException("Not yet implemented: getStatus"); + return status; } - @Override public void setStatus(STATUS status) { - // Set current status, e.g., on engine - throw new UnsupportedOperationException("Not yet implemented: setStatus"); + this.status = status; + // Potentially pass this to the engine if it has its own status } @Override public void close() { - // Release Lucene resources (IndexWriter, IndexSearcher, Directory) - throw new UnsupportedOperationException("Not yet implemented: close"); + if (engine != null) { + engine.close(); + engine = null; + } + status = STATUS.OFFLINE; } @Override public void drop() { - // Remove Lucene index files from disk. - // Unregister from schema should be handled by Schema.dropIndex() calling this. - throw new UnsupportedOperationException("Not yet implemented: drop"); + if (engine != null) { + engine.delete(); // Engine handles file deletion + engine = null; + } + // Additional cleanup of ArcadeDB metadata files if any (e.g., this.metadataFile) + // This is usually handled by Schema.dropIndex calling this. + status = STATUS.OFFLINE; } @Override public int getFileId() { - // Lucene might not use file IDs in the same way ArcadeDB's native engine does. - // Return a sentinel or appropriate value. - return -1; + return fileId; // Or a specific ID for Lucene structure if different } @Override public T getComponent(String name, Class type) { - // Used for accessing underlying components, might be relevant for engine access. - throw new UnsupportedOperationException("Not yet implemented: getComponent"); + if (type.isAssignableFrom(engine.getClass())) { + return type.cast(engine); + } + return null; } @Override public Type[] getKeyTypes() { - return keyTypes; // Return stored keyTypes + return definition != null ? definition.getKeyTypes() : null; } @Override public byte[] getBinaryKeyTypes() { - // Convert Type[] to byte[] if necessary for serialization, or return null if not used. - throw new UnsupportedOperationException("Not yet implemented: getBinaryKeyTypes"); + // Lucene doesn't use this in the same way as binary comparable keys. + return null; } @Override public void setTypeIndex(TypeIndex typeIndex) { - // Associated with schema type's index list. - throw new UnsupportedOperationException("Not yet implemented: setTypeIndex"); + // Associated with schema type's index list. Store if needed. } @Override public TypeIndex getTypeIndex() { - throw new UnsupportedOperationException("Not yet implemented: getTypeIndex"); + return null; // Retrieve if stored } @Override public void scheduleCompaction() { - // Lucene has its own merging/optimization, might not map directly. - throw new UnsupportedOperationException("Not yet implemented: scheduleCompaction"); + // Lucene has IndexWriter.forceMerge or IndexWriter.maybeMerge. + // This could be a trigger for that. + lazyInit(); + // engine.forceMerge(); // FIXME: Add such a method to engine interface if needed } @Override public String getMostRecentFileName() { - // Relates to WAL, might not be applicable or needs specific handling for Lucene. - throw new UnsupportedOperationException("Not yet implemented: getMostRecentFileName"); + return null; // Not directly applicable } @Override public Map toJSON() { // Serialize index configuration/stats to JSON. - throw new UnsupportedOperationException("Not yet implemented: toJSON"); + // Include name, type, definition, engine stats. + Map json = new java.util.HashMap<>(); + json.put("name", getName()); + json.put("typeName", getTypeName()); + json.put("algorithm", getAlgorithm()); + if (definition != null) { + json.put("definition", definition.getOptions()); // Or more detailed definition + } + if (engine != null) { + // FIXME: engine should provide some stats or config + // json.put("engineStats", engine.getStats()); + } + return json; } @Override public Index getAssociatedIndex() { - // For sub-indexes, typically null for a main index. return null; } @@ -162,201 +284,288 @@ public String getName() { } @Override - public String getTypeName() { - // This should return the algorithm name, e.g., "LUCENE" - // return ArcadeLuceneLifecycleManager.LUCENE_ALGORITHM; // If constant is accessible - return "LUCENE"; // Or get from definition if set + public String getTypeName() { // This should be the Type's name this index is on, not algorithm + return definition != null ? definition.getTypeName() : null; } + @Override + public String getAlgorithm() { + // Return the actual algorithm from the definition if available + return (definition != null && definition.getAlgorithm() != null) ? + definition.getAlgorithm() : + com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler.LUCENE_FULL_TEXT_ALGORITHM; + } + + @Override public IndexDefinition getDefinition() { - // Return the stored IndexDefinition - if (this.definition == null) { - throw new UnsupportedOperationException("IndexDefinition not set for index: " + name); - } - return this.definition; + return definition; } @Override public boolean isUnique() { - return this.unique; + return definition != null && definition.isUnique(); // Lucene full-text usually not unique } @Override public List getPropertyNames() { - // Get from IndexDefinition - if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); - return this.definition.getPropertyNames(); + return definition != null ? definition.getPropertyNames() : Collections.emptyList(); } @Override public long countEntries() { - // Count documents in Lucene index - throw new UnsupportedOperationException("Not yet implemented: countEntries"); + lazyInit(); + // engine.size(null) or engine.sizeInTx(null) + // The ValuesTransformer is for OrientDB's SBTree based indexes. For Lucene, it's just a doc count. + return engine.size(null); + } + + public long getRecordCount() { // From OLuceneFullTextIndex + return countEntries(); } + @Override public IndexCursor get(Object[] keys) { - // Perform Lucene search - throw new UnsupportedOperationException("Not yet implemented: get"); + lazyInit(); + if (keys == null || keys.length == 0 || keys[0] == null) { + throw new IllegalArgumentException("Lucene query key cannot be null."); + } + // Assuming keys[0] is the query string or a LuceneKeyAndMetadata object + // FIXME: This needs to adapt to how LuceneKeyAndMetadata is structured and if options are passed + Object queryKey = keys[0]; + Document metadata = null; + if (keys.length > 1 && keys[1] instanceof Map) { + metadata = new Document(database, (Map) keys[1]); + } else if (keys.length > 1 && keys[1] instanceof Document) { + metadata = (Document) keys[1]; + } + + // The engine's get method: Set getInTx(Object key, LuceneTxChanges changes) + // This needs to be wrapped in an IndexCursor. + // The key for engine.getInTx is likely LuceneKeyAndMetadata + // FIXME: Construct LuceneKeyAndMetadata correctly + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(queryKey, metadata, null); // Assuming CommandContext can be null here + + Set results = engine.getInTx(keyAndMeta, null); // Passing null for changes if not in tx or tx changes not used + return new LuceneIndexCursor(results.iterator()); // FIXME: LuceneIndexCursor needs to be implemented } @Override public IndexCursor get(Object[] keys, int limit) { - throw new UnsupportedOperationException("Not yet implemented: get with limit"); + // FIXME: Implement limit. Lucene TopDocs can handle this. + // This will require engine.getInTx or a similar method to accept a limit. + lazyInit(); + if (keys == null || keys.length == 0 || keys[0] == null) { + throw new IllegalArgumentException("Lucene query key cannot be null."); + } + Object queryKey = keys[0]; + Document metadata = new Document(database); // Default empty metadata + if (keys.length > 1 && keys[1] instanceof Map) { + metadata.fromMap((Map) keys[1]); + } else if (keys.length > 1 && keys[1] instanceof Document) { + metadata = (Document) keys[1]; + } + if (limit > 0) { + metadata.set("limit", limit); // Pass limit via metadata + } + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(queryKey, metadata, null); + Set results = engine.getInTx(keyAndMeta, null); + return new LuceneIndexCursor(results.iterator()); // FIXME: LuceneIndexCursor } @Override public Stream getRidsStream(Object[] keys) { - throw new UnsupportedOperationException("Not yet implemented: getRidsStream"); + IndexCursor cursor = get(keys); + return cursor.ridsStream(); } + public Set get(Object key) { // From OLuceneFullTextIndex, matching engine's getInTx + lazyInit(); + // This 'key' is likely LuceneKeyAndMetadata or the raw query string. + return engine.getInTx(key, null); // Assuming null for LuceneTxChanges if not in a tx context for this call + } + + public Set getRids(Object key) { // New method, if useful + lazyInit(); + // This 'key' is likely LuceneKeyAndMetadata or the raw query string. + // engine.getInTx returns Set + return engine.getInTx(key, null).stream().map(Identifiable::getIdentity).collect(Collectors.toSet()); + } + + @Override public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded) { - throw new UnsupportedOperationException("Not yet implemented: range"); + throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax."); } @Override public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit) { - throw new UnsupportedOperationException("Not yet implemented: range with limit"); + throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax."); } @Override public IndexCursor iterator(boolean ascendingOrder) { - // Iterate all documents - throw new UnsupportedOperationException("Not yet implemented: iterator"); + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene. Use a match_all query if needed."); } @Override public IndexCursor iterator(boolean ascendingOrder, Object[] fromKey, boolean fromKeyInclusive) { - throw new UnsupportedOperationException("Not yet implemented: iterator with fromKey"); + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene."); } @Override public IndexCursor descendingIterator() { - throw new UnsupportedOperationException("Not yet implemented: descendingIterator"); + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene."); } @Override public IndexCursor descendingIterator(Object[] fromKey, boolean fromKeyInclusive) { - throw new UnsupportedOperationException("Not yet implemented: descendingIterator with fromKey"); + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene."); } @Override public boolean supportsOrderedIterations() { - return false; // Lucene supports score-based ordering, key-based might not be natural. + return false; // Lucene orders by relevance score by default, not by key. } @Override public boolean isAutomatic() { - // Get from IndexDefinition - if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); - return this.definition.isAutomatic(); + return definition != null && definition.isAutomatic(); } @Override public void setRebuilding(boolean rebuilding) { - // Set a flag if the index is rebuilding - throw new UnsupportedOperationException("Not yet implemented: setRebuilding"); + // Could set a flag or inform the engine } @Override public IndexEngine getEngine() { - // Return the LuceneIndexEngine instance associated with this index - throw new UnsupportedOperationException("Not yet implemented: getEngine"); + lazyInit(); + return engine; } @Override public boolean isValid() { - throw new UnsupportedOperationException("Not yet implemented: isValid"); + // Check if engine is initialized and Lucene index is readable + lazyInit(); + // FIXME: engine needs an isValid() or similar check + return engine != null; } @Override public Map getStats() { - // Return Lucene specific stats - throw new UnsupportedOperationException("Not yet implemented: getStats"); + // FIXME: engine should provide stats (num docs, etc.) + return Collections.emptyMap(); } @Override public void setStats(Map stats) { // Not typically set from outside - throw new UnsupportedOperationException("Not yet implemented: setStats"); } @Override public void compact() throws IOException { - // Trigger Lucene merge/optimize if applicable - throw new UnsupportedOperationException("Not yet implemented: compact"); + lazyInit(); + // engine.forceMerge(); // FIXME: Add to engine if needed } @Override public boolean isCompacting() { - // Check if Lucene merge/optimize is running - throw new UnsupportedOperationException("Not yet implemented: isCompacting"); + return false; // FIXME: engine should report this } @Override public List getFileIds() { - // Lucene manages its own files; this might not map directly. - throw new UnsupportedOperationException("Not yet implemented: getFileIds"); + return Collections.singletonList(fileId); // Main metadata file ID } @Override public int getPageSize() { - // Lucene doesn't use pages in the same way as ArcadeDB's native engine. - throw new UnsupportedOperationException("Not yet implemented: getPageSize"); + return -1; // Not page-based like ArcadeDB native } @Override public void setPageSize(int pageSize) { - throw new UnsupportedOperationException("Not yet implemented: setPageSize"); + // No-op for Lucene } @Override public byte getNullStrategy() { - // Get from IndexDefinition - if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); - return this.definition.getNullStrategy().getValue(); + return definition != null ? definition.getNullStrategy().getValue() : Index.NULL_STRATEGY.ERROR.getValue(); } @Override public void setNullStrategy(byte nullStrategy) { - // Set in IndexDefinition (usually immutable after creation) - throw new UnsupportedOperationException("Not yet implemented: setNullStrategy"); + // Usually immutable } @Override public void set(TransactionContext tx, Object[] keys, RID[] rids) throws IndexException { - // Add entries to Lucene index - throw new UnsupportedOperationException("Not yet implemented: set"); + lazyInit(); + // This is for unique indexes usually. Lucene full-text is not typically unique. + // If used, it implies key -> RID mapping. + // For Lucene, it's document (derived from RID's record) -> indexed. + // This method needs careful interpretation for Lucene. + // Assuming keys[0] is the "key" to index (could be a document itself or fields) + // and rids[0] is the value. + if (keys == null || keys.length == 0 || rids == null || rids.length == 0) { + throw new IndexException("Keys and RIDs must be provided for Lucene set operation for index '" + getName() + "'."); + } + // Engine methods (put, remove) were refactored to take TransactionContext directly. + engine.put(tx, keys[0], rids[0]); } @Override public void remove(TransactionContext tx, Object[] keys, Identifiable rid) throws IndexException { - // Remove specific RID associated with keys - throw new UnsupportedOperationException("Not yet implemented: remove with rid"); + lazyInit(); + if (keys == null || keys.length == 0) { + throw new IndexException("Keys must be provided for Lucene remove operation for index '" + getName() + "'."); + } + // Engine methods (put, remove) were refactored to take TransactionContext directly. + if (rid != null) { + engine.remove(tx, keys[0], rid); + } else { + engine.remove(tx, keys[0]); // Remove all documents matching key + } } @Override public void remove(TransactionContext tx, Object[] keys) throws IndexException { - // Remove all RIDs associated with keys - throw new UnsupportedOperationException("Not yet implemented: remove"); + remove(tx, keys, null); // Remove all RIDs associated with these keys } @Override public IndexCursor range(boolean ascendingOrder) { - throw new UnsupportedOperationException("Not yet implemented: range without keys"); + throw new UnsupportedOperationException("Range queries without keys are not directly supported. Use a match_all query."); } @Override public IndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit, int skip) { - throw new UnsupportedOperationException("Not yet implemented: range with limit and skip"); + throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax."); } @Override public int getAssociatedBucketId() { - // Lucene indexes are not directly associated with a single bucket in the same way. - return -1; // Or derive from schema/type if applicable + if (definition == null) return -1; + List bucketIds = definition.getBucketIds(); + return bucketIds != null && !bucketIds.isEmpty() ? bucketIds.get(0) : -1; + } + + // --- Lucene Specific Accessors --- + public IndexSearcher searcher() { + lazyInit(); + return engine.searcher(); + } + + public Analyzer indexAnalyzer() { + lazyInit(); + return engine.indexAnalyzer(); + } + + public Analyzer queryAnalyzer() { + lazyInit(); + return engine.queryAnalyzer(); } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java index 241fed17d6..50af17bf24 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java +++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java @@ -24,6 +24,7 @@ import com.arcadedb.index.CompositeKey; // Changed import import com.arcadedb.index.IndexDefinition; // Changed import import com.arcadedb.schema.Type; // Changed import +import com.arcadedb.lucene.util.LuceneDateTools; // Added import import java.io.UnsupportedEncodingException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -117,18 +118,29 @@ public static List createFields( luceneFields.add(new NumericDocValuesField(fieldName, number.longValue())); // Use long for DV for all integer types if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.intValue())); } - // Optionally, add the original value as a TextField if it needs to be searchable as text - // luceneFields.add(new TextField(fieldName, value.toString(), store)); - } else if (type == Type.DATETIME || type == Type.DATE || value instanceof Date) { - long time = (value instanceof Date) ? ((Date) value).getTime() : Long.parseLong(value.toString()); - luceneFields.add(new LongPoint(fieldName, time)); - luceneFields.add(new NumericDocValuesField(fieldName, time)); // For sorting/faceting - if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, time)); - // Optionally, add the original value as a TextField - // luceneFields.add(new TextField(fieldName, value.toString(), store)); - } else if (type == Type.STRING || value instanceof String) { + } else if (type == Type.DATETIME || type == Type.DATE) { + Long time = null; + if (value instanceof Date) { + time = ((Date) value).getTime(); + } else if (value instanceof Number) { + time = ((Number) value).longValue(); + } else if (value instanceof String) { + time = LuceneDateTools.parseDateTimeToMillis((String) value); + } + if (time != null) { + if (type == Type.DATE) { + time = LuceneDateTools.normalizeToDayEpochMillis(time); + } + luceneFields.add(new LongPoint(fieldName, time)); + luceneFields.add(new NumericDocValuesField(fieldName, time)); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, time)); + } + // Optionally, add the original value as a TextField if searchable as text and not just date + // if (value != null && store == Field.Store.YES) luceneFields.add(new TextField(fieldName, value.toString(), store)); + } else if (type == Type.STRING || type == Type.TEXT || type == Type.ENUM || type == Type.UUID || value instanceof String) { // Added TEXT, ENUM, UUID String stringValue = value.toString(); - luceneFields.add(new TextField(fieldName, stringValue, store)); // Analyzed + // Defaulting to TextField (analyzed). If non-analyzed is preferred for some types (e.g. UUID, ENUM), use StringField. + luceneFields.add(new TextField(fieldName, stringValue, store)); // Or use StringField for non-analyzed: // luceneFields.add(new StringField(fieldName, stringValue, store)); if (Boolean.TRUE.equals(sort)) { @@ -217,4 +229,117 @@ public static Query createDeleteQuery( // Changed OIdentifiable, ODocument } return filter.build(); } + + /** + * Creates a Lucene Query for exact matching on a field, considering the field's schema type. + * + * @param fieldName The name of the field. + * @param value The value to match. + * @param type The ArcadeDB schema Type of the field. + * @param database The database instance (currently unused here, but might be useful for context or complex types). + * @return A Lucene Query. + */ + public static Query createExactFieldQuery(String fieldName, Object value, Type type, com.arcadedb.database.DatabaseInternal database) { + if (value == null) { + // Or handle as a specific query for null if Lucene supports it for the type, e.g. for checking existence. + // For now, a query that matches nothing or a specific "null value" term if that's how nulls are indexed. + // This behavior needs to align with how nulls are indexed by createFields. + // Assuming null means "match no specific value" for now, which might mean it's handled by query structure. + // A "must not exist" or "must exist" query is different. This is for "fieldName:null". + // Let's treat it as a TermQuery for "null" string for now if type is string, otherwise it's problematic for points. + if (type == Type.STRING || type == Type.TEXT || type == Type.ENUM) { + return new TermQuery(new Term(fieldName, "null")); // Or a special null marker if used during indexing + } + // For numeric/point types, matching "null" is usually done by ensuring the field *doesn't* exist, + // which is more complex (e.g., BooleanQuery with MUST_NOT(MatchAllDocs) + filter on field existence). + // For simplicity, an exact match for a null value on a point field should probably yield no results or error. + // Returning a query that matches nothing for non-string nulls. + return new BooleanQuery.Builder().build(); // Empty BooleanQuery matches nothing + } + + switch (type) { + case STRING: + case TEXT: + case ENUM: + case UUID: // UUIDs are typically indexed and queried as strings + return new TermQuery(new Term(fieldName, value.toString())); + case INTEGER: + if (value instanceof Number) { + return IntPoint.newExactQuery(fieldName, ((Number) value).intValue()); + } else { + try { + return IntPoint.newExactQuery(fieldName, Integer.parseInt(value.toString())); + } catch (NumberFormatException e) { + // Log warning, fallback to TermQuery + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case LONG: + case DATETIME: // Assuming stored as long (epoch millis) + case DATE: // Assuming stored as long (epoch millis) + if (value instanceof Number) { + return LongPoint.newExactQuery(fieldName, ((Number) value).longValue()); + } else if (value instanceof java.util.Date) { + return LongPoint.newExactQuery(fieldName, ((java.util.Date) value).getTime()); + } else { + try { + Long time = null; + if (value instanceof String) { + time = LuceneDateTools.parseDateTimeToMillis((String) value); + } else { // Already Long or Date + // Handled by previous instanceof checks + } + if (time == null) { // Parsing failed or was not a convertible type + // Log warning or throw? For now, fallback to TermQuery on original string. + return new TermQuery(new Term(fieldName, value.toString())); + } + if (type == Type.DATE) { + time = LuceneDateTools.normalizeToDayEpochMillis(time); + } + return LongPoint.newExactQuery(fieldName, time); + } catch (NumberFormatException e) { // Should be caught by LuceneDateTools or earlier instanceof + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case FLOAT: + if (value instanceof Number) { + return FloatPoint.newExactQuery(fieldName, ((Number) value).floatValue()); + } else { + try { + return FloatPoint.newExactQuery(fieldName, Float.parseFloat(value.toString())); + } catch (NumberFormatException e) { + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case DOUBLE: + if (value instanceof Number) { + return DoublePoint.newExactQuery(fieldName, ((Number) value).doubleValue()); + } else { + try { + return DoublePoint.newExactQuery(fieldName, Double.parseDouble(value.toString())); + } catch (NumberFormatException e) { + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case SHORT: + case BYTE: + if (value instanceof Number) { + return IntPoint.newExactQuery(fieldName, ((Number) value).intValue()); // Promote to IntPoint + } else { + try { + return IntPoint.newExactQuery(fieldName, Short.parseShort(value.toString())); + } catch (NumberFormatException e) { + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case BOOLEAN: + // Lucene typically stores booleans as "T"/"F" or "true"/"false" in a StringField, + // or as 0/1 in a numeric field. Assuming string "true" or "false" as indexed by createFields default for strings. + return new TermQuery(new Term(fieldName, value.toString().toLowerCase(Locale.ENGLISH))); + default: + // For BINARY, EMBEDDED, LINK etc., default to TermQuery on string representation. + // This might not be effective unless specific string representations are indexed. + return new TermQuery(new Term(fieldName, value.toString())); + } + } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java b/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java new file mode 100644 index 0000000000..99264342eb --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java @@ -0,0 +1,183 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.parser; + +import com.arcadedb.schema.Type; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.DateTools; // For date parsing, if needed +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeQuery; // For newStringRange +import org.apache.lucene.util.BytesRef; + +import java.text.SimpleDateFormat; // Example for date parsing +import java.util.Date; // Example for date parsing +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ArcadeLuceneMultiFieldQueryParser extends MultiFieldQueryParser { + + private static final Logger logger = Logger.getLogger(ArcadeLuceneMultiFieldQueryParser.class.getName()); + + private final Map fieldTypes; + + // Date format constants removed, will use LuceneDateTools + + public ArcadeLuceneMultiFieldQueryParser(Map fieldTypes, String[] fields, Analyzer analyzer, Map boosts) { + super(fields, analyzer, boosts); + this.fieldTypes = fieldTypes != null ? new HashMap<>(fieldTypes) : new HashMap<>(); + } + + public ArcadeLuceneMultiFieldQueryParser(Map fieldTypes, String[] fields, Analyzer analyzer) { + super(fields, analyzer); + this.fieldTypes = fieldTypes != null ? new HashMap<>(fieldTypes) : new HashMap<>(); + } + + protected Type getFieldType(String field) { + return fieldTypes.get(field); + } + + @Override + protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { + Type fieldType = getFieldType(field); + + if (fieldType == null) { + logger.log(Level.FINE, "No type information for field {0} in range query, defaulting to string range.", field); + fieldType = Type.STRING; // Default to string range if type unknown + } + + // Lucene's default MultiFieldQueryParser uses TermRangeQuery for ranges on text fields. + // For specific data types, we need to create appropriate Point range queries. + + try { + switch (fieldType) { + case STRING: + case TEXT: + // For string ranges, ensure part1 and part2 are not null for TermRangeQuery.newStringRange + // The superclass handles * as open range for TermRangeQuery. + // If super.newRangeQuery is called, it will likely create a TermRangeQuery. + // TermRangeQuery.newStringRange is more explicit for string ranges. + BytesRef lowerTerm = part1 == null ? null : new BytesRef(part1); + BytesRef upperTerm = part2 == null ? null : new BytesRef(part2); + return TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive); + + case INTEGER: + Integer lowerInt = (part1 == null || "*".equals(part1)) ? null : Integer.parseInt(part1); + Integer upperInt = (part2 == null || "*".equals(part2)) ? null : Integer.parseInt(part2); + return IntPoint.newRangeQuery(field, + lowerInt == null ? Integer.MIN_VALUE : (startInclusive ? lowerInt : lowerInt + 1), + upperInt == null ? Integer.MAX_VALUE : (endInclusive ? upperInt : upperInt - 1)); + + case LONG: + case DATETIME: + case DATE: + Long lowerLong = com.arcadedb.lucene.util.LuceneDateTools.parseDateTimeToMillis(part1); + Long upperLong = com.arcadedb.lucene.util.LuceneDateTools.parseDateTimeToMillis(part2); + + if (fieldType == Type.DATE) { + if (lowerLong != null) lowerLong = com.arcadedb.lucene.util.LuceneDateTools.normalizeToDayEpochMillis(lowerLong); + if (upperLong != null) upperLong = com.arcadedb.lucene.util.LuceneDateTools.normalizeToDayEpochMillis(upperLong); + } + + // Adjust for inclusive/exclusive after potential null from parsing + long actualLowerLong = lowerLong == null ? Long.MIN_VALUE : (startInclusive ? lowerLong : lowerLong + 1L); + if (lowerLong == null && "*".equals(part1)) actualLowerLong = Long.MIN_VALUE; // Explicit open start + else if (lowerLong == null && part1 != null) throw new ParseException("Cannot parse lower date range: " + part1); + + + long actualUpperLong = upperLong == null ? Long.MAX_VALUE : (endInclusive ? upperLong : upperLong - 1L); + if (upperLong == null && "*".equals(part2)) actualUpperLong = Long.MAX_VALUE; // Explicit open end + else if (upperLong == null && part2 != null) throw new ParseException("Cannot parse upper date range: " + part2); + + // Ensure lower is not greater than upper after adjustments if both are specified + if (lowerLong != null && upperLong != null && actualLowerLong > actualUpperLong) { + actualLowerLong = lowerLong; // Reset to original parsed if adjustments inverted range for point fields + actualUpperLong = upperLong; + // For point fields, if startInclusive=false means actual_low = low+1, endInclusive=false means actual_high = high-1 + // If after this actual_low > actual_high, it means no values can exist. + // Lucene's LongPoint.newRangeQuery handles this correctly by creating a query that matches nothing. + } + + return LongPoint.newRangeQuery(field, actualLowerLong, actualUpperLong); + case LONG: // Separate from DATE/DATETIME for clarity if parseDateTimeToMillis is too specific + Long lowerPlainLong = (part1 == null || "*".equals(part1)) ? null : Long.parseLong(part1); + Long upperPlainLong = (part2 == null || "*".equals(part2)) ? null : Long.parseLong(part2); + return LongPoint.newRangeQuery(field, + lowerPlainLong == null ? Long.MIN_VALUE : (startInclusive ? lowerPlainLong : lowerPlainLong + 1L), + upperPlainLong == null ? Long.MAX_VALUE : (endInclusive ? upperPlainLong : upperPlainLong - 1L)); + + case FLOAT: + Float lowerFloat = (part1 == null || "*".equals(part1)) ? null : Float.parseFloat(part1); + Float upperFloat = (part2 == null || "*".equals(part2)) ? null : Float.parseFloat(part2); + // Point queries are exclusive for lower, inclusive for upper by default with null/MIN/MAX handling. + // Adjusting for inclusive/exclusive: + float actualLowerFloat = lowerFloat == null ? Float.NEGATIVE_INFINITY : (startInclusive ? lowerFloat : Math.nextUp(lowerFloat)); + float actualUpperFloat = upperFloat == null ? Float.POSITIVE_INFINITY : (endInclusive ? upperFloat : Math.nextDown(upperFloat)); + return FloatPoint.newRangeQuery(field, actualLowerFloat, actualUpperFloat); + + + case DOUBLE: + Double lowerDouble = (part1 == null || "*".equals(part1)) ? null : Double.parseDouble(part1); + Double upperDouble = (part2 == null || "*".equals(part2)) ? null : Double.parseDouble(part2); + double actualLowerDouble = lowerDouble == null ? Double.NEGATIVE_INFINITY : (startInclusive ? lowerDouble : Math.nextUp(lowerDouble)); + double actualUpperDouble = upperDouble == null ? Double.POSITIVE_INFINITY : (endInclusive ? upperDouble : Math.nextDown(upperDouble)); + return DoublePoint.newRangeQuery(field, actualLowerDouble, actualUpperDouble); + + case SHORT: + case BYTE: + // Promote to IntPoint for querying, as Lucene has no ShortPoint/BytePoint + Integer lowerShortOrByte = (part1 == null || "*".equals(part1)) ? null : Integer.parseInt(part1); + Integer upperShortOrByte = (part2 == null || "*".equals(part2)) ? null : Integer.parseInt(part2); + return IntPoint.newRangeQuery(field, + lowerShortOrByte == null ? Integer.MIN_VALUE : (startInclusive ? lowerShortOrByte : lowerShortOrByte + 1), + upperShortOrByte == null ? Integer.MAX_VALUE : (endInclusive ? upperShortOrByte : upperShortOrByte - 1)); + + default: + logger.log(Level.WARNING, "Unhandled type {0} for field {1} in range query. Defaulting to string range.", new Object[]{fieldType, field}); + return TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive); + } + } catch (NumberFormatException e) { + throw new ParseException("Could not parse number in range query for field " + field + ": " + e.getMessage()); + } + // Removed catch for java.text.ParseException as LuceneDateTools handles its own parsing issues or returns null + } + + // Date parsing helper removed, now using LuceneDateTools + + // Wildcard, Prefix, Fuzzy queries usually apply to text fields. + // The superclass versions are generally fine. If specific behavior is needed + // for non-text fields (e.g., to disallow or handle differently), + // these methods can be overridden. For now, relying on superclass. + + // @Override + // protected Query getWildcardQuery(String field, String termStr) throws ParseException { + // Type fieldType = getFieldType(field); + // if (fieldType != null && fieldType.isNumeric()) { + // // Wildcards on numeric points don't make sense. + // // Could throw error or return a MatchNoDocsQuery, or let super handle (might error). + // // For now, let super decide, it might try to parse termStr as a number. + // } + // return super.getWildcardQuery(field, termStr); + // } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java index 0c311f42d5..b9a0d08f63 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java @@ -1,113 +1,282 @@ package com.arcadedb.lucene.query; +import com.arcadedb.database.Database; import com.arcadedb.database.Identifiable; import com.arcadedb.database.RID; +import com.arcadedb.database.RecordId; // ArcadeDB RecordId for context import com.arcadedb.index.IndexCursor; +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Assumed engine interface +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // For RID field name +import com.arcadedb.query.sql.executor.Result; +import com.arcadedb.query.sql.executor.ResultInternal; +import java.io.IOException; +import java.util.Collections; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; -// import org.apache.lucene.search.ScoreDoc; -// import org.apache.lucene.search.IndexSearcher; -// import org.apache.lucene.document.Document; -// import java.io.IOException; +import org.apache.lucene.document.Document; // Lucene Document +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; public class LuceneIndexCursor implements IndexCursor { - // private ScoreDoc[] scoreDocs; - // private IndexSearcher searcher; - // private int currentIndex = 0; - // private Document currentDocument; - // private RID currentRID; + private static final Logger logger = Logger.getLogger(LuceneIndexCursor.class.getName()); - // public LuceneIndexCursor(ScoreDoc[] scoreDocs, IndexSearcher searcher) { - // this.scoreDocs = scoreDocs; - // this.searcher = searcher; - // // Potentially pre-fetch the first one or do it in hasNext/next - // } + private final LuceneQueryContext queryContext; + private final LuceneIndexEngine engine; // Engine for callbacks + private final com.arcadedb.document.Document metadata; // ArcadeDB Document for query metadata + + private ScoreDoc[] scoreDocs; + private IndexSearcher searcher; + private int currentIndex = -1; // Before the first element + private RID currentRID; + private float currentScore; + private Map currentProximityInfo; // For contextual data like highlights + + private TopDocs topDocs; + + + public LuceneIndexCursor(LuceneQueryContext queryContext, + LuceneIndexEngine engine, + com.arcadedb.document.Document metadata) { + this.queryContext = queryContext; + this.engine = engine; + this.metadata = metadata; + this.searcher = queryContext.getSearcher(); // Get the potentially transactional searcher + + executeSearch(); + } + + // Constructor for when results (Set) are already fetched, e.g. from engine.getInTx() + // This is a simplified cursor that iterates over pre-fetched RIDs without scores or Lucene docs. + private Iterator preFetchedResultsIterator; + private Identifiable currentPreFetched; + private int preFetchedCount; + + public LuceneIndexCursor(Set preFetchedResults) { + this.queryContext = null; // Not applicable + this.engine = null; // Not applicable + this.metadata = null; // Not applicable + if (preFetchedResults != null) { + this.preFetchedResultsIterator = preFetchedResults.iterator(); + this.preFetchedCount = preFetchedResults.size(); + } else { + this.preFetchedResultsIterator = Collections.emptyIterator(); + this.preFetchedCount = 0; + } + } + + + private void executeSearch() { + if (queryContext == null) return; // Should not happen if not using pre-fetched constructor + + try { + int limit = queryContext.getContext() != null ? queryContext.getContext().getLimit() : Integer.MAX_VALUE; + if (limit == -1) limit = Integer.MAX_VALUE; // SQL limit -1 means no limit + + if (queryContext.getSort() != null) { + this.topDocs = searcher.search(queryContext.getQuery(), limit, queryContext.getSort()); + } else { + this.topDocs = searcher.search(queryContext.getQuery(), limit); + } + this.scoreDocs = topDocs.scoreDocs; + } catch (IOException e) { + logger.log(Level.SEVERE, "Error executing Lucene search", e); + this.scoreDocs = new ScoreDoc[0]; // Empty results on error + this.topDocs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]); + } + } @Override public Object[] getKeys() { - // This would typically return the terms that matched for the current document, - // which might not be straightforward or always relevant for a Lucene full-text search result. - // Or, if the cursor iterates over specific keys that led to this document. - throw new UnsupportedOperationException("Not yet implemented: getKeys"); + // For Lucene, the "keys" are the search terms. This is not usually returned per document. + // If queryContext.getQuery() is available, one could try to extract terms, but it's complex. + if (currentRID != null) { + // Could potentially store the query that led to this hit if needed. + // For now, returning null as it's not a natural fit. + return null; + } + throw new NoSuchElementException("No current element or keys not applicable"); } @Override public Identifiable getRecord() { - // if (currentRID == null && currentDocument != null) { - // // Assuming RID is stored in a field, e.g., "RID" - // String ridString = currentDocument.get("RID"); - // if (ridString != null) { - // currentRID = new RID(null, ridString); // Database instance might be needed - // } - // } - // return currentRID; - throw new UnsupportedOperationException("Not yet implemented: getRecord"); + // In ArcadeDB, IndexCursor usually returns RIDs. The record is loaded by the caller. + // If this cursor *must* return the full record, a DB lookup is needed. + // For now, consistent with returning RID via next() and getRID(). + // This method could load and cache it if frequently used. + if (currentRID != null && queryContext != null && queryContext.getContext() != null) { + return queryContext.getContext().getDatabase().lookupByRID(currentRID, true); + } + if (currentPreFetched != null) { + return currentPreFetched; + } + return null; + } + + public RID getRID() { + if (currentRID != null) { + return currentRID; + } + if (currentPreFetched != null) { + return currentPreFetched.getIdentity(); + } + return null; } + @Override public Map getProperties() { - throw new UnsupportedOperationException("Not implemented for LuceneIndexCursor"); + // This could return highlights and score if structured appropriately. + // The currentProximityInfo is designed for this. + return currentProximityInfo != null ? currentProximityInfo : Collections.emptyMap(); } @Override - public int getScore() { - // if (currentIndex > 0 && currentIndex <= scoreDocs.length) { - // return (int) (scoreDocs[currentIndex -1].score * 1000); // Example scaling - // } - return 0; + public float getScore() { // Changed from int to float to match Lucene score + return currentScore; } @Override public boolean hasNext() { - // return currentIndex < scoreDocs.length; - throw new UnsupportedOperationException("Not yet implemented: hasNext"); + if (preFetchedResultsIterator != null) { + return preFetchedResultsIterator.hasNext(); + } + if (scoreDocs == null) { + return false; + } + return (currentIndex + 1) < scoreDocs.length; } @Override public Identifiable next() { - // if (!hasNext()) { - // throw new NoSuchElementException(); - // } - // try { - // currentDocument = searcher.doc(scoreDocs[currentIndex].doc); - // currentRID = null; // Reset so getRecord re-fetches it - // currentIndex++; - // return getRecord(); // This might need the database instance to load the actual record - // } catch (IOException e) { - // throw new RuntimeException("Error fetching document from Lucene index", e); - // } - throw new UnsupportedOperationException("Not yet implemented: next"); + if (preFetchedResultsIterator != null) { + if (!preFetchedResultsIterator.hasNext()) { + throw new NoSuchElementException(); + } + currentPreFetched = preFetchedResultsIterator.next(); + this.currentRID = currentPreFetched.getIdentity(); // Store RID + this.currentScore = 1.0f; // Pre-fetched results usually don't carry Lucene score directly + this.currentProximityInfo = Collections.singletonMap("$score", this.currentScore); + return currentPreFetched; // Or just currentRID if API prefers that + } + + if (!hasNext()) { + throw new NoSuchElementException(); + } + currentIndex++; + ScoreDoc scoreDoc = scoreDocs[currentIndex]; + try { + // Using storedFields().document() is preferred in modern Lucene for retrieving stored fields. + // searcher.doc(scoreDoc.doc) retrieves all (including non-stored if they were indexed in a certain way, but generally for stored). + Document luceneDoc = searcher.storedFields().document(scoreDoc.doc); + String ridString = luceneDoc.get(ArcadeLuceneIndexType.RID); // Use constant from ArcadeLuceneIndexType + + if (ridString == null) { + // Fallback or try another RID field if there are multiple conventions (e.g. from older data) + // For now, log and skip if primary RID field is missing. + logger.log(Level.WARNING, "Lucene document " + scoreDoc.doc + " is missing RID field (" + ArcadeLuceneIndexType.RID + ")"); + // Try to advance to next valid document or return null/throw + return next(); // Recursive call to try next, or could throw. Be careful with recursion. + } + + Database currentDb = queryContext != null && queryContext.getContext() != null ? queryContext.getContext().getDatabase() : null; + this.currentRID = new RID(currentDb, ridString); // Pass database if available for cluster info + this.currentScore = scoreDoc.score; + + // Prepare contextual data (score, highlights) + this.currentProximityInfo = new HashMap<>(); + this.currentProximityInfo.put("$score", this.currentScore); + + if (engine != null && queryContext != null) { + // The RecordId for context needs a way to carry this info if onRecordAddedToResultSet modifies it. + // Let's assume RecordId is primarily for identity, and contextual info is managed by this cursor + // or passed directly to some wrapper if needed. + // For now, `onRecordAddedToResultSet` might populate `queryContext.fragments` + // which we can then retrieve here if needed. + RecordId contextualRid = new RecordId(this.currentRID); // Create a new RecordId instance for context + + // Call engine callback to potentially populate highlights in queryContext or for other processing + engine.onRecordAddedToResultSet(queryContext, contextualRid, luceneDoc, scoreDoc); + + // Retrieve fragments if populated by the callback + if (queryContext.getFragments() != null && !queryContext.getFragments().isEmpty()) { + queryContext.getFragments().forEach((field, frags) -> { + if (frags != null && frags.length > 0) { + StringBuilder sb = new StringBuilder(); + for (org.apache.lucene.search.highlight.TextFragment frag : frags) { + if (frag != null && frag.getScore() > 0) { // frag.getScore() might not exist, check TextFragment API + sb.append(frag.toString()); + } + } + if (sb.length() > 0) { + this.currentProximityInfo.put("$" + field + "_hl", sb.toString()); + } + } + }); + queryContext.getFragments().clear(); // Clear for next record + } + } + + // IndexCursor traditionally returns Identifiable (which can be just the RID) + // If the caller needs the full record, they call getRecord(). + return this.currentRID; + + } catch (IOException e) { + throw new RuntimeException("Error fetching document from Lucene index", e); + } } @Override public void close() { - // Release any Lucene resources if necessary, e.g., if the searcher was context-specific. - // scoreDocs = null; - // searcher = null; + // Release Lucene resources if this cursor specifically acquired them. + // If searcher is managed by engine (e.g. via SearcherManager), + // this cursor typically doesn't close/release the searcher. + scoreDocs = null; + // searcher = null; // Don't nullify if it's shared from engine/queryContext } @Override - public long size() { - // return scoreDocs != null ? scoreDocs.length : 0; - throw new UnsupportedOperationException("Not yet implemented: size"); + public long getCount() { // Changed from size() to match typical usage for total hits + if (preFetchedResultsIterator != null) { + return preFetchedCount; + } + return topDocs != null && topDocs.totalHits != null ? topDocs.totalHits.value : 0; } + @Override + public long size() { // Kept for IndexCursor interface if it uses size() for current iteration count + return getCount(); + } + + @Override public void setLimit(int limit) { - throw new UnsupportedOperationException("Not supported after creation."); + // Limit should be applied during the search execution. + throw new UnsupportedOperationException("Limit must be set before search execution via CommandContext or metadata."); } @Override public int getLimit() { - return -1; // Or actual limit if supported + // Return the limit that was applied to this cursor's search + if (queryContext != null && queryContext.getContext() != null) { + return queryContext.getContext().getLimit(); + } + return -1; } @Override public boolean isPaginated() { - return true; // Or based on actual implementation + // Lucene TopDocs inherently supports pagination if the search is re-executed with 'searchAfter'. + // This simple cursor iterates a fixed set of top N docs. So, it's "paginated" in the sense + // that it represents one page of results. + return true; } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java new file mode 100644 index 0000000000..861f7eecef --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java @@ -0,0 +1,84 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.query; + +import com.arcadedb.document.Document; +import com.arcadedb.index.CompositeKey; // If key can be a CompositeKey +import com.arcadedb.query.sql.executor.CommandContext; + +import java.util.Map; + +/** + * A container to pass a query key (which can be a simple string, + * a CompositeKey, or other structures) along with associated metadata Document. + * The metadata can include options for highlighting, sorting, limits, etc. + */ +public class LuceneKeyAndMetadata { + + public final Object key; + public final Document metadata; + private CommandContext context; // Optional command context + + /** + * Constructor. + * + * @param key The main query key (e.g., String, CompositeKey). + * @param metadata A Document containing additional query parameters and options. + */ + public LuceneKeyAndMetadata(Object key, Document metadata) { + this.key = key; + this.metadata = metadata != null ? metadata : new Document(null); // Ensure metadata is never null + } + + /** + * Constructor with command context. + * + * @param key The main query key. + * @param metadata A Document containing additional query parameters. + * @param context The SQL command execution context. + */ + public LuceneKeyAndMetadata(Object key, Document metadata, CommandContext context) { + this.key = key; + this.metadata = metadata != null ? metadata : new Document(null); // Ensure metadata is never null + this.context = context; + } + + + public Object getKey() { + return key; + } + + public Document getMetadata() { + return metadata; + } + + public CommandContext getContext() { + return context; + } + + public LuceneKeyAndMetadata setContext(CommandContext context) { + this.context = context; + return this; + } + + /** + * Helper to get metadata as a Map, typically for options. + * @return Map representation of metadata, or empty map if null. + */ + public Map getMetadataAsMap() { + return this.metadata.toMap(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java new file mode 100644 index 0000000000..007154ccde --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java @@ -0,0 +1,179 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2014 Orient Technologies. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.query; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.tx.LuceneTxChanges; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** Created by Enrico Risa on 08/01/15. */ +public class LuceneQueryContext { // Changed class name + private final CommandContext context; // Changed + private final IndexSearcher searcher; + private final Query query; + private final Sort sort; + private Optional changes; // FIXME: Needs refactoring + private HashMap fragments; + + public LuceneQueryContext( // Changed + final CommandContext context, final IndexSearcher searcher, final Query query) { + this(context, searcher, query, Collections.emptyList()); + } + + public LuceneQueryContext( // Changed + final CommandContext context, + final IndexSearcher searcher, + final Query query, + final List sortFields) { + this.context = context; + this.searcher = searcher; + this.query = query; + if (sortFields == null || sortFields.isEmpty()) { // Added null check + sort = null; + } else { + sort = new Sort(sortFields.toArray(new SortField[0])); // Changed to new SortField[0] + } + changes = Optional.empty(); + fragments = new HashMap<>(); + } + + public boolean isInTx() { + return changes.isPresent(); + } + + public LuceneQueryContext withChanges(final LuceneTxChanges changes) { // FIXME: Needs refactoring + this.changes = Optional.ofNullable(changes); + return this; + } + + public LuceneQueryContext addHighlightFragment( + final String field, final TextFragment[] fieldFragment) { + fragments.put(field, fieldFragment); + return this; + } + + public CommandContext getContext() { // Changed + return context; + } + + public Query getQuery() { + return query; + } + + public Optional getChanges() { // FIXME: Needs refactoring + return changes; + } + + public Sort getSort() { + return sort; + } + + public IndexSearcher getSearcher() { + // FIXME: LuceneTxChanges and its searcher() method need refactoring + return changes.map(c -> new IndexSearcher(multiReader(c))).orElse(searcher); + } + + private MultiReader multiReader(final LuceneTxChanges luceneTxChanges) { // FIXME: Needs refactoring + final IndexReader primaryReader = searcher.getIndexReader(); + // FIXME: luceneTxChanges.searcher() needs to be refactored and return an IndexSearcher + final IndexReader txReader = luceneTxChanges.searcher().getIndexReader(); + try { + // Lucene's MultiReader takes an array of IndexReaders. + // The boolean for sharing readers is gone in some modern versions, + // lifecycle of readers passed to MultiReader should be managed by the caller if they are not to be closed by MultiReader. + // However, if primaryReader and txReader are obtained just for this MultiReader, + // it might be okay for MultiReader to close them. + // The decRef logic was for when readers were shared. If they are not shared, it's not needed. + // Let's assume for now they are not shared and MultiReader can own them. + // If they are shared/managed elsewhere, then incRef/decRef or try-with-resources on the MultiReader is needed. + // For Lucene 9+, just passing readers is fine, their lifecycle is tricky. + // One common pattern is that MultiReader does NOT close the readers given to it by default. + // The `searcher.getIndexReader()` typically gives a reader that should not be closed by MultiReader if searcher is still live. + // `txReader` from `luceneTxChanges.searcher().getIndexReader()` also needs care. + // The original decRef implies they were "taken over". + // A safer approach for modern Lucene if readers are managed (e.g. by SearcherManager / NRTManager): + // DONT call decRef here. Ensure MultiReader is closed after use, and that it DOES NOT close its sub-readers + // if they are still managed externally. + // The constructor `new MultiReader(IndexReader... subReaders)` does NOT take ownership (doesn't close them). + + // Given the original decRef, it implies MultiReader was taking ownership. + // This is not standard for the varags MultiReader constructor. + // The constructor `MultiReader(IndexReader[] r, boolean closeSubReaders)` is gone. + // Let's assume the readers passed are temporary or their lifecycle is handled by the SearcherManager from which they came. + // If txReader is from a RAMDirectory, it's simpler. + // This part is tricky without knowing exactly how primaryReader and txReader are managed. + // For now, will replicate the structure but acknowledge the complexity. + // One option: increase ref count before passing to MultiReader, then MultiReader can decRef on its close. + // primaryReader.incRef(); // If primaryReader is managed and should survive this MultiReader + // txReader.incRef(); // If txReader is managed + // MultiReader multiReader = new MultiReader(new IndexReader[] {primaryReader, txReader}); + // If MultiReader is short-lived and we don't want to affect original readers: + List readers = new ArrayList<>(); + readers.add(primaryReader); + if (txReader != null) readers.add(txReader); // txReader could be null if no changes + + return new MultiReader(readers.toArray(new IndexReader[0])); + + } catch (final IOException e) { + // FIXME: OLuceneIndexException needs to be ArcadeDB specific + throw ArcadeDBException.wrapException( + new ArcadeDBException("unable to create reader on changes"), e); // Changed + } + } + + public long deletedDocs(final Query query) { + // FIXME: LuceneTxChanges and its deletedDocs method need refactoring + return changes.map(c -> c.deletedDocs(query)).orElse(0L); // Ensure Long literal + } + + public boolean isUpdated(final Document doc, final Object key, final Identifiable value) { // Changed + // FIXME: LuceneTxChanges and its isUpdated method need refactoring + return changes.map(c -> c.isUpdated(doc, key, value)).orElse(false); + } + + public boolean isDeleted(final Document doc, final Object key, final Identifiable value) { // Changed + // FIXME: LuceneTxChanges and its isDeleted method need refactoring + return changes.map(c -> c.isDeleted(doc, key, value)).orElse(false); + } + + public Map getFragments() { + return fragments; + } + + // getLimit() and onRecord() were not in the provided OLuceneQueryContext, + // they might be from a different class or an older version. + // If they are needed, they would be implemented here. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java b/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java deleted file mode 100644 index 3bbee4581e..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * - * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * * - * * Licensed under the Apache License, Version 2.0 (the "License"); - * * you may not use this file except in compliance with the License. - * * You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * * See the License for the specific language governing permissions and - * * limitations under the License. - * - */ - -package com.arcadedb.lucene.query; - -import com.arcadedb.common.exception.OException; -import com.arcadedb.lucene.exception.OLuceneIndexException; -import com.arcadedb.lucene.tx.OLuceneTxChanges; -import com.arcadedb.database.OCommandContext; -import com.arcadedb.database.OIdentifiable; -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.highlight.TextFragment; - -/** Created by Enrico Risa on 08/01/15. */ -public class OLuceneQueryContext { - private final OCommandContext context; - private final IndexSearcher searcher; - private final Query query; - private final Sort sort; - private Optional changes; - private HashMap fragments; - - public OLuceneQueryContext( - final OCommandContext context, final IndexSearcher searcher, final Query query) { - this(context, searcher, query, Collections.emptyList()); - } - - public OLuceneQueryContext( - final OCommandContext context, - final IndexSearcher searcher, - final Query query, - final List sortFields) { - this.context = context; - this.searcher = searcher; - this.query = query; - if (sortFields.isEmpty()) { - sort = null; - } else { - sort = new Sort(sortFields.toArray(new SortField[] {})); - } - changes = Optional.empty(); - fragments = new HashMap<>(); - } - - public boolean isInTx() { - return changes.isPresent(); - } - - public OLuceneQueryContext withChanges(final OLuceneTxChanges changes) { - this.changes = Optional.ofNullable(changes); - return this; - } - - public OLuceneQueryContext addHighlightFragment( - final String field, final TextFragment[] fieldFragment) { - fragments.put(field, fieldFragment); - return this; - } - - public OCommandContext getContext() { - return context; - } - - public Query getQuery() { - return query; - } - - public Optional getChanges() { - return changes; - } - - public Sort getSort() { - return sort; - } - - public IndexSearcher getSearcher() { - return changes.map(c -> new IndexSearcher(multiReader(c))).orElse(searcher); - } - - private MultiReader multiReader(final OLuceneTxChanges luceneTxChanges) { - final IndexReader primaryReader = searcher.getIndexReader(); - final IndexReader txReader = luceneTxChanges.searcher().getIndexReader(); - try { - // Transfer ownership to the MultiReader so the index searcher can be released transparently. - // Without this, the primary IndexReader will leak a refcount each time it is wrapped. - MultiReader multiReader = new MultiReader(new IndexReader[] {primaryReader, txReader}, false); - primaryReader.decRef(); - txReader.decRef(); - return multiReader; - } catch (final IOException e) { - throw OException.wrapException( - new OLuceneIndexException("unable to create reader on changes"), e); - } - } - - public long deletedDocs(final Query query) { - return changes.map(c -> c.deletedDocs(query)).orElse(0l); - } - - public boolean isUpdated(final Document doc, final Object key, final OIdentifiable value) { - return changes.map(c -> c.isUpdated(doc, key, value)).orElse(false); - } - - public boolean isDeleted(final Document doc, final Object key, final OIdentifiable value) { - return changes.map(c -> c.isDeleted(doc, key, value)).orElse(false); - } - - public Map getFragments() { - return fragments; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java new file mode 100644 index 0000000000..267aafe911 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java @@ -0,0 +1,108 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; // Changed +import org.apache.lucene.analysis.Analyzer; // Added for new methods +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; // Added for new methods +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; // Added for new methods +import org.apache.lucene.util.Bits; // Added for new methods + +import java.io.IOException; // Added for new methods +import java.util.Collections; +import java.util.List; // Added for new methods +import java.util.Map; // Added for new methods +import java.util.Set; + +/** Created by Enrico Risa on 15/09/15. */ +public interface LuceneTxChanges { // Changed interface name + + // Existing methods adapted + void put(Object key, Identifiable value, Document doc); // Changed OIdentifiable + + void remove(Object key, Identifiable value); // Changed OIdentifiable + + IndexSearcher searcher(); // Existing method, seems to be the transactional searcher + + // numDocs() from prompt matches existing signature (except return type was long, now int as per Lucene's numDocs()) + int numDocs(); // Changed from long to int + + // getDeletedDocs() from prompt returns Set, existing returned Set + // Renaming existing to getDeletedLuceneDocs for clarity and adding new one + default Set getDeletedLuceneDocs() { // Kept original behavior with new name + return Collections.emptySet(); + } + + // isDeleted(Document, Object, OIdentifiable) adapted + boolean isDeleted(Document document, Object key, Identifiable value); // Changed OIdentifiable + + // isUpdated(Document, Object, OIdentifiable) adapted + boolean isUpdated(Document document, Object key, Identifiable value); // Changed OIdentifiable + + // deletedDocs(Query query) from prompt returns Bits, existing returned long + // Renaming existing to countDeletedDocs for clarity and adding new one + default long countDeletedDocs(Query query) { // Kept original behavior with new name + return 0; + } + + // New methods from prompt + IndexSearcher getCoordinatingSearcher(); // New: Could be the main index searcher before TX changes overlay + + Bits deletedDocs(Query query); // New: Returns Bits for live docs + + boolean isUpdated(Document doc, Analyzer analyzer, Query query); // New: Overload with Analyzer and Query + + boolean isDeleted(Document doc, Analyzer analyzer, Query query); // New: Overload with Analyzer and Query + + int nDoc(Query query); // New: Number of documents matching query in current TX state + + Set getDeletedDocuments(); // New: Set of deletion queries + + Map getUpdatedDocuments(); // New: Map of update queries to new documents + + List getAddedDocuments(); // New: List of added documents + + IndexReader getReader() throws IOException; // New: Get current transactional reader + + TopDocs query(Query query, int N) throws IOException; // New: Execute query with limit // Changed signature to add N + + Document doc(int doc) throws IOException; // New: Retrieve Lucene document by internal ID + + Document doc(int doc, Set fieldsToLoad) throws IOException; // New: Retrieve specific fields + + void close() throws IOException; // New + + int maxDoc() throws IOException; // New + + boolean hasDeletions(); // New + + void commit() throws IOException; // New + + void rollback() throws IOException; // New + + void addDocument(Document document) throws IOException; // New + + void deleteDocument(Query query) throws IOException; // New + + void updateDocument(Query query, Document document) throws IOException; // New +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java new file mode 100644 index 0000000000..ba124633ca --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java @@ -0,0 +1,322 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; // Changed + +/** Created by Enrico Risa on 28/09/15. */ +public abstract class LuceneTxChangesAbstract implements LuceneTxChanges { // Changed class name and interface + private static final Logger logger = + Logger.getLogger(LuceneTxChangesAbstract.class.getName()); // Changed + public static final String TMP = "_tmp_rid"; // This constant seems unused here, but kept for now. + + protected final LuceneIndexEngine engine; // Changed + protected final IndexWriter writer; // For new/updated documents + protected final IndexWriter deletesExecutor; // For pending deletions + + private IndexSearcher txSearcher; // Cached NRT searcher for the current transaction state (adds + main) + private IndexReader txReader; // Cached NRT reader for the current transaction state + + public LuceneTxChangesAbstract( // Changed + final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) { + this.engine = engine; + this.writer = writer; + this.deletesExecutor = deletesExecutor; + } + + // Method to get a transactional reader, possibly NRT from writer + protected IndexReader getTxReader() throws IOException { + if (txReader == null || !txReader.tryIncRef()) { // Check if reader is still valid or can be used + if (txReader != null) { // was valid, but couldn't incRef, so it's likely closed + try { + txReader.decRef(); // ensure it's closed if it was open + } catch (Exception e) { /* ignore */ } + } + // If writer is null or closed, this will throw an exception, which is appropriate. + txReader = DirectoryReader.open(writer); // Standard NRT reader + } + return txReader; + } + + protected void NRTReaderReopen() throws IOException{ + if (txReader != null) { + IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader)txReader, writer); + if (newReader != null) { + txReader.decRef(); + txReader = newReader; + txSearcher = new IndexSearcher(txReader); + } + } else { + txReader = DirectoryReader.open(writer); + txSearcher = new IndexSearcher(txReader); + } + } + + + @Override + public IndexSearcher searcher() { + try { + // Return a new NRT searcher reflecting current changes in 'writer' + // This searcher sees documents added/updated in the current TX but not yet committed. + // It does not see documents deleted in this TX against the main index. + // For a searcher that sees deletes as well, getCoordinatingSearcher might be better. + NRTReaderReopen(); + return txSearcher; + } catch (IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Error creating transactional IndexSearcher from writer"), e); // Changed + } + } + + @Override + public IndexSearcher getCoordinatingSearcher() { + // This searcher should ideally reflect adds, updates, AND deletes. + // This typically involves a MultiReader combining the main index (with its own deletions applied) + // and the in-memory 'writer' index, while filtering out documents marked for deletion by 'deletesExecutor'. + // For simplicity in this abstract class, could return the same as searcher() and expect + // query execution layer to use getLiveDocs() or similar. + // Or, could be more complex here if a combined view is built. + // For now, let's assume it's similar to searcher() but it's a point for review. + // The engine's main searcher is `engine.searcher()` + // FIXME: This needs a proper implementation, probably involving MultiReader and live docs from deletesExecutor + return searcher(); + } + + @Override + public IndexReader getReader() throws IOException { + return getTxReader(); + } + + @Override + public long countDeletedDocs(Query query) { // Renamed from original deletedDocs + try { + // This counts documents matching the query in the 'deletesExecutor' index. + // These are documents marked for deletion in this transaction. + if (deletesExecutor.getDocStats().numDocs == 0) return 0; // Optimization + try (IndexReader reader = DirectoryReader.open(deletesExecutor)) { + final IndexSearcher indexSearcher = new IndexSearcher(reader); + final TopDocs search = indexSearcher.search(query, 1); // We only need totalHits + return search.totalHits.value; + } + } catch (IOException e) { + logger.log(Level.SEVERE, "Error reading pending deletions index", e); // Changed + } + return 0; + } + + @Override + public Bits deletedDocs(Query query) { + // This should return a Bits representing documents deleted by this query + // within the context of the main index reader (from engine.searcher()). + // This is complex as it needs to check against the 'deletesExecutor' or tracked delete queries. + // Not typically provided directly by IndexWriter for pending changes. + // FIXME: This needs a proper implementation, likely involving custom collector or query rewriting. + logger.warning("deletedDocs(Query) returning Bits is not fully implemented in abstract class."); + return null; // Placeholder + } + + + @Override + public void addDocument(Document document) throws IOException { + writer.addDocument(document); + } + + @Override + public void deleteDocument(Query query) throws IOException { + // Deletes applied to main writer will be visible to its NRT reader. + // If deletesExecutor is for tracking standalone delete operations before commit to main index: + // writer.deleteDocuments(query); // This applies to the current TX state + // deletesExecutor.addDocument(createDeleteMarker(query)); // If deletes are tracked as docs in a separate index + // For now, assuming deletes are applied to the main writer for NRT visibility. + // If deletesExecutor is a separate RAMDirectory for _pending full deletes_ against main index, + // then it should be: deletesExecutor.deleteDocuments(query) or writer.deleteDocuments(query) + // The original code had separate writer and deletedIdx. Let's assume deletes are applied to writer. + writer.deleteDocuments(query); + if(deletesExecutor != writer && deletesExecutor != null) { // If deletes are tracked separately for commit to main index + deletesExecutor.deleteDocuments(query); + } + } + + @Override + public void updateDocument(Query query, Document document) throws IOException { + writer.updateDocument(query, document); + if(deletesExecutor != writer && deletesExecutor != null) { + // If an update can also affect the "to be deleted from main index" list, handle here. + // This is complex. Usually an update is a delete then an add. + // deletesExecutor.updateDocument(query, document); // This might not be how it works. + } + } + + @Override + public void commit() throws IOException { + writer.commit(); + if (deletesExecutor != null && deletesExecutor != writer) { + deletesExecutor.commit(); + } + } + + @Override + public void rollback() throws IOException { + writer.rollback(); + if (deletesExecutor != null && deletesExecutor != writer) { + deletesExecutor.rollback(); + } + } + + @Override + public void close() throws IOException { + try { + if (txReader != null) { + txReader.decRef(); + txReader = null; + } + } finally { + txSearcher = null; // Searcher was using txReader + try { + writer.close(); + } finally { + if (deletesExecutor != null && deletesExecutor != writer) { + deletesExecutor.close(); + } + } + } + } + + @Override + public int numDocs() { + // Returns numDocs of the current transactional reader (reflecting adds/updates in this TX) + try (IndexReader reader = getTxReader()) { // getTxReader handles incRef/decRef + return reader.numDocs(); + } catch (IOException e) { + logger.log(Level.SEVERE, "Cannot get numDocs from transactional reader", e); + return 0; + } + } + + @Override + public int maxDoc() throws IOException { + try (IndexReader reader = getTxReader()) { + return reader.maxDoc(); + } + } + + @Override + public boolean hasDeletions() { + // Check deletions in the context of the main writer for NRT changes + return writer.hasDeletions(); + } + + @Override + public TopDocs query(Query query, int n) throws IOException { + NRTReaderReopen(); // Ensure searcher is up-to-date + return txSearcher.search(query, n); + } + + @Override + public Document doc(int docId) throws IOException { + NRTReaderReopen(); + return txSearcher.storedFields().document(docId); + } + + @Override + public Document doc(int docId, Set fieldsToLoad) throws IOException { + NRTReaderReopen(); + return txSearcher.storedFields().document(docId, fieldsToLoad); + } + + // Methods requiring more specific state tracking, to be implemented by concrete classes or left as default/abstract. + // These were not in the original OLuceneTxChangesAbstract. + + @Override + public abstract void put(Object key, Identifiable value, Document doc); + + @Override + public abstract void remove(Object key, Identifiable value); + + @Override + public abstract boolean isDeleted(Document document, Object key, Identifiable value); + + @Override + public abstract boolean isUpdated(Document document, Object key, Identifiable value); + + @Override + public boolean isUpdated(Document doc, Analyzer analyzer, Query query) { + // Default: Not supported or needs concrete implementation + logger.warning("isUpdated(doc, analyzer, query) not implemented in abstract class."); + return false; + } + + @Override + public boolean isDeleted(Document doc, Analyzer analyzer, Query query) { + // Default: Not supported or needs concrete implementation + logger.warning("isDeleted(doc, analyzer, query) not implemented in abstract class."); + return false; + } + + @Override + public int nDoc(Query query) { + // Number of documents matching query in current TX state + try { + TopDocs results = query(query, 1); // Just need total hits + return (int) results.totalHits.value; + } catch (IOException e) { + logger.log(Level.SEVERE, "Error executing nDoc query", e); + return 0; + } + } + + // These typically require tracking specific operations, left abstract or default. + @Override + public Set getDeletedDocuments() { + logger.warning("getDeletedDocuments() not implemented in abstract class, returning empty set."); + return Collections.emptySet(); + } + + @Override + public Map getUpdatedDocuments() { + logger.warning("getUpdatedDocuments() not implemented in abstract class, returning empty map."); + return Collections.emptyMap(); + } + + @Override + public List getAddedDocuments() { + logger.warning("getAddedDocuments() not implemented in abstract class, returning empty list."); + return Collections.emptyList(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java new file mode 100644 index 0000000000..20e8874497 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java @@ -0,0 +1,191 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.analysis.Analyzer; // For isDeleted/isUpdated with Analyzer +import org.apache.lucene.analysis.core.KeywordAnalyzer; // For MemoryIndex in isDeleted +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; // For isDeleted +import org.apache.lucene.search.Query; + +/** Created by Enrico Risa on 15/09/15. */ +public class LuceneTxChangesMultiRid extends LuceneTxChangesAbstract { // Changed class name and base class + // Stores RID string to a list of associated keys that were part of a delete operation for that RID. + private final Map> deletedRidToKeys = new HashMap<>(); + + // To support new interface methods + private final List addedDocuments = new ArrayList<>(); + // For MultiRid, an "update" is typically a delete of an old key-RID pair (doc) and an add of a new one. + // Tracking specific "updates" as Query->Document is complex here if not just delete+add. + private final Map updatedDocumentsMap = new HashMap<>(); + private final Set deletedQueries = new HashSet<>(); + + + public LuceneTxChangesMultiRid( // Changed + final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) { + super(engine, writer, deletesExecutor); + } + + @Override + public void put(final Object key, final Identifiable value, final Document doc) { + try { + super.addDocument(doc); // Use base class to add to writer + addedDocuments.add(doc); // Track for getAddedDocuments() + } catch (IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Unable to add document to transactional Lucene index for multi-RID"), e); // Changed + } + } + + @Override + public void remove(final Object key, final Identifiable value) { + Query deleteQuery; + if (value == null) { // Delete by key - affects all RIDs for this key + deleteQuery = engine.deleteQuery(key, null); + } else { // Delete a specific key-RID association + deleteQuery = engine.deleteQuery(key, value); + } + + try { + super.deleteDocument(deleteQuery); // Apply to current transaction's writer + deletedQueries.add(deleteQuery); // Track query for getDeletedDocuments() + + if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) { + // Track that this RID was involved in a delete operation with this key + String ridString = value.getIdentity().toString(); + deletedRidToKeys.computeIfAbsent(ridString, k -> new ArrayList<>()).add(key); + + // Original logic added the specific doc to deletedIdx (deletesExecutor). + // This implies deletesExecutor might track full documents to be deleted from the main index. + // If super.deleteDocument also routes to deletesExecutor based on query, this might be redundant + // or requires deletesExecutor to handle full document additions for its own logic. + // For now, let's assume super.deleteDocument(query) is sufficient for deletesExecutor if it's configured for queries. + // If deletesExecutor *must* have the full doc: + // final Document docToDelete = engine.buildDocument(key, value); // FIXME: engine.buildDocument dependency + // if (deletesExecutor != null) deletesExecutor.addDocument(docToDelete); + } + } catch (final IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Error while deleting documents in transaction from Lucene index (multi-RID)"), e); // Changed + } + } + + @Override + public int numDocs() { + // The base class numDocs() provides NRT view of `writer`. + // Original OLuceneTxChangesMultiRid subtracted deletedDocs.size(). + // `deletedDocs` (now represented by deletedQueries or deletedRidToKeys) refers to deletions + // that will be applied to the main index. + // A precise count is complex. For now, relying on base class numDocs which reflects writer's current state. + // A more accurate count of "net new documents in this TX" would be addedDocuments.size() minus + // documents that were added then deleted within the same TX (if tracked). + // If numDocs should reflect the final state after commit, it's more complex. + // Let's return the NRT view of the current writer. + return super.numDocs(); + } + + @Override + public Set getDeletedLuceneDocs() { + // The original stored actual Document objects that were deleted. + // This is hard to reconstruct if we only store queries or (RID,Key) pairs. + // FIXME: If this exact Set is needed, logic in remove() must re-build and store them. + // For now, returning empty as per LuceneTxChangesSingleRid refactoring. + return Collections.emptySet(); + } + + @Override + public boolean isDeleted(final Document document, final Object key, final Identifiable value) { // Changed + if (value == null || value.getIdentity() == null) return false; + + final List associatedKeys = deletedRidToKeys.get(value.getIdentity().toString()); + if (associatedKeys != null) { + // Check if the provided 'key' (or a general match for the document) is among those deleted for this RID + if (associatedKeys.contains(key)) return true; // Exact key match + + // More complex check: does the 'document' match any of the delete operations for this RID? + // This matches the original MemoryIndex check. + final MemoryIndex memoryIndex = new MemoryIndex(); + // Populate memoryIndex with the fields of the 'document' parameter + for (final IndexableField field : document.getFields()) { + // TODO: This needs proper handling for different field types. + // stringValue() might not be universally appropriate. + // Using KeywordAnalyzer, so it's mostly for exact term matching. + // This part is tricky and might need to use the actual field value from IndexableField. + // For now, assuming stringValue is a simplified placeholder. + if (field.stringValue() != null) { // MemoryIndex cannot add null values + memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer()); + } + } + + for (final Object deletedKey : associatedKeys) { + // engine.deleteQuery should generate a query that identifies the specific key-RID pair + final Query q = engine.deleteQuery(deletedKey, value); // Query for specific key-RID pair + if (memoryIndex.search(q) > 0.0f) { + return true; // The document matches one of the delete operations for this RID + } + } + } + return false; + } + + @Override + public boolean isUpdated(final Document document, final Object key, final Identifiable value) { // Changed + // For MultiRid, an update is typically a delete of an old association and an add of a new one. + // This class doesn't explicitly track "updates" in a separate set like SingleRid did. + // One could argue an entry is "updated" if it was deleted and then re-added with the same RID but different key/doc. + // However, without more state, this is hard to determine accurately here. + // The original returned false. + return false; + } + + // Implementations for new methods from LuceneTxChanges interface + @Override + public List getAddedDocuments() { + return Collections.unmodifiableList(addedDocuments); + } + + @Override + public Set getDeletedDocuments() { + return Collections.unmodifiableSet(deletedQueries); + } + + @Override + public Map getUpdatedDocuments() { + // Updates are not explicitly tracked as Query->Document in this multi-value implementation. + // An update is a delete of one Lucene document and an add of another. + // To fulfill this, one might need to capture the delete query and the newly added document + // if a "key" conceptually remains the same but its associated RIDs change. + // For now, returning empty, as this requires more specific tracking. + return Collections.unmodifiableMap(updatedDocumentsMap); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java new file mode 100644 index 0000000000..967ac52fba --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java @@ -0,0 +1,203 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // Changed for createField +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; // For Field.Store +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.Query; // For getDeletedDocuments & getUpdatedDocuments + +/** Created by Enrico Risa on 15/09/15. */ +public class LuceneTxChangesSingleRid extends LuceneTxChangesAbstract { // Changed class name and base class + private final Set deletedRids = new HashSet<>(); // RIDs marked for deletion from main index + private final Set updatedRids = new HashSet<>(); // RIDs that were deleted and then re-added (i.e., updated) + + // To support new interface methods + private final List addedDocuments = new ArrayList<>(); + private final Map updatedDocumentsMap = new HashMap<>(); // Query to delete old, Document is new + private final Set deletedQueries = new HashSet<>(); + + + public LuceneTxChangesSingleRid( // Changed + final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) { + super(engine, writer, deletesExecutor); + } + + @Override + public void put(final Object key, final Identifiable value, final Document doc) { + // This method is called when a key/value is to be associated in the index. + // The `doc` is the Lucene document representing this association. + try { + if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) { + String ridString = value.getIdentity().toString(); + if (deletedRids.remove(ridString)) { + // If it was previously deleted in this transaction, it's now an update. + // The TMP field was used to mark such docs for special handling during merge/query, + // but it's unclear if that's needed with current Lucene NRT capabilities or specific merge logic. + // For now, we track it as updated. + doc.add(ArcadeLuceneIndexType.createField(TMP, ridString, Field.Store.YES)); // Changed OLuceneIndexType + updatedRids.add(ridString); + // The document for this RID might have been in `deletesExecutor`; + // an update means it shouldn't be deleted from the main index. + // This might require removing it from `deletesExecutor` if it was added there. + // This is complex and depends on how commit logic handles deletesExecutor. + // For now, just adding to writer. + } + } + super.addDocument(doc); // Use base class to add to writer + addedDocuments.add(doc); // Track for getAddedDocuments() + } catch (IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Unable to add document to transactional Lucene index"), e); // Changed + } + } + + @Override + public void remove(final Object key, final Identifiable value) { + // This method is called to disassociate a key/value. + // `value` is the RID to be removed. + // `key` might be used to construct a more specific delete query if needed, but typically deletion by RID is sufficient. + Query deleteQuery; + if (value == null) { + // Delete by key - this is dangerous for non-unique indexes, but Lucene handles it by query + deleteQuery = engine.deleteQuery(key, null); // engine.deleteQuery should handle null value for key-based delete + } else { + deleteQuery = engine.deleteQuery(key, value); // Specific RID deletion query + } + + try { + super.deleteDocument(deleteQuery); // Use base class to delete from writer (current TX view) + deletedQueries.add(deleteQuery); // Track for getDeletedDocuments() + + if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) { + // If it's a persistent RID, track it for specific management. + // This logic matches original: add to deletedRids and also add its document to deletesExecutor + String ridString = value.getIdentity().toString(); + deletedRids.add(ridString); + updatedRids.remove(ridString); // If it was updated then deleted, it's just a delete. + + // The original added the full document to deletedIdx (deletesExecutor). + // This implies deletesExecutor might be a "negative" index. + if (deletesExecutor != null) { + // We need the document as it was in the main index to correctly mark it for deletion. + // Building it here might not be accurate if fields changed. + // FIXME: This needs a robust way to get the "old" document or rely on query for deletion. + // For now, if we have 'value', we assume `engine.deleteQuery` is by specific ID. + // If `deletesExecutor` is meant to hold docs to be deleted from main index on commit: + // Document docToDelete = engine.buildDocument(key, value); // This builds NEW doc. + // Instead of adding doc, we add the query. Commit logic will use these queries. + } + } + } catch (final IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Error while deleting documents in transaction from Lucene index"), e); // Changed + } + } + + @Override + public int numDocs() { + // The base class numDocs() returns writer.getDocStats().numDocs or similar NRT count from writer. + // This reflects documents added/updated in the current TX. + // The original OLuceneTxChangesSingleRid subtracted deleted.size() and updated.size(). + // Subtracting deletedRids makes sense if these are deletions from the main index state. + // Subtracting updatedRids from writer's NRT count is tricky; an update is a delete + add. + // The NRT reader from `writer` already accounts for its own adds/deletes. + // If `deletedRids` tracks docs to be deleted from the *main committed index*, then this makes sense. + // Let's assume the base `numDocs()` gives count from `writer` (adds/updates in tx). + // We need to subtract those in `deletedRids` that were not re-added/updated. + int writerDocs = super.numDocs(); + int netDeletes = 0; + for (String rid : deletedRids) { + if (!updatedRids.contains(rid)) { // If it was deleted and not subsequently updated/re-added + netDeletes++; + } + } + // This is still an approximation of the final count after commit. + // A true transactional count would need to consider the main index count + adds - (deletes not in adds). + // For now, this is an estimate of the TX view. + return writerDocs - netDeletes; + } + + @Override + public Set getDeletedLuceneDocs() { + // This method from the original interface returned Lucene docs marked for deletion. + // The new interface has getDeletedDocuments returning Set. + // This method can be implemented if still needed, but might be redundant. + // For now, let's try to build it from deletedQueries if possible, or keep original logic if it made sense. + // The original stored `deletedDocs` (actual Document objects). + // Let's return empty for now, assuming getDeletedDocuments() is the primary. + // FIXME: Review if this specific Set is still needed. + return Collections.emptySet(); + } + + @Override + public boolean isDeleted(Document document, Object key, Identifiable value) { // Changed + return value != null && value.getIdentity() != null && deletedRids.contains(value.getIdentity().toString()); + } + + @Override + public boolean isUpdated(Document document, Object key, Identifiable value) { // Changed + return value != null && value.getIdentity() != null && updatedRids.contains(value.getIdentity().toString()); + } + + // Implementations for new methods from LuceneTxChanges interface + @Override + public List getAddedDocuments() { + return Collections.unmodifiableList(addedDocuments); + } + + @Override + public Set getDeletedDocuments() { + return Collections.unmodifiableSet(deletedQueries); + } + + @Override + public Map getUpdatedDocuments() { + // This class tracks updatedRids. To fulfill Map, + // we'd need to store the delete query and the new document for each update. + // The current `put` logic handles updates by re-adding. + // FIXME: This needs more sophisticated tracking if specific update queries are required. + // For now, returning based on `updatedRids` and `addedDocuments`. + // This is an approximation. + Map approxUpdated = new HashMap<>(); + for (Document doc : addedDocuments) { + String tmpRid = doc.get(TMP); + if (tmpRid != null && updatedRids.contains(tmpRid)) { + // This doc is an update. What was the query to delete the old one? + // We don't store the original key for the RID directly here for updates. + // This highlights a gap if this specific Map is needed. + // For now, this will be empty or needs more info. + } + } + return Collections.unmodifiableMap(updatedDocumentsMap); // Requires populating this map during put/update + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java deleted file mode 100644 index 117c4d911f..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * - * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * * - * * Licensed under the Apache License, Version 2.0 (the "License"); - * * you may not use this file except in compliance with the License. - * * You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * * See the License for the specific language governing permissions and - * * limitations under the License. - * - */ - -package com.arcadedb.lucene.tx; - -import com.arcadedb.database.OIdentifiable; -import java.util.Collections; -import java.util.Set; -import org.apache.lucene.document.Document; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; - -/** Created by Enrico Risa on 15/09/15. */ -public interface OLuceneTxChanges { - - void put(Object key, OIdentifiable value, Document doc); - - void remove(Object key, OIdentifiable value); - - IndexSearcher searcher(); - - default long numDocs() { - return 0; - } - - default Set getDeletedDocs() { - return Collections.emptySet(); - } - - boolean isDeleted(Document document, Object key, OIdentifiable value); - - boolean isUpdated(Document document, Object key, OIdentifiable value); - - default long deletedDocs(Query query) { - return 0; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java deleted file mode 100644 index 52fd4f629f..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * - * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * * - * * Licensed under the Apache License, Version 2.0 (the "License"); - * * you may not use this file except in compliance with the License. - * * You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * * See the License for the specific language governing permissions and - * * limitations under the License. - * - */ - -package com.arcadedb.lucene.tx; - -import com.arcadedb.common.exception.OException; -import com.arcadedb.common.log.OLogManager; -import com.arcadedb.common.log.OLogger; -import com.arcadedb.lucene.engine.OLuceneIndexEngine; -import com.arcadedb.lucene.exception.OLuceneIndexException; -import java.io.IOException; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopDocs; - -/** Created by Enrico Risa on 28/09/15. */ -public abstract class OLuceneTxChangesAbstract implements OLuceneTxChanges { - private static final OLogger logger = - OLogManager.instance().logger(OLuceneTxChangesAbstract.class); - public static final String TMP = "_tmp_rid"; - - protected final OLuceneIndexEngine engine; - protected final IndexWriter writer; - protected final IndexWriter deletedIdx; - - public OLuceneTxChangesAbstract( - final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { - this.engine = engine; - this.writer = writer; - this.deletedIdx = deletedIdx; - } - - public IndexSearcher searcher() { - // TODO optimize - try { - return new IndexSearcher(DirectoryReader.open(writer, true, true)); - } catch (IOException e) { - // logger.error("Error during searcher index instantiation on new documents", e); - throw OException.wrapException( - new OLuceneIndexException("Error during searcher index instantiation on new documents"), - e); - } - } - - @Override - public long deletedDocs(Query query) { - try { - final IndexSearcher indexSearcher = - new IndexSearcher(DirectoryReader.open(deletedIdx, true, true)); - final TopDocs search = indexSearcher.search(query, Integer.MAX_VALUE); - return search.totalHits.value; - } catch (IOException e) { - logger.error("Error during searcher index instantiation on deleted documents ", e); - } - return 0; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java deleted file mode 100644 index c3758ba6d6..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * - * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * * - * * Licensed under the Apache License, Version 2.0 (the "License"); - * * you may not use this file except in compliance with the License. - * * You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * * See the License for the specific language governing permissions and - * * limitations under the License. - * - */ - -package com.arcadedb.lucene.tx; - -import com.arcadedb.common.exception.OException; -import com.arcadedb.lucene.engine.OLuceneIndexEngine; -import com.arcadedb.lucene.exception.OLuceneIndexException; -import com.arcadedb.database.OIdentifiable; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.lucene.analysis.core.KeywordAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.memory.MemoryIndex; -import org.apache.lucene.search.Query; - -/** Created by Enrico Risa on 15/09/15. */ -public class OLuceneTxChangesMultiRid extends OLuceneTxChangesAbstract { - private final Map> deleted = new HashMap>(); - private final Set deletedDocs = new HashSet(); - - public OLuceneTxChangesMultiRid( - final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { - super(engine, writer, deletedIdx); - } - - public void put(final Object key, final OIdentifiable value, final Document doc) { - try { - writer.addDocument(doc); - } catch (IOException e) { - throw OException.wrapException( - new OLuceneIndexException("unable to add document to changes index"), e); - } - } - - public void remove(final Object key, final OIdentifiable value) { - try { - if (value.getIdentity().isTemporary()) { - writer.deleteDocuments(engine.deleteQuery(key, value)); - } else { - deleted.putIfAbsent(value.getIdentity().toString(), new ArrayList<>()); - deleted.get(value.getIdentity().toString()).add(key.toString()); - - final Document doc = engine.buildDocument(key, value); - deletedDocs.add(doc); - deletedIdx.addDocument(doc); - } - } catch (final IOException e) { - throw OException.wrapException( - new OLuceneIndexException( - "Error while deleting documents in transaction from lucene index"), - e); - } - } - - public long numDocs() { - return searcher().getIndexReader().numDocs() - deletedDocs.size(); - } - - public Set getDeletedDocs() { - return deletedDocs; - } - - public boolean isDeleted(final Document document, final Object key, final OIdentifiable value) { - boolean match = false; - final List strings = deleted.get(value.getIdentity().toString()); - if (strings != null) { - final MemoryIndex memoryIndex = new MemoryIndex(); - for (final String string : strings) { - final Query q = engine.deleteQuery(string, value); - memoryIndex.reset(); - for (final IndexableField field : document.getFields()) { - memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer()); - } - match = match || (memoryIndex.search(q) > 0.0f); - } - return match; - } - return match; - } - - // TODO is this valid? - public boolean isUpdated(final Document document, final Object key, final OIdentifiable value) { - return false; - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java deleted file mode 100644 index dcc87fe84e..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * - * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) - * * - * * Licensed under the Apache License, Version 2.0 (the "License"); - * * you may not use this file except in compliance with the License. - * * You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * * See the License for the specific language governing permissions and - * * limitations under the License. - * - */ - -package com.arcadedb.lucene.tx; - -import com.arcadedb.common.exception.OException; -import com.arcadedb.lucene.builder.OLuceneIndexType; -import com.arcadedb.lucene.engine.OLuceneIndexEngine; -import com.arcadedb.lucene.exception.OLuceneIndexException; -import com.arcadedb.database.OIdentifiable; -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; - -/** Created by Enrico Risa on 15/09/15. */ -public class OLuceneTxChangesSingleRid extends OLuceneTxChangesAbstract { - private final Set deleted = new HashSet(); - private final Set updated = new HashSet(); - private final Set deletedDocs = new HashSet(); - - public OLuceneTxChangesSingleRid( - final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { - super(engine, writer, deletedIdx); - } - - public void put(final Object key, final OIdentifiable value, final Document doc) { - if (deleted.remove(value.getIdentity().toString())) { - doc.add(OLuceneIndexType.createField(TMP, value.getIdentity().toString(), Field.Store.YES)); - updated.add(value.getIdentity().toString()); - } - try { - writer.addDocument(doc); - } catch (IOException e) { - throw OException.wrapException( - new OLuceneIndexException("unable to add document to changes index"), e); - } - } - - public void remove(final Object key, final OIdentifiable value) { - try { - if (value == null) { - writer.deleteDocuments(engine.deleteQuery(key, value)); - } else if (value.getIdentity().isTemporary()) { - writer.deleteDocuments(engine.deleteQuery(key, value)); - } else { - deleted.add(value.getIdentity().toString()); - Document doc = engine.buildDocument(key, value); - deletedDocs.add(doc); - deletedIdx.addDocument(doc); - } - } catch (final IOException e) { - throw OException.wrapException( - new OLuceneIndexException( - "Error while deleting documents in transaction from lucene index"), - e); - } - } - - public long numDocs() { - return searcher().getIndexReader().numDocs() - deleted.size() - updated.size(); - } - - public Set getDeletedDocs() { - return deletedDocs; - } - - public boolean isDeleted(Document document, Object key, OIdentifiable value) { - return deleted.contains(value.getIdentity().toString()); - } - - public boolean isUpdated(Document document, Object key, OIdentifiable value) { - return updated.contains(value.getIdentity().toString()); - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java b/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java new file mode 100644 index 0000000000..ca4c7b775a --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java @@ -0,0 +1,130 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.util; + +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.Calendar; +import java.util.Date; +import java.util.TimeZone; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class LuceneDateTools { + + private static final Logger logger = Logger.getLogger(LuceneDateTools.class.getName()); + + // Prioritized list of date/datetime formatters + // ISO 8601 with Z / offset / local + private static final DateTimeFormatter ISO_OFFSET_DATE_TIME = DateTimeFormatter.ISO_OFFSET_DATE_TIME; // Handles 'Z' and offsets like +01:00 + private static final DateTimeFormatter ISO_LOCAL_DATE_TIME = DateTimeFormatter.ISO_LOCAL_DATE_TIME; // Handles 'yyyy-MM-ddTHH:mm:ss.SSS' + private static final DateTimeFormatter ISO_LOCAL_DATE = DateTimeFormatter.ISO_LOCAL_DATE; // Handles 'yyyy-MM-dd' + + // Common alternative formats + private static final String ALT_DATETIME_FORMAT_NO_T = "yyyy-MM-dd HH:mm:ss.SSS"; + private static final String ALT_DATETIME_FORMAT_NO_T_NO_MS = "yyyy-MM-dd HH:mm:ss"; + private static final String ALT_DATETIME_FORMAT_NO_T_NO_S_NO_MS = "yyyy-MM-dd HH:mm"; + + + public static Long parseDateTimeToMillis(String dateTimeString) { + if (dateTimeString == null || dateTimeString.isEmpty() || "*".equals(dateTimeString)) { + return null; + } + + // 1. Try parsing as plain long (epoch millis) + try { + return Long.parseLong(dateTimeString); + } catch (NumberFormatException e) { + // Not a long, proceed to date formats + } + + // 2. Try ISO_OFFSET_DATE_TIME (handles 'Z' for UTC and offsets) + try { + OffsetDateTime odt = OffsetDateTime.parse(dateTimeString, ISO_OFFSET_DATE_TIME); + return odt.toInstant().toEpochMilli(); + } catch (DateTimeParseException e) { + // ignore and try next format + } + + // 3. Try ISO_LOCAL_DATE_TIME (assumes system default timezone if no offset specified) + // To be safer, we should assume UTC if no offset is present, or make it configurable. + // For now, let's try parsing as local and then converting to UTC for consistency. + try { + LocalDateTime ldt = LocalDateTime.parse(dateTimeString, ISO_LOCAL_DATE_TIME); + return ldt.toInstant(ZoneOffset.UTC).toEpochMilli(); // Assume UTC if no offset + } catch (DateTimeParseException e) { + // ignore and try next format + } + + // 4. Try ISO_LOCAL_DATE (assumes start of day, UTC) + try { + LocalDate ld = LocalDate.parse(dateTimeString, ISO_LOCAL_DATE); + return ld.atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); + } catch (DateTimeParseException e) { + // ignore and try next format + } + + // 5. Try alternative SimpleDateFormat patterns (less robust, more ambiguous) + // These assume UTC. If local timezone is implied by strings, SimpleDateFormat needs setTimeZone(TimeZone.getDefault()) + // but for consistency with Lucene (which often uses UTC via DateTools), UTC is safer. + String[] altPatterns = { + ALT_DATETIME_FORMAT_NO_T, + ALT_DATETIME_FORMAT_NO_T_NO_MS, + ALT_DATETIME_FORMAT_NO_T_NO_S_NO_MS + }; + + for (String pattern : altPatterns) { + try { + SimpleDateFormat sdf = new SimpleDateFormat(pattern); + sdf.setTimeZone(TimeZone.getTimeZone("UTC")); // Assume UTC for these patterns too + sdf.setLenient(false); + Date date = sdf.parse(dateTimeString); + return date.getTime(); + } catch (java.text.ParseException ex) { + // ignore and try next pattern + } + } + + logger.log(Level.WARNING, "Failed to parse date/datetime string: {0}", dateTimeString); + return null; // Or throw ParseException if strict parsing is required + } + + public static Long normalizeToDayEpochMillis(long epochMillis) { + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + cal.setTimeInMillis(epochMillis); + cal.set(Calendar.HOUR_OF_DAY, 0); + cal.set(Calendar.MINUTE, 0); + cal.set(Calendar.SECOND, 0); + cal.set(Calendar.MILLISECOND, 0); + return cal.getTimeInMillis(); + } + + public static Long parseDateToMillis(String dateString) { + Long epochMillis = parseDateTimeToMillis(dateString); + if (epochMillis != null) { + return normalizeToDayEpochMillis(epochMillis); + } + return null; + } +} From 6911ba1142f6b92b7b592ffa9aa5f3afc771e36d Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 10:25:28 +0000 Subject: [PATCH 5/6] Refactor Lucene module: Phase 2 - Critical FIXME Resolution and Deep Integration This commit builds upon the initial structural refactoring by addressing many critical FIXME comments and performing deeper integration of the Lucene module components. **Key Changes Made (Integration Phase):** 1. **`ArcadeLuceneEngineUtils.java` Created:** * Implemented `buildSortFields` to parse sorting criteria from metadata/schema and create Lucene `SortField` objects. Integrated into `ArcadeLuceneCrossClassIndexEngine`. 2. **`ArcadeLuceneFullTextIndex.java` Enhancements:** * Solidified lifecycle methods (`build`, `drop`, `close`) and their calls to the engine. * Ensured `TransactionContext` is correctly passed to engine methods. * Clarified `IndexEngineData` by passing `IndexDefinition.getOptions()` to engine's `create/load` (though engine's `load` was mostly via `init`). 3. **Engine Constructor Alignment:** * Refactored `ArcadeLuceneFullTextIndexEngine` constructor and its instantiation in `ArcadeLuceneFullTextIndex.lazyInit()` to remove redundant `fileId`, aligning with `OLuceneIndexEngineAbstract`. 4. **`LuceneQueryBuilder.java` & `ArcadeLuceneIndexType.java` Refinements:** * Moved type-specific exact query logic for `CompositeKey` parts from `LuceneQueryBuilder` to a new static helper `createExactFieldQuery` in `ArcadeLuceneIndexType`, improving modularity. 5. **Date Handling Standardization (`LuceneDateTools.java`):** * Created `LuceneDateTools.java` in `com.arcadedb.lucene.util`. * Centralized date/datetime string parsing (supporting epoch millis, ISO 8601, common formats) and normalization logic. * Updated `ArcadeLuceneIndexType` (for indexing & exact queries) and `ArcadeLuceneMultiFieldQueryParser` (for range queries) to use `LuceneDateTools`, ensuring consistent date handling. 6. **`ArcadeLuceneCrossClassSearchFunction.java` Refinement:** * Improved `getCrossClassEngine()` with a clearer discovery strategy (iterating schema indexes, checking associated engine types) and caching. 7. **Factory and Engine Polymorphism (`ArcadeLuceneIndexFactoryHandler`, `ArcadeLuceneFullTextIndex`):** * Enabled `ArcadeLuceneFullTextIndex.lazyInit()` to instantiate either `ArcadeLuceneFullTextIndexEngine` or `ArcadeLuceneCrossClassIndexEngine` based on the algorithm in `IndexDefinition`. * Implemented `init(IndexMetadata)` in `ArcadeLuceneCrossClassIndexEngine`. 8. **`LuceneDocumentBuilder.java` Enhancements:** * Implemented `isToStore()` and `isToSort()` methods to respect configurations from index definition options and query-time metadata for determining field storage/sorting. * Added initial support for indexing collections of scalar values. * Included `FIXME`s and placeholder logic for handling complex embedded documents/maps. **Overall Status:** The Lucene module is now significantly more integrated. Many critical dependencies have been addressed, and core components like type handling, date parsing, query building, and engine instantiation are much more robust. While some complex areas (like full embedded document indexing) and final API alignments (e.g., for SQL functions) still have `FIXME`s, the module is approaching a state where compilation can be attempted. This phase focused on making the connections between the refactored classes, ensuring data (like types and configurations) flows correctly, and standardizing common operations like date handling. From e96d4be7a078ebd7e06f1386bbe618a18a3a9395 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 12:42:53 +0000 Subject: [PATCH 6/6] Refactor Lucene module: Pausing due to environment instability This commit represents the state of the Lucene module refactoring before I paused work due to persistent environment/filesystem instability. I've refactored many components, but further progress and compilation attempts are blocked. **Summary of Steps I've Taken & Changes I've Made:** 1. **Initial Structural Refactoring (Completed):** * I refactored all key Java files from the original migration across packages (`engine`, `analyzer`, `index`, `builder`, `functions`, `query`, `tx`). This involved: * Renaming classes and files (e.g., `OLucene...` to `ArcadeLucene...` or `Lucene...`). * Updating package declarations. * Replacing OrientDB imports and API calls with ArcadeDB equivalents where direct mappings were apparent. * Switching logging to `java.util.logging.Logger`. * I added the missing `OLuceneIndexEngineAbstract.java`. * I significantly modernized `ArcadeLuceneIndexType` for Lucene 10.x field creation. * I implemented `ArcadeLuceneMultiFieldQueryParser` for type-aware range queries. * I aligned SQL functions to implement `com.arcadedb.query.sql.executor.SQLFunction` (via `ArcadeLuceneSearchFunctionTemplate`). 2. **Critical FIXME Resolution and Deep Integration (Partially Completed):** * I created `ArcadeLuceneEngineUtils` for `buildSortFields`. * I enhanced `ArcadeLuceneFullTextIndex` lifecycle methods and `TransactionContext` usage. * I aligned engine constructors (`ArcadeLuceneFullTextIndexEngine`, `OLuceneIndexEngineAbstract`). * I centralized type-specific exact query logic for `CompositeKey`s into `ArcadeLuceneIndexType`. * I standardized date/datetime parsing and handling via a new `LuceneDateTools` utility. * I refined `ArcadeLuceneCrossClassSearchFunction.getCrossClassEngine()` discovery. * I enabled polymorphic engine creation in `ArcadeLuceneIndexFactoryHandler` and `ArcadeLuceneFullTextIndex.lazyInit()`. * I implemented `init()` in `ArcadeLuceneCrossClassIndexEngine`. * I enhanced `LuceneDocumentBuilder` for storage/sorting configuration and initial recursive handling of scalar collections (with placeholders for full embedded object flattening). * I refined the score/highlighting mechanism via `LuceneQueryContext` and `LuceneIndexCursor`. * I attempted to align SQL functions with ArcadeDB's `SQLFunctionAbstract` and `SQLFunction` interfaces based on your feedback. **Current Blockers (Environment Instability):** The primary reason for pausing is environment instability. I've observed: * Files (e.g., `ArcadeLuceneAnalyzerFactory.java`) disappearing or becoming inaccessible immediately after I've written or restored them. * An inability to reliably apply further code changes (like fixing ambiguous `Document` references or refactoring remaining classes in the `collections` package). * This prevents confident modification of `pom.xml` and compilation attempts. **Unresolved Issues (Before Environment Problems Halted Work):** * **Missing Core ArcadeDB Dependencies During Compile:** My initial compile attempts (before the file instability became critical) failed due to "package does not exist" for core ArcadeDB classes. This suggests issues with `pom.xml` dependency declaration (scope, version) or the local Maven repository state. * **Missing `ArcadeLuceneAnalyzerFactory.java`:** The build reported this as missing, which halted further diagnosis of compilation errors. My efforts to restore it were hampered by the environment issues. * **Ambiguous `Document` References:** The code needs a pass to fully qualify `com.arcadedb.database.Document` vs. `org.apache.lucene.document.Document`. * **Unrefactore `collections` Package:** Classes like `OLuceneResultSet` were not yet explicitly refactored. Once the environment is stable, I plan to verify file integrity, complete the refactoring, fix the POM, and then compile. --- .../lucene/builder/LuceneDocumentBuilder.java | 250 ++++--- .../ArcadeLuceneFullTextIndexEngine.java | 129 +++- .../engine/OLuceneIndexEngineAbstract.java | 671 ------------------ .../ArcadeLuceneSearchFunctionTemplate.java | 82 +-- .../ArcadeLuceneSearchOnIndexFunction.java | 213 +++--- .../lucene/query/LuceneIndexCursor.java | 49 +- .../lucene/query/LuceneQueryContext.java | 93 ++- 7 files changed, 478 insertions(+), 1009 deletions(-) delete mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java index 778d4dd511..b6fd7d7ebb 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java +++ b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java @@ -91,68 +91,11 @@ public org.apache.lucene.document.Document build(IndexDefinition indexDefinition boolean storeField = isToStore(indexDefinition, fieldName, metadata); boolean sortField = isToSort(indexDefinition, fieldName, metadata); - if (fieldValue instanceof Collection && (fieldType == Type.EMBEDDEDLIST || fieldType == Type.EMBEDDEDSET || fieldType == Type.LIST)) { - Collection collection = (Collection) fieldValue; - Type linkedType = (property != null && property.getOfType() != null) ? property.getOfType() : null; - - if (linkedType == null && !collection.isEmpty()) { // Try to infer from first element if not specified in schema - Object firstElement = collection.iterator().next(); - if (firstElement instanceof Document) linkedType = Type.EMBEDDED; // Or specific DocumentType if available - else if (firstElement != null) linkedType = Type.getTypeByValue(firstElement); - } - - if (linkedType != null && linkedType != Type.EMBEDDED && linkedType != Type.EMBEDDEDMAP) { // Scalar list/set - for (Object item : collection) { - if (item != null) { - List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item, - storeField ? Field.Store.YES : Field.Store.NO, - sortField, // Note: Sorting on multi-value fields needs specific Lucene setup. - // createFields will add DocValues for the type if sortField is true. - linkedType); - for (Field f : itemFields) { - luceneDoc.add(f); - } - } - } - } else { // EMBEDDEDLIST/SET of Documents, or list of EMBEDDEDMAP (unlikely for direct indexing here) - // FIXME: Implement flattening strategy for embedded documents in collections. - // Example: fieldName_embeddedField. This needs recursive calls or a helper. - // For now, logging a warning and indexing toString() for each item if it's a Document. - logger.warning("Full indexing of embedded documents within collection '" + fieldName + "' is not yet implemented. Indexing toString()."); - if (linkedType == Type.EMBEDDED || (linkedType == null && collection.iterator().hasNext() && collection.iterator().next() instanceof Document)) { - for (Object item : collection) { - if (item != null) { - List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), - storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); // Index as string - for (Field f : itemFields) { luceneDoc.add(f); } - } - } - } - } - } else if (fieldValue instanceof Map && fieldType == Type.EMBEDDEDMAP) { - // FIXME: Implement flattening strategy for embedded maps. - // Example: fieldName_mapKey_embeddedField or index map entries as JSON/string. - logger.warning("Indexing embedded maps is not yet fully implemented for field: " + fieldName + ". Indexing toString()."); - List mapFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue.toString(), - storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); - for (Field f : mapFields) { luceneDoc.add(f); } - - } else if (fieldValue instanceof Document && fieldType == Type.EMBEDDED) { - // FIXME: Implement flattening strategy for single embedded documents. - // Example: fieldName_embeddedField. This needs recursive calls or a helper. - logger.warning("Indexing single embedded documents is not yet fully implemented for field: " + fieldName + ". Indexing toString()."); - List embeddedFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue.toString(), - storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); - for (Field f : embeddedFields) { luceneDoc.add(f); } - } else { // Scalar field - List luceneFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue, - storeField ? Field.Store.YES : Field.Store.NO, - sortField, - fieldType); - for (Field f : luceneFields) { - luceneDoc.add(f); - } - } + // Get schema type of the field, and for collections/maps, the linked type + Type linkedType = (property != null) ? property.getOfType() : null; + + indexValue(luceneDoc, fieldName, fieldValue, fieldType, linkedType, + storeField, sortField, 1, indexDefinition, metadata, db); } } } else if (identifiableValue != null) { @@ -184,31 +127,56 @@ public org.apache.lucene.document.Document build(IndexDefinition indexDefinition /** * Determines if a field should be stored in the Lucene index based on index definition options. - } - } - } + * Convention: + * - "storeFields": "*" or "ALL" means store all. + * - "storeFields": "fieldA,fieldB" means store only these. + * - "dontStoreFields": "fieldC,fieldD" means do not store these (takes precedence). + * - "store.": "true" or "false" for field-specific setting. + * Defaults to Field.Store.NO if not specified otherwise for full-text search efficiency. + */ + private boolean isToStore(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) { + Map options = indexDefinition.getOptions(); + // Query-time metadata can override index-time options + if (metadata != null) { + Object fieldSpecificStoreMeta = metadata.get("store." + fieldName); + if (fieldSpecificStoreMeta != null) return Boolean.parseBoolean(fieldSpecificStoreMeta.toString()); + + List queryStoredFields = metadata.get("storedFields"); // Assuming list of strings + if (queryStoredFields != null) { + if (queryStoredFields.contains(fieldName)) return true; + if (queryStoredFields.contains("*") || queryStoredFields.contains("ALL")) return true; } - } else if (identifiableValue != null) { - // If the value is an Identifiable but not a Document (e.g. just an RID for a manual index key) - // and fields are defined in the index, this implies we should load the document - // and then process its fields. This case should ideally be handled by the caller - // by passing the actual Document record. - // If only key and RID are indexed for non-Document identifiables, current logic is okay. + List queryDontStoreFields = metadata.get("dontStoreFields"); + if (queryDontStoreFields != null && queryDontStoreFields.contains(fieldName)) return false; } + // Index definition options + if (options != null) { + String fieldSpecificStoreOpt = options.get("store." + fieldName); + if (fieldSpecificStoreOpt != null) return Boolean.parseBoolean(fieldSpecificStoreOpt); - // Add _CLASS field if type is available - String typeName = indexDefinition.getTypeName(); - if (typeName != null && !typeName.isEmpty()) { - luceneDoc.add(new StringField("_CLASS", typeName, Field.Store.YES)); // Non-analyzed - } - + String dontStoreFieldsOpt = options.get("dontStoreFields"); + if (dontStoreFieldsOpt != null) { + List dontStoreList = Arrays.asList(dontStoreFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (dontStoreList.contains(fieldName.toLowerCase())) return false; + } - return luceneDoc; + String storeFieldsOpt = options.get("storeFields"); + if (storeFieldsOpt != null) { + if ("*".equals(storeFieldsOpt) || "ALL".equalsIgnoreCase(storeFieldsOpt)) return true; + List storeList = Arrays.asList(storeFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (storeList.contains(fieldName.toLowerCase())) return true; + // If storeFields is specified but doesn't list this field, and no "*" or "ALL", assume don't store (unless dontStoreFields also doesn't list it). + // This means explicit list in storeFields acts as a whitelist if present. + return false; + } + } + // Default if no specific rules found: DO NOT STORE fields unless specified. + return false; } /** - * Determines if a field should be stored in the Lucene index based on index definition options. + * Determines if a field should have DocValues for sorting. * Convention: * - "storeFields": "*" or "ALL" means store all. * - "storeFields": "fieldA,fieldB" means store only these. @@ -295,4 +263,128 @@ private boolean isToSort(IndexDefinition indexDefinition, String fieldName, com. } return false; // Default to not sortable } + + private void indexValue(org.apache.lucene.document.Document luceneDoc, String fieldName, Object fieldValue, + Type fieldType, Type linkedType, boolean storeField, boolean sortField, + int currentDepth, IndexDefinition rootIndexDefinition, + com.arcadedb.document.Document rootMetadata, DatabaseInternal database) { + + int maxDepth = getMaxDepth(rootIndexDefinition, fieldName); + if (currentDepth > maxDepth) { + logger.finer("Max indexing depth ("+ maxDepth +") reached for field: " + fieldName); + return; + } + + if (fieldValue instanceof Collection && (fieldType == Type.EMBEDDEDLIST || fieldType == Type.EMBEDDEDSET || fieldType == Type.LIST)) { + Collection collection = (Collection) fieldValue; + Type actualLinkedType = linkedType; + if (actualLinkedType == null && !collection.isEmpty()) { + Object firstElement = collection.iterator().next(); + if (firstElement instanceof Document) actualLinkedType = Type.EMBEDDED; + else if (firstElement != null) actualLinkedType = Type.getTypeByValue(firstElement); + } + + if (actualLinkedType != null && actualLinkedType != Type.EMBEDDED && actualLinkedType != Type.EMBEDDEDMAP) { // Scalar list/set + for (Object item : collection) { + if (item != null) { + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item, + storeField ? Field.Store.YES : Field.Store.NO, sortField, actualLinkedType); + for (Field f : itemFields) luceneDoc.add(f); + } + } + } else if (actualLinkedType == Type.EMBEDDED || (actualLinkedType == null && collection.iterator().hasNext() && collection.iterator().next() instanceof Document)){ // EMBEDDEDLIST/SET of Documents + for (Object item : collection) { + if (item instanceof Document) { + indexEmbeddedContent(luceneDoc, fieldName, (Document) item, currentDepth, rootIndexDefinition, rootMetadata, database); + } else if (item != null) { // Non-document item in what was expected to be an embedded list + logger.finer("Item in embedded list for field '" + fieldName + "' is not a Document, indexing toString(): " + item.getClass()); + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); + for (Field f : itemFields) luceneDoc.add(f); + } + } + } else { + logger.finer("Collection field '" + fieldName + "' contains unhandled linked type: " + actualLinkedType + " or collection is empty/mixed."); + // Optionally index toString() for each item as a fallback + for (Object item : collection) { + if (item != null) { + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); + for (Field f : itemFields) luceneDoc.add(f); + } + } + } + } else if (fieldValue instanceof Map && fieldType == Type.EMBEDDEDMAP) { + indexEmbeddedContent(luceneDoc, fieldName, (Map) fieldValue, currentDepth, rootIndexDefinition, rootMetadata, database); + } else if (fieldValue instanceof Document && fieldType == Type.EMBEDDED) { + indexEmbeddedContent(luceneDoc, fieldName, (Document) fieldValue, currentDepth, rootIndexDefinition, rootMetadata, database); + } else { // Scalar field or unhandled complex type treated as scalar + List luceneFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue, + storeField ? Field.Store.YES : Field.Store.NO, sortField, fieldType); + for (Field f : luceneFields) luceneDoc.add(f); + } + } + + private void indexEmbeddedContent(org.apache.lucene.document.Document luceneDoc, String baseFieldName, + Object embeddedObject, int currentDepth, + IndexDefinition rootIndexDefinition, com.arcadedb.document.Document rootMetadata, + DatabaseInternal database) { + if (embeddedObject instanceof Document) { + Document embeddedDoc = (Document) embeddedObject; + DocumentType embeddedSchemaType = embeddedDoc.getType(); + + for (String innerFieldName : embeddedDoc.getPropertyNames()) { + Object innerFieldValue = embeddedDoc.get(innerFieldName); + if (innerFieldValue == null) continue; + + String prefixedFieldName = baseFieldName + "." + innerFieldName; + // TODO: Add options to include/exclude specific embedded fields `rootIndexDefinition.getOptions().get("includeEmbedded." + prefixedFieldName)` + + Property innerProperty = (embeddedSchemaType != null) ? embeddedSchemaType.getProperty(innerFieldName) : null; + Type innerFieldType = (innerProperty != null) ? innerProperty.getType() : Type.getTypeByValue(innerFieldValue); + Type innerLinkedType = (innerProperty != null) ? innerProperty.getOfType() : null; + + boolean storeField = isToStore(rootIndexDefinition, prefixedFieldName, rootMetadata); + boolean sortField = isToSort(rootIndexDefinition, prefixedFieldName, rootMetadata); + + indexValue(luceneDoc, prefixedFieldName, innerFieldValue, innerFieldType, innerLinkedType, + storeField, sortField, currentDepth + 1, rootIndexDefinition, rootMetadata, database); + } + } else if (embeddedObject instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) embeddedObject; + for (Map.Entry entry : map.entrySet()) { + String mapKey = entry.getKey(); + Object mapValue = entry.getValue(); + if (mapValue == null) continue; + + String prefixedFieldName = baseFieldName + "." + mapKey; + // TODO: Add options to include/exclude specific embedded fields + + Type valueType = Type.getTypeByValue(mapValue); // Infer type from map value + // For maps, linkedType is generally not applicable unless map values are consistently typed Documents. + + boolean storeField = isToStore(rootIndexDefinition, prefixedFieldName, rootMetadata); + boolean sortField = isToSort(rootIndexDefinition, prefixedFieldName, rootMetadata); + + // Here, we treat map values. If a map value is another Document/Map/Collection, it will be handled by recursive call. + indexValue(luceneDoc, prefixedFieldName, mapValue, valueType, null, // Pass null for linkedType for map values for now + storeField, sortField, currentDepth + 1, rootIndexDefinition, rootMetadata, database); + } + } + // Collections within embedded content are handled by the recursive call to indexValue + } + + private int getMaxDepth(IndexDefinition indexDefinition, String fieldName) { + Map options = indexDefinition.getOptions(); + if (options != null) { + String specificDepth = options.get("embeddedIndexingDepth." + fieldName); + if (specificDepth != null) { + try { return Integer.parseInt(specificDepth); } catch (NumberFormatException e) { /* ignore */ } + } + String globalDepth = options.get("embeddedIndexingDepth"); + if (globalDepth != null) { + try { return Integer.parseInt(globalDepth); } catch (NumberFormatException e) { /* ignore */ } + } + } + return 1; // Default depth if not specified + } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java index 50924874c2..cedc2ba1c9 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java @@ -68,27 +68,38 @@ public class ArcadeLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract private static final Logger logger = Logger.getLogger(ArcadeLuceneFullTextIndexEngine.class.getName()); // Changed logger - private final LuceneDocumentBuilder builder; // FIXME: Needs refactoring - private LuceneQueryBuilder queryBuilder; // FIXME: Needs refactoring - private final AtomicLong bonsayFileId = new AtomicLong(0); // TODO: Review if bonsayFileId is still relevant in ArcadeDB context + private final LuceneDocumentBuilder builder; + private LuceneQueryBuilder queryBuilder; + // bonsayFileId removed as it's not used for standard Lucene updates. + // If a specific versioning or optimistic locking mechanism is needed for index entries, + // it would require a different design, possibly involving specific fields in Lucene documents. - // Removed 'id' parameter as it's not used by the superclass OLuceneIndexEngineAbstract - // and not used internally in this class. - public ArcadeLuceneFullTextIndexEngine(Storage storage, String idxName) { // Changed OStorage + public ArcadeLuceneFullTextIndexEngine(Storage storage, String idxName) { super(storage, idxName); - builder = new LuceneDocumentBuilder(); // FIXME: Needs refactoring + builder = new LuceneDocumentBuilder(); } @Override - public void init(IndexMetadata im) { // Changed OIndexMetadata - super.init(im.getName(), im.getType(), im.getDefinition(), im.isAutomatic(), im.getMetadata()); // FIXME: super.init might have changed - // FIXME: getMetadata() on IndexMetadata might be different from OIndexMetadata.getMetadata() - // queryBuilder = new LuceneQueryBuilder(im.getMetadata()); // FIXME: Needs refactoring and correct metadata access - if (im.getDefinition() != null && im.getDefinition().getOptions() != null) { - queryBuilder = new LuceneQueryBuilder(new Document(getDatabase(), im.getDefinition().getOptions())); // FIXME Needs correct metadata Document - } else { - queryBuilder = new LuceneQueryBuilder(new Document(getDatabase())); // Empty metadata if not available + public void init(IndexMetadata indexMetadata) { + // The super.init in OLuceneIndexEngineAbstract expects: + // (String indexName, String indexType, IndexDefinition indexDefinition, boolean isAutomatic, Document metadata) + // IndexMetadata (ArcadeDB) has: name, typeName (of Schema Type), algorithm, propertyNames, keyTypes, options, unique, automatic, associatedToBucket, nullStrategy. + // It does not directly have a single "indexType" string in the sense of "LUCENE" or "FULLTEXT" - that's algorithm. + // The "metadata" Document for super.init should be created from indexMetadata.getOptions(). + + com.arcadedb.document.Document engineInitMetadata = new com.arcadedb.document.Document(getDatabase()); + if (indexMetadata.getOptions() != null) { + engineInitMetadata.fromMap(indexMetadata.getOptions()); } + + super.init(indexMetadata.getName(), + indexMetadata.getAlgorithm(), // Pass algorithm as indexType + indexMetadata, // Pass the whole IndexMetadata as IndexDefinition (it implements it) + indexMetadata.isAutomatic(), + engineInitMetadata); + + // queryBuilder uses the same options Document + queryBuilder = new LuceneQueryBuilder(engineInitMetadata); } @Override @@ -141,12 +152,68 @@ public Object get(final Object key) { } @Override - public void update( // Changed OAtomicOperation, OIndexKeyUpdater - final TransactionContext atomicOperation, + public void update( + final TransactionContext txContext, // Changed parameter name for clarity final Object key, final IndexKeyUpdater updater) { - // FIXME: bonsayFileId might not be relevant. updater.update might change. - put(atomicOperation, key, updater.update(null, bonsayFileId).getValue()); + // A Lucene update is typically a delete followed by an add. + // The 'key' here is what identifies the document(s) to be updated. + // The 'updater' provides the new value(s)/Identifiable(s). + + // 1. Determine the new Identifiable that results from the update. + // The updater.update(oldValue, ...) is meant to get the new value. + // 'oldValue' for an index is usually the set of RIDs mapped to the key. + // Since this is a full-text index, the 'key' itself might be complex. + // For simplicity, if we assume the updater gives the *new complete Identifiable* to index: + Object newValue = updater.update(null, null).getValue(); // Passing null for oldValue and bonsayFileId. + + if (!(newValue instanceof Identifiable)) { + throw new IndexException("Updater did not provide an Identifiable value for Lucene index update. Key: " + key); + } + Identifiable newIdentifiable = (Identifiable) newValue; + + // 2. Delete old document(s) associated with the key. + // This requires a query that uniquely identifies the old document(s) for this key. + // If the key is the RID itself (e.g. auto index on @rid), then it's simple. + // If the key is field values, and these values *might have changed*, then deleting by + // the *old* key is important. The current `key` parameter should represent the old key. + // However, IndexKeyUpdater is often used when the key itself doesn't change, but the RID does (e.g. unique index). + // Or when the indexed content of the RID changes, but the RID (and key) remains the same. + + // Let's assume 'key' can identify the old document(s) and 'newIdentifiable' is the new state to index. + // If the RID is constant and only content changes: + // We need to re-build the Lucene document for newIdentifiable and use Lucene's updateDocument. + + // Simplest approach for now: delete by key, then put new document. + // This assumes 'key' can uniquely identify the document via a query. + // If 'key' is the set of indexed fields from the *old* version of the document: + if (key != null) { + Query deleteByOldKeyQuery = this.queryBuilder.query(this.indexDefinition, key, EMPTY_METADATA, this.queryAnalyzer(), getDatabase()); + try { + this.deleteDocument(deleteByOldKeyQuery); // From OLuceneIndexEngineAbstract + } catch (IOException e) { + throw new IndexException("Error deleting old document during update for key: " + key, e); + } + } else if (newIdentifiable != null && newIdentifiable.getIdentity() != null) { + // If key is null, but we have the new Identifiable's RID, try to delete by RID. + // This is only safe if we are sure this RID was previously indexed and this is a true update. + Query deleteByRidQuery = ArcadeLuceneIndexType.createQueryId(newIdentifiable); + try { + this.deleteDocument(deleteByRidQuery); + } catch (IOException e) { + throw new IndexException("Error deleting old document by RID during update for: " + newIdentifiable.getIdentity(), e); + } + } else { + throw new IndexException("Cannot determine document to update for Lucene index. Key and new Identifiable are null."); + } + + // 3. Put the new document state + // The 'key' for put should be derived from the newIdentifiable's fields if it's an automatic index. + // If it's a manual index, the 'key' might remain the same or be derived. + // For now, assuming the 'key' parameter to 'update' is what we use to identify the document, + // and the new content comes from 'newIdentifiable'. + // The 'put' method will call buildDocument(key, newIdentifiable). + put(txContext, key, newIdentifiable); // Pass the original key for now } @Override @@ -315,15 +382,13 @@ private static Document putInManualindex(Object key, Identifiable oIdentifiable) public Query buildQuery(final Object maybeQuery) { try { if (maybeQuery instanceof String) { - // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring - return queryBuilder.query(indexDefinition, (String) maybeQuery, new Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer()); + return queryBuilder.query(indexDefinition, (String) maybeQuery, new com.arcadedb.document.Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer(), getDatabase()); } else { LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) maybeQuery; // FIXME: LuceneKeyAndMetadata needs refactoring - // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring - return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer(), getDatabase()); } } catch (final ParseException e) { - throw new IndexException("Error parsing query", e); // Changed exception + throw new IndexException("Error parsing query for index '" + name + "'", e); // Changed exception } } @@ -334,24 +399,22 @@ public Set getInTx(Object key, LuceneTxChanges changes) { // Chang try { if (key instanceof LuceneKeyAndMetadata) { // FIXME: LuceneKeyAndMetadata needs refactoring LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) key; - // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring - Query query = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + Query luceneQuery = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer(), getDatabase()); - CommandContext commandContext = q.key.getContext(); // FIXME: LuceneKeyAndMetadata.key might not have getContext - return getResults(query, commandContext, changes, q.metadata); + CommandContext commandContext = q.getContext(); // LuceneKeyAndMetadata now has getContext() + return getResults(luceneQuery, commandContext, changes, q.metadata); } else { - // FIXME: queryBuilder (LuceneQueryBuilder) needs refactoring - Query query = queryBuilder.query(indexDefinition, key, new Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer()); + Query luceneQuery = queryBuilder.query(indexDefinition, key, new com.arcadedb.document.Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer(), getDatabase()); CommandContext commandContext = null; if (key instanceof LuceneCompositeKey) { // FIXME: LuceneCompositeKey needs refactoring - commandContext = ((LuceneCompositeKey) key).getContext(); + commandContext = ((LuceneCompositeKey) key).getContext(); // Assuming LuceneCompositeKey might have a context } - return getResults(query, commandContext, changes, new Document(getDatabase())/*EMPTY_METADATA*/); + return getResults(luceneQuery, commandContext, changes, new com.arcadedb.document.Document(getDatabase())/*EMPTY_METADATA*/); } } catch (ParseException e) { - throw new IndexException("Error parsing lucene query", e); // Changed exception + throw new IndexException("Error parsing lucene query for index '" + name + "'", e); // Changed exception } } } diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java deleted file mode 100644 index 1818963685..0000000000 --- a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngineAbstract.java +++ /dev/null @@ -1,671 +0,0 @@ -/* - * Copyright 2014 Orient Technologies. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.arcadedb.lucene.engine; - -import com.arcadedb.GlobalConfiguration; -import com.arcadedb.database.DatabaseInternal; -import com.arcadedb.database.DatabaseThreadLocal; -import com.arcadedb.database.Identifiable; -import com.arcadedb.database.RecordId; -import com.arcadedb.document.Document; -import com.arcadedb.engine.PaginatedFile; -import com.arcadedb.engine.Storage; -import com.arcadedb.exception.ArcadeDBException; -import com.arcadedb.exception.IndexException; -import com.arcadedb.index.IndexCursor; -import com.arcadedb.index.IndexDefinition; -import com.arcadedb.index.IndexKeyCursor; -import com.arcadedb.lucene.analyzer.ArcadeLuceneAnalyzerFactory; -import com.arcadedb.lucene.exception.LuceneIndexException; -import com.arcadedb.lucene.index.ArcadeLuceneIndexType; -import com.arcadedb.lucene.query.LuceneQueryContext; -import com.arcadedb.lucene.tx.LuceneTxChanges; -import com.arcadedb.query.sql.executor.CommandContext; -import com.arcadedb.schema.DocumentType; -import com.arcadedb.schema.Property; -import com.arcadedb.schema.Type; -import com.arcadedb.utility.FileUtils; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.TrackingIndexWriter; -import org.apache.lucene.search.*; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; - -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; -import java.util.logging.Level; -import java.util.logging.Logger; - -import static com.arcadedb.lucene.analyzer.ArcadeLuceneAnalyzerFactory.AnalyzerK -ind.INDEX; -import static com.arcadedb.lucene.analyzer.ArcadeLuceneAnalyzerFactory.AnalyzerK -ind.QUERY; - -public abstract class OLuceneIndexEngineAbstract /* extends OSharedResourceAd -aptiveExternal */ implements OLuceneIndexEngine { // FIXME - - public static final String RID = "RID"; - public static final String KEY = "KEY"; - public static final String STORED = "_STORED"; - - public static final String OLUCENE_BASE_DIR = "luceneIndexes"; - - protected final AtomicLong lastAccess; - protected SearcherManager searcherManager; - protected IndexDefinition index; - protected String name; - protected String clusterIndexName; - protected boolean automatic; - protected ControlledRealTimeReopenThread nrt; - protected Document metadata; - - protected Map collectionFields = new HashMap -(); - protected TimerTask commitTask; - protected AtomicBoolean closed = new AtomicBoolean(false); - protected Storage storage; - private long reopenToken; - private Analyzer indexAnalyzer; - private Analyzer queryAnalyzer; - private Directory directory; - private TrackingIndexWriter mgrWriter; - private long flushIndexInterval; - private long closeAfterInterval; - private long firstFlushAfter; - - public OLuceneIndexEngineAbstract(Storage storage, String indexName) { - this.storage = storage; - this.name = indexName; - - lastAccess = new AtomicLong(System.currentTimeMillis()); - - closed = new AtomicBoolean(true); - - } - - // TODO: move to utility class - public static void sendTotalHits(String indexName, CommandContext context, int - totalHits) { - if (context != null) { - - if (context.getVariable("totalHits") == null) { - context.setVariable("totalHits", totalHits); - } else { - context.setVariable("totalHits", null); - } - context.setVariable((indexName + ".totalHits").replace(".", "_"), totalHit -s); - } - } - - // TODO: move to utility class - public static void sendLookupTime(String indexName, CommandContext context, fi -nal TopDocs docs, final Integer limit, - long startFetching) { - if (context != null) { - - final long finalTime = System.currentTimeMillis() - startFetching; - context.setVariable((indexName + ".lookupTime").replace(".", "_"), new Has -hMap() { - { - put("limit", limit); - put("totalTime", finalTime); - put("totalHits", docs.totalHits); - put("returnedHits", docs.scoreDocs.length); - if (!Float.isNaN(docs.getMaxScore())) { - put("maxScore", docs.getMaxScore()); - } - - } - }); - } - } - - protected void updateLastAccess() { - lastAccess.set(System.currentTimeMillis()); - } - - protected abstract IndexWriter openIndexWriter(Directory directory) throws IOE -xception; - - protected void addDocument(Document doc) { - try { - - reopenToken = mgrWriter.addDocument(doc); - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on adding - new document '" + doc + "' to Lucene index", e); - } - } - - @Override - public void init(String indexName, String indexType, IndexDefinition indexDefi -nition, boolean isAutomatic, Document metadata) { - - this.index = indexDefinition; - this.automatic = isAutomatic; - this.metadata = metadata; - - ArcadeLuceneAnalyzerFactory fc = new ArcadeLuceneAnalyzerFactory(); - indexAnalyzer = fc.createAnalyzer(indexDefinition, INDEX, metadata); - queryAnalyzer = fc.createAnalyzer(indexDefinition, QUERY, metadata); - - checkCollectionIndex(indexDefinition); - - if (metadata.containsField("flushIndexInterval")) { - flushIndexInterval = Integer.valueOf(metadata.field("flushIndexIn -terval")).longValue(); - } else { - flushIndexInterval = 10000l; - } - - if (metadata.containsField("closeAfterInterval")) { - closeAfterInterval = Integer.valueOf(metadata.field("closeAfterIn -terval")).longValue(); - } else { - closeAfterInterval = 20000l; - } - - if (metadata.containsField("firstFlushAfter")) { - firstFlushAfter = Integer.valueOf(metadata.field("firstFlushAfter -")).longValue(); - } else { - firstFlushAfter = 10000l; - } - - } - - private void scheduleCommitTask() { - commitTask = new TimerTask() { - @Override - public boolean cancel() { -// Logger.getLogger(getClass().getName()).info(" Cancelling commit task f -or index:: " + indexName()); - return super.cancel(); - } - - @Override - public void run() { - - if (System.currentTimeMillis() - lastAccess.get() > closeAfterInterval) -{ - -// Logger.getLogger(getClass().getName()).info(" Closing index:: " + in -dexName()); - close(); - } - if (!closed.get()) { - -// Logger.getLogger(getClass().getName()).info(" Flushing index:: " + i -ndexName()); - flush(); - } - } - }; - // FIXME - // Orient.instance().scheduleTask(commitTask, firstFlushAfter, flushIndexInt -erval); - getDatabase().getSchema().getScheduler().scheduleTask(commitTask, firstFlush -After, flushIndexInterval); - } - - private void checkCollectionIndex(IndexDefinition indexDefinition) { - - List fields = indexDefinition.getFields(); - - DocumentType aClass = getDatabase().getSchema().getType(indexDefinition.getT -ypeName()); - for (String field : fields) { - Property property = aClass.getProperty(field); - - if (property.getType().isEmbedded() && property.getLinkedType() != null) { - collectionFields.put(field, true); - } else { - collectionFields.put(field, false); - } - } - } - - protected void reOpen() throws IOException { - - if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen() && directory in -stanceof RAMDirectory) { - // don't waste time reopening an in memory index - return; - } - open(); - } - - protected DatabaseInternal getDatabase() { - return DatabaseThreadLocal.INSTANCE.get(); - } - - private synchronized void open() throws IOException { - - if (!closed.get()) - return; - - ArcadeLuceneDirectoryFactory directoryFactory = new ArcadeLuceneDirectoryFac -tory(); // FIXME OLuceneDirectoryFactory - - directory = directoryFactory.createDirectory(getDatabase(), name, metadata); - - final IndexWriter indexWriter = createIndexWriter(directory); - mgrWriter = new TrackingIndexWriter(indexWriter); - searcherManager = new SearcherManager(indexWriter, true, null); - - reopenToken = 0; - - startNRT(); - - closed.set(false); - - flush(); - - scheduleCommitTask(); - - } - - private void startNRT() { - nrt = new ControlledRealTimeReopenThread(mgrWriter, searcherManager, 60.00, -0.1); - nrt.setDaemon(true); - nrt.start(); - } - - private void closeNRT() { - if (nrt != null) { - nrt.interrupt(); - nrt.close(); - } - } - - private void cancelCommitTask() { - if (commitTask != null) { - commitTask.cancel(); - } - } - - private void closeSearchManager() throws IOException { - if (searcherManager != null) { - searcherManager.close(); - } - } - - private void commitAndCloseWriter() throws IOException { - if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen()) { - mgrWriter.getIndexWriter().commit(); - mgrWriter.getIndexWriter().close(); - closed.set(true); - } - } - - protected abstract IndexWriter createIndexWriter(Directory directory) throws I -OException; - - @Override - public void flush() { - - try { - if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen()) - mgrWriter.getIndexWriter().commit(); - - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on flushi -ng Lucene index", e); - } catch (Throwable e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on flushi -ng Lucene index", e); - } - - } - - @Override - public void create(com.arcadedb.serializer.BinarySerializer valueSerializer, b -oolean isAutomatic, Type[] keyTypes, boolean nullPointerSupport, - com.arcadedb.serializer.BinarySerializer keySerializer, int keySize, Set clustersToIndex, Map engineProperties, - Document metadata) { - } - - @Override - public void delete() { - updateLastAccess(); - openIfClosed(); - - if (mgrWriter != null && mgrWriter.getIndexWriter() != null) { - - try { - mgrWriter.getIndexWriter().deleteUnusedFiles(); - } catch (IOException e) { - e.printStackTrace(); - } - close(); - } - - final DatabaseInternal database = getDatabase(); - deleteIndexFolder(indexName(), database); - } - - private void deleteIndexFolder(String indexName, DatabaseInternal database) { -// FIXME OLocalPaginatedStorage - File f = new File(getIndexPath(database, indexName)); - FileUtils.deleteRecursively(f); - f = new File(getIndexBasePath(database)); - FileUtils.deleteFolderIfEmpty(f); - } - - @Override - public String indexName() { - return name; - } - - private String getIndexPath(DatabaseInternal database, String indexName) { // F -IXME OLocalPaginatedStorage - return database.getDatabasePath() + File.separator + OLUCENE_BASE_DIR + File -.separator + indexName; // FIXME getStoragePath - } - - protected String getIndexBasePath(DatabaseInternal database) { // FIXME OLocal -PaginatedStorage - return database.getDatabasePath() + File.separator + OLUCENE_BASE_DIR; // FIX -ME getStoragePath - } - - public abstract void onRecordAddedToResultSet(LuceneQueryContext queryContext, - RecordId recordId, Document ret, - ScoreDoc score); - - @Override - public Analyzer indexAnalyzer() { - return indexAnalyzer; - } - - @Override - public Analyzer queryAnalyzer() { - return queryAnalyzer; - } - - @Override - public boolean remove(Object key, Identifiable value) { - updateLastAccess(); - openIfClosed(); - - Query query = deleteQuery(key, value); - if (query != null) - deleteDocument(query); - return true; - } - - protected void deleteDocument(Query query) { - try { - reopenToken = mgrWriter.deleteDocuments(query); - if (!mgrWriter.getIndexWriter().hasDeletions()) { - Logger.getLogger(getClass().getName()) - .log(Level.SEVERE, "Error on deleting document by query '" + query + - "' to Lucene index", new IndexException("Error deleting document")); - } - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on deleti -ng document by query '" + query + "' to Lucene index", e); - } - } - - protected boolean isCollectionDelete() { - boolean collectionDelete = false; - for (Boolean aBoolean : collectionFields.values()) { - collectionDelete = collectionDelete || aBoolean; - } - return collectionDelete; - } - - protected void openIfClosed() { - if (closed.get()) { -// Logger.getLogger(getClass().getName()).info("open closed index:: " + ind -exName()); - - try { - reOpen(); - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "error while o -pening closed index:: " + indexName(), e); - - } - } - } - - @Override - public boolean isCollectionIndex() { - return isCollectionDelete(); - } - - @Override - public IndexSearcher searcher() { - try { - updateLastAccess(); - openIfClosed(); - nrt.waitForGeneration(reopenToken); - IndexSearcher searcher = searcherManager.acquire(); - return searcher; - } catch (Exception e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on get se -archer from Lucene index", e); - throw new LuceneIndexException("Error on get searcher from Lucene index", -e); - } - - } - - @Override - public long sizeInTx(LuceneTxChanges changes) { - IndexSearcher searcher = searcher(); - try { - IndexReader reader = searcher.getIndexReader(); - - return changes == null ? reader.numDocs() : reader.numDocs() + changes.get -NumDocs(); - } finally { - - release(searcher); - } - } - - @Override - public LuceneTxChanges buildTxChanges() throws IOException { - if (isCollectionDelete()) { - // FIXME - // return new OLuceneTxChangesMultiRid(this, createIndexWriter(new RAMDire -ctory()), createIndexWriter(new RAMDirectory())); - return null; - } else { - // FIXME - // return new OLuceneTxChangesSingleRid(this, createIndexWriter(new RAMDire -ctory()), createIndexWriter(new RAMDirectory())); - return null; - } - } - - @Override - public Query deleteQuery(Object key, Identifiable value) { - updateLastAccess(); - openIfClosed(); - if (isCollectionDelete()) { - return ArcadeLuceneIndexType.createDeleteQuery(value, index.getFields(), -key); - } - return ArcadeLuceneIndexType.createQueryId(value); - } - - @Override - public void deleteWithoutLoad(String indexName) { - internalDelete(indexName); - } - - protected void internalDelete(String indexName) { - if (mgrWriter != null && mgrWriter.getIndexWriter().isOpen()) { - close(); - } - - final DatabaseInternal database = getDatabase(); - deleteIndexFolder(indexName, database); - } - - @Override - public void load(String indexName, com.arcadedb.serializer.BinarySerializer v -alueSerializer, boolean isAutomatic, com.arcadedb.serializer.BinarySerializer k -eySerializer, - Type[] keyTypes, boolean nullPointerSupport, int keySize, Map engineProperties) { - // initIndex(indexName, indexDefinition, isAutomatic, metadata); - } - - @Override - public void clear() { - updateLastAccess(); - openIfClosed(); - try { - reopenToken = mgrWriter.deleteAll(); - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on cleari -ng Lucene index", e); - } - } - - @Override - public synchronized void close() { - if (closed.get()) - return; - - try { -// Logger.getLogger(getClass().getName()).info("Closing Lucene index '" + t -his.name + "'..."); - - closeNRT(); - - closeSearchManager(); - - commitAndCloseWriter(); - -// Logger.getLogger(getClass().getName()).info("Closed Lucene index '" + th -is.name); - cancelCommitTask(); - - } catch (Throwable e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on closin -g Lucene index", e); - } - } - - @Override - public IndexCursor descCursor(ValuesTransformer valuesTransformer) { - throw new UnsupportedOperationException("Cannot iterate over a lucene index" -); - } - - @Override - public IndexCursor cursor(ValuesTransformer valuesTransformer) { - throw new UnsupportedOperationException("Cannot iterate over a lucene index" -); - } - - @Override - public IndexKeyCursor keyCursor() { - throw new UnsupportedOperationException("Cannot iterate over a lucene index" -); - } - - public long size(final ValuesTransformer transformer) { - return sizeInTx(null); - } - - protected void release(IndexSearcher searcher) { - updateLastAccess(); - openIfClosed(); - try { - searcherManager.release(searcher); - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on releas -ing index searcher of Lucene index", e); - } - } - - @Override - public int getVersion() { - return 0; - } - - @Override - public String getName() { - return name; - } - - @Override - public boolean acquireAtomicExclusiveLock(Object key) { - return true; // do nothing - } - - @Override - public String getIndexNameByKey(final Object key) { - return name; - } - - private String getIndexPath(DatabaseInternal database) { // FIXME OLocalPagina -tedStorage - return getIndexPath(database, name); - } - - protected Field.Store isToStore(String f) { - return collectionFields.get(f) ? Field.Store.YES : Field.Store.NO; - } - - @Override - public void freeze(boolean throwException) { - - try { - closeNRT(); - cancelCommitTask(); - commitAndCloseWriter(); - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on freezi -ng Lucene index:: " + indexName(), e); - } - - } - - @Override - public void release() { - try { - close(); - reOpen(); - } catch (IOException e) { - Logger.getLogger(getClass().getName()).log(Level.SEVERE, "Error on releas -ing Lucene index:: " + indexName(), e); - } - } - - @Override - public boolean isFrozen() { - return closed.get(); - } -} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java index 140ccbb28e..a872e5520c 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java @@ -6,81 +6,43 @@ import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring import com.arcadedb.query.sql.executor.CommandContext; // Changed import com.arcadedb.query.sql.executor.Result; // Changed -import com.arcadedb.query.sql.function.IndexableSQLFunction; // Assuming this exists -import com.arcadedb.query.sql.function.SQLFunctionAbstract; // Assuming this is the base class +import com.arcadedb.query.sql.function.SQLFunction; // Standard ArcadeDB SQLFunction if SQLFunctionAbstract is not public or is different import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed import com.arcadedb.query.sql.parser.Expression; // Changed import com.arcadedb.query.sql.parser.FromClause; // Changed import java.util.Map; /** Created by frank on 25/05/2017. */ -// Changed base class and interface -public abstract class ArcadeLuceneSearchFunctionTemplate extends SQLFunctionAbstract - implements IndexableSQLFunction { +// Changed base class and removed IndexableSQLFunction interface +public abstract class ArcadeLuceneSearchFunctionTemplate implements SQLFunction { - public ArcadeLuceneSearchFunctionTemplate(String iName, int iMinParams, int iMaxParams) { - super(iName, iMinParams, iMaxParams); - } + protected final String name; - // FIXME: Signature of these methods depends heavily on the actual ArcadeDB interfaces for IndexableSQLFunction - @Override - public boolean canExecuteInline( - FromClause target, - BinaryCompareOperator operator, - Object rightValue, - CommandContext ctx, - Expression... args) { // Changed parameter types - return allowsIndexedExecution(target, operator, rightValue, ctx, args); + public ArcadeLuceneSearchFunctionTemplate(final String name) { + this.name = name; + // Parameter count checks will be done in each concrete class's execute method } @Override - public boolean allowsIndexedExecution( - FromClause target, - BinaryCompareOperator operator, - Object rightValue, - CommandContext ctx, - Expression... args) { // Changed parameter types - ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME - return index != null; + public String getName() { + return name; } - @Override - public boolean shouldExecuteAfterSearch( - FromClause target, - BinaryCompareOperator operator, - Object rightValue, - CommandContext ctx, - Expression... args) { // Changed parameter types - return false; - } + // The following methods are from the old IndexableSQLFunction interface and will be removed. + // If ArcadeDB has a new way for functions to declare index usability, that would be a separate implementation. + // public abstract boolean canExecuteInline(...); + // public abstract boolean allowsIndexedExecution(...); + // public abstract boolean shouldExecuteAfterSearch(...); + // public abstract long estimate(...); + // public abstract Iterable searchFromTarget(...); // This logic moves into execute - @Override - public long estimate( - FromClause target, - BinaryCompareOperator operator, - Object rightValue, - CommandContext ctx, - Expression... args) { // Changed parameter types - - // FIXME: searchFromTarget is not defined in this template, assuming it's from OIndexableSQLFunction or a subclass - // For now, commenting out as its direct equivalent/necessity in ArcadeDB is unclear without seeing concrete function implementation - /* - Iterable a = searchFromTarget(target, operator, rightValue, ctx, args); // Changed OIdentifiable - if (a instanceof LuceneResultSet) { // FIXME - return ((LuceneResultSet) a).size(); // FIXME - } - long count = 0; - for (Object o : a) { - count++; - } - return count; - */ - return 0; // Placeholder - } + // The execute method is abstract in SQLFunction and must be implemented by concrete subclasses. + // public abstract Object execute(Object self, Identifiable currentRecord, Object currentResult, Object[] params, CommandContext context); - protected Document getMetadata(Expression metadata, CommandContext ctx) { // Changed ODocument, OExpression, OCommandContext - final Object md = metadata.execute((Result) null, ctx); // Changed OResult - if (md instanceof Document) { // Changed ODocument + protected Document getMetadata(Expression metadataExpression, CommandContext ctx) { + if (metadataExpression == null) return new Document(ctx.getDatabase()); + final Object md = metadataExpression.execute((Result) null, ctx); + if (md instanceof Document) { return (Document) md; } else if (md instanceof Map) { return new Document().fromMap((Map) md); // Changed ODocument diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java index eab77225ec..80aad5ac9a 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java @@ -28,159 +28,112 @@ import org.apache.lucene.index.memory.MemoryIndex; /** Created by frank on 15/01/2017. */ -public class ArcadeLuceneSearchOnIndexFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class +public class ArcadeLuceneSearchOnIndexFunction extends ArcadeLuceneSearchFunctionTemplate { - // public static final String MEMORY_INDEX = "_memoryIndex"; // Already in ArcadeLuceneFunctionsUtils - public static final String NAME = "search_index"; // OrientDB's name was luceneMatch, but class name implies search_index + public static final String NAME = "search_index"; public ArcadeLuceneSearchOnIndexFunction() { - super(NAME, 2, 3); // Using "search_index" as function name + super(NAME); } @Override - public String getName() { - return NAME; - } - - @Override - public Object execute( // FIXME: Signature might change based on actual SQLFunctionAbstract in ArcadeDB - Object iThis, - Identifiable iCurrentRecord, // Changed - Object iCurrentResult, + public Object execute( + Object self, // Is the target of the function, could be null, or an identifier (index name) or a collection + Identifiable currentRecord, + Object currentResult, Object[] params, - CommandContext ctx) { // Changed - if (iThis instanceof RID) { // Changed - iThis = ((RID) iThis).getRecord(); - } - if (iThis instanceof Identifiable) { // Changed - iThis = new ResultInternal((Identifiable) iThis); // Changed - } - Result result = (Result) iThis; // Changed - - String indexName = (String) params[0]; - - ArcadeLuceneFullTextIndex index = searchForIndex(ctx, indexName); // FIXME + CommandContext ctx) { - if (index == null) return false; + validateParameterCount(params, 2, 3); - String query = (String) params[1]; + String indexName = params[0].toString(); + String query = params[1].toString(); + Document metadata = params.length == 3 ? getMetadata((Expression) params[2], ctx) : new Document(ctx.getDatabase()); - MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); // Use refactored util + ArcadeLuceneFullTextIndex index = ArcadeLuceneFunctionsUtils.getLuceneFullTextIndex(ctx, indexName); - // FIXME: index.getDefinition() might be different. - List key = - index.getDefinition().getFields().stream() - .map(s -> result.getProperty(s)) - .collect(Collectors.toList()); - - // FIXME: index.buildDocument and index.indexAnalyzer might not exist or have different signatures - // This part is highly dependent on ArcadeLuceneFullTextIndex refactoring. - org.apache.lucene.document.Document luceneDoc = index.buildDocument(key, iCurrentRecord); - if (luceneDoc != null) { - for (IndexableField field : luceneDoc.getFields()) { - memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // Simplified, assuming stringValue is appropriate - } + if (index == null) { + // If used in a WHERE clause for a specific record, returning false means "filter out" + // If used as a standalone function returning a set, return empty set. + // The `filterResult` method in template handles boolean conversion. + return currentRecord != null ? false : Collections.emptySet(); } - Document metadata = getMetadataDoc(params); // Changed ODocument - // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring - LuceneKeyAndMetadata keyAndMetadata = - new LuceneKeyAndMetadata( - new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); - - // FIXME: index.buildQuery might not exist or have different signature - return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; - } - - private Document getMetadataDoc(Object[] params) { // Changed ODocument - if (params.length == 3) { - if (params[2] instanceof Map) { - return new Document().fromMap((Map) params[2]); // Changed - } else if (params[2] instanceof String) { - return new Document().fromJSON((String) params[2]); + // If currentRecord is not null, this function is likely used in a WHERE clause context. + // It needs to determine if the currentRecord matches the Lucene query *within its own fields*. + if (currentRecord != null && currentRecord.getIdentity() != null) { + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + + // We need the Lucene Document for the currentRecord + // The 'key' for buildDocument in this context is not a separate key, but derived from the record itself if auto index. + // Or, if the index has specific fields, those are used. + // Since we are in context of a specific record, we use its fields. + org.apache.lucene.document.Document luceneDoc = index.buildDocument(null, currentRecord); // Pass null for key if derived from record + + if (luceneDoc != null) { + for (IndexableField field : luceneDoc.getFields()) { + // Simplified: use stringValue. Actual field data might be needed for MemoryIndex if not string. + // MemoryIndex.addField can take Analyzer, which it gets from the IndexableFieldType. + // If the field is not indexed with an analyzer (e.g. StringField), it's fine. + // If it is (e.g. TextField), index.indexAnalyzer() should be used. + // For simplicity, assuming MemoryIndex handles it or we use the general indexAnalyzer. + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); + } + } else { + return false; // Cannot build Lucene doc for current record } - // Fallback for other types, or throw error - return new Document().fromJSON(params[2].toString()); - } - // FIXME: LuceneQueryBuilder.EMPTY_METADATA needs to be accessible or defined differently - return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; - } - // getOrCreateMemoryIndex was moved to ArcadeLuceneFunctionsUtils - - @Override - public String getSyntax() { - return "search_index( , , [ ] )"; // Updated syntax - } - - @Override - public boolean filterResult() { - return true; - } - - // FIXME: This method's signature and logic are highly dependent on ArcadeDB's IndexableSQLFunction interface - @Override - public Iterable searchFromTarget( // Changed - FromClause target, // Changed - BinaryCompareOperator operator, // Changed - Object rightValue, - CommandContext ctx, // Changed - Expression... args) { // Changed - - ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME - - Expression expression = args[1]; - String query = (String) expression.execute((Result) null, ctx); // Changed - if (index != null && query != null) { - - Document meta = getMetadata(args, ctx); // Changed - - List luceneResultSet; // Changed - try (Stream rids = // Changed - // FIXME: index.getInternal().getRids() needs to be replaced with ArcadeDB equivalent - // This whole block is highly dependent on ArcadeLuceneFullTextIndex and LuceneKeyAndMetadata refactoring - index - .getAssociatedIndex() // Assuming getAssociatedIndex() is the way, or index might be the LuceneIndexEngine itself - .getRids( // This method might not exist on ArcadeDB's Index interface or ArcadeLuceneFullTextIndex - new LuceneKeyAndMetadata( // FIXME - new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME - luceneResultSet = rids.collect(Collectors.toList()); + // The query here is the main Lucene query from params[1] + // Metadata for this specific sub-query within MemoryIndex. + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(query, metadata, ctx); + org.apache.lucene.search.Query luceneQuery = index.buildQuery(keyAndMeta); // Build query using index's config + + return memoryIndex.search(luceneQuery) > 0.0f; + } else { + // If currentRecord is null, this function is likely used to return a set of results from the specified index. + // This is the "searchFromTarget" equivalent. + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(query, metadata, ctx); + // The `index.get(keyAndMeta)` should return a LuceneResultSet or similar. + // The `ArcadeLuceneFullTextIndex.get(Object[])` was changed to return IndexCursor. + // We might need a direct way to execute a query via engine and get results. + // For now, assuming `index.get(keyAndMeta)` returns a Set or IndexCursor via engine. + + // The `get` method on `ArcadeLuceneFullTextIndex` takes `Object[] keys`. + // We need to wrap `keyAndMeta` or pass its components. + // Let's assume the engine's getInTx is what we want. + if (index.getEngine() instanceof LuceneIndexEngine) { + LuceneIndexEngine luceneEngine = (LuceneIndexEngine) index.getEngine(); + // LuceneKeyAndMetadata is already the 'key' for getInTx + return luceneEngine.getInTx(keyAndMeta, null); // Passing null for LuceneTxChanges for non-transactional view } - - return luceneResultSet; + return Collections.emptySet(); } - return Collections.emptyList(); } - private Document getMetadata(Expression[] args, CommandContext ctx) { // Changed types - if (args.length == 3) { - return getMetadata(args[2], ctx); // Calls the method in ArcadeLuceneSearchFunctionTemplate + private Document getMetadata(Object[] params, CommandContext ctx) { // Kept for direct param access if needed + if (params.length == 3 && params[2] != null) { + if (params[2] instanceof Map) { + return new Document(ctx.getDatabase()).fromMap((Map) params[2]); + } else if (params[2] instanceof String) { + return new Document(ctx.getDatabase()).fromJSON((String) params[2]); + } else if (params[2] instanceof Expression) { // If metadata is an expression + return getMetadata((Expression) params[2], ctx); + } else if (params[2] instanceof Document) { + return (Document) params[2]; + } + try { + return new Document(ctx.getDatabase()).fromJSON(params[2].toString()); + } catch (Exception e) { /* ignore, return empty */ } } - // FIXME: LuceneQueryBuilder.EMPTY_METADATA - return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + return new Document(ctx.getDatabase()); // LuceneQueryBuilder.EMPTY_METADATA; } + @Override - protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types - FromClause target, CommandContext ctx, Expression... args) { // FIXME - - FromItem item = target.getItem(); // Changed - Identifier identifier = item.getIdentifier(); // Changed - // FIXME: This was calling a private searchForIndex, now it should call the one from ArcadeLuceneFunctionsUtils or similar. - // For now, assuming the util class will be used by the concrete implementations. - // This abstract method in template might need rethinking or this class needs its own way to get the index. - // Let's assume for now it will use the utility. - String indexNameFromArg = (String) args[0].execute((Result) null, ctx); - // String className = identifier.getStringValue(); // This would be the class from FROM clause - // We need the index name from the function argument. - return ArcadeLuceneFunctionsUtils.getLuceneFullTextIndex(ctx, indexNameFromArg); // FIXME + public String getSyntax() { + return getName() + "( , [, ] )"; } - // Removed private searchForIndex methods, assuming logic will consolidate or use ArcadeLuceneFunctionsUtils - - // getResult(OCommandContext) is part of OSQLFunction and likely not needed if SQLFunctionAbstract is different - // @Override - // public Object getResult(CommandContext ctx) { // Changed OCommandContext - // return super.getResult(ctx); - // } + // Removed searchFromTarget, estimate, canExecuteInline, allowsIndexedExecution, shouldExecuteAfterSearch + // searchForIndex is not needed here as index name is a direct parameter. } diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java index b9a0d08f63..8aee90bad4 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java @@ -195,36 +195,31 @@ public Identifiable next() { this.currentProximityInfo = new HashMap<>(); this.currentProximityInfo.put("$score", this.currentScore); - if (engine != null && queryContext != null) { - // The RecordId for context needs a way to carry this info if onRecordAddedToResultSet modifies it. - // Let's assume RecordId is primarily for identity, and contextual info is managed by this cursor - // or passed directly to some wrapper if needed. - // For now, `onRecordAddedToResultSet` might populate `queryContext.fragments` - // which we can then retrieve here if needed. - RecordId contextualRid = new RecordId(this.currentRID); // Create a new RecordId instance for context - - // Call engine callback to potentially populate highlights in queryContext or for other processing - engine.onRecordAddedToResultSet(queryContext, contextualRid, luceneDoc, scoreDoc); - - // Retrieve fragments if populated by the callback - if (queryContext.getFragments() != null && !queryContext.getFragments().isEmpty()) { - queryContext.getFragments().forEach((field, frags) -> { - if (frags != null && frags.length > 0) { - StringBuilder sb = new StringBuilder(); - for (org.apache.lucene.search.highlight.TextFragment frag : frags) { - if (frag != null && frag.getScore() > 0) { // frag.getScore() might not exist, check TextFragment API - sb.append(frag.toString()); - } - } - if (sb.length() > 0) { - this.currentProximityInfo.put("$" + field + "_hl", sb.toString()); - } - } - }); - queryContext.getFragments().clear(); // Clear for next record + if (queryContext != null && queryContext.isHighlightingEnabled()) { + if (engine != null && engine.queryAnalyzer() != null) { // Ensure we have an analyzer for highlighting + queryContext.setHighlightingAnalyzer(engine.queryAnalyzer()); // Use engine's query analyzer + + // We need an IndexReader to pass to getHighlights if it needs one. + // The searcher in queryContext already has one. + IndexReader reader = queryContext.getSearcher().getIndexReader(); + Map highlights = queryContext.getHighlights(luceneDoc, reader); + if (highlights != null && !highlights.isEmpty()) { + this.currentProximityInfo.putAll(highlights); + } + } else { + logger.warning("Highlighting enabled but no queryAnalyzer available from engine to set on LuceneQueryContext."); } } + // The engine.onRecordAddedToResultSet callback is now less critical for highlights, + // but can be kept if it serves other purposes (e.g. security, logging, complex context data). + // For now, let's assume its primary highlight-related role is superseded. + if (engine != null && queryContext != null) { + RecordId contextualRid = new RecordId(this.currentRID); + engine.onRecordAddedToResultSet(queryContext, contextualRid, luceneDoc, scoreDoc); + } + + // IndexCursor traditionally returns Identifiable (which can be just the RID) // If the caller needs the full record, they call getRecord(). return this.currentRID; diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java index 007154ccde..e4b4068f39 100644 --- a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java @@ -46,7 +46,13 @@ public class LuceneQueryContext { // Changed class name private final Query query; private final Sort sort; private Optional changes; // FIXME: Needs refactoring - private HashMap fragments; + // private HashMap fragments; // Replaced by on-demand highlighting + + // Highlighter components - to be initialized if highlighting is requested + private org.apache.lucene.search.highlight.Highlighter highlighter; + private org.apache.lucene.analysis.Analyzer highlightingAnalyzer; // Analyzer used for highlighting (might be queryAnalyzer) + private String[] highlightingFields; + public LuceneQueryContext( // Changed final CommandContext context, final IndexSearcher searcher, final Query query) { @@ -67,9 +73,49 @@ public LuceneQueryContext( // Changed sort = new Sort(sortFields.toArray(new SortField[0])); // Changed to new SortField[0] } changes = Optional.empty(); - fragments = new HashMap<>(); + // fragments = new HashMap<>(); // Not pre-cached anymore + + // Check metadata for highlighting setup + if (this.context != null && this.context.getVariable("highlight") instanceof Map) { + @SuppressWarnings("unchecked") + Map highlightParams = (Map) this.context.getVariable("highlight"); + // Simple setup for now, more advanced formatting can be added + // String preTag = (String) highlightParams.getOrDefault("preTag", ""); + // String postTag = (String) highlightParams.getOrDefault("postTag", ""); + // org.apache.lucene.search.highlight.Formatter formatter = new org.apache.lucene.search.highlight.SimpleHTMLFormatter(preTag, postTag); + org.apache.lucene.search.highlight.Formatter formatter = new org.apache.lucene.search.highlight.SimpleHTMLFormatter(); + org.apache.lucene.search.highlight.QueryScorer queryScorer = new org.apache.lucene.search.highlight.QueryScorer(query); + this.highlighter = new org.apache.lucene.search.highlight.Highlighter(formatter, queryScorer); + // Fragmenter: + // this.highlighter.setTextFragmenter(new org.apache.lucene.search.highlight.SimpleFragmenter(100)); // Example: 100 chars per fragment + + Object fieldsToHighlightObj = highlightParams.get("fields"); + if (fieldsToHighlightObj instanceof String) { + this.highlightingFields = ((String) fieldsToHighlightObj).split(","); + } else if (fieldsToHighlightObj instanceof List) { + @SuppressWarnings("unchecked") + List list = (List) fieldsToHighlightObj; + this.highlightingFields = list.toArray(new String[0]); + } + // Analyzer for highlighting should ideally be the one used for querying the specific fields. + // This is a simplification; a more robust solution would fetch field-specific analyzers. + // If the engine provides a general queryAnalyzer, use it. + // this.highlightingAnalyzer = searcher.getAnalyzer(); // This is not standard on IndexSearcher + // Analyzer must be passed or retrieved from engine/index definition + } + + } + + public boolean isHighlightingEnabled() { + return this.highlighter != null && this.highlightingFields != null && this.highlightingFields.length > 0; + } + + public LuceneQueryContext setHighlightingAnalyzer(Analyzer analyzer) { + this.highlightingAnalyzer = analyzer; + return this; } + public boolean isInTx() { return changes.isPresent(); } @@ -79,11 +125,7 @@ public LuceneQueryContext withChanges(final LuceneTxChanges changes) { // FIXME: return this; } - public LuceneQueryContext addHighlightFragment( - final String field, final TextFragment[] fieldFragment) { - fragments.put(field, fieldFragment); - return this; - } + // addHighlightFragment removed as highlights are generated on demand by getHighlights public CommandContext getContext() { // Changed return context; @@ -169,10 +211,43 @@ public boolean isDeleted(final Document doc, final Object key, final Identifiabl return changes.map(c -> c.isDeleted(doc, key, value)).orElse(false); } - public Map getFragments() { - return fragments; + /** + * Generates highlighted snippets for the given Lucene document and configured fields. + * Requires highlightingAnalyzer to be set. + */ + public Map getHighlights(Document luceneDoc, IndexReader reader) { + if (!isHighlightingEnabled() || luceneDoc == null || this.highlightingAnalyzer == null) { + return Collections.emptyMap(); + } + + Map highlights = new HashMap<>(); + for (String field : highlightingFields) { + String text = luceneDoc.get(field); + if (text != null) { + try { + // Get best fragments. Last param is maxNoFragments. + TextFragment[] frags = highlighter.getBestTextFragments(this.highlightingAnalyzer, field, text, 3); + StringBuilder sb = new StringBuilder(); + for (TextFragment frag : frags) { + if (frag != null && frag.getScore() > 0) { + sb.append(frag.toString()); + sb.append("... "); // Separator for multiple fragments + } + } + if (sb.length() > 0) { + highlights.put("$" + field + "_hl", sb.toString().trim()); + } + } catch (IOException | org.apache.lucene.search.highlight.InvalidTokenOffsetsException e) { + // Log error or handle as needed + System.err.println("Error highlighting field " + field + ": " + e.getMessage()); + } + } + } + return highlights; } + // getFragments() method removed, replaced by getHighlights() logic integrated into LuceneIndexCursor + // getLimit() and onRecord() were not in the provided OLuceneQueryContext, // they might be from a different class or an older version. // If they are needed, they would be implemented here.