diff --git a/lucene/pom.xml b/lucene/pom.xml new file mode 100644 index 0000000000..16d6341c39 --- /dev/null +++ b/lucene/pom.xml @@ -0,0 +1,150 @@ + + + 4.0.0 + + + com.arcadedb + arcadedb-parent + 25.6.1-SNAPSHOT + ../pom.xml + + + arcadedb-lucene + jar + ArcadeDB Lucene + Lucene full-text search engine integration for ArcadeDB. + + + + + + 10.2.1 + 0.8 + 1.20.0 + + + + + + com.arcadedb + arcadedb-engine + ${project.version} + + + + + + org.apache.lucene + lucene-core + ${lucene.version} + + + org.apache.lucene + lucene-analysis-common + ${lucene.version} + + + org.apache.lucene + lucene-queryparser + ${lucene.version} + + + org.apache.lucene + lucene-queries + ${lucene.version} + + + org.apache.lucene + lucene-misc + ${lucene.version} + + + org.apache.lucene + lucene-facet + ${lucene.version} + + + org.apache.lucene + lucene-memory + ${lucene.version} + + + org.apache.lucene + lucene-highlighter + ${lucene.version} + + + org.apache.lucene + lucene-codecs + ${lucene.version} + + + org.apache.lucene + lucene-backward-codecs + ${lucene.version} + + + org.apache.lucene + lucene-spatial-extras + ${lucene.version} + + + + + + org.locationtech.spatial4j + spatial4j + ${spatial4j.version} + + + org.locationtech.jts + jts-core + ${jts-core.version} + + + + + org.slf4j + slf4j-api + 1.7.36 + + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + true + + + + + + + + diff --git a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java new file mode 100644 index 0000000000..091a3fcb9f --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java @@ -0,0 +1,84 @@ +package com.arcadedb.lucene; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.index.IndexFactoryHandler; +import com.arcadedb.index.IndexInternal; +import com.arcadedb.schema.IndexBuilder; +import com.arcadedb.schema.Type; +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; +import java.util.Map; + +public class ArcadeLuceneIndexFactoryHandler implements IndexFactoryHandler { + + public static final String LUCENE_FULL_TEXT_ALGORITHM = "LUCENE"; // Or just "LUCENE" + public static final String LUCENE_CROSS_CLASS_ALGORITHM = "LUCENE_CROSS_CLASS"; + + + @Override + public IndexInternal create(IndexBuilder builder) { + DatabaseInternal database = builder.getDatabase(); + String indexName = builder.getIndexName(); + // boolean unique = builder.isUnique(); // Unique is part of IndexDefinition + // Type[] keyTypes = builder.getKeyTypes(); // Key types are part of IndexDefinition + + // The IndexDefinition is the primary source of truth for index properties. + IndexDefinition definition = builder.getIndexDefinition(); + if (definition == null) { + // This case should ideally be prevented by the schema/builder logic before reaching here. + // If it can happen, we might need to construct a minimal definition. + // For now, assuming builder provides a valid definition or enough info to create one. + // If builder.build() is called before this, definition should be set. + // If this factory *is* part of builder.build(), then builder has all components. + throw new IllegalArgumentException("IndexDefinition is required to create a Lucene index."); + } + + // Algorithm is now part of IndexDefinition + // String algorithm = definition.getAlgorithm() != null ? definition.getAlgorithm() : LUCENE_FULL_TEXT_ALGORITHM; + // The factory is usually registered for a specific algorithm, so this check might be redundant + // if this factory is only invoked for "LUCENE" or "LUCENE_CROSS_CLASS". + + // The constructor for ArcadeLuceneFullTextIndex is: + // (DatabaseInternal db, String name, String typeName, IndexDefinition definition, + // String filePath, PaginatedFile metadataFile, PaginatedFile[] dataFiles, + // PaginatedFile[] treeFiles, int fileId, int pageSize, + // TransactionContext.AtomicOperation atomicOperation) + // The IndexBuilder provides most of these. + // typeName here is the schema type name the index is on, not the index type/algorithm. + + // filePath should be determined by the system, often databasePath + indexFileName + String filePath = builder.getFilePath(); + if (filePath == null) { + filePath = database.getDatabasePath() + java.io.File.separator + builder.getFileName(); + } + + + // For PaginatedFile parameters, they are usually managed by the Storage engine. + // For a Lucene index, it might not directly use these ArcadeDB PaginatedFile structures + // for its main data, but it might have a metadata file. + // The IndexBuilder should provide these if they are standard. + // If Lucene manages its own files in 'filePath', some of these might be null or placeholders. + + // Let's assume the builder provides what's needed for the generic parts of an index. + // The specific engine (Lucene) will manage its own data files within its directory (filePath). + + // The old constructor of ArcadeLuceneFullTextIndex took: + // (DatabaseInternal database, String name, boolean unique, String analyzerClassName, String filePath, Type[] keyTypes) + // This has been changed to the standard one. + // We need to ensure that IndexDefinition within builder has all necessary info (like analyzer). + // Analyzer is typically stored in definition.getOptions().get("analyzer") + + return new ArcadeLuceneFullTextIndex( + database, + indexName, + definition.getTypeName(), // Class/Type name this index is on + definition, + filePath, + builder.getMetadataFile(), // from IndexBuilder + builder.getDataFiles(), // from IndexBuilder + builder.getTreeFiles(), // from IndexBuilder (likely null/unused for Lucene) + builder.getFileId(), // from IndexBuilder + builder.getPageSize(), // from IndexBuilder (might be less relevant for Lucene) + null // AtomicOperation: build is usually outside a TX or handles its own. + ); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java new file mode 100644 index 0000000000..35e0947279 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java @@ -0,0 +1,46 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +// This class might serve as the main plugin class listed in plugin.json for initialization purposes, +// or handle lifecycle events if ArcadeDB's plugin API expects a specific class for that. +// For now, it's minimal. +public class ArcadeLuceneLifecycleManager { + private static final Logger logger = LoggerFactory.getLogger(ArcadeLuceneLifecycleManager.class); + + // This constant might be better placed in ArcadeLuceneIndexFactoryHandler or a shared constants class. + public static final String LUCENE_ALGORITHM = "LUCENE"; + + public ArcadeLuceneLifecycleManager() { + this(false); + } + + public ArcadeLuceneLifecycleManager(boolean manual) { + if (!manual) { + logger.info("ArcadeLuceneLifecycleManager initialized (manual: {}).", manual); + // Further initialization or listener registration logic specific to ArcadeDB's plugin system + // would go here if this class is the entry point. + } + } + + // Any necessary lifecycle methods (e.g., from a specific ArcadeDB plugin interface) would be here. + // For now, assuming it does not need to implement DatabaseListener directly. + // Drop logic for indexes of this type should be handled by the Index.drop() method. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLuceneAnalyzerFactory.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLuceneAnalyzerFactory.java new file mode 100644 index 0000000000..2bfcb49424 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLuceneAnalyzerFactory.java @@ -0,0 +1,137 @@ +package com.arcadedb.lucene.analyzer; + +import com.arcadedb.document.Document; +import com.arcadedb.exception.ArcadeDBException; +import com.arcadedb.exception.IndexException; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.schema.Type; +import java.lang.reflect.Constructor; +import java.util.Collection; +import java.util.Locale; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +/** Created by frank on 30/10/2015. */ +public class ArcadeLuceneAnalyzerFactory { + private static final Logger logger = Logger.getLogger(ArcadeLuceneAnalyzerFactory.class.getName()); + + public Analyzer createAnalyzer( + final IndexDefinition index, final AnalyzerKind kind, final Document metadata) { + if (index == null) { + throw new IllegalArgumentException("Index must not be null"); + } + if (kind == null) { + throw new IllegalArgumentException("Analyzer kind must not be null"); + } + if (metadata == null) { + throw new IllegalArgumentException("Metadata must not be null"); + } + final String defaultAnalyzerFQN = metadata.getString("default"); + final String prefix = index.getTypeName() + "."; + + final OLucenePerFieldAnalyzerWrapper analyzer = // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper + geLucenePerFieldPresetAnalyzerWrapperForAllFields(defaultAnalyzerFQN); + setDefaultAnalyzerForRequestedKind(index, kind, metadata, prefix, analyzer); + setSpecializedAnalyzersForEachField(index, kind, metadata, prefix, analyzer); + return analyzer; + } + + private OLucenePerFieldAnalyzerWrapper geLucenePerFieldPresetAnalyzerWrapperForAllFields( // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper + final String defaultAnalyzerFQN) { + if (defaultAnalyzerFQN == null) { + return new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper + } else { + return new OLucenePerFieldAnalyzerWrapper(buildAnalyzer(defaultAnalyzerFQN)); // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper + } + } + + private void setDefaultAnalyzerForRequestedKind( + final IndexDefinition index, + final AnalyzerKind kind, + final Document metadata, + final String prefix, + final OLucenePerFieldAnalyzerWrapper analyzer) { // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper + final String specializedAnalyzerFQN = metadata.getString(kind.toString()); + if (specializedAnalyzerFQN != null) { + for (final String field : index.getFields()) { + analyzer.add(field, buildAnalyzer(specializedAnalyzerFQN)); + analyzer.add(prefix + field, buildAnalyzer(specializedAnalyzerFQN)); + } + } + } + + private void setSpecializedAnalyzersForEachField( + final IndexDefinition index, + final AnalyzerKind kind, + final Document metadata, + final String prefix, + final OLucenePerFieldAnalyzerWrapper analyzer) { // FIXME: Needs to be ArcadeLucenePerFieldAnalyzerWrapper + for (final String field : index.getFields()) { + final String analyzerName = field + "_" + kind.toString(); + final String analyzerStopwords = analyzerName + "_stopwords"; + + if (metadata.containsField(analyzerName) && metadata.containsField(analyzerStopwords)) { + final Collection stopWords = metadata.get(analyzerStopwords, Collection.class); + analyzer.add(field, buildAnalyzer(metadata.getString(analyzerName), stopWords)); + analyzer.add(prefix + field, buildAnalyzer(metadata.getString(analyzerName), stopWords)); + } else if (metadata.containsField(analyzerName)) { + analyzer.add(field, buildAnalyzer(metadata.getString(analyzerName))); + analyzer.add(prefix + field, buildAnalyzer(metadata.getString(analyzerName))); + } + } + } + + private Analyzer buildAnalyzer(final String analyzerFQN) { + try { + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getDeclaredConstructor(); + return (Analyzer) constructor.newInstance(); + } catch (final ClassNotFoundException e) { + throw new IndexException("Analyzer: " + analyzerFQN + " not found", e); + } catch (final NoSuchMethodException e) { + Class classAnalyzer; + try { + classAnalyzer = Class.forName(analyzerFQN); + //noinspection deprecation + return (Analyzer) classAnalyzer.newInstance(); + } catch (Exception e1) { + logger.log(Level.SEVERE, "Exception is suppressed, original exception is ", e); + //noinspection ThrowInsideCatchBlockWhichIgnoresCaughtException + throw new IndexException("Couldn't instantiate analyzer: public constructor not found", e1); + } + } catch (Exception e) { + logger.log( + Level.SEVERE, "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + return new StandardAnalyzer(); + } + } + + private Analyzer buildAnalyzer(final String analyzerFQN, final Collection stopwords) { + try { + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class); + return (Analyzer) constructor.newInstance(new CharArraySet(stopwords, true)); + } catch (final ClassNotFoundException e) { + throw new IndexException("Analyzer: " + analyzerFQN + " not found", e); + } catch (final NoSuchMethodException e) { + throw new IndexException("Couldn't instantiate analyzer: public constructor not found", e); + } catch (final Exception e) { + logger.log( + Level.SEVERE, "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + return new StandardAnalyzer(); + } + } + + public enum AnalyzerKind { + INDEX, + QUERY; + + @Override + public String toString() { + return name().toLowerCase(Locale.ENGLISH); + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java new file mode 100644 index 0000000000..ba140f59eb --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java @@ -0,0 +1,89 @@ +package com.arcadedb.lucene.analyzer; + +import static com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract.RID; // FIXME: This might need to be ArcadeDB specific constant if RID definition changes + +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // FIXME: Ensure this is the correct refactored class for OLuceneIndexType +import java.util.HashMap; +import java.util.Map; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; +import org.apache.lucene.analysis.core.KeywordAnalyzer; + +/** + * Created by frank on 10/12/15. + * + *

Doesn't allow to wrap components or readers. Thread local resources can be + delegated to the + * delegate analyzer, but not allocated on this analyzer (limit memory consumption). Uses a per + * field reuse strategy. + */ +public class ArcadeLucenePerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper { + private final Analyzer defaultDelegateAnalyzer; + private final Map fieldAnalyzers; + + /** + * Constructs with default analyzer. + * + * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use + * the one provided here. + */ + public ArcadeLucenePerFieldAnalyzerWrapper(final Analyzer defaultAnalyzer) { + this(defaultAnalyzer, new HashMap<>()); + } + + /** + * Constructs with default analyzer and a map of analyzers to use for specific fields. + * + * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use + * the one provided here. + * @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields + */ + public ArcadeLucenePerFieldAnalyzerWrapper( + final Analyzer defaultAnalyzer, final Map fieldAnalyzers) { + super(PER_FIELD_REUSE_STRATEGY); + this.defaultDelegateAnalyzer = defaultAnalyzer; + this.fieldAnalyzers = new HashMap<>(); + + this.fieldAnalyzers.putAll(fieldAnalyzers); + + this.fieldAnalyzers.put(RID, new KeywordAnalyzer()); + this.fieldAnalyzers.put(ArcadeLuceneIndexType.RID_HASH, new KeywordAnalyzer()); + this.fieldAnalyzers.put("_CLASS", new KeywordAnalyzer()); + this.fieldAnalyzers.put("_CLUSTER", new KeywordAnalyzer()); + this.fieldAnalyzers.put("_JSON", new KeywordAnalyzer()); + } + + @Override + protected Analyzer getWrappedAnalyzer(final String fieldName) { + final Analyzer analyzer = fieldAnalyzers.get(fieldName); + return (analyzer != null) ? analyzer : defaultDelegateAnalyzer; + } + + @Override + public String toString() { + return "ArcadeLucenePerFieldAnalyzerWrapper(" // Updated class name in toString + + fieldAnalyzers + + ", default=" + + defaultDelegateAnalyzer + + ")"; + } + + public ArcadeLucenePerFieldAnalyzerWrapper add(final String field, final Analyzer analyzer) { + fieldAnalyzers.put(field, analyzer); + return this; + } + + public ArcadeLucenePerFieldAnalyzerWrapper add(final ArcadeLucenePerFieldAnalyzerWrapper wrapper) { // Changed parameter type + fieldAnalyzers.putAll(wrapper.getAnalyzers()); + return this; + } + + public ArcadeLucenePerFieldAnalyzerWrapper remove(final String field) { + fieldAnalyzers.remove(field); + return this; + } + + protected Map getAnalyzers() { + return fieldAnalyzers; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java new file mode 100644 index 0000000000..b6fd7d7ebb --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java @@ -0,0 +1,390 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.builder; + +import com.arcadedb.database.Database; +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; +import com.arcadedb.schema.DocumentType; +import com.arcadedb.schema.Property; +import com.arcadedb.schema.Type; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import org.apache.lucene.document.Field; // Lucene Field +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; + +public class LuceneDocumentBuilder { + + private static final Logger logger = Logger.getLogger(LuceneDocumentBuilder.class.getName()); + + public org.apache.lucene.document.Document build(IndexDefinition indexDefinition, + Object key, // The key used for indexing (can be composite) + Identifiable identifiableValue, // The record to index + Map collectionFields, // Info about collection fields (if needed, from old engine) + com.arcadedb.document.Document metadata) { // Query/index time metadata + + org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document(); + + // Add RID field + if (identifiableValue != null && identifiableValue.getIdentity() != null) { + luceneDoc.add(ArcadeLuceneIndexType.createRidField(identifiableValue)); + } + + // Add KEY field(s) if the key is provided and the index is not on specific fields (manual index style) + // For automatic indexes, the key is usually derived from the document's fields. + if (key != null && (indexDefinition.getFields() == null || indexDefinition.getFields().isEmpty())) { + // This logic is more for manual indexes where 'key' is the value being indexed. + // For automatic indexes on document fields, this 'key' might be redundant or handled differently. + // Assuming KEY field stores the string representation of the key for now. + luceneDoc.add(new StringField(ArcadeLuceneIndexType.KEY, key.toString(), Field.Store.YES)); + } + + + if (identifiableValue instanceof com.arcadedb.document.Document) { + com.arcadedb.document.Document record = (com.arcadedb.document.Document) identifiableValue; + DatabaseInternal db = record.getDatabase(); + DocumentType recordType = record.getType(); + + List fieldsToIndex = indexDefinition.getFields(); + if (fieldsToIndex == null || fieldsToIndex.isEmpty()) { + // If no specific fields defined for index (e.g. manual index), + // and we already added KEY, then we might be done for primary content for this key. + // However, if the 'value' (record) itself should have its fields indexed, + // then fieldsToIndex should probably default to all fields of the record. + // This part depends on the semantics of "automatic" vs "manual" Lucene indexes. + // For now, if no fields are in definition, we assume only KEY and RID are added. + } else { + for (String fieldName : fieldsToIndex) { + if (!record.has(fieldName)) { + continue; + } + Object fieldValue = record.get(fieldName); + if (fieldValue == null) { + continue; + } + + Property property = recordType != null ? recordType.getProperty(fieldName) : null; + Type fieldType = property != null ? property.getType() : Type.STRING; // Default to STRING if no schema type + + // Determine if field should be stored and sorted based on index definition options or metadata + boolean storeField = isToStore(indexDefinition, fieldName, metadata); + boolean sortField = isToSort(indexDefinition, fieldName, metadata); + + // Get schema type of the field, and for collections/maps, the linked type + Type linkedType = (property != null) ? property.getOfType() : null; + + indexValue(luceneDoc, fieldName, fieldValue, fieldType, linkedType, + storeField, sortField, 1, indexDefinition, metadata, db); + } + } + } else if (identifiableValue != null) { + // If the value is an Identifiable but not a Document (e.g. just an RID for a manual index key) + // and fields are defined in the index, this implies we should load the document + // and then process its fields. This case should ideally be handled by the caller + // by passing the actual Document record. + // If only key and RID are indexed for non-Document identifiables, current logic is okay. + } + + + // Add _CLASS field if type is available + String typeName = indexDefinition.getTypeName(); + if (typeName != null && !typeName.isEmpty()) { + luceneDoc.add(new StringField("_CLASS", typeName, Field.Store.YES)); // Non-analyzed + } + + // Log usage of collectionFields if it's passed but not deeply integrated yet + if (collectionFields != null && !collectionFields.isEmpty()) { + // The `collectionFields` map (from OrientDB's engine) indicated if a field was a collection of simple types. + // This information might be used to guide specific tokenization or if ArcadeLuceneIndexType.createFields + // needs more hints for collections of scalars vs. collections of embeddeds, though getType and getOfType should cover most cases. + // For now, just logging its presence. + logger.finer("Received 'collectionFields' map, but its specific nuanced behavior is not fully implemented beyond standard collection handling: " + collectionFields); + } + + return luceneDoc; + } + + /** + * Determines if a field should be stored in the Lucene index based on index definition options. + * Convention: + * - "storeFields": "*" or "ALL" means store all. + * - "storeFields": "fieldA,fieldB" means store only these. + * - "dontStoreFields": "fieldC,fieldD" means do not store these (takes precedence). + * - "store.": "true" or "false" for field-specific setting. + * Defaults to Field.Store.NO if not specified otherwise for full-text search efficiency. + */ + private boolean isToStore(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) { + Map options = indexDefinition.getOptions(); + // Query-time metadata can override index-time options + if (metadata != null) { + Object fieldSpecificStoreMeta = metadata.get("store." + fieldName); + if (fieldSpecificStoreMeta != null) return Boolean.parseBoolean(fieldSpecificStoreMeta.toString()); + + List queryStoredFields = metadata.get("storedFields"); // Assuming list of strings + if (queryStoredFields != null) { + if (queryStoredFields.contains(fieldName)) return true; + if (queryStoredFields.contains("*") || queryStoredFields.contains("ALL")) return true; + } + List queryDontStoreFields = metadata.get("dontStoreFields"); + if (queryDontStoreFields != null && queryDontStoreFields.contains(fieldName)) return false; + } + + // Index definition options + if (options != null) { + String fieldSpecificStoreOpt = options.get("store." + fieldName); + if (fieldSpecificStoreOpt != null) return Boolean.parseBoolean(fieldSpecificStoreOpt); + + String dontStoreFieldsOpt = options.get("dontStoreFields"); + if (dontStoreFieldsOpt != null) { + List dontStoreList = Arrays.asList(dontStoreFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (dontStoreList.contains(fieldName.toLowerCase())) return false; + } + + String storeFieldsOpt = options.get("storeFields"); + if (storeFieldsOpt != null) { + if ("*".equals(storeFieldsOpt) || "ALL".equalsIgnoreCase(storeFieldsOpt)) return true; + List storeList = Arrays.asList(storeFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (storeList.contains(fieldName.toLowerCase())) return true; + // If storeFields is specified but doesn't list this field, and no "*" or "ALL", assume don't store (unless dontStoreFields also doesn't list it). + // This means explicit list in storeFields acts as a whitelist if present. + return false; + } + } + // Default if no specific rules found: DO NOT STORE fields unless specified. + return false; + } + + /** + * Determines if a field should have DocValues for sorting. + * Convention: + * - "storeFields": "*" or "ALL" means store all. + * - "storeFields": "fieldA,fieldB" means store only these. + * - "dontStoreFields": "fieldC,fieldD" means do not store these (takes precedence). + * - "store.": "true" or "false" for field-specific setting. + * Defaults to Field.Store.NO if not specified otherwise for full-text search efficiency. + */ + private boolean isToStore(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) { + Map options = indexDefinition.getOptions(); + // Query-time metadata can override index-time options + if (metadata != null) { + Object fieldSpecificStoreMeta = metadata.get("store." + fieldName); + if (fieldSpecificStoreMeta != null) return Boolean.parseBoolean(fieldSpecificStoreMeta.toString()); + + List queryStoredFields = metadata.get("storedFields"); // Assuming list of strings + if (queryStoredFields != null) { + if (queryStoredFields.contains(fieldName)) return true; + if (queryStoredFields.contains("*") || queryStoredFields.contains("ALL")) return true; + } + List queryDontStoreFields = metadata.get("dontStoreFields"); + if (queryDontStoreFields != null && queryDontStoreFields.contains(fieldName)) return false; + } + + // Index definition options + if (options != null) { + String fieldSpecificStoreOpt = options.get("store." + fieldName); + if (fieldSpecificStoreOpt != null) return Boolean.parseBoolean(fieldSpecificStoreOpt); + + String dontStoreFieldsOpt = options.get("dontStoreFields"); + if (dontStoreFieldsOpt != null) { + List dontStoreList = Arrays.asList(dontStoreFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (dontStoreList.contains(fieldName.toLowerCase())) return false; + } + + String storeFieldsOpt = options.get("storeFields"); + if (storeFieldsOpt != null) { + if ("*".equals(storeFieldsOpt) || "ALL".equalsIgnoreCase(storeFieldsOpt)) return true; + List storeList = Arrays.asList(storeFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (storeList.contains(fieldName.toLowerCase())) return true; + // If storeFields is specified but doesn't list this field, and no "*" or "ALL", assume don't store (unless dontStoreFields also doesn't list it). + // This means explicit list in storeFields acts as a whitelist if present. + return false; + } + } + // Default if no specific rules found: DO NOT STORE fields unless specified. + return false; + } + + /** + * Determines if a field should have DocValues for sorting. + * Convention: + * - "sortableFields": "*" or "ALL" (less common for global sortability). + * - "sortableFields": "fieldA,fieldB". + * - "sort.": "true" or "false". + * Defaults to false. + */ + private boolean isToSort(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) { + Map options = indexDefinition.getOptions(); + // Query-time metadata can override index-time options + if (metadata != null) { + Object fieldSpecificSortMeta = metadata.get("sort." + fieldName); + if (fieldSpecificSortMeta != null) return Boolean.parseBoolean(fieldSpecificSortMeta.toString()); + + List querySortableFields = metadata.get("sortableFields"); // Assuming list of strings + if (querySortableFields != null) { + if (querySortableFields.contains("*") || querySortableFields.contains("ALL")) return true; + if (querySortableFields.contains(fieldName)) return true; + } + } + + // Index definition options + if (options != null) { + String fieldSpecificSortOpt = options.get("sort." + fieldName); + if (fieldSpecificSortOpt != null) return Boolean.parseBoolean(fieldSpecificSortOpt); + + String sortableFieldsOpt = options.get("sortableFields"); + if (sortableFieldsOpt != null) { + if ("*".equals(sortableFieldsOpt) || "ALL".equalsIgnoreCase(sortableFieldsOpt)) return true; + List sortList = Arrays.asList(sortableFieldsOpt.toLowerCase().split("\\s*,\\s*")); + if (sortList.contains(fieldName.toLowerCase())) return true; + // If sortableFields is specified but doesn't list this field, and no "*" or "ALL", assume not sortable. + return false; + } + } + return false; // Default to not sortable + } + + private void indexValue(org.apache.lucene.document.Document luceneDoc, String fieldName, Object fieldValue, + Type fieldType, Type linkedType, boolean storeField, boolean sortField, + int currentDepth, IndexDefinition rootIndexDefinition, + com.arcadedb.document.Document rootMetadata, DatabaseInternal database) { + + int maxDepth = getMaxDepth(rootIndexDefinition, fieldName); + if (currentDepth > maxDepth) { + logger.finer("Max indexing depth ("+ maxDepth +") reached for field: " + fieldName); + return; + } + + if (fieldValue instanceof Collection && (fieldType == Type.EMBEDDEDLIST || fieldType == Type.EMBEDDEDSET || fieldType == Type.LIST)) { + Collection collection = (Collection) fieldValue; + Type actualLinkedType = linkedType; + if (actualLinkedType == null && !collection.isEmpty()) { + Object firstElement = collection.iterator().next(); + if (firstElement instanceof Document) actualLinkedType = Type.EMBEDDED; + else if (firstElement != null) actualLinkedType = Type.getTypeByValue(firstElement); + } + + if (actualLinkedType != null && actualLinkedType != Type.EMBEDDED && actualLinkedType != Type.EMBEDDEDMAP) { // Scalar list/set + for (Object item : collection) { + if (item != null) { + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item, + storeField ? Field.Store.YES : Field.Store.NO, sortField, actualLinkedType); + for (Field f : itemFields) luceneDoc.add(f); + } + } + } else if (actualLinkedType == Type.EMBEDDED || (actualLinkedType == null && collection.iterator().hasNext() && collection.iterator().next() instanceof Document)){ // EMBEDDEDLIST/SET of Documents + for (Object item : collection) { + if (item instanceof Document) { + indexEmbeddedContent(luceneDoc, fieldName, (Document) item, currentDepth, rootIndexDefinition, rootMetadata, database); + } else if (item != null) { // Non-document item in what was expected to be an embedded list + logger.finer("Item in embedded list for field '" + fieldName + "' is not a Document, indexing toString(): " + item.getClass()); + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); + for (Field f : itemFields) luceneDoc.add(f); + } + } + } else { + logger.finer("Collection field '" + fieldName + "' contains unhandled linked type: " + actualLinkedType + " or collection is empty/mixed."); + // Optionally index toString() for each item as a fallback + for (Object item : collection) { + if (item != null) { + List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING); + for (Field f : itemFields) luceneDoc.add(f); + } + } + } + } else if (fieldValue instanceof Map && fieldType == Type.EMBEDDEDMAP) { + indexEmbeddedContent(luceneDoc, fieldName, (Map) fieldValue, currentDepth, rootIndexDefinition, rootMetadata, database); + } else if (fieldValue instanceof Document && fieldType == Type.EMBEDDED) { + indexEmbeddedContent(luceneDoc, fieldName, (Document) fieldValue, currentDepth, rootIndexDefinition, rootMetadata, database); + } else { // Scalar field or unhandled complex type treated as scalar + List luceneFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue, + storeField ? Field.Store.YES : Field.Store.NO, sortField, fieldType); + for (Field f : luceneFields) luceneDoc.add(f); + } + } + + private void indexEmbeddedContent(org.apache.lucene.document.Document luceneDoc, String baseFieldName, + Object embeddedObject, int currentDepth, + IndexDefinition rootIndexDefinition, com.arcadedb.document.Document rootMetadata, + DatabaseInternal database) { + if (embeddedObject instanceof Document) { + Document embeddedDoc = (Document) embeddedObject; + DocumentType embeddedSchemaType = embeddedDoc.getType(); + + for (String innerFieldName : embeddedDoc.getPropertyNames()) { + Object innerFieldValue = embeddedDoc.get(innerFieldName); + if (innerFieldValue == null) continue; + + String prefixedFieldName = baseFieldName + "." + innerFieldName; + // TODO: Add options to include/exclude specific embedded fields `rootIndexDefinition.getOptions().get("includeEmbedded." + prefixedFieldName)` + + Property innerProperty = (embeddedSchemaType != null) ? embeddedSchemaType.getProperty(innerFieldName) : null; + Type innerFieldType = (innerProperty != null) ? innerProperty.getType() : Type.getTypeByValue(innerFieldValue); + Type innerLinkedType = (innerProperty != null) ? innerProperty.getOfType() : null; + + boolean storeField = isToStore(rootIndexDefinition, prefixedFieldName, rootMetadata); + boolean sortField = isToSort(rootIndexDefinition, prefixedFieldName, rootMetadata); + + indexValue(luceneDoc, prefixedFieldName, innerFieldValue, innerFieldType, innerLinkedType, + storeField, sortField, currentDepth + 1, rootIndexDefinition, rootMetadata, database); + } + } else if (embeddedObject instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) embeddedObject; + for (Map.Entry entry : map.entrySet()) { + String mapKey = entry.getKey(); + Object mapValue = entry.getValue(); + if (mapValue == null) continue; + + String prefixedFieldName = baseFieldName + "." + mapKey; + // TODO: Add options to include/exclude specific embedded fields + + Type valueType = Type.getTypeByValue(mapValue); // Infer type from map value + // For maps, linkedType is generally not applicable unless map values are consistently typed Documents. + + boolean storeField = isToStore(rootIndexDefinition, prefixedFieldName, rootMetadata); + boolean sortField = isToSort(rootIndexDefinition, prefixedFieldName, rootMetadata); + + // Here, we treat map values. If a map value is another Document/Map/Collection, it will be handled by recursive call. + indexValue(luceneDoc, prefixedFieldName, mapValue, valueType, null, // Pass null for linkedType for map values for now + storeField, sortField, currentDepth + 1, rootIndexDefinition, rootMetadata, database); + } + } + // Collections within embedded content are handled by the recursive call to indexValue + } + + private int getMaxDepth(IndexDefinition indexDefinition, String fieldName) { + Map options = indexDefinition.getOptions(); + if (options != null) { + String specificDepth = options.get("embeddedIndexingDepth." + fieldName); + if (specificDepth != null) { + try { return Integer.parseInt(specificDepth); } catch (NumberFormatException e) { /* ignore */ } + } + String globalDepth = options.get("embeddedIndexingDepth"); + if (globalDepth != null) { + try { return Integer.parseInt(globalDepth); } catch (NumberFormatException e) { /* ignore */ } + } + } + return 1; // Default depth if not specified + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java new file mode 100644 index 0000000000..294647b19d --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java @@ -0,0 +1,168 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.builder; + +import com.arcadedb.database.Database; +import com.arcadedb.database.DatabaseInternal; // Required for schema access +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.index.CompositeKey; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.lucene.parser.ArcadeLuceneMultiFieldQueryParser; // FIXME: Needs refactoring +import com.arcadedb.schema.DocumentType; +import com.arcadedb.schema.Property; +import com.arcadedb.schema.Schema; +import com.arcadedb.schema.Type; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; // For string ranges, newStringRange +import org.apache.lucene.index.Term; +// Import Point field range queries +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.DoublePoint; + + +public class LuceneQueryBuilder { + + private static final Logger logger = Logger.getLogger(LuceneQueryBuilder.class.getName()); + public static final Document EMPTY_METADATA = new Document(null); // Assuming Document can be db-less for this constant + + private final boolean allowLeadingWildcard; + private final boolean splitOnWhitespace; + + public LuceneQueryBuilder(Document metadata) { + if (metadata == null) { + metadata = EMPTY_METADATA; + } + this.allowLeadingWildcard = Boolean.TRUE.equals(metadata.get("allowLeadingWildcard")); + // Lucene's StandardQueryParser and MultiFieldQueryParser split on whitespace by default. + // This setting in OrientDB was more about how the string was fed *to* the parser or if specific syntax implied no split. + // For now, assuming default Lucene behavior is mostly fine. If specific "phrase" vs "term" logic is needed from splitOnWhitespace, + // it would affect how the query string is constructed or which parser is used. + this.splitOnWhitespace = Boolean.TRUE.equals(metadata.get("splitOnWhitespace")); // Default true + } + + public Query query(IndexDefinition indexDefinition, Object key, Document metadata, Analyzer analyzer, DatabaseInternal database) throws ParseException { + if (key == null) { + throw new IllegalArgumentException("Query key cannot be null"); + } + if (metadata == null) { + metadata = EMPTY_METADATA; + } + + String[] fields = indexDefinition.getFields().toArray(new String[0]); + if (fields.length == 0) { + // Default to a common field if not specified, e.g. "_all" or a convention + // This case needs clarification based on how schema-less Lucene indexes were handled. + // For now, let's assume if no fields, it might be a special query type or error. + // Or, if key is string, it searches default fields of the parser. + // For now, if no fields defined in index, and key is String, let parser use its default field. + // This requires parser to be configured with default field(s). + // fields = new String[] { "_DEFAULT_SEARCH_FIELD" }; // Placeholder for default search field + logger.warning("Querying Lucene index " + indexDefinition.getName() + " with no fields defined in index definition. Query may not behave as expected."); + } + + Map fieldTypes = new HashMap<>(); + if (database != null && indexDefinition.getTypeName() != null) { + Schema schema = database.getSchema(); + DocumentType docType = schema.getType(indexDefinition.getTypeName()); + if (docType != null) { + for (String fieldName : indexDefinition.getFields()) { + Property prop = docType.getProperty(fieldName); + if (prop != null) { + fieldTypes.put(fieldName, prop.getType()); + } else { + fieldTypes.put(fieldName, Type.STRING); // Default if property not found in schema + } + } + } else { + for (String fieldName : indexDefinition.getFields()) { + fieldTypes.put(fieldName, Type.STRING); // Default if type not found + } + } + } else { + for (String fieldName : indexDefinition.getFields()) { + fieldTypes.put(fieldName, Type.STRING); // Default if no DB or typeName + } + } + + + if (key instanceof String) { + // ArcadeLuceneMultiFieldQueryParser is now available. + ArcadeLuceneMultiFieldQueryParser parser = new ArcadeLuceneMultiFieldQueryParser(fieldTypes, fields, analyzer); + parser.setAllowLeadingWildcard(allowLeadingWildcard); + // this.splitOnWhitespace is available but MultiFieldQueryParser handles split on whitespace by default. + // If specific behavior like "always phrase if false" is needed, parser logic would be more complex. + // For now, assuming standard MFQP behavior is sufficient. + // if (!this.splitOnWhitespace) { /* Potentially use different parser or pre-process query string */ } + + Map boost = metadata.get("boost", Map.class); + if (boost != null) { + parser.setBoosts(boost); + } + return parser.parse((String) key); + + } else if (key instanceof CompositeKey) { + CompositeKey compositeKey = (CompositeKey) key; + List keys = compositeKey.getKeys(); + BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); + + if (keys.size() != fields.length) { + throw new IllegalArgumentException("CompositeKey size does not match index definition fields count."); + } + + for (int i = 0; i < keys.size(); i++) { + Object partKey = keys.get(i); + String fieldName = fields[i]; + Type fieldType = fieldTypes.getOrDefault(fieldName, Type.STRING); + + if (partKey != null) { + Query partQuery = com.arcadedb.lucene.index.ArcadeLuceneIndexType.createExactFieldQuery(fieldName, partKey, fieldType, database); + booleanQuery.add(partQuery, BooleanClause.Occur.MUST); + } + } + return booleanQuery.build(); + } + // FIXME: Add support for specific range query objects if defined (this would be a new key instanceof MyCustomRangeObject) + // else if (key instanceof ...) { + // MyCustomRange range = (MyCustomRange) key; + // String fieldName = range.getField(); + // Type fieldType = fieldTypes.getOrDefault(fieldName, Type.STRING); + // if (fieldType.isNumeric()) { + // if (fieldType == Type.LONG || fieldType == Type.INTEGER || fieldType == Type.SHORT || fieldType == Type.BYTE || fieldType == Type.DATETIME || fieldType == Type.DATE) { + // return LongPoint.newRangeQuery(fieldName, (Long)range.getLower(), (Long)range.getUpper()); + // } // Add other numeric types + // } else if (fieldType == Type.STRING) { + // return TermRangeQuery.newStringRange(fieldName, range.getLower().toString(), range.getUpper().toString(), range.isLowerInclusive(), range.isUpperInclusive()); + // } + // } + + // Default fallback or throw exception for unsupported key types + logger.warning("Unsupported key type for Lucene query: " + key.getClass().getName() + ". Attempting TermQuery on toString()."); + return new TermQuery(new Term(fields.length > 0 ? fields[0] : "_DEFAULT_", key.toString())); // Fallback, likely not useful + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java new file mode 100644 index 0000000000..8a023552a0 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java @@ -0,0 +1,423 @@ +package com.arcadedb.lucene.engine; + +// import static com.arcadedb.lucene.OLuceneIndexFactory.LUCENE_ALGORITHM; // FIXME: Define or import appropriately + +import com.arcadedb.database.DatabaseThreadLocal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.database.RecordId; +import com.arcadedb.database.TransactionContext; // For AtomicOperation +// import com.arcadedb.database.config.IndexEngineData; // FIXME: Find ArcadeDB equivalent or refactor +import com.arcadedb.document.Document; +import com.arcadedb.engine.Storage; +import com.arcadedb.index.Index; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.index.IndexKeyUpdater; +import com.arcadedb.index.IndexMetadata; +import com.arcadedb.index.engine.IndexValidator; +import com.arcadedb.index.IndexValuesTransformer; +import com.arcadedb.lucene.analyzer.ArcadeLucenePerFieldAnalyzerWrapper; // Refactored +import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.parser.ArcadeLuceneMultiFieldQueryParser; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneQueryContext; // FIXME: Needs refactoring +import com.arcadedb.lucene.tx.LuceneTxChanges; // FIXME: Needs refactoring +import com.arcadedb.schema.DocumentType; // Changed from OClass +import com.arcadedb.schema.Type; // Changed from OType +import com.arcadedb.utility.Pair; // Changed from ORawPair +import com.arcadedb.lucene.engine.ArcadeLuceneEngineUtils; // Added import +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; // Lucene Document +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** + * Created by frank on 03/11/2016. + */ +public class ArcadeLuceneCrossClassIndexEngine implements LuceneIndexEngine { // Changed class name and interface + private static final Logger logger = + Logger.getLogger(ArcadeLuceneCrossClassIndexEngine.class.getName()); // Changed logger + private final Storage storage; // Changed OStorage + private final String indexName; + private final int indexId; + private static final String LUCENE_ALGORITHM = "LUCENE"; // Placeholder for algorithm name + private IndexMetadata markerIndexMetadata; // Optional: if you need to store it + + + public ArcadeLuceneCrossClassIndexEngine(int indexId, Storage storage, String indexName) { // Changed OStorage + this.indexId = indexId; + this.storage = storage; + this.indexName = indexName; + } + + @Override + public void init(IndexMetadata metadata) { // Changed OIndexMetadata + // This engine orchestrates queries across other Lucene indexes. + // It doesn't manage its own Lucene directory or writers in the same way + // a full-text index engine does. + // The 'metadata' here belongs to the "marker" index that caused this + // cross-class engine to be instantiated. + + this.markerIndexMetadata = metadata; // Store if needed for any config + + // For now, primarily log initialization. + // Any specific configurations for the cross-class behavior that might + // be stored in the markerIndexMetadata.getOptions() could be parsed here. + logger.info("ArcadeLuceneCrossClassIndexEngine initialized for marker index: " + (metadata != null ? metadata.getName() : "null")); + + // Example: If you had a default list of fields to use for cross-class searches + // if not specified in query metadata, you could load it from metadata.getOptions(). + // Map options = metadata.getOptions(); + // String defaultFieldsStr = options.get("crossClassDefaultFields"); + // if (defaultFieldsStr != null) { ... parse and store ... } + } + + @Override + public void flush() {} + + @Override + public int getId() { + return indexId; + } + + // FIXME: IndexEngineData equivalent in ArcadeDB? + @Override + public void create(TransactionContext atomicOperation, Object data) throws IOException {} // Changed OAtomicOperation, IndexEngineData + + @Override + public void delete(TransactionContext atomicOperation) {} // Changed OAtomicOperation + + // FIXME: IndexEngineData equivalent in ArcadeDB? + @Override + public void load(Object data) {} // Changed IndexEngineData + + @Override + public boolean remove(TransactionContext atomicOperation, Object key) { // Changed OAtomicOperation + return false; + } + + @Override + public void clear(TransactionContext atomicOperation) {} // Changed OAtomicOperation + + @Override + public void close() {} + + @Override + public Object get(Object key) { + // FIXME: This method requires significant refactoring once dependent classes are updated + // (LuceneKeyAndMetadata, ArcadeLuceneFullTextIndex, ArcadeLuceneMultiFieldQueryParser, OLuceneIndexEngineUtils, LuceneResultSet) + + final LuceneKeyAndMetadata keyAndMeta = (LuceneKeyAndMetadata) key; // FIXME + final Document arcadedbMetadata = keyAndMeta.metadata; // ArcadeDB Document // FIXME + final List excludes = + Optional.ofNullable(arcadedbMetadata.>getProperty("excludes")) + .orElse(Collections.emptyList()); + final List includes = + Optional.ofNullable(arcadedbMetadata.>getProperty("includes")) + .orElse(Collections.emptyList()); + + final Collection indexes = // Changed OIndex to Index + DatabaseThreadLocal.INSTANCE // Changed ODatabaseRecordThreadLocal + .get() + .getSchema() // Changed getMetadata().getIndexManager() + .getIndexes() + .stream() + .filter(i -> !excludes.contains(i.getName())) + .filter(i -> includes.isEmpty() || includes.contains(i.getName())) + .collect(Collectors.toList()); + + final ArcadeLucenePerFieldAnalyzerWrapper globalAnalyzer = // Changed OLucenePerFieldAnalyzerWrapper + new ArcadeLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); + + final List globalFields = new ArrayList(); + final List globalReaders = new ArrayList(); + final Map types = new HashMap<>(); // Changed OType to Type + + try { + for (Index index : indexes) { // Changed OIndex to Index + // FIXME: index.getAlgorithm() might be different, DocumentType.INDEX_TYPE.FULLTEXT might be different + if (index.getAlgorithm().equalsIgnoreCase(LUCENE_ALGORITHM) + && index.getType().equalsIgnoreCase(DocumentType.INDEX_TYPE.FULLTEXT.toString())) { + + final IndexDefinition definition = index.getDefinition(); // Changed OIndexDefinition + final String typeName = definition.getTypeName(); // Changed getClassName + + String[] indexFields = + definition.getFields().toArray(new String[definition.getFields().size()]); + + for (int i = 0; i < indexFields.length; i++) { + String field = indexFields[i]; + types.put(typeName + "." + field, definition.getTypes()[i]); + globalFields.add(typeName + "." + field); + } + + ArcadeLuceneFullTextIndex fullTextIndex = (ArcadeLuceneFullTextIndex) index.getAssociatedIndex(); // Changed OLuceneFullTextIndex, getInternal() + + globalAnalyzer.add((ArcadeLucenePerFieldAnalyzerWrapper) fullTextIndex.queryAnalyzer()); // FIXME: queryAnalyzer might not be directly on ArcadeLuceneFullTextIndex + + globalReaders.add(fullTextIndex.searcher().getIndexReader()); // FIXME: searcher might not be directly on ArcadeLuceneFullTextIndex + } + } + + if (globalReaders.isEmpty()) { + return new LuceneResultSet(this, null, arcadedbMetadata); // FIXME: LuceneResultSet + } + + IndexReader indexReader = new MultiReader(globalReaders.toArray(new IndexReader[] {})); + IndexSearcher searcher = new IndexSearcher(indexReader); + + Map boost = + Optional.ofNullable(arcadedbMetadata.>getProperty("boost")) + .orElse(new HashMap<>()); + + // FIXME: ArcadeLuceneMultiFieldQueryParser needs refactoring + ArcadeLuceneMultiFieldQueryParser p = + new ArcadeLuceneMultiFieldQueryParser( + types, globalFields.toArray(new String[] {}), globalAnalyzer, boost); + + p.setAllowLeadingWildcard( + Optional.ofNullable(arcadedbMetadata.getProperty("allowLeadingWildcard")).orElse(false)); + p.setSplitOnWhitespace( + Optional.ofNullable(arcadedbMetadata.getProperty("splitOnWhitespace")).orElse(true)); + + Object params = keyAndMeta.key.getKeys().get(0); // FIXME: keyAndMeta.key structure might change + Query query = p.parse(params.toString()); + + final List sortFields = ArcadeLuceneEngineUtils.buildSortFields(arcadedbMetadata, null, DatabaseThreadLocal.INSTANCE.get()); + // final List fields = OLuceneIndexEngineUtils.buildSortFields(arcadedbMetadata); + + LuceneQueryContext ctx = new LuceneQueryContext(null, searcher, query, sortFields); // FIXME + return new LuceneResultSet(this, ctx, arcadedbMetadata); // FIXME + } catch (IOException e) { + logger.log(Level.SEVERE, "unable to create multi-reader", e); + } catch (ParseException e) { + logger.log(Level.SEVERE, "unable to parse query", e); + } + return null; + } + + @Override + public void put(TransactionContext atomicOperation, Object key, Object value) {} // Changed OAtomicOperation + + @Override + public void put(TransactionContext atomicOperation, Object key, RID value) {} // Changed OAtomicOperation, ORID + + @Override + public boolean remove(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID + return false; + } + + @Override + public void update( // Changed OAtomicOperation, OIndexKeyUpdater + TransactionContext atomicOperation, Object key, IndexKeyUpdater updater) {} + + @Override + public boolean validatedPut( // Changed OAtomicOperation, ORID, IndexEngineValidator + TransactionContext atomicOperation, + Object key, + RID value, + IndexValidator validator) { + return false; + } + + @Override + public Stream> iterateEntriesBetween( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMajor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMinor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> stream(IndexValuesTransformer valuesTransformer) { // Changed ORawPair, ORID + return Stream.empty(); + } + + @Override + public Stream> descStream(IndexValuesTransformer valuesTransformer) { // Changed ORawPair, ORID + return Stream.empty(); + } + + @Override + public Stream keyStream() { + return Stream.empty(); + } + + @Override + public long size(IndexValuesTransformer transformer) { // Changed IndexEngineValuesTransformer + return 0; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public String getName() { + return indexName; + } + + @Override + public boolean acquireAtomicExclusiveLock(Object key) { + return false; + } + + @Override + public String getIndexNameByKey(Object key) { + return null; + } + + @Override + public String indexName() { + return indexName; + } + + @Override + public void onRecordAddedToResultSet( // Changed parameter types + LuceneQueryContext queryContext, // FIXME + RecordId recordId, // Changed OContextualRecordId + Document ret, // Lucene Document + final ScoreDoc score) { + + // FIXME: RecordId in ArcadeDB does not have setContext. How to pass this data? + // recordId.setContext( + // new HashMap() { + // { + // Map frag = queryContext.getFragments(); + // frag.entrySet().stream() + // .forEach( + // f -> { + // TextFragment[] fragments = f.getValue(); + // StringBuilder hlField = new StringBuilder(); + // for (int j = 0; j < fragments.length; j++) { + // if ((fragments[j] != null) && (fragments[j].getScore() > 0)) { + // hlField.append(fragments[j].toString()); + // } + // } + // put("$" + f.getKey() + "_hl", hlField.toString()); + // }); + // put("$score", score.score); + // } + // }); + } + + @Override + public Document buildDocument(Object key, Identifiable value) { // Changed OIdentifiable, Lucene Document + return null; + } + + @Override + public Query buildQuery(Object query) { + return null; + } + + @Override + public Analyzer indexAnalyzer() { + return null; + } + + @Override + public Analyzer queryAnalyzer() { + return null; + } + + @Override + public boolean remove(Object key, Identifiable value) { // Changed OIdentifiable + return false; + } + + @Override + public IndexSearcher searcher() { + return null; + } + + @Override + public void release(IndexSearcher searcher) {} + + @Override + public Set getInTx(Object key, LuceneTxChanges changes) { // Changed OIdentifiable, OLuceneTxChanges + return null; + } + + @Override + public long sizeInTx(LuceneTxChanges changes) { // Changed OLuceneTxChanges + return 0; + } + + @Override + public LuceneTxChanges buildTxChanges() throws IOException { // Changed OLuceneTxChanges + return null; + } + + @Override + public Query deleteQuery(Object key, Identifiable value) { // Changed OIdentifiable + return null; + } + + @Override + public boolean isCollectionIndex() { + return false; + } + + @Override + public void freeze(boolean throwException) {} + + @Override + public void release() {} + + @Override + public void updateUniqueIndexVersion(Object key) {} + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; + } + + @Override + public boolean remove(Object key) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java new file mode 100644 index 0000000000..e9aa4127c5 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java @@ -0,0 +1,160 @@ +package com.arcadedb.lucene.engine; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.document.Document; +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.schema.Property; +import com.arcadedb.schema.Type; +import com.arcadedb.schema.DocumentType; + +import org.apache.lucene.search.SortField; +// Corrected import for SortField.Type +// import org.apache.lucene.search.SortField.первый; // This was incorrect in the prompt +// No, SortField.Type is an enum inside SortField, direct import not needed for Type itself, +// but rather SortField.Type.INT etc. + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +public class ArcadeLuceneEngineUtils { + + private static final Logger logger = Logger.getLogger(ArcadeLuceneEngineUtils.class.getName()); + + /** + * Builds a list of Lucene SortField objects based on sorting criteria + * specified in the metadata document. + * + * @param arcadedbMetadata The metadata document, typically from query options. + * Expected to contain a "sort" or "orderBy" field. + * The value can be a String (e.g., "fieldA ASC, fieldB DESC") + * or a List of Maps (e.g., [{"field": "fieldA", "direction": "ASC"}, ...]). + * @param indexDefinition Optional: The index definition, used to infer field types for sorting if not specified. + * @param database Optional: The database instance, used to get schema for type inference. + * @return A list of Lucene SortField objects. + */ + public static List buildSortFields(Document arcadedbMetadata, IndexDefinition indexDefinition, DatabaseInternal database) { + List sortFields = new ArrayList<>(); + if (arcadedbMetadata == null) { + return sortFields; + } + + Object sortCriteria = arcadedbMetadata.get("sort"); + if (sortCriteria == null) { + sortCriteria = arcadedbMetadata.get("orderBy"); + } + + if (sortCriteria == null) { + return sortFields; + } + + if (sortCriteria instanceof String) { + // Parse string like "fieldA ASC, fieldB DESC" + String[] criteria = ((String) sortCriteria).split(","); + for (String criterion : criteria) { + String[] parts = criterion.trim().split("\\s+"); // Use \\s+ for one or more spaces + String fieldName = parts[0].trim(); + if (fieldName.isEmpty()) continue; + + boolean reverse = parts.length > 1 && "DESC".equalsIgnoreCase(parts[1].trim()); + + SortField.Type sortType = inferSortType(fieldName, indexDefinition, database); + sortFields.add(new SortField(fieldName, sortType, reverse)); + } + } else if (sortCriteria instanceof List) { + // Parse list of maps, e.g., [{"field": "fieldA", "direction": "ASC"}, ...] + try { + @SuppressWarnings("unchecked") // Generic type for list elements from Document.get() + List criteriaList = (List) sortCriteria; + for (Object criterionObj : criteriaList) { + if (criterionObj instanceof Map) { + @SuppressWarnings("unchecked") + Map criterion = (Map) criterionObj; + String fieldName = criterion.get("field"); + String direction = criterion.get("direction"); + if (fieldName != null && !fieldName.trim().isEmpty()) { + boolean reverse = "DESC".equalsIgnoreCase(direction); + SortField.Type sortType = inferSortType(fieldName.trim(), indexDefinition, database); + sortFields.add(new SortField(fieldName.trim(), sortType, reverse)); + } + } else if (criterionObj instanceof String) { // Support list of strings like ["fieldA ASC", "fieldB DESC"] + String[] parts = ((String)criterionObj).trim().split("\\s+"); + String fieldName = parts[0].trim(); + if (fieldName.isEmpty()) continue; + boolean reverse = parts.length > 1 && "DESC".equalsIgnoreCase(parts[1].trim()); + SortField.Type sortType = inferSortType(fieldName, indexDefinition, database); + sortFields.add(new SortField(fieldName, sortType, reverse)); + } + } + } catch (ClassCastException e) { + logger.warning("Could not parse 'sort' criteria from List due to unexpected element types: " + e.getMessage()); + } + } else { + logger.warning("Unsupported 'sort' criteria format: " + sortCriteria.getClass().getName()); + } + + return sortFields; + } + + /** + * Infers the Lucene SortField.Type for a given field name. + * + * @param fieldName The name of the field. + * @param indexDefinition Optional: The index definition containing schema information. + * @param database Optional: The database instance for schema lookup. + * @return The inferred SortField.Type, defaults to STRING if type cannot be determined. + */ + private static SortField.Type inferSortType(String fieldName, IndexDefinition indexDefinition, DatabaseInternal database) { + // Special Lucene sort field for relevance score + if ("score".equalsIgnoreCase(fieldName) || SortField.FIELD_SCORE.toString().equals(fieldName)) { + return SortField.Type.SCORE; + } + // Special Lucene sort field for document order + if (SortField.FIELD_DOC.toString().equals(fieldName)) { + return SortField.Type.DOC; + } + + if (database != null && indexDefinition != null && indexDefinition.getTypeName() != null) { + DocumentType docType = database.getSchema().getType(indexDefinition.getTypeName()); + if (docType != null) { + Property property = docType.getProperty(fieldName); + if (property != null) { + Type propertyType = property.getType(); + switch (propertyType) { + case INTEGER: + case SHORT: + case BYTE: + return SortField.Type.INT; + case LONG: + case DATETIME: // Assuming DATETIME is stored as long epoch millis for sorting + case DATE: // Assuming DATE is stored as long epoch millis for sorting + return SortField.Type.LONG; + case FLOAT: + return SortField.Type.FLOAT; + case DOUBLE: + return SortField.Type.DOUBLE; + case STRING: + case TEXT: + case ENUM: + case UUID: // UUIDs are often sorted as strings + case BINARY: // Might be sorted as string, or custom if specific byte order needed + return SortField.Type.STRING; + // Add other types as needed, e.g., CUSTOM for specific comparators + // BOOLEAN is not directly sortable with a standard SortField.Type unless mapped to INT/STRING + default: + logger.finer("Cannot infer specific Lucene SortField.Type for ArcadeDB Type " + propertyType + " on field '" + fieldName + "'. Defaulting to STRING."); + return SortField.Type.STRING; + } + } else { + logger.finer("Property '" + fieldName + "' not found in type '" + indexDefinition.getTypeName() + "'. Defaulting to STRING sort type."); + } + } else { + logger.finer("DocumentType '" + indexDefinition.getTypeName() + "' not found in schema. Defaulting to STRING sort type for field '" + fieldName + "'."); + } + } + // Default if schema info is unavailable or field not found + logger.finer("Insufficient schema information for field '" + fieldName + "'. Defaulting to STRING sort type."); + return SortField.Type.STRING; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java new file mode 100644 index 0000000000..cedc2ba1c9 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java @@ -0,0 +1,420 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2014 Orient Technologies. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.engine; + +import static com.arcadedb.lucene.builder.LuceneQueryBuilder.EMPTY_METADATA; // FIXME: LuceneQueryBuilder needs refactoring + +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.database.RecordId; +import com.arcadedb.database.TransactionContext; // For AtomicOperation +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.engine.Storage; +import com.arcadedb.exception.IndexException; // Changed exception +import com.arcadedb.index.CompositeKey; +import com.arcadedb.index.IndexKeyUpdater; +import com.arcadedb.index.IndexMetadata; +import com.arcadedb.index.IndexValuesTransformer; +import com.arcadedb.index.engine.IndexValidator; +import com.arcadedb.lucene.builder.LuceneDocumentBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.ArcadeLuceneIndexTransformer; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneQueryContext; +import com.arcadedb.lucene.tx.LuceneTxChanges; +import com.arcadedb.query.sql.executor.CommandContext; +import com.arcadedb.query.sql.parser.ParseException; +import com.arcadedb.schema.Type; // For manual index field creation +import com.arcadedb.utility.Pair; // Changed from ORawPair +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Stream; +import org.apache.lucene.document.Document; // Lucene Document +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.store.Directory; + +public class ArcadeLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract implements LuceneIndexEngine { // Changed class, base, and interface + private static final Logger logger = + Logger.getLogger(ArcadeLuceneFullTextIndexEngine.class.getName()); // Changed logger + + private final LuceneDocumentBuilder builder; + private LuceneQueryBuilder queryBuilder; + // bonsayFileId removed as it's not used for standard Lucene updates. + // If a specific versioning or optimistic locking mechanism is needed for index entries, + // it would require a different design, possibly involving specific fields in Lucene documents. + + public ArcadeLuceneFullTextIndexEngine(Storage storage, String idxName) { + super(storage, idxName); + builder = new LuceneDocumentBuilder(); + } + + @Override + public void init(IndexMetadata indexMetadata) { + // The super.init in OLuceneIndexEngineAbstract expects: + // (String indexName, String indexType, IndexDefinition indexDefinition, boolean isAutomatic, Document metadata) + // IndexMetadata (ArcadeDB) has: name, typeName (of Schema Type), algorithm, propertyNames, keyTypes, options, unique, automatic, associatedToBucket, nullStrategy. + // It does not directly have a single "indexType" string in the sense of "LUCENE" or "FULLTEXT" - that's algorithm. + // The "metadata" Document for super.init should be created from indexMetadata.getOptions(). + + com.arcadedb.document.Document engineInitMetadata = new com.arcadedb.document.Document(getDatabase()); + if (indexMetadata.getOptions() != null) { + engineInitMetadata.fromMap(indexMetadata.getOptions()); + } + + super.init(indexMetadata.getName(), + indexMetadata.getAlgorithm(), // Pass algorithm as indexType + indexMetadata, // Pass the whole IndexMetadata as IndexDefinition (it implements it) + indexMetadata.isAutomatic(), + engineInitMetadata); + + // queryBuilder uses the same options Document + queryBuilder = new LuceneQueryBuilder(engineInitMetadata); + } + + @Override + public IndexWriter createIndexWriter(Directory directory) throws IOException { + // FIXME: OLuceneIndexWriterFactory needs to be ArcadeLuceneIndexWriterFactory + // OLuceneIndexWriterFactory fc = new OLuceneIndexWriterFactory(); + // logger.log(Level.FINE, "Creating Lucene index in ''{0}''...", directory); + // return fc.createIndexWriter(directory, metadata, indexAnalyzer()); + throw new UnsupportedOperationException("ArcadeLuceneIndexWriterFactory not yet implemented"); + } + + @Override + public void onRecordAddedToResultSet( // Changed parameter types + final LuceneQueryContext queryContext, + final RecordId recordId, // Changed OContextualRecordId + final Document ret, // Lucene Document + final ScoreDoc score) { + HashMap data = new HashMap(); + + final Map frag = queryContext.getFragments(); + frag.forEach( + (key, fragments) -> { + final StringBuilder hlField = new StringBuilder(); + for (final TextFragment fragment : fragments) { + if ((fragment != null) && (fragment.getScore() > 0)) { + hlField.append(fragment.toString()); + } + } + data.put("$" + key + "_hl", hlField.toString()); + }); + data.put("$score", score.score); + + // recordId.setContext(data); // FIXME: RecordId in ArcadeDB does not have setContext. How to pass this data? + // This might need a wrapper class or different result handling. + } + + @Override + public boolean remove(final TransactionContext atomicOperation, final Object key) { // Changed OAtomicOperation + return remove(key); + } + + @Override + public boolean remove(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID + return remove(key, value); + } + + @Override + public Object get(final Object key) { + return getInTx(key, null); + } + + @Override + public void update( + final TransactionContext txContext, // Changed parameter name for clarity + final Object key, + final IndexKeyUpdater updater) { + // A Lucene update is typically a delete followed by an add. + // The 'key' here is what identifies the document(s) to be updated. + // The 'updater' provides the new value(s)/Identifiable(s). + + // 1. Determine the new Identifiable that results from the update. + // The updater.update(oldValue, ...) is meant to get the new value. + // 'oldValue' for an index is usually the set of RIDs mapped to the key. + // Since this is a full-text index, the 'key' itself might be complex. + // For simplicity, if we assume the updater gives the *new complete Identifiable* to index: + Object newValue = updater.update(null, null).getValue(); // Passing null for oldValue and bonsayFileId. + + if (!(newValue instanceof Identifiable)) { + throw new IndexException("Updater did not provide an Identifiable value for Lucene index update. Key: " + key); + } + Identifiable newIdentifiable = (Identifiable) newValue; + + // 2. Delete old document(s) associated with the key. + // This requires a query that uniquely identifies the old document(s) for this key. + // If the key is the RID itself (e.g. auto index on @rid), then it's simple. + // If the key is field values, and these values *might have changed*, then deleting by + // the *old* key is important. The current `key` parameter should represent the old key. + // However, IndexKeyUpdater is often used when the key itself doesn't change, but the RID does (e.g. unique index). + // Or when the indexed content of the RID changes, but the RID (and key) remains the same. + + // Let's assume 'key' can identify the old document(s) and 'newIdentifiable' is the new state to index. + // If the RID is constant and only content changes: + // We need to re-build the Lucene document for newIdentifiable and use Lucene's updateDocument. + + // Simplest approach for now: delete by key, then put new document. + // This assumes 'key' can uniquely identify the document via a query. + // If 'key' is the set of indexed fields from the *old* version of the document: + if (key != null) { + Query deleteByOldKeyQuery = this.queryBuilder.query(this.indexDefinition, key, EMPTY_METADATA, this.queryAnalyzer(), getDatabase()); + try { + this.deleteDocument(deleteByOldKeyQuery); // From OLuceneIndexEngineAbstract + } catch (IOException e) { + throw new IndexException("Error deleting old document during update for key: " + key, e); + } + } else if (newIdentifiable != null && newIdentifiable.getIdentity() != null) { + // If key is null, but we have the new Identifiable's RID, try to delete by RID. + // This is only safe if we are sure this RID was previously indexed and this is a true update. + Query deleteByRidQuery = ArcadeLuceneIndexType.createQueryId(newIdentifiable); + try { + this.deleteDocument(deleteByRidQuery); + } catch (IOException e) { + throw new IndexException("Error deleting old document by RID during update for: " + newIdentifiable.getIdentity(), e); + } + } else { + throw new IndexException("Cannot determine document to update for Lucene index. Key and new Identifiable are null."); + } + + // 3. Put the new document state + // The 'key' for put should be derived from the newIdentifiable's fields if it's an automatic index. + // If it's a manual index, the 'key' might remain the same or be derived. + // For now, assuming the 'key' parameter to 'update' is what we use to identify the document, + // and the new content comes from 'newIdentifiable'. + // The 'put' method will call buildDocument(key, newIdentifiable). + put(txContext, key, newIdentifiable); // Pass the original key for now + } + + @Override + public void put(final TransactionContext atomicOperation, final Object key, final Object value) { // Changed OAtomicOperation + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, (Identifiable) value); // Lucene Document + addDocument(doc); + } + + @Override + public void put(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, value); // Lucene Document + addDocument(doc); + } + + @Override + public boolean validatedPut( // Changed OAtomicOperation, ORID, IndexEngineValidator + TransactionContext atomicOperation, + Object key, + RID value, + IndexValidator validator) { + throw new UnsupportedOperationException( + "Validated put is not supported by ArcadeLuceneFullTextIndexEngine"); + } + + @Override + public Stream> iterateEntriesBetween( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + // FIXME: OLuceneResultSet and LuceneIndexTransformer need refactoring + return ArcadeLuceneIndexTransformer.transformToStream((LuceneResultSet) get(rangeFrom), rangeFrom); + } + + private Set getResults( // Changed OIdentifiable, OCommandContext, OLuceneTxChanges, ODocument + final Query query, + final CommandContext context, + final LuceneTxChanges changes, + final Document metadata) { // ArcadeDB Document for metadata + // sort + // FIXME: OLuceneIndexEngineUtils.buildSortFields needs refactoring + // final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); + final List fields = null; // Placeholder + final IndexSearcher luceneSearcher = searcher(); + final LuceneQueryContext queryContext = + new LuceneQueryContext(context, luceneSearcher, query, fields).withChanges(changes); + // FIXME: OLuceneResultSet needs refactoring to LuceneResultSet + return new LuceneResultSet(this, queryContext, metadata); + } + + @Override + public Stream> iterateEntriesMajor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return null; + } + + @Override + public Stream> iterateEntriesMinor( // Changed ORawPair, ORID, IndexEngineValuesTransformer + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexValuesTransformer transformer) { + return null; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public void updateUniqueIndexVersion(Object key) { + // not implemented + } + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; // not implemented + } + + @Override + public Document buildDocument(Object key, Identifiable value) { // Changed OIdentifiable, Lucene Document + if (indexDefinition.isAutomatic()) { + // builder is an instance of LuceneDocumentBuilder + // LuceneDocumentBuilder.build expects: IndexDefinition, Object key, Identifiable value, Map collectionFields, Document metadata + // collectionFields and metadata are available as protected members from OLuceneIndexEngineAbstract + return builder.build(indexDefinition, key, value, this.collectionFields, this.metadata); + } else { + return putInManualindex(key, value); + } + } + + private static Document putInManualindex(Object key, Identifiable oIdentifiable) { // Changed OIdentifiable, Lucene Document + Document luceneDoc = new Document(); // Lucene Document + luceneDoc.add(ArcadeLuceneIndexType.createRidField(oIdentifiable)); + // The ID field for manual indexes might store the key itself if simple, or a hash if complex. + // createIdField might be more about a specific format if needed. + // For now, let's assume the key itself or its parts are added below with specific field names. + // If a single "ID" field representing the whole key is desired for searching the key: + // luceneDoc.add(ArcadeLuceneIndexType.createIdField(oIdentifiable, key)); + + + if (key instanceof CompositeKey) { + List keys = ((CompositeKey) key).getKeys(); + // If this manual index has a definition with field names for composite parts: + List definedFields = null; + // Type[] definedTypes = null; // Not directly available for manual index key parts in IndexDefinition easily + // if (indexDefinition != null) { // indexDefinition is not available in this static context directly + // definedFields = indexDefinition.getFields(); + // // definedTypes = indexDefinition.getTypes(); // This is for the main value, not necessarily for key parts + // } + + for (int i = 0; i < keys.size(); i++) { + Object subKey = keys.get(i); + if (subKey == null) continue; + String fieldName = (definedFields != null && i < definedFields.size()) ? definedFields.get(i) : "k" + i; + Type type = Type.getTypeByValue(subKey); + // For manual keys, typically store and index them. Sorting is less common for manual keys. + List fields = ArcadeLuceneIndexType.createFields(fieldName, subKey, Field.Store.YES, false, type); + for (Field f : fields) { + luceneDoc.add(f); + } + } + } else if (key instanceof Collection) { + @SuppressWarnings("unchecked") + Collection keys = (Collection) key; + int i = 0; + for (Object item : keys) { + if (item == null) continue; + String fieldName = "k" + i; // Implicit field name for collection items + Type type = Type.getTypeByValue(item); + List fields = ArcadeLuceneIndexType.createFields(fieldName, item, Field.Store.YES, false, type); + for (Field f : fields) { + luceneDoc.add(f); + } + i++; + } + } else if (key != null) { + // Single key + // String fieldName = (indexDefinition != null && !indexDefinition.getFields().isEmpty()) ? indexDefinition.getFields().get(0) : "k0"; + String fieldName = "k0"; // Default field name for single manual key + Type type = Type.getTypeByValue(key); + // Store.NO was used in original for single key; this means it's indexed but not retrievable from Lucene doc. + // Let's make it configurable or default to YES for consistency if this key is what user searches. + // For now, keeping Store.NO to match original hint, but this is questionable. + // If it's the actual key to be searched, it should likely be YES or its components stored. + // Given createFields also adds Point fields which are not stored, this might be okay. + List fields = ArcadeLuceneIndexType.createFields(fieldName, key, Field.Store.NO, false, type); + for (Field f : fields) { + luceneDoc.add(f); + } + } + return luceneDoc; + } + + @Override + public Query buildQuery(final Object maybeQuery) { + try { + if (maybeQuery instanceof String) { + return queryBuilder.query(indexDefinition, (String) maybeQuery, new com.arcadedb.document.Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer(), getDatabase()); + } else { + LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) maybeQuery; // FIXME: LuceneKeyAndMetadata needs refactoring + return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer(), getDatabase()); + } + } catch (final ParseException e) { + throw new IndexException("Error parsing query for index '" + name + "'", e); // Changed exception + } + } + + @Override + public Set getInTx(Object key, LuceneTxChanges changes) { // Changed OIdentifiable, OLuceneTxChanges + updateLastAccess(); + openIfClosed(); + try { + if (key instanceof LuceneKeyAndMetadata) { // FIXME: LuceneKeyAndMetadata needs refactoring + LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) key; + Query luceneQuery = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer(), getDatabase()); + + CommandContext commandContext = q.getContext(); // LuceneKeyAndMetadata now has getContext() + return getResults(luceneQuery, commandContext, changes, q.metadata); + + } else { + Query luceneQuery = queryBuilder.query(indexDefinition, key, new com.arcadedb.document.Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer(), getDatabase()); + + CommandContext commandContext = null; + if (key instanceof LuceneCompositeKey) { // FIXME: LuceneCompositeKey needs refactoring + commandContext = ((LuceneCompositeKey) key).getContext(); // Assuming LuceneCompositeKey might have a context + } + return getResults(luceneQuery, commandContext, changes, new com.arcadedb.document.Document(getDatabase())/*EMPTY_METADATA*/); + } + } catch (ParseException e) { + throw new IndexException("Error parsing lucene query for index '" + name + "'", e); // Changed exception + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java new file mode 100644 index 0000000000..49e1c5db19 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java @@ -0,0 +1,69 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2014 Orient Technologies. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.engine; + +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RecordId; +import com.arcadedb.engine.WALFile; // For Freezeable +import com.arcadedb.index.IndexEngine; +import com.arcadedb.lucene.query.LuceneQueryContext; // Will be refactored +import com.arcadedb.lucene.tx.LuceneTxChanges; // Will be refactored +import java.io.IOException; +import java.util.Set; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; + +/** Created by Enrico Risa on 04/09/15. */ +public interface LuceneIndexEngine extends IndexEngine, WALFile.Freezeable { // Changed interface name and extended interfaces + + String indexName(); + + void onRecordAddedToResultSet( // Changed parameter types + LuceneQueryContext queryContext, RecordId recordId, Document ret, ScoreDoc score); + + Document buildDocument(Object key, Identifiable value); // Changed parameter type + + Query buildQuery(Object query); + + Analyzer indexAnalyzer(); + + Analyzer queryAnalyzer(); + + boolean remove(Object key, Identifiable value); // Changed parameter type + + boolean remove(Object key); + + IndexSearcher searcher(); + + void release(IndexSearcher searcher); + + Set getInTx(Object key, LuceneTxChanges changes); // Changed parameter and return types + + long sizeInTx(LuceneTxChanges changes); // Changed parameter type + + LuceneTxChanges buildTxChanges() throws IOException; // Changed return type + + Query deleteQuery(Object key, Identifiable value); // Changed parameter type + + boolean isCollectionIndex(); +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java new file mode 100644 index 0000000000..5a24641965 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java @@ -0,0 +1,28 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.functions; + +import com.arcadedb.query.sql.SQLFunctionRegistry; // Assuming this is the ArcadeDB equivalent + +// FIXME: The actual function class (ArcadeLuceneCrossClassSearchFunction) will need to be created/refactored separately. + +public class ArcadeLuceneCrossClassFunctionsFactory { // Changed class name + + public static void onStartup() { // Changed to a static method for registration + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneCrossClassSearchFunction()); // FIXME: Placeholder for refactored class + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java new file mode 100644 index 0000000000..9e5fba63a6 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java @@ -0,0 +1,259 @@ +package com.arcadedb.lucene.functions; + +// import static com.arcadedb.lucene.OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS; // FIXME Define or import +import com.arcadedb.database.DatabaseContext; +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.engine.ArcadeLuceneCrossClassIndexEngine; // Changed +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring (used as type in old code, though engine is likely target) +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; // Changed +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * This function uses the CrossClassIndex to search documents across all the Lucene indexes defined in a database + *

+ * Created by frank on 19/02/2016. + */ +public class ArcadeLuceneCrossClassSearchFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class + private static final Logger logger = + Logger.getLogger(ArcadeLuceneCrossClassSearchFunction.class.getName()); // Changed + + public static final String NAME = "search_cross"; // Changed from SEARCH_CROSS + private static final String LUCENE_CROSS_CLASS_ALGORITHM = "LUCENE_CROSS_CLASS"; // Placeholder + + private ArcadeLuceneCrossClassIndexEngine crossClassEngineInstance = null; // Cache for the engine + + public ArcadeLuceneCrossClassSearchFunction() { + super(NAME, 1, 2); // query, [metadata] + } + + // searchForIndex in the template expects args for index name. This class doesn't use that. + // It finds a specific *kind* of index (cross class). + // So, the searchForIndex from the template is not suitable. + // This function might not be a good fit for ArcadeLuceneSearchFunctionTemplate if it cannot provide a single index. + // However, if ArcadeLuceneCrossClassIndexEngine is treated as *the* index, it could work. + + @Override + public Iterable searchFromTarget( // Changed + FromClause target, // Target is ignored by this function as it's cross-class + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, // Changed + Expression... args) { // Changed + + ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); // FIXME: Needs robust way to get this engine + + if (engine == null) { + logger.warning("Lucene Cross Class Index Engine not found."); + return Collections.emptySet(); + } + + Expression expression = args[0]; + String query = (String) expression.execute((Result) null, ctx); // Changed + + Document metadata = getMetadata(args, ctx, 1); // Changed, metadata is args[1] + + // The engine's 'get' method should return Iterable or similar + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + Object result = engine.get( + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata)); + + if (result instanceof Iterable) { + return (Iterable) result; + } + return Collections.emptySet(); + } + + @Override + public Object execute( + Object iThis, + Identifiable currentRecord, // Changed + Object currentResult, + Object[] params, + CommandContext ctx) { // Changed + + ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); // FIXME + + if (engine == null) { + logger.warning("Lucene Cross Class Index Engine not found for execute."); + return Collections.emptySet(); + } + + String query = (String) params[0]; + Document metadata = getMetadata(params, 1); // Changed + + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + Object result = engine.get( + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata)); + + return result; + } + + private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed + if (args.length > metadataParamIndex) { + // Assuming getMetadata from ArcadeLuceneSearchFunctionTemplate is suitable + return super.getMetadata(args[metadataParamIndex], ctx); + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + private Document getMetadata(Object[] params, int metadataParamIndex) { // Changed + if (params.length > metadataParamIndex) { + if (params[metadataParamIndex] instanceof Map) { + return new Document().fromMap((Map) params[metadataParamIndex]); + } else if (params[metadataParamIndex] instanceof String) { + return new Document().fromJSON((String) params[metadataParamIndex]); + } else if (params[metadataParamIndex] instanceof Document) { + return (Document) params[metadataParamIndex]; + } + // Fallback or error if type is not recognized + try { + return new Document().fromJSON(params[metadataParamIndex].toString()); + } catch (Exception e) { + // ignore + } + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + // This method is problematic as the template expects an ArcadeLuceneFullTextIndex. + // This function uses a different kind of engine. + // Returning null tells the template that direct indexed execution (via that specific index type) is not possible. + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // FIXME: This signature might not be appropriate for this class + FromClause target, CommandContext ctx, Expression... args) { + return null; // This function doesn't use a single, standard Lucene full-text index from the target. + // It uses the ArcadeLuceneCrossClassIndexEngine. + } + + // Helper to get the specific cross-class engine instance + private ArcadeLuceneCrossClassIndexEngine getCrossClassEngine(CommandContext ctx) { + if (this.crossClassEngineInstance != null && this.crossClassEngineInstance.getDatabase() == ctx.getDatabase()) { + // Ensure cached engine is for the same database instance, though typically SQL functions are per-query. + // If function instances are per-query, caching might offer little benefit unless getCrossClassEngine is called multiple times in one execution. + // If functions are singletons, then caching is more useful but needs to be thread-safe or per-database-instance. + // For now, simple instance caching. If SQLFunctions are per-query, this cache won't persist across queries. + return this.crossClassEngineInstance; + } + + DatabaseInternal database = null; + if (ctx instanceof DatabaseContext) { // Check if CommandContext is or provides DatabaseContext + database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); + } else if (ctx.getDatabase() instanceof DatabaseInternal) { // Standard way to get Database + database = (DatabaseInternal) ctx.getDatabase(); + } + + if (database == null) { + logger.warning("Database not found in CommandContext for getCrossClassEngine. CommandContext type: " + ctx.getClass().getName()); + return null; + } + + for (Index idx : database.getSchema().getIndexes()) { + IndexEngine engine = idx.getAssociatedIndex(); // Index.getAssociatedIndex() returns IndexEngine + if (engine instanceof ArcadeLuceneCrossClassIndexEngine) { + this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) engine; + logger.fine("Found ArcadeLuceneCrossClassIndexEngine via associated engine of index: " + idx.getName()); + return this.crossClassEngineInstance; + } + // Check if the index itself is a wrapper for the engine (less likely with getAssociatedIndex) + // or if algorithm matches (if factory handler associates this engine type with an algorithm for a "marker" index) + if (LUCENE_CROSS_CLASS_ALGORITHM.equals(idx.getAlgorithm())) { + if (engine instanceof ArcadeLuceneCrossClassIndexEngine) { // Should be true if factory did its job + this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) engine; + logger.fine("Found ArcadeLuceneCrossClassIndexEngine via algorithm on index: " + idx.getName()); + return this.crossClassEngineInstance; + } else if (engine == null && idx instanceof ArcadeLuceneCrossClassIndexEngine) { + // This case is if the Index object itself *is* the engine, which is not standard for ArcadeDB. + // But keeping a check for robustness during refactoring. + this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) idx; + logger.warning("Found ArcadeLuceneCrossClassIndexEngine directly as an Index instance (unusual): " + idx.getName()); + return this.crossClassEngineInstance; + } + } + } + + logger.warning("ArcadeLuceneCrossClassIndexEngine not found. Ensure an index of type '" + LUCENE_CROSS_CLASS_ALGORITHM + + "' (which uses this engine) is defined, or that an existing index correctly associates this engine."); + return null; + } + + + @Override + public String getSyntax() { + // logger.debug("syntax"); // Logging in getSyntax is unusual + return NAME + "('', [ ])"; + } + + // Other overrides from OIndexableSQLFunction (estimate, canExecuteInline, etc.) + // The original class had specific implementations for these. + // If extending ArcadeLuceneSearchFunctionTemplate, these might be inherited or need specific overrides. + // For now, relying on template's (which has FIXMEs) or needing specific ones here. + + @Override + public long estimate( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + // Cross-class estimation is complex. Returning a default or trying to get a count from the engine. + ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); + if (engine != null) { + // FIXME: The engine might need a size estimation method + // return engine.sizeEstimate(args...); + } + return super.estimate(target, operator, rightValue, ctx, args); // Fallback to template's estimate + } + + @Override + public boolean allowsIndexedExecution( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + // This function *always* uses its specialized engine, so it's "indexed" in that sense. + return getCrossClassEngine(ctx) != null; + } + @Override + public boolean canExecuteInline( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + return false; // Cross class search is likely too complex for simple inline execution + } + + @Override + public boolean shouldExecuteAfterSearch( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + return false; + } + +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java new file mode 100644 index 0000000000..ca707bae1a --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java @@ -0,0 +1,32 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.functions; + +import com.arcadedb.query.sql.SQLFunctionRegistry; // Assuming this is the ArcadeDB equivalent + +// FIXME: The actual function classes (e.g., ArcadeLuceneSearchOnIndexFunction) will need to be created/refactored separately. +// For now, we are just changing the instantiation call. + +public class ArcadeLuceneFunctionsFactory { // Changed class name + + public static void onStartup() { // Changed to a static method for registration + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnIndexFunction()); // FIXME: Placeholder for refactored class + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnFieldsFunction()); // FIXME: Placeholder for refactored class + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnClassFunction()); // FIXME: Placeholder for refactored class + SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchMoreLikeThisFunction()); // FIXME: Placeholder for refactored class + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java new file mode 100644 index 0000000000..df3f5b7307 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java @@ -0,0 +1,64 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.schema.Schema; // Changed +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 13/02/2017. */ +public class ArcadeLuceneFunctionsUtils { // Changed class name + public static final String MEMORY_INDEX = "_memoryIndex"; + + protected static ArcadeLuceneFullTextIndex searchForIndex(Expression[] args, CommandContext ctx) { // Changed types + final String indexName = (String) args[0].execute((Result) null, ctx); // Changed types + return getLuceneFullTextIndex(ctx, indexName); + } + + protected static ArcadeLuceneFullTextIndex getLuceneFullTextIndex( // Changed types + final CommandContext ctx, final String indexName) { + // Assuming CommandContext gives access to DatabaseInternal instance + final DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify how to get DatabaseInternal from CommandContext + // database.activateOnCurrentThread(); // This might not be needed or done differently in ArcadeDB + + final Schema schema = database.getSchema(); // Changed OMetadataInternal + + // FIXME: metadata.getIndexManagerInternal().getIndex(documentDatabase, indexName) changed to schema.getIndex() + // Also, the casting and type checking for ArcadeLuceneFullTextIndex needs ArcadeLuceneFullTextIndex to be properly defined and refactored. + final Index index = schema.getIndex(indexName); + + if (!(index instanceof ArcadeLuceneFullTextIndex)) { // FIXME + throw new IllegalArgumentException("Not a valid Lucene index:: " + indexName); + } + return (ArcadeLuceneFullTextIndex) index; // FIXME + } + + public static MemoryIndex getOrCreateMemoryIndex(CommandContext ctx) { // Changed OCommandContext + MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); + if (memoryIndex == null) { + memoryIndex = new MemoryIndex(); + ctx.setVariable(MEMORY_INDEX, memoryIndex); + } + memoryIndex.reset(); + return memoryIndex; + } + + public static String doubleEscape(final String s) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); ++i) { + final char c = s.charAt(i); + if (c == 92 || c == 43 || c == 45 || c == 33 || c == 40 || c == 41 || c == 58 || c == 94 + || c == 91 || c == 93 || c == 34 || c == 123 || c == 125 || c == 126 || c == 42 || c == 63 + || c == 124 || c == 38 || c == 47) { + sb.append('\\'); + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java new file mode 100644 index 0000000000..a872e5520c --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java @@ -0,0 +1,76 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.function.SQLFunction; // Standard ArcadeDB SQLFunction if SQLFunctionAbstract is not public or is different +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import java.util.Map; + +/** Created by frank on 25/05/2017. */ +// Changed base class and removed IndexableSQLFunction interface +public abstract class ArcadeLuceneSearchFunctionTemplate implements SQLFunction { + + protected final String name; + + public ArcadeLuceneSearchFunctionTemplate(final String name) { + this.name = name; + // Parameter count checks will be done in each concrete class's execute method + } + + @Override + public String getName() { + return name; + } + + // The following methods are from the old IndexableSQLFunction interface and will be removed. + // If ArcadeDB has a new way for functions to declare index usability, that would be a separate implementation. + // public abstract boolean canExecuteInline(...); + // public abstract boolean allowsIndexedExecution(...); + // public abstract boolean shouldExecuteAfterSearch(...); + // public abstract long estimate(...); + // public abstract Iterable searchFromTarget(...); // This logic moves into execute + + // The execute method is abstract in SQLFunction and must be implemented by concrete subclasses. + // public abstract Object execute(Object self, Identifiable currentRecord, Object currentResult, Object[] params, CommandContext context); + + protected Document getMetadata(Expression metadataExpression, CommandContext ctx) { + if (metadataExpression == null) return new Document(ctx.getDatabase()); + final Object md = metadataExpression.execute((Result) null, ctx); + if (md instanceof Document) { + return (Document) md; + } else if (md instanceof Map) { + return new Document().fromMap((Map) md); // Changed ODocument + } else if (md instanceof String) { + try { + return new Document().fromJSON((String) md); // Changed ODocument + } catch (Exception e) { + // It might not be a JSON string, but the raw metadata string itself (e.g. analyzer class name) + // This part needs careful review based on how metadata is actually passed and used. + // For now, returning a document with a field containing the string. + Document doc = new Document(); + doc.set("metadata", (String) md); // FIXME: Review this fallback for non-JSON metadata strings + return doc; + } + } else if (metadata != null) { + // Fallback if metadata is not null but not a recognized type, try its string representation as JSON + try { + return new Document().fromJSON(metadata.toString()); // Changed ODocument + } catch (Exception e) { + Document doc = new Document(); + doc.set("metadata", metadata.toString()); // FIXME: Review this fallback + return doc; + } + } + return new Document(); // Empty document if null or unparseable + } + + // Changed OLuceneFullTextIndex, OFromClause, OCommandContext, OExpression + protected abstract ArcadeLuceneFullTextIndex searchForIndex( // FIXME + FromClause target, CommandContext ctx, Expression... args); +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java new file mode 100644 index 0000000000..1b67424217 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java @@ -0,0 +1,389 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.Database; // Changed ODatabaseSession to Database +import com.arcadedb.database.DatabaseContext; // For context access +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.document.Element; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // For RID field name +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.function.IndexableSQLFunction; // Assuming +import com.arcadedb.query.sql.function.SQLFunctionAbstract; // Assuming +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.query.sql.parser.Identifier; +import com.arcadedb.schema.DocumentType; +import com.arcadedb.schema.Schema; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.mlt.MoreLikeThis; +import org.apache.lucene.queryparser.classic.QueryParser; // Used for escape +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; // Directly use BooleanQuery.Builder +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchMoreLikeThisFunction extends ArcadeLuceneSearchFunctionTemplate // Changed base + implements IndexableSQLFunction { // Assuming from template + + private static final Logger logger = + Logger.getLogger(ArcadeLuceneSearchMoreLikeThisFunction.class.getName()); // Changed + + public static final String NAME = "search_more_like_this"; // Changed name + + public ArcadeLuceneSearchMoreLikeThisFunction() { + super(NAME, 1, 2); // params: rids, [metadata] + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( // FIXME: Signature might change + Object iThis, + Identifiable iCurrentRecord, // Changed + Object iCurrentResult, + Object[] params, + CommandContext ctx) { // Changed + + // This function's logic in OrientDB was to check if iCurrentRecord is similar to records identified by RIDs in params[0]. + // This seems more like a filter for a WHERE clause rather than a direct result-producing function. + // The return type 'boolean' suggests this. + + if (!(iCurrentRecord instanceof Document)) { // Changed + return false; + } + String className = ((Document) iCurrentRecord).getTypeName(); // Changed + ArcadeLuceneFullTextIndex index = this.searchForIndex(ctx, className); // FIXME + + if (index == null) return false; // Cannot perform MLT without an index + + IndexSearcher searcher = index.searcher(); // FIXME + if (searcher == null) return false; + + Document metadata = getMetadataDoc(params, 1); // metadata is params[1] // Changed + + List ridsAsString = parseRidsObj(ctx, params[0]); + if (ridsAsString.isEmpty()) return false; + + List others = // Changed ORecord to Identifiable + ridsAsString.stream() + .map(ridStr -> (Identifiable) new RID(ctx.getDatabase(), ridStr)) // Changed ORecordId + .map(id -> ctx.getDatabase().lookupByRID(id.getIdentity(), true).getRecord()) // Load record // Changed + .filter(r -> r instanceof Element) // Ensure it's an element + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); // FIXME + + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); // Changed + + // The MLT query should be built against the content of 'others' + // And then we check if iCurrentRecord matches this mltQuery. + // This is different from how 'searchFromTarget' works. + + // This part seems to generate a query based on the 'others' documents + addLikeQueries(others, mlt, queryBuilder, ctx.getDatabase()); // Changed + + Query mltQuery = queryBuilder.build(); + if (mltQuery.toString().isEmpty()) { // No terms generated if documents are empty or too common/rare + return false; + } + + // Now, check if iCurrentRecord matches the mltQuery. + // This requires indexing iCurrentRecord in-memory. + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + org.apache.lucene.document.Document luceneDoc = index.buildDocument(null, iCurrentRecord); // FIXME: Key might be needed or different buildDocument signature + if (luceneDoc != null) { + for (org.apache.lucene.index.IndexableField field : luceneDoc.getFields()) { + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // FIXME + } + } else { + return false; + } + return memoryIndex.search(mltQuery) > 0.0f; + } + + @Override + public String getSyntax() { + return NAME + "( , [ ] )"; // Corrected syntax + } + + @Override + public Iterable searchFromTarget( // Changed + FromClause target, // Changed + BinaryCompareOperator operator, // Changed + Object rightValue, + CommandContext ctx, // Changed + Expression... args) { // Changed + + ArcadeLuceneFullTextIndex index = this.searchForIndex(target, ctx, args); // FIXME + + if (index == null) return Collections.emptySet(); + + IndexSearcher searcher = index.searcher(); // FIXME + if (searcher == null) return Collections.emptySet(); + + + Expression ridExpression = args[0]; + Document metadata = getMetadataFromExpression(args, ctx, 1); // metadata is args[1] // Changed + + List ridsAsString = parseRids(ctx, ridExpression); + if (ridsAsString.isEmpty()) return Collections.emptySet(); + + List others = // Changed + ridsAsString.stream() + .map(ridStr -> (Identifiable) new RID(ctx.getDatabase(), ridStr)) // Changed + .map(id -> ctx.getDatabase().lookupByRID(id.getIdentity(), true).getRecord()) // Load record // Changed + .filter(r -> r instanceof Element) + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); // FIXME + + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); // Changed + + excludeOtherFromResults(ridsAsString, queryBuilder); // Keep input RIDs out of results + + addLikeQueries(others, mlt, queryBuilder, ctx.getDatabase()); // Changed + + Query mltQuery = queryBuilder.build(); + if (mltQuery.toString().isEmpty()) return Collections.emptySet(); + + + // Execute the mltQuery against the main index + // FIXME: index.getInternal().getRids() needs to be replaced + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + // This part is highly dependent on how ArcadeLuceneFullTextIndex exposes search capabilities + try (Stream rids = // Changed + index + .getAssociatedIndex() // Assuming + .getRids( // This method might not exist + new LuceneKeyAndMetadata( // FIXME + new LuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), // FIXME + metadata))) { + return rids.map(rid -> (Identifiable) rid).collect(Collectors.toSet()); // Changed + } catch (Exception e) { + logger.log(Level.SEVERE, "Error executing MoreLikeThis query via getRids", e); + return Collections.emptySet(); + } + } + + private List parseRids(CommandContext ctx, Expression expression) { // Changed + Object expResult = expression.execute((Result) null, ctx); // Changed + return parseRidsObj(ctx, expResult); + } + + private List parseRidsObj(CommandContext ctx, Object expResult) { // Changed + if (expResult instanceof Identifiable) { // Changed + return Collections.singletonList(((Identifiable) expResult).getIdentity().toString()); + } + + Iterator iter; // Wildcard for iterator type + if (expResult instanceof Iterable) { + iter = ((Iterable) expResult).iterator(); + } else if (expResult instanceof Iterator) { + iter = (Iterator) expResult; + } else { + return Collections.emptyList(); + } + + List rids = new ArrayList<>(); + while (iter.hasNext()) { + Object item = iter.next(); + if (item instanceof Result) { // Changed + if (((Result) item).isElement()) { + ((Result) item).getIdentity().ifPresent(id -> rids.add(id.toString())); // Changed + } else { + Set properties = ((Result) item).getPropertyNames(); + if (properties.size() == 1) { + Object val = ((Result) item).getProperty(properties.iterator().next()); + if (val instanceof Identifiable) { // Changed + rids.add(((Identifiable) val).getIdentity().toString()); + } + } + } + } else if (item instanceof Identifiable) { // Changed + rids.add(((Identifiable) item).getIdentity().toString()); + } + } + return rids; + } + + private Document getMetadataDoc(Object[] params, int metadataParamIndex) { // Changed + if (params.length > metadataParamIndex) { + if (params[metadataParamIndex] instanceof Map) { + return new Document().fromMap((Map) params[metadataParamIndex]); + } else if (params[metadataParamIndex] instanceof String) { + return new Document().fromJSON((String) params[metadataParamIndex]); + } + return new Document().fromJSON(params[metadataParamIndex].toString()); + } + return new Document(); // Empty if not present + } + + private Document getMetadataFromExpression(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed + if (args.length > metadataParamIndex) { + return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate + } + return new Document(); // Empty if not present + } + + + private MoreLikeThis buildMoreLikeThis( // Changed + ArcadeLuceneFullTextIndex index, IndexSearcher searcher, Document metadata) { // FIXME + + try { + MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); + + mlt.setAnalyzer(index.queryAnalyzer()); // FIXME + + // FIXME: index.getDefinition() might be different + mlt.setFieldNames( + Optional.ofNullable(metadata.>getProperty("fieldNames")) + .orElse(index.getDefinition().getFields()) + .toArray(new String[] {})); + + mlt.setMaxQueryTerms( + Optional.ofNullable(metadata.getProperty("maxQueryTerms")) + .orElse(MoreLikeThis.DEFAULT_MAX_QUERY_TERMS)); + // ... (rest of MoreLikeThis setters, ensure getProperty types match) + mlt.setMinTermFreq( + Optional.ofNullable(metadata.getProperty("minTermFreq")) + .orElse(MoreLikeThis.DEFAULT_MIN_TERM_FREQ)); + mlt.setMaxDocFreq( + Optional.ofNullable(metadata.getProperty("maxDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); + mlt.setMinDocFreq( + Optional.ofNullable(metadata.getProperty("minDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MIN_DOC_FREQ)); // Corrected from DEFAULT_MAX_DOC_FREQ + mlt.setBoost( + Optional.ofNullable(metadata.getProperty("boost")) + .orElse(MoreLikeThis.DEFAULT_BOOST)); + mlt.setBoostFactor( + Optional.ofNullable(metadata.getProperty("boostFactor")).orElse(1f)); + mlt.setMaxWordLen( + Optional.ofNullable(metadata.getProperty("maxWordLen")) + .orElse(MoreLikeThis.DEFAULT_MAX_WORD_LENGTH)); + mlt.setMinWordLen( + Optional.ofNullable(metadata.getProperty("minWordLen")) + .orElse(MoreLikeThis.DEFAULT_MIN_WORD_LENGTH)); + // setMaxNumTokensParsed was removed in later Lucene versions, check alternatives if needed. + // mlt.setMaxNumTokensParsed( + // Optional.ofNullable(metadata.getProperty("maxNumTokensParsed")) + // .orElse(MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED)); + mlt.setStopWords( + (Set) + Optional.ofNullable(metadata.get("stopWords")) // Simpler get for Set + .orElse(MoreLikeThis.DEFAULT_STOP_WORDS)); + + + return mlt; + } catch (IOException e) { + throw ArcadeDBException.wrapException(new ArcadeDBException("Lucene IO Exception"), e); // Changed + } + } + + private void addLikeQueries( // Changed + List others, MoreLikeThis mlt, BooleanQuery.Builder queryBuilder, Database database) { // Changed + others.stream() + .filter(id -> id instanceof Element) // ensure it's an element to get properties + .map(id -> (Element) id) + .forEach( + element -> + Arrays.stream(mlt.getFieldNames()) // These are the fields to check for similarity + .forEach( + fieldName -> { + Object propertyValue = element.getProperty(fieldName); + if (propertyValue != null) { + try { + // MoreLikeThis.like() can take a String directly for a field's content + Query fieldQuery = mlt.like(fieldName, new StringReader(propertyValue.toString())); + if (!fieldQuery.toString().isEmpty()) // Check if anything was generated + queryBuilder.add(fieldQuery, Occur.SHOULD); + } catch (IOException e) { + logger.log(Level.SEVERE, "Error during Lucene MoreLikeThis query generation for field " + fieldName, e); + } + } + })); + } + + private void excludeOtherFromResults(List ridsAsString, BooleanQuery.Builder queryBuilder) { // Changed + ridsAsString.stream() + .forEach( + rid -> + queryBuilder.add( // Use ArcadeLuceneIndexType.RID for consistency + new TermQuery(new Term(ArcadeLuceneIndexType.RID, QueryParser.escape(rid))), Occur.MUST_NOT)); + } + + // searchForIndex from OLuceneSearchFunctionTemplate should be used or overridden if different logic needed for target. + // The private helpers here were specific to how OLuceneSearchMoreLikeThisFunction determined its index. + // For now, relying on the overridden searchForIndex from ArcadeLuceneSearchFunctionTemplate. + // If this function *always* uses class name from context (iThis) for 'execute' and target for 'searchFromTarget', + // then the template's searchForIndex might need to be made non-abstract or this class needs its own. + // The original OLuceneSearchMoreLikeThisFunction had its own searchForIndex. + + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // Changed + FromClause target, CommandContext ctx, Expression... args) { // FIXME + FromItem item = target.getItem(); // Changed + Identifier identifier = item.getIdentifier(); // Changed + String className = identifier.getStringValue(); + return searchForIndex(ctx, className); // Calls private helper + } + + private ArcadeLuceneFullTextIndex searchForIndex(CommandContext ctx, String className) { // Changed + DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify + // database.activateOnCurrentThread(); // May not be needed + + Schema schema = database.getSchema(); // Changed + DocumentType docType = schema.getType(className); // Changed + + if (docType == null) { + return null; + } + + List indices = // Changed + docType.getIndexes(true).stream() + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .collect(Collectors.toList()); + + if (indices.size() > 1) { + // Consider if a more specific index selection is needed, e.g. one covering certain fields if provided in metadata + throw new IllegalArgumentException("Too many full-text Lucene indices on class: " + className + ". Disambiguate or configure."); + } + return indices.size() == 0 ? null : indices.get(0); + } + + + // estimate, canExecuteInline, allowsIndexedExecution, shouldExecuteAfterSearch + // are inherited from ArcadeLuceneSearchFunctionTemplate. + // Their default implementations in the template might need review for this specific function's behavior. + // E.g., allowsIndexedExecution for MLT depends on finding *an* index on the class to get an IndexReader. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java new file mode 100644 index 0000000000..a6a893a127 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java @@ -0,0 +1,242 @@ +package com.arcadedb.lucene.functions; + +// Static import from ArcadeLuceneFunctionsUtils if getOrCreateMemoryIndex is public there, or keep local. +// For now, assuming it's accessible via ArcadeLuceneFunctionsUtils. +// import static com.arcadedb.lucene.functions.ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex; + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.document.Element; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.executor.ResultInternal; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.schema.DocumentType; // Changed +import com.arcadedb.schema.Schema; // Changed +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchOnClassFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class + + public static final String NAME = "search_class"; + + public ArcadeLuceneSearchOnClassFunction() { + super(NAME, 1, 2); // Original params: className, query, [metadata] - now query, [metadata] as class comes from context + // However, the original code takes classname as param for searchForIndex, + // but in execute it gets class from iThis. + // The original super was (NAME, 1, 2) -> (query, [metadata]), class was implicit from target. + // Let's stick to (NAME, 2, 3) -> (className, query, [metadata]) for now if it's a global function. + // If it's context aware (iThis), then (query, [metadata]) is fine. + // The original `search_class(, , [ ])` + // super(NAME, 2, 3); // (className, query, [metadata]) + // The original code for OLuceneSearchOnClassFunction used (NAME, 1, 2) + // and derived className from `iThis` in `execute` or from `target` in `searchFromTarget`. + // Let's keep the original arity and rely on context for class name. + super(NAME, 1, 2); + } + + @Override + public String getName() { + return NAME; + } + + // canExecuteInline from template is likely fine if it relies on searchForIndex. + + @Override + public Object execute( // FIXME: Signature might change + Object iThis, + Identifiable iCurrentRecord, // Changed + Object iCurrentResult, + Object[] params, + CommandContext ctx) { // Changed + + Result result; // Changed + if (iThis instanceof Result) { + result = (Result) iThis; + } else if (iThis instanceof Identifiable) { + result = new ResultInternal((Identifiable) iThis); // Changed + } else { + // Cannot determine current record or class, perhaps throw error or return false + return false; + } + + if (!result.getElement().isPresent()) return false; + Element element = result.getElement().get(); // Changed + if (element.getType() == null) return false; // Changed, was getSchemaType().isPresent() + + String className = element.getType().getName(); // Changed + + ArcadeLuceneFullTextIndex index = searchForIndex(ctx, className); // FIXME + + if (index == null) return false; + + String query = (String) params[0]; + + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + + // FIXME: index.getDefinition() might be different. + List key = + index.getDefinition().getFields().stream() + .map(s -> element.getProperty(s)) + .collect(Collectors.toList()); + + // FIXME: index.buildDocument and index.indexAnalyzer might not exist or have different signatures + org.apache.lucene.document.Document luceneDoc = index.buildDocument(key, iCurrentRecord); + if (luceneDoc != null) { + for (IndexableField field : luceneDoc.getFields()) { + // Simplified, assuming stringValue is appropriate. Lucene's MemoryIndex.addField handles various IndexableField types. + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); + } + } + + + Document metadata = getMetadataDoc(params); // Changed + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + LuceneKeyAndMetadata keyAndMetadata = + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + // FIXME: index.buildQuery might not exist or have different signature + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private Document getMetadataDoc(Object[] params) { // Changed + if (params.length == 2) { // Original used params[1] for metadata if arity was 2 (query, metadata) + if (params[1] instanceof Map) { + return new Document().fromMap((Map) params[1]); // Changed + } else if (params[1] instanceof String) { + return new Document().fromJSON((String) params[1]); + } + return new Document().fromJSON(params[1].toString()); + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); //LuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + // Original was "SEARCH_INDEX( indexName, [ metdatada {} ] )" which seems incorrect for search_class + return "search_class( , [ ] )"; // Class is implicit from context + } + + @Override + public boolean filterResult() { + return true; + } + + // FIXME: This method's signature and logic are highly dependent on ArcadeDB's IndexableSQLFunction interface + @Override + public Iterable searchFromTarget( // Changed + FromClause target, // Changed + BinaryCompareOperator operator, // Changed + Object rightValue, + CommandContext ctx, // Changed + Expression... args) { // Changed + + // In this context, the class comes from the target FromClause + ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME + + Expression expression = args[0]; // Query is the first argument to the function + String query = (String) expression.execute((Result) null, ctx); // Changed + + if (index != null) { + Document meta = getMetadata(args, ctx, 1); // Metadata is the second argument (index 1) if present + + List luceneResultSet; // Changed + try (Stream rids = // Changed + // FIXME: index.getInternal().getRids() needs to be replaced with ArcadeDB equivalent + // This whole block is highly dependent on ArcadeLuceneFullTextIndex and LuceneKeyAndMetadata refactoring + index + .getAssociatedIndex() // Assuming getAssociatedIndex() is the way + .getRids( // This method might not exist on ArcadeDB's Index interface + new LuceneKeyAndMetadata( // FIXME + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME + luceneResultSet = rids.collect(Collectors.toList()); + } + return luceneResultSet; + } + return Collections.emptySet(); + } + + private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed types + if (args.length > metadataParamIndex) { + return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types + FromClause target, CommandContext ctx, Expression... args) { // FIXME + FromItem item = target.getItem(); // Changed + + // This function determines the class from the target (FROM clause) + String className = item.getIdentifier().getStringValue(); // Changed + + return searchForIndex(ctx, className); // Calls private helper + } + + private ArcadeLuceneFullTextIndex searchForIndex(CommandContext ctx, String className) { // Changed types + DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify + // database.activateOnCurrentThread(); // May not be needed + + Schema schema = database.getSchema(); // Changed + DocumentType docType = schema.getType(className); // Changed + + if (docType == null) { + return null; + } + + List indices = // Changed + docType.getIndexes(true).stream() // getIndexes(true) for all indexes including supertypes + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .collect(Collectors.toList()); + + if (indices.size() > 1) { + // Try to find an index that is defined ONLY on this class, not subclasses/supertypes if possible + // Or, if multiple, pick one based on a convention (e.g. specific fields) + // For now, this logic is simplified. + // Original code just picked the first one if only one, or threw error. + // We might need a more sophisticated way if multiple Lucene indexes can exist on a class hierarchy. + for (ArcadeLuceneFullTextIndex idx : indices) { + if (idx.getDefinition().getTypeName().equals(className)) { // Check if index is defined on this exact class + return idx; + } + } + // If no index is defined directly on this class, but inherited, it might be ambiguous. + // However, the original code's filter `dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes()` + // would only get indexes directly on that class. + // `docType.getIndexes(true)` gets all. Let's refine to match original more closely for now: + indices = docType.getIndexes(false).stream() // false = only indexes defined on this type + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .collect(Collectors.toList()); + if (indices.size() > 1) { + throw new IllegalArgumentException("Too many full-text indices on given class: " + className + ". Specify the index name using search_index function."); + } + } + + + return indices.size() == 0 ? null : indices.get(0); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java new file mode 100644 index 0000000000..3c6123a32b --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java @@ -0,0 +1,291 @@ +package com.arcadedb.lucene.functions; + +// import static com.arcadedb.lucene.functions.ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex; // Assuming public access + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; // Changed +import com.arcadedb.document.Document; // Changed +import com.arcadedb.document.Element; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.index.IndexDefinition; +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.executor.ResultInternal; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.query.sql.parser.Identifier; +import com.arcadedb.schema.DocumentType; // Changed +import com.arcadedb.schema.Schema; // Changed +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchOnFieldsFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class + + public static final String NAME = "search_fields"; + + public ArcadeLuceneSearchOnFieldsFunction() { + // Original params: fieldNames, query, [metadata] + // Class name is derived from context (iThis or target) + super(NAME, 2, 3); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( // FIXME: Signature might change + Object iThis, + Identifiable iCurrentRecord, // Changed + Object iCurrentResult, + Object[] params, + CommandContext ctx) { // Changed + + Result result; // Changed + if (iThis instanceof Result) { + result = (Result) iThis; + } else if (iThis instanceof Identifiable) { + result = new ResultInternal((Identifiable) iThis); // Changed + } else { + return false; // Cannot determine current record + } + + if (!result.getElement().isPresent()) return false; + Element element = result.getElement().get(); // Changed + if (element.getType() == null) return false; // Changed + String className = element.getType().getName(); // Changed + + @SuppressWarnings("unchecked") + List fieldNames = (List) params[0]; + + // Note: searchForIndex here might not be strictly necessary if we always build an in-memory index from the current record's fields. + // However, the original code uses it to get definition and analyzer. + ArcadeLuceneFullTextIndex index = searchForIndex(className, ctx, fieldNames); // FIXME + + if (index == null) { + // If no pre-existing index matches, we might still proceed if we can get a default analyzer + // or one from metadata, but building a Lucene document without an IndexDefinition is problematic. + // For now, returning false if no suitable index is found to provide an analyzer/definition. + // This part might need a different strategy for on-the-fly indexing without a backing index. + return false; + } + + String query; + if (params.length < 2 || params[1] == null) { // query is params[1] + query = null; + } else { + query = params[1].toString(); + } + + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + + // FIXME: This part needs to build a Lucene document using ONLY the specified fieldNames + // from the 'element', and using the types from the schema for those fields. + // The 'key' concept from OLuceneSearchOnIndexFunction is not directly applicable here in the same way. + // index.buildDocument(key, iCurrentRecord) is not right for this context. + org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document(); + DocumentType docType = element.getType(); + if (docType != null) { + for(String fieldName : fieldNames) { + if (element.has(fieldName)) { + Object fieldValue = element.getProperty(fieldName); + com.arcadedb.schema.Property prop = docType.getProperty(fieldName); + Type fieldType = prop != null ? prop.getType() : Type.STRING; // Default to string if no prop + // FIXME: ArcadeLuceneIndexType.createFields needs correct store/sort parameters. + // Assuming Field.Store.YES and no sorting for memory index fields for now. + List fields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue, Field.Store.YES, false, fieldType); + for(org.apache.lucene.document.Field f : fields) { + luceneDoc.add(f); + } + } + } + } + + if (luceneDoc.getFields().isEmpty()) return false; // No fields were added + + // Add all fields from the created luceneDoc to memoryIndex + for (IndexableField field : luceneDoc.getFields()) { + // Simplified, assuming stringValue is appropriate for all, which is not robust. + // MemoryIndex.addField handles various IndexableField types, so this might be okay if createFields returns typed fields. + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // FIXME: index.indexAnalyzer() dependency + } + + + Document metadata = getMetadataDoc(params, 2); // metadata is params[2] + // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring + LuceneKeyAndMetadata keyAndMetadata = + new LuceneKeyAndMetadata( + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + // FIXME: index.buildQuery might not exist or have different signature + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private Document getMetadataDoc(Object[] params, int metadataParamIndex) { // Changed + if (params.length > metadataParamIndex) { + if (params[metadataParamIndex] instanceof Map) { + return new Document().fromMap((Map) params[metadataParamIndex]); // Changed + } else if (params[metadataParamIndex] instanceof String) { + return new Document().fromJSON((String) params[metadataParamIndex]); + } + return new Document().fromJSON(params[metadataParamIndex].toString()); + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + return "search_fields( , , [ ] )"; // Class is implicit + } + + // searchFromTarget and related metadata method from template might not be directly applicable + // as this function operates on specified fields of current record using MemoryIndex. + // If it were to support indexed execution, it would need to find a covering persistent index. + @Override + public Iterable searchFromTarget( + FromClause target, + BinaryCompareOperator operator, + Object rightValue, + CommandContext ctx, + Expression... args) { + + // This function, as implemented in execute(), builds an in-memory index for the current record. + // For it to be "indexable" in a broader query, it would need to find a persistent Lucene index + // that covers the requested fields for the target class. + ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME + + // First arg (args[0]) is fieldNamesList, second (args[1]) is query + if (args.length < 2) throw new IllegalArgumentException("search_fields requires at least fieldNames and query parameters."); + + @SuppressWarnings("unchecked") + // List fieldNames = (List) args[0].execute((Result) null, ctx); // This is how searchForIndex gets it. + // We need the query string here. + Expression queryExpression = args[1]; + String query = (String) queryExpression.execute((Result) null, ctx); + + + if (index != null && query != null) { + Document meta = getMetadata(args, ctx, 2); // Metadata is third arg (index 2) + Set luceneResultSet; // Changed + try (Stream rids = // Changed + // FIXME: index.getInternal().getRids() needs to be replaced + index + .getAssociatedIndex() + .getRids( // This method might not exist + new LuceneKeyAndMetadata( // FIXME + new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME + luceneResultSet = rids.collect(Collectors.toSet()); + } + return luceneResultSet; + } + // Original threw RuntimeException, returning empty set might be safer for unhandled cases. + return Collections.emptySet(); + } + + private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed + if (args.length > metadataParamIndex) { + return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate + } + // FIXME: LuceneQueryBuilder.EMPTY_METADATA + return new Document(); // LuceneQueryBuilder.EMPTY_METADATA; + } + + + @Override + protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types + FromClause target, CommandContext ctx, Expression... args) { // FIXME + // First argument to the function (args[0]) is the list of field names + if (args == null || args.length == 0) { + throw new IllegalArgumentException("Field names list parameter is missing."); + } + Object fieldNamesParam = args[0].execute((Result) null, ctx); + if (!(fieldNamesParam instanceof List)) { + throw new IllegalArgumentException("Field names parameter must be a list."); + } + @SuppressWarnings("unchecked") + List fieldNames = (List) fieldNamesParam; + + FromItem item = target.getItem(); // Changed + Identifier identifier = item.getIdentifier(); // Changed + String className = identifier.getStringValue(); + + return searchForIndex(className, ctx, fieldNames); // Calls private helper + } + + private ArcadeLuceneFullTextIndex searchForIndex( // Changed types + String className, CommandContext ctx, List fieldNames) { + DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify + // database.activateOnCurrentThread(); // May not be needed + + Schema schema = database.getSchema(); // Changed + DocumentType docType = schema.getType(className); // Changed + + if (docType == null) { + return null; + } + List indices = // Changed + docType.getIndexes(true).stream() // getIndexes(true) for all indexes including supertypes + .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME + .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME + .filter(idx -> intersect(idx.getDefinition().getFields(), fieldNames)) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + // If multiple indexes match (e.g. one on [f1], another on [f2], and we search [f1,f2]) + // This logic might need refinement. For now, it implies any single index covering *at least one* field. + // The original code would throw "too many indices matching given field name" only if multiple INDIVIDUAL indexes + // were found that EACH satisfy the intersect condition. + // A more robust approach might be to find the "best" covering index or combine results if that makes sense. + // For now, sticking to "if any index covers any of the fields, and there's only one such index" + // The original code finds an index if ANY of its fields are in fieldNames. + // If multiple such indexes exist, it's an error. + + // Let's find the one with the most matching fields? Or just the first one? + // The original code would throw if 'indices.size() > 1'. + throw new IllegalArgumentException( + "Too many Lucene indices on class '" + className + "' match the specified fields: " + String.join(",", fieldNames) + + ". Specify a single target index using search_index()."); + } + + return indices.size() == 0 ? null : indices.get(0); + } + + // intersection and intersect methods are helpers, can remain as they are (generic) + public List intersection(List list1, List list2) { + List list = new ArrayList(); + for (T t : list1) { + if (list2.contains(t)) { + list.add(t); + } + } + return list; + } + + public boolean intersect(List list1, List list2) { + for (T t : list1) { + if (list2.contains(t)) { + return true; + } + } + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java new file mode 100644 index 0000000000..80aad5ac9a --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java @@ -0,0 +1,139 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.document.Document; // Changed +import com.arcadedb.index.Index; // Changed +import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring +import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import com.arcadedb.query.sql.executor.Result; // Changed +import com.arcadedb.query.sql.executor.ResultInternal; // Changed +import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed +import com.arcadedb.query.sql.parser.Expression; // Changed +import com.arcadedb.query.sql.parser.FromClause; // Changed +import com.arcadedb.query.sql.parser.FromItem; // Changed +import com.arcadedb.query.sql.parser.Identifier; // Changed +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class ArcadeLuceneSearchOnIndexFunction extends ArcadeLuceneSearchFunctionTemplate { + + public static final String NAME = "search_index"; + + public ArcadeLuceneSearchOnIndexFunction() { + super(NAME); + } + + @Override + public Object execute( + Object self, // Is the target of the function, could be null, or an identifier (index name) or a collection + Identifiable currentRecord, + Object currentResult, + Object[] params, + CommandContext ctx) { + + validateParameterCount(params, 2, 3); + + String indexName = params[0].toString(); + String query = params[1].toString(); + Document metadata = params.length == 3 ? getMetadata((Expression) params[2], ctx) : new Document(ctx.getDatabase()); + + ArcadeLuceneFullTextIndex index = ArcadeLuceneFunctionsUtils.getLuceneFullTextIndex(ctx, indexName); + + if (index == null) { + // If used in a WHERE clause for a specific record, returning false means "filter out" + // If used as a standalone function returning a set, return empty set. + // The `filterResult` method in template handles boolean conversion. + return currentRecord != null ? false : Collections.emptySet(); + } + + // If currentRecord is not null, this function is likely used in a WHERE clause context. + // It needs to determine if the currentRecord matches the Lucene query *within its own fields*. + if (currentRecord != null && currentRecord.getIdentity() != null) { + MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx); + + // We need the Lucene Document for the currentRecord + // The 'key' for buildDocument in this context is not a separate key, but derived from the record itself if auto index. + // Or, if the index has specific fields, those are used. + // Since we are in context of a specific record, we use its fields. + org.apache.lucene.document.Document luceneDoc = index.buildDocument(null, currentRecord); // Pass null for key if derived from record + + if (luceneDoc != null) { + for (IndexableField field : luceneDoc.getFields()) { + // Simplified: use stringValue. Actual field data might be needed for MemoryIndex if not string. + // MemoryIndex.addField can take Analyzer, which it gets from the IndexableFieldType. + // If the field is not indexed with an analyzer (e.g. StringField), it's fine. + // If it is (e.g. TextField), index.indexAnalyzer() should be used. + // For simplicity, assuming MemoryIndex handles it or we use the general indexAnalyzer. + memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); + } + } else { + return false; // Cannot build Lucene doc for current record + } + + // The query here is the main Lucene query from params[1] + // Metadata for this specific sub-query within MemoryIndex. + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(query, metadata, ctx); + org.apache.lucene.search.Query luceneQuery = index.buildQuery(keyAndMeta); // Build query using index's config + + return memoryIndex.search(luceneQuery) > 0.0f; + } else { + // If currentRecord is null, this function is likely used to return a set of results from the specified index. + // This is the "searchFromTarget" equivalent. + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(query, metadata, ctx); + // The `index.get(keyAndMeta)` should return a LuceneResultSet or similar. + // The `ArcadeLuceneFullTextIndex.get(Object[])` was changed to return IndexCursor. + // We might need a direct way to execute a query via engine and get results. + // For now, assuming `index.get(keyAndMeta)` returns a Set or IndexCursor via engine. + + // The `get` method on `ArcadeLuceneFullTextIndex` takes `Object[] keys`. + // We need to wrap `keyAndMeta` or pass its components. + // Let's assume the engine's getInTx is what we want. + if (index.getEngine() instanceof LuceneIndexEngine) { + LuceneIndexEngine luceneEngine = (LuceneIndexEngine) index.getEngine(); + // LuceneKeyAndMetadata is already the 'key' for getInTx + return luceneEngine.getInTx(keyAndMeta, null); // Passing null for LuceneTxChanges for non-transactional view + } + return Collections.emptySet(); + } + } + + private Document getMetadata(Object[] params, CommandContext ctx) { // Kept for direct param access if needed + if (params.length == 3 && params[2] != null) { + if (params[2] instanceof Map) { + return new Document(ctx.getDatabase()).fromMap((Map) params[2]); + } else if (params[2] instanceof String) { + return new Document(ctx.getDatabase()).fromJSON((String) params[2]); + } else if (params[2] instanceof Expression) { // If metadata is an expression + return getMetadata((Expression) params[2], ctx); + } else if (params[2] instanceof Document) { + return (Document) params[2]; + } + try { + return new Document(ctx.getDatabase()).fromJSON(params[2].toString()); + } catch (Exception e) { /* ignore, return empty */ } + } + return new Document(ctx.getDatabase()); // LuceneQueryBuilder.EMPTY_METADATA; + } + + + @Override + public String getSyntax() { + return getName() + "( , [, ] )"; + } + + // Removed searchFromTarget, estimate, canExecuteInline, allowsIndexedExecution, shouldExecuteAfterSearch + // searchForIndex is not needed here as index name is a direct parameter. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java new file mode 100644 index 0000000000..7c9f759cbf --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java @@ -0,0 +1,571 @@ +package com.arcadedb.lucene.index; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.database.TransactionContext; +import com.arcadedb.document.Document; +import com.arcadedb.engine.PaginatedFile; // For constructor, might not be directly used by Lucene +import com.arcadedb.engine.Storage; +import com.arcadedb.index.Index; +import com.arcadedb.index.IndexCursor; +import com.arcadedb.index.IndexException; +import com.arcadedb.index.IndexInternal; +import com.arcadedb.index.RangeIndexCursor; +import com.arcadedb.index.TypeIndex; +import com.arcadedb.index.engine.IndexEngine; +import com.arcadedb.lucene.engine.ArcadeLuceneFullTextIndexEngine; // Changed from OLuceneFullTextIndexEngine +import com.arcadedb.lucene.engine.LuceneIndexEngine; // The refactored interface +import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; +import com.arcadedb.schema.IndexBuilder; +import com.arcadedb.schema.IndexDefinition; +import com.arcadedb.schema.Schema; +import com.arcadedb.schema.Type; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; + +public class ArcadeLuceneFullTextIndex implements IndexInternal { + + private final DatabaseInternal database; + private final String name; + private IndexDefinition definition; + private String filePath; // Path where Lucene index files are stored + private int fileId; // ArcadeDB fileId, might not be directly used by Lucene files themselves + private PaginatedFile metadataFile; // For ArcadeDB metadata about this index + + private LuceneIndexEngine engine; // Changed type to interface + private STATUS status = STATUS.OFFLINE; + + // Moved constants to ArcadeLuceneIndexFactoryHandler + // public static final String LUCENE_ALGORITHM = "LUCENE"; + + + // Constructor matching AbstractIndex an IndexFactory might call + public ArcadeLuceneFullTextIndex(DatabaseInternal db, String name, String typeName, IndexDefinition definition, + String filePath, PaginatedFile metadataFile, PaginatedFile[] dataFiles, + PaginatedFile[] treeFiles, int fileId, int pageSize, + TransactionContext.AtomicOperation atomicOperation) { + this.database = db; + this.name = name; + this.definition = definition; + this.filePath = filePath; // Should be directory for Lucene + this.metadataFile = metadataFile; // ArcadeDB own metadata for this index + this.fileId = fileId; + // pageSize, dataFiles, treeFiles might be less relevant for Lucene which manages its own files. + + // Engine initialization is deferred to lazyInit or build/load + } + + private void lazyInit() { + if (engine == null) { + // Determine if this is part of an active transaction and if an engine instance already exists for this TX. + if (database.isTransactionActive() && database.getTransaction().getInvolvedIndexEngine(getName()) instanceof LuceneIndexEngine) { + this.engine = (LuceneIndexEngine) database.getTransaction().getInvolvedIndexEngine(getName()); + if (this.engine == null) { // Should not happen if getInvolvedIndexEngine returned one + throw new IndexException("Cannot find transactional Lucene engine for index " + getName() + " though it was marked as involved."); + } + } else { + String algorithm = getAlgorithm(); // Uses the overridden getAlgorithm() + com.arcadedb.document.Document engineMetadataDoc = new com.arcadedb.document.Document(database); + if (this.definition != null && this.definition.getOptions() != null) { + engineMetadataDoc.fromMap(this.definition.getOptions()); + } + + if (com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler.LUCENE_CROSS_CLASS_ALGORITHM.equalsIgnoreCase(algorithm)) { + ArcadeLuceneCrossClassIndexEngine crossEngine = new ArcadeLuceneCrossClassIndexEngine(this.fileId, database.getStorage(), this.name); + + // Construct IndexMetadata Pojo for crossEngine.init() + // OLuceneCrossClassIndexEngine.init takes IndexMetadata. + // IndexMetadata needs: name, typeName (class this index is on, can be null for cross-class marker), List propertyNames, Type[] keyTypes, String algorithm, boolean isAutomatic, Map options + IndexMetadata im = new IndexMetadata( + this.name, + this.definition.getPropertyNames(), + this.definition.getKeyTypes(), + this.definition.getOptions() + ); + im.setTypeName(this.definition.getTypeName()); // May be null if truly cross-class and not bound to a type + im.setAlgorithm(algorithm); + im.setIsAutomatic(this.isAutomatic()); + im.setUnique(this.isUnique()); + im.setNullStrategy(this.getNullStrategy()); + // Add other relevant properties from 'this.definition' to 'im' if needed by crossEngine.init() + + crossEngine.init(im); + this.engine = crossEngine; + } else { // Default to LUCENE_FULL_TEXT_ALGORITHM + ArcadeLuceneFullTextIndexEngine ftEngine = new ArcadeLuceneFullTextIndexEngine(database.getStorage(), name); + // OLuceneIndexEngineAbstract.init expects: String indexName, String indexType(algorithm), IndexDefinition, boolean isAutomatic, Document metadata + ftEngine.init(getName(), algorithm, definition, isAutomatic(), engineMetadataDoc); + this.engine = ftEngine; + } + } + this.status = STATUS.ONLINE; + } + } + + + @Override + public String getAssociatedFileName() { + return filePath; + } + + @Override + public void build(IndexBuilder builder) { + this.definition = builder.getIndexDefinition(); + // filePath might be set by IndexBuilder or derived, ensure it's correct for Lucene (a directory path) + this.filePath = builder.getFilePath() != null ? builder.getFilePath() : database.getDatabasePath() + "/" + builder.getFileName(); + this.fileId = builder.getFileId(); // Get fileId from builder + + lazyInit(); // Initialize engine + try { + Document engineMetadata = new Document(database); + if (this.definition.getOptions() != null) { + engineMetadata.fromMap(this.definition.getOptions()); + } + + // Parameters for engine.create: + // valueSerializer, keySerializer: null for Lucene as it handles its own types. + // keyTypes: from definition + // nullPointerSupport: from definition + // propertyNames.size(): as keySize (number of indexed fields) + // clustersToIndex: from definition + // options: from definition + engine.create( + null, // valueSerializer + this.isAutomatic(), + this.getKeyTypes(), + this.getDefinition().isNullStrategyNode(), // nullPointerSupport + null, // keySerializer + this.getDefinition().getPropertyNames() != null ? this.getDefinition().getPropertyNames().size() : 0, // keySize + this.getDefinition().getClustersToIndex(), // clustersToIndex (might be null) + this.getDefinition().getOptions(), // engineProperties + engineMetadata // metadata Document for engine + ); + this.status = STATUS.ONLINE; + } catch (Exception e) { + throw new IndexException("Error during Lucene index build for index '" + getName() + "'", e); + } + } + + @Override + public void setMetadata(IndexDefinition definition, String filePath, int pageSize, byte nullStrategy) { + this.definition = definition; + this.filePath = filePath; + // pageSize and nullStrategy are part of definition or handled by Lucene engine differently. + // This method is usually for loading existing index metadata. + // We might need to re-init or load the engine here. + if (engine != null) { + engine.close(); // Close existing engine if any + } + engine = null; // Reset engine + lazyInit(); // Re-initialize with new metadata + // engine.load(...) might be relevant here if this implies loading an existing index. + } + + @Override + public STATUS getStatus() { + return status; + } + + @Override + public void setStatus(STATUS status) { + this.status = status; + // Potentially pass this to the engine if it has its own status + } + + @Override + public void close() { + if (engine != null) { + engine.close(); + engine = null; + } + status = STATUS.OFFLINE; + } + + @Override + public void drop() { + if (engine != null) { + engine.delete(); // Engine handles file deletion + engine = null; + } + // Additional cleanup of ArcadeDB metadata files if any (e.g., this.metadataFile) + // This is usually handled by Schema.dropIndex calling this. + status = STATUS.OFFLINE; + } + + @Override + public int getFileId() { + return fileId; // Or a specific ID for Lucene structure if different + } + + @Override + public T getComponent(String name, Class type) { + if (type.isAssignableFrom(engine.getClass())) { + return type.cast(engine); + } + return null; + } + + @Override + public Type[] getKeyTypes() { + return definition != null ? definition.getKeyTypes() : null; + } + + @Override + public byte[] getBinaryKeyTypes() { + // Lucene doesn't use this in the same way as binary comparable keys. + return null; + } + + @Override + public void setTypeIndex(TypeIndex typeIndex) { + // Associated with schema type's index list. Store if needed. + } + + @Override + public TypeIndex getTypeIndex() { + return null; // Retrieve if stored + } + + @Override + public void scheduleCompaction() { + // Lucene has IndexWriter.forceMerge or IndexWriter.maybeMerge. + // This could be a trigger for that. + lazyInit(); + // engine.forceMerge(); // FIXME: Add such a method to engine interface if needed + } + + @Override + public String getMostRecentFileName() { + return null; // Not directly applicable + } + + @Override + public Map toJSON() { + // Serialize index configuration/stats to JSON. + // Include name, type, definition, engine stats. + Map json = new java.util.HashMap<>(); + json.put("name", getName()); + json.put("typeName", getTypeName()); + json.put("algorithm", getAlgorithm()); + if (definition != null) { + json.put("definition", definition.getOptions()); // Or more detailed definition + } + if (engine != null) { + // FIXME: engine should provide some stats or config + // json.put("engineStats", engine.getStats()); + } + return json; + } + + @Override + public Index getAssociatedIndex() { + return null; + } + + // --- Index Methods --- + + @Override + public String getName() { + return name; + } + + @Override + public String getTypeName() { // This should be the Type's name this index is on, not algorithm + return definition != null ? definition.getTypeName() : null; + } + + @Override + public String getAlgorithm() { + // Return the actual algorithm from the definition if available + return (definition != null && definition.getAlgorithm() != null) ? + definition.getAlgorithm() : + com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler.LUCENE_FULL_TEXT_ALGORITHM; + } + + + @Override + public IndexDefinition getDefinition() { + return definition; + } + + @Override + public boolean isUnique() { + return definition != null && definition.isUnique(); // Lucene full-text usually not unique + } + + @Override + public List getPropertyNames() { + return definition != null ? definition.getPropertyNames() : Collections.emptyList(); + } + + @Override + public long countEntries() { + lazyInit(); + // engine.size(null) or engine.sizeInTx(null) + // The ValuesTransformer is for OrientDB's SBTree based indexes. For Lucene, it's just a doc count. + return engine.size(null); + } + + public long getRecordCount() { // From OLuceneFullTextIndex + return countEntries(); + } + + + @Override + public IndexCursor get(Object[] keys) { + lazyInit(); + if (keys == null || keys.length == 0 || keys[0] == null) { + throw new IllegalArgumentException("Lucene query key cannot be null."); + } + // Assuming keys[0] is the query string or a LuceneKeyAndMetadata object + // FIXME: This needs to adapt to how LuceneKeyAndMetadata is structured and if options are passed + Object queryKey = keys[0]; + Document metadata = null; + if (keys.length > 1 && keys[1] instanceof Map) { + metadata = new Document(database, (Map) keys[1]); + } else if (keys.length > 1 && keys[1] instanceof Document) { + metadata = (Document) keys[1]; + } + + // The engine's get method: Set getInTx(Object key, LuceneTxChanges changes) + // This needs to be wrapped in an IndexCursor. + // The key for engine.getInTx is likely LuceneKeyAndMetadata + // FIXME: Construct LuceneKeyAndMetadata correctly + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(queryKey, metadata, null); // Assuming CommandContext can be null here + + Set results = engine.getInTx(keyAndMeta, null); // Passing null for changes if not in tx or tx changes not used + return new LuceneIndexCursor(results.iterator()); // FIXME: LuceneIndexCursor needs to be implemented + } + + @Override + public IndexCursor get(Object[] keys, int limit) { + // FIXME: Implement limit. Lucene TopDocs can handle this. + // This will require engine.getInTx or a similar method to accept a limit. + lazyInit(); + if (keys == null || keys.length == 0 || keys[0] == null) { + throw new IllegalArgumentException("Lucene query key cannot be null."); + } + Object queryKey = keys[0]; + Document metadata = new Document(database); // Default empty metadata + if (keys.length > 1 && keys[1] instanceof Map) { + metadata.fromMap((Map) keys[1]); + } else if (keys.length > 1 && keys[1] instanceof Document) { + metadata = (Document) keys[1]; + } + if (limit > 0) { + metadata.set("limit", limit); // Pass limit via metadata + } + LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(queryKey, metadata, null); + Set results = engine.getInTx(keyAndMeta, null); + return new LuceneIndexCursor(results.iterator()); // FIXME: LuceneIndexCursor + } + + + @Override + public Stream getRidsStream(Object[] keys) { + IndexCursor cursor = get(keys); + return cursor.ridsStream(); + } + + public Set get(Object key) { // From OLuceneFullTextIndex, matching engine's getInTx + lazyInit(); + // This 'key' is likely LuceneKeyAndMetadata or the raw query string. + return engine.getInTx(key, null); // Assuming null for LuceneTxChanges if not in a tx context for this call + } + + public Set getRids(Object key) { // New method, if useful + lazyInit(); + // This 'key' is likely LuceneKeyAndMetadata or the raw query string. + // engine.getInTx returns Set + return engine.getInTx(key, null).stream().map(Identifiable::getIdentity).collect(Collectors.toSet()); + } + + + @Override + public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded) { + throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax."); + } + + @Override + public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit) { + throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax."); + } + + @Override + public IndexCursor iterator(boolean ascendingOrder) { + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene. Use a match_all query if needed."); + } + + @Override + public IndexCursor iterator(boolean ascendingOrder, Object[] fromKey, boolean fromKeyInclusive) { + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene."); + } + + @Override + public IndexCursor descendingIterator() { + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene."); + } + + @Override + public IndexCursor descendingIterator(Object[] fromKey, boolean fromKeyInclusive) { + throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene."); + } + + @Override + public boolean supportsOrderedIterations() { + return false; // Lucene orders by relevance score by default, not by key. + } + + @Override + public boolean isAutomatic() { + return definition != null && definition.isAutomatic(); + } + + @Override + public void setRebuilding(boolean rebuilding) { + // Could set a flag or inform the engine + } + + @Override + public IndexEngine getEngine() { + lazyInit(); + return engine; + } + + @Override + public boolean isValid() { + // Check if engine is initialized and Lucene index is readable + lazyInit(); + // FIXME: engine needs an isValid() or similar check + return engine != null; + } + + @Override + public Map getStats() { + // FIXME: engine should provide stats (num docs, etc.) + return Collections.emptyMap(); + } + + @Override + public void setStats(Map stats) { + // Not typically set from outside + } + + @Override + public void compact() throws IOException { + lazyInit(); + // engine.forceMerge(); // FIXME: Add to engine if needed + } + + @Override + public boolean isCompacting() { + return false; // FIXME: engine should report this + } + + @Override + public List getFileIds() { + return Collections.singletonList(fileId); // Main metadata file ID + } + + @Override + public int getPageSize() { + return -1; // Not page-based like ArcadeDB native + } + + @Override + public void setPageSize(int pageSize) { + // No-op for Lucene + } + + @Override + public byte getNullStrategy() { + return definition != null ? definition.getNullStrategy().getValue() : Index.NULL_STRATEGY.ERROR.getValue(); + } + + @Override + public void setNullStrategy(byte nullStrategy) { + // Usually immutable + } + + @Override + public void set(TransactionContext tx, Object[] keys, RID[] rids) throws IndexException { + lazyInit(); + // This is for unique indexes usually. Lucene full-text is not typically unique. + // If used, it implies key -> RID mapping. + // For Lucene, it's document (derived from RID's record) -> indexed. + // This method needs careful interpretation for Lucene. + // Assuming keys[0] is the "key" to index (could be a document itself or fields) + // and rids[0] is the value. + if (keys == null || keys.length == 0 || rids == null || rids.length == 0) { + throw new IndexException("Keys and RIDs must be provided for Lucene set operation for index '" + getName() + "'."); + } + // Engine methods (put, remove) were refactored to take TransactionContext directly. + engine.put(tx, keys[0], rids[0]); + } + + @Override + public void remove(TransactionContext tx, Object[] keys, Identifiable rid) throws IndexException { + lazyInit(); + if (keys == null || keys.length == 0) { + throw new IndexException("Keys must be provided for Lucene remove operation for index '" + getName() + "'."); + } + // Engine methods (put, remove) were refactored to take TransactionContext directly. + if (rid != null) { + engine.remove(tx, keys[0], rid); + } else { + engine.remove(tx, keys[0]); // Remove all documents matching key + } + } + + @Override + public void remove(TransactionContext tx, Object[] keys) throws IndexException { + remove(tx, keys, null); // Remove all RIDs associated with these keys + } + + @Override + public IndexCursor range(boolean ascendingOrder) { + throw new UnsupportedOperationException("Range queries without keys are not directly supported. Use a match_all query."); + } + + @Override + public IndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit, int skip) { + throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax."); + } + + @Override + public int getAssociatedBucketId() { + if (definition == null) return -1; + List bucketIds = definition.getBucketIds(); + return bucketIds != null && !bucketIds.isEmpty() ? bucketIds.get(0) : -1; + } + + // --- Lucene Specific Accessors --- + public IndexSearcher searcher() { + lazyInit(); + return engine.searcher(); + } + + public Analyzer indexAnalyzer() { + lazyInit(); + return engine.indexAnalyzer(); + } + + public Analyzer queryAnalyzer() { + lazyInit(); + return engine.queryAnalyzer(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java new file mode 100644 index 0000000000..50af17bf24 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java @@ -0,0 +1,345 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * Copyright 2014 Orient Technologies. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.index; // Changed package + +import com.arcadedb.database.Identifiable; // Changed import +import com.arcadedb.database.RID; // Changed import +import com.arcadedb.document.Document; // ArcadeDB Document +import com.arcadedb.exception.ArcadeDBException; // Changed import +import com.arcadedb.index.CompositeKey; // Changed import +import com.arcadedb.index.IndexDefinition; // Changed import +import com.arcadedb.schema.Type; // Changed import +import com.arcadedb.lucene.util.LuceneDateTools; // Added import +import java.io.UnsupportedEncodingException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import org.apache.lucene.document.Field; // Lucene Document Field +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; + +/** Created by enricorisa on 21/03/14. */ +public class ArcadeLuceneIndexType { // Changed class name + public static final String RID_HASH = "_RID_HASH"; + public static final String RID = "_RID"; // Defined locally + public static final String KEY = "_KEY"; // Defined locally + + public static Field createField( // Simplified, assuming store is passed correctly by caller for specific needs + final String fieldName, final Object value, final Field.Store store) { + if (fieldName.startsWith("_CLASS") || fieldName.startsWith("_CLUSTER")) { + return new StringField(fieldName, value.toString(), store); + } + // Defaulting to TextField, assuming analysis. Use StringField if non-analyzed is the default. + return new TextField(fieldName, value.toString(), store); + } + + public static String extractId(org.apache.lucene.document.Document doc) { // Lucene Document + String value = doc.get(RID_HASH); + if (value != null) { + int pos = value.indexOf("|"); + if (pos > 0) { + return value.substring(0, pos); + } else { + return value; + } + } else { + return null; + } + } + + public static Field createIdField(final Identifiable id, final Object key) { // Changed Identifiable + return new StringField(RID_HASH, genValueId(id, key), Field.Store.YES); + } + + public static Field createRidField(final Identifiable id) { // Renamed from createOldIdField, Changed Identifiable + return new StringField(RID, id.getIdentity().toString(), Field.Store.YES); + } + + public static String genValueId(final Identifiable id, final Object key) { // Changed Identifiable + String value = id.getIdentity().toString() + "|"; + value += hashKey(key); + return value; + } + + public static List createFields( + String fieldName, Object value, Field.Store store, Boolean sort, Type type) { // Added Type parameter + List luceneFields = new ArrayList<>(); + + if (value instanceof Number) { + Number number = (Number) value; + if (type == Type.LONG || value instanceof Long) { + luceneFields.add(new LongPoint(fieldName, number.longValue())); + luceneFields.add(new NumericDocValuesField(fieldName, number.longValue())); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.longValue())); + } else if (type == Type.FLOAT || value instanceof Float) { + luceneFields.add(new FloatPoint(fieldName, number.floatValue())); + luceneFields.add(new FloatDocValuesField(fieldName, number.floatValue())); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.floatValue())); + } else if (type == Type.DOUBLE || value instanceof Double) { + luceneFields.add(new DoublePoint(fieldName, number.doubleValue())); + luceneFields.add(new DoubleDocValuesField(fieldName, number.doubleValue())); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.doubleValue())); + } else { // INTEGER, SHORT, BYTE + luceneFields.add(new IntPoint(fieldName, number.intValue())); + luceneFields.add(new NumericDocValuesField(fieldName, number.longValue())); // Use long for DV for all integer types + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.intValue())); + } + } else if (type == Type.DATETIME || type == Type.DATE) { + Long time = null; + if (value instanceof Date) { + time = ((Date) value).getTime(); + } else if (value instanceof Number) { + time = ((Number) value).longValue(); + } else if (value instanceof String) { + time = LuceneDateTools.parseDateTimeToMillis((String) value); + } + if (time != null) { + if (type == Type.DATE) { + time = LuceneDateTools.normalizeToDayEpochMillis(time); + } + luceneFields.add(new LongPoint(fieldName, time)); + luceneFields.add(new NumericDocValuesField(fieldName, time)); // For sorting/faceting + if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, time)); + } + // Optionally, add the original value as a TextField if searchable as text and not just date + // if (value != null && store == Field.Store.YES) luceneFields.add(new TextField(fieldName, value.toString(), store)); + } else if (type == Type.STRING || type == Type.TEXT || type == Type.ENUM || type == Type.UUID || value instanceof String) { // Added TEXT, ENUM, UUID + String stringValue = value.toString(); + // Defaulting to TextField (analyzed). If non-analyzed is preferred for some types (e.g. UUID, ENUM), use StringField. + luceneFields.add(new TextField(fieldName, stringValue, store)); + // Or use StringField for non-analyzed: + // luceneFields.add(new StringField(fieldName, stringValue, store)); + if (Boolean.TRUE.equals(sort)) { + luceneFields.add(new SortedDocValuesField(fieldName, new BytesRef(stringValue))); + } + } else { + // Default to TextField for other types or if type is null + luceneFields.add(new TextField(fieldName, value.toString(), store)); + if (Boolean.TRUE.equals(sort)) { + luceneFields.add(new SortedDocValuesField(fieldName, new BytesRef(value.toString()))); + } + } + return luceneFields; + } + + public static Query createExactQuery(IndexDefinition index, Object key) { // Changed OIndexDefinition + Query query = null; + if (key instanceof String) { + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + if (index.getFields().size() > 0) { + for (String idx : index.getFields()) { + queryBuilder.add(new TermQuery(new Term(idx, key.toString())), BooleanClause.Occur.SHOULD); + } + } else { + queryBuilder.add(new TermQuery(new Term(KEY, key.toString())), BooleanClause.Occur.SHOULD); + } + query = queryBuilder.build(); + } else if (key instanceof CompositeKey) { // Changed OCompositeKey + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + int i = 0; + CompositeKey keys = (CompositeKey) key; + for (String idx : index.getFields()) { + String val = (String) keys.getKeys().get(i); // Assuming keys are strings + queryBuilder.add(new TermQuery(new Term(idx, val)), BooleanClause.Occur.MUST); + i++; + } + query = queryBuilder.build(); + } + return query; + } + + public static Query createQueryId(Identifiable value) { // Changed OIdentifiable + return new TermQuery(new Term(RID, value.getIdentity().toString())); + } + + public static Query createQueryId(Identifiable value, Object key) { // Changed OIdentifiable + return new TermQuery(new Term(RID_HASH, genValueId(value, key))); + } + + public static String hashKey(Object key) { + try { + String keyString; + if (key instanceof Document) { // Changed ODocument to ArcadeDB Document + keyString = ((Document) key).toJSON().toString(); // Assuming toJSON returns JSON object + } else { + keyString = key.toString(); + } + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + byte[] bytes = sha256.digest(keyString.getBytes("UTF-8")); + return Base64.getEncoder().encodeToString(bytes); + } catch (NoSuchAlgorithmException e) { + throw ArcadeDBException.wrapException(new ArcadeDBException("fail to find sha algorithm"), e); // Changed exception + } catch (UnsupportedEncodingException e) { + throw ArcadeDBException.wrapException(new ArcadeDBException("fail to find utf-8 encoding"), e); // Changed exception + } + } + + public static Query createDeleteQuery( // Changed OIdentifiable, ODocument + Identifiable value, List fields, Object key, com.arcadedb.document.Document metadata) { + + // TODO Implementation of Composite keys with Collection + final BooleanQuery.Builder filter = new BooleanQuery.Builder(); + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + // TODO: Condition on Id and field key only for backward compatibility + if (value != null) { + builder.add(createQueryId(value), BooleanClause.Occur.MUST); + } + String field = fields.iterator().next(); + builder.add( + new TermQuery(new Term(field, key.toString().toLowerCase(Locale.ENGLISH))), + BooleanClause.Occur.MUST); + + filter.add(builder.build(), BooleanClause.Occur.SHOULD); + if (value != null) { + filter.add(createQueryId(value, key), BooleanClause.Occur.SHOULD); + } + return filter.build(); + } + + /** + * Creates a Lucene Query for exact matching on a field, considering the field's schema type. + * + * @param fieldName The name of the field. + * @param value The value to match. + * @param type The ArcadeDB schema Type of the field. + * @param database The database instance (currently unused here, but might be useful for context or complex types). + * @return A Lucene Query. + */ + public static Query createExactFieldQuery(String fieldName, Object value, Type type, com.arcadedb.database.DatabaseInternal database) { + if (value == null) { + // Or handle as a specific query for null if Lucene supports it for the type, e.g. for checking existence. + // For now, a query that matches nothing or a specific "null value" term if that's how nulls are indexed. + // This behavior needs to align with how nulls are indexed by createFields. + // Assuming null means "match no specific value" for now, which might mean it's handled by query structure. + // A "must not exist" or "must exist" query is different. This is for "fieldName:null". + // Let's treat it as a TermQuery for "null" string for now if type is string, otherwise it's problematic for points. + if (type == Type.STRING || type == Type.TEXT || type == Type.ENUM) { + return new TermQuery(new Term(fieldName, "null")); // Or a special null marker if used during indexing + } + // For numeric/point types, matching "null" is usually done by ensuring the field *doesn't* exist, + // which is more complex (e.g., BooleanQuery with MUST_NOT(MatchAllDocs) + filter on field existence). + // For simplicity, an exact match for a null value on a point field should probably yield no results or error. + // Returning a query that matches nothing for non-string nulls. + return new BooleanQuery.Builder().build(); // Empty BooleanQuery matches nothing + } + + switch (type) { + case STRING: + case TEXT: + case ENUM: + case UUID: // UUIDs are typically indexed and queried as strings + return new TermQuery(new Term(fieldName, value.toString())); + case INTEGER: + if (value instanceof Number) { + return IntPoint.newExactQuery(fieldName, ((Number) value).intValue()); + } else { + try { + return IntPoint.newExactQuery(fieldName, Integer.parseInt(value.toString())); + } catch (NumberFormatException e) { + // Log warning, fallback to TermQuery + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case LONG: + case DATETIME: // Assuming stored as long (epoch millis) + case DATE: // Assuming stored as long (epoch millis) + if (value instanceof Number) { + return LongPoint.newExactQuery(fieldName, ((Number) value).longValue()); + } else if (value instanceof java.util.Date) { + return LongPoint.newExactQuery(fieldName, ((java.util.Date) value).getTime()); + } else { + try { + Long time = null; + if (value instanceof String) { + time = LuceneDateTools.parseDateTimeToMillis((String) value); + } else { // Already Long or Date + // Handled by previous instanceof checks + } + if (time == null) { // Parsing failed or was not a convertible type + // Log warning or throw? For now, fallback to TermQuery on original string. + return new TermQuery(new Term(fieldName, value.toString())); + } + if (type == Type.DATE) { + time = LuceneDateTools.normalizeToDayEpochMillis(time); + } + return LongPoint.newExactQuery(fieldName, time); + } catch (NumberFormatException e) { // Should be caught by LuceneDateTools or earlier instanceof + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case FLOAT: + if (value instanceof Number) { + return FloatPoint.newExactQuery(fieldName, ((Number) value).floatValue()); + } else { + try { + return FloatPoint.newExactQuery(fieldName, Float.parseFloat(value.toString())); + } catch (NumberFormatException e) { + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case DOUBLE: + if (value instanceof Number) { + return DoublePoint.newExactQuery(fieldName, ((Number) value).doubleValue()); + } else { + try { + return DoublePoint.newExactQuery(fieldName, Double.parseDouble(value.toString())); + } catch (NumberFormatException e) { + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case SHORT: + case BYTE: + if (value instanceof Number) { + return IntPoint.newExactQuery(fieldName, ((Number) value).intValue()); // Promote to IntPoint + } else { + try { + return IntPoint.newExactQuery(fieldName, Short.parseShort(value.toString())); + } catch (NumberFormatException e) { + return new TermQuery(new Term(fieldName, value.toString())); + } + } + case BOOLEAN: + // Lucene typically stores booleans as "T"/"F" or "true"/"false" in a StringField, + // or as 0/1 in a numeric field. Assuming string "true" or "false" as indexed by createFields default for strings. + return new TermQuery(new Term(fieldName, value.toString().toLowerCase(Locale.ENGLISH))); + default: + // For BINARY, EMBEDDED, LINK etc., default to TermQuery on string representation. + // This might not be effective unless specific string representations are indexed. + return new TermQuery(new Term(fieldName, value.toString())); + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java new file mode 100644 index 0000000000..a0b1cc9d48 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java @@ -0,0 +1,118 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.index; + +import com.arcadedb.lucene.OLuceneCrossClassIndexFactory; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.exception.OInvalidIndexEngineIdException; +import com.arcadedb.database.index.OIndexMetadata; +import com.arcadedb.database.storage.OStorage; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.Query; + +public class OLuceneFullTextIndex extends OLuceneIndexNotUnique { + + public OLuceneFullTextIndex(OIndexMetadata im, final OStorage storage) { + super(im, storage); + } + + public Document buildDocument(final Object key, OIdentifiable identifieable) { + + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.buildDocument(key, identifieable); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public Query buildQuery(final Object query) { + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.buildQuery(query); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public Analyzer queryAnalyzer() { + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.queryAnalyzer(); + }); + } catch (final OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public boolean isCollectionIndex() { + while (true) { + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.isCollectionIndex(); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + } + + public Analyzer indexAnalyzer() { + while (true) { + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.indexAnalyzer(); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + } + + @Override + public boolean isAutomatic() { + return super.isAutomatic() + || OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS.equals(im.getAlgorithm()); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java b/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java new file mode 100644 index 0000000000..99264342eb --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java @@ -0,0 +1,183 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.parser; + +import com.arcadedb.schema.Type; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.DateTools; // For date parsing, if needed +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeQuery; // For newStringRange +import org.apache.lucene.util.BytesRef; + +import java.text.SimpleDateFormat; // Example for date parsing +import java.util.Date; // Example for date parsing +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ArcadeLuceneMultiFieldQueryParser extends MultiFieldQueryParser { + + private static final Logger logger = Logger.getLogger(ArcadeLuceneMultiFieldQueryParser.class.getName()); + + private final Map fieldTypes; + + // Date format constants removed, will use LuceneDateTools + + public ArcadeLuceneMultiFieldQueryParser(Map fieldTypes, String[] fields, Analyzer analyzer, Map boosts) { + super(fields, analyzer, boosts); + this.fieldTypes = fieldTypes != null ? new HashMap<>(fieldTypes) : new HashMap<>(); + } + + public ArcadeLuceneMultiFieldQueryParser(Map fieldTypes, String[] fields, Analyzer analyzer) { + super(fields, analyzer); + this.fieldTypes = fieldTypes != null ? new HashMap<>(fieldTypes) : new HashMap<>(); + } + + protected Type getFieldType(String field) { + return fieldTypes.get(field); + } + + @Override + protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { + Type fieldType = getFieldType(field); + + if (fieldType == null) { + logger.log(Level.FINE, "No type information for field {0} in range query, defaulting to string range.", field); + fieldType = Type.STRING; // Default to string range if type unknown + } + + // Lucene's default MultiFieldQueryParser uses TermRangeQuery for ranges on text fields. + // For specific data types, we need to create appropriate Point range queries. + + try { + switch (fieldType) { + case STRING: + case TEXT: + // For string ranges, ensure part1 and part2 are not null for TermRangeQuery.newStringRange + // The superclass handles * as open range for TermRangeQuery. + // If super.newRangeQuery is called, it will likely create a TermRangeQuery. + // TermRangeQuery.newStringRange is more explicit for string ranges. + BytesRef lowerTerm = part1 == null ? null : new BytesRef(part1); + BytesRef upperTerm = part2 == null ? null : new BytesRef(part2); + return TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive); + + case INTEGER: + Integer lowerInt = (part1 == null || "*".equals(part1)) ? null : Integer.parseInt(part1); + Integer upperInt = (part2 == null || "*".equals(part2)) ? null : Integer.parseInt(part2); + return IntPoint.newRangeQuery(field, + lowerInt == null ? Integer.MIN_VALUE : (startInclusive ? lowerInt : lowerInt + 1), + upperInt == null ? Integer.MAX_VALUE : (endInclusive ? upperInt : upperInt - 1)); + + case LONG: + case DATETIME: + case DATE: + Long lowerLong = com.arcadedb.lucene.util.LuceneDateTools.parseDateTimeToMillis(part1); + Long upperLong = com.arcadedb.lucene.util.LuceneDateTools.parseDateTimeToMillis(part2); + + if (fieldType == Type.DATE) { + if (lowerLong != null) lowerLong = com.arcadedb.lucene.util.LuceneDateTools.normalizeToDayEpochMillis(lowerLong); + if (upperLong != null) upperLong = com.arcadedb.lucene.util.LuceneDateTools.normalizeToDayEpochMillis(upperLong); + } + + // Adjust for inclusive/exclusive after potential null from parsing + long actualLowerLong = lowerLong == null ? Long.MIN_VALUE : (startInclusive ? lowerLong : lowerLong + 1L); + if (lowerLong == null && "*".equals(part1)) actualLowerLong = Long.MIN_VALUE; // Explicit open start + else if (lowerLong == null && part1 != null) throw new ParseException("Cannot parse lower date range: " + part1); + + + long actualUpperLong = upperLong == null ? Long.MAX_VALUE : (endInclusive ? upperLong : upperLong - 1L); + if (upperLong == null && "*".equals(part2)) actualUpperLong = Long.MAX_VALUE; // Explicit open end + else if (upperLong == null && part2 != null) throw new ParseException("Cannot parse upper date range: " + part2); + + // Ensure lower is not greater than upper after adjustments if both are specified + if (lowerLong != null && upperLong != null && actualLowerLong > actualUpperLong) { + actualLowerLong = lowerLong; // Reset to original parsed if adjustments inverted range for point fields + actualUpperLong = upperLong; + // For point fields, if startInclusive=false means actual_low = low+1, endInclusive=false means actual_high = high-1 + // If after this actual_low > actual_high, it means no values can exist. + // Lucene's LongPoint.newRangeQuery handles this correctly by creating a query that matches nothing. + } + + return LongPoint.newRangeQuery(field, actualLowerLong, actualUpperLong); + case LONG: // Separate from DATE/DATETIME for clarity if parseDateTimeToMillis is too specific + Long lowerPlainLong = (part1 == null || "*".equals(part1)) ? null : Long.parseLong(part1); + Long upperPlainLong = (part2 == null || "*".equals(part2)) ? null : Long.parseLong(part2); + return LongPoint.newRangeQuery(field, + lowerPlainLong == null ? Long.MIN_VALUE : (startInclusive ? lowerPlainLong : lowerPlainLong + 1L), + upperPlainLong == null ? Long.MAX_VALUE : (endInclusive ? upperPlainLong : upperPlainLong - 1L)); + + case FLOAT: + Float lowerFloat = (part1 == null || "*".equals(part1)) ? null : Float.parseFloat(part1); + Float upperFloat = (part2 == null || "*".equals(part2)) ? null : Float.parseFloat(part2); + // Point queries are exclusive for lower, inclusive for upper by default with null/MIN/MAX handling. + // Adjusting for inclusive/exclusive: + float actualLowerFloat = lowerFloat == null ? Float.NEGATIVE_INFINITY : (startInclusive ? lowerFloat : Math.nextUp(lowerFloat)); + float actualUpperFloat = upperFloat == null ? Float.POSITIVE_INFINITY : (endInclusive ? upperFloat : Math.nextDown(upperFloat)); + return FloatPoint.newRangeQuery(field, actualLowerFloat, actualUpperFloat); + + + case DOUBLE: + Double lowerDouble = (part1 == null || "*".equals(part1)) ? null : Double.parseDouble(part1); + Double upperDouble = (part2 == null || "*".equals(part2)) ? null : Double.parseDouble(part2); + double actualLowerDouble = lowerDouble == null ? Double.NEGATIVE_INFINITY : (startInclusive ? lowerDouble : Math.nextUp(lowerDouble)); + double actualUpperDouble = upperDouble == null ? Double.POSITIVE_INFINITY : (endInclusive ? upperDouble : Math.nextDown(upperDouble)); + return DoublePoint.newRangeQuery(field, actualLowerDouble, actualUpperDouble); + + case SHORT: + case BYTE: + // Promote to IntPoint for querying, as Lucene has no ShortPoint/BytePoint + Integer lowerShortOrByte = (part1 == null || "*".equals(part1)) ? null : Integer.parseInt(part1); + Integer upperShortOrByte = (part2 == null || "*".equals(part2)) ? null : Integer.parseInt(part2); + return IntPoint.newRangeQuery(field, + lowerShortOrByte == null ? Integer.MIN_VALUE : (startInclusive ? lowerShortOrByte : lowerShortOrByte + 1), + upperShortOrByte == null ? Integer.MAX_VALUE : (endInclusive ? upperShortOrByte : upperShortOrByte - 1)); + + default: + logger.log(Level.WARNING, "Unhandled type {0} for field {1} in range query. Defaulting to string range.", new Object[]{fieldType, field}); + return TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive); + } + } catch (NumberFormatException e) { + throw new ParseException("Could not parse number in range query for field " + field + ": " + e.getMessage()); + } + // Removed catch for java.text.ParseException as LuceneDateTools handles its own parsing issues or returns null + } + + // Date parsing helper removed, now using LuceneDateTools + + // Wildcard, Prefix, Fuzzy queries usually apply to text fields. + // The superclass versions are generally fine. If specific behavior is needed + // for non-text fields (e.g., to disallow or handle differently), + // these methods can be overridden. For now, relying on superclass. + + // @Override + // protected Query getWildcardQuery(String field, String termStr) throws ParseException { + // Type fieldType = getFieldType(field); + // if (fieldType != null && fieldType.isNumeric()) { + // // Wildcards on numeric points don't make sense. + // // Could throw error or return a MatchNoDocsQuery, or let super handle (might error). + // // For now, let super decide, it might try to parse termStr as a number. + // } + // return super.getWildcardQuery(field, termStr); + // } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java new file mode 100644 index 0000000000..8aee90bad4 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java @@ -0,0 +1,277 @@ +package com.arcadedb.lucene.query; + +import com.arcadedb.database.Database; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.database.RecordId; // ArcadeDB RecordId for context +import com.arcadedb.index.IndexCursor; +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Assumed engine interface +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // For RID field name +import com.arcadedb.query.sql.executor.Result; +import com.arcadedb.query.sql.executor.ResultInternal; + +import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.lucene.document.Document; // Lucene Document +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; + +public class LuceneIndexCursor implements IndexCursor { + + private static final Logger logger = Logger.getLogger(LuceneIndexCursor.class.getName()); + + private final LuceneQueryContext queryContext; + private final LuceneIndexEngine engine; // Engine for callbacks + private final com.arcadedb.document.Document metadata; // ArcadeDB Document for query metadata + + private ScoreDoc[] scoreDocs; + private IndexSearcher searcher; + private int currentIndex = -1; // Before the first element + private RID currentRID; + private float currentScore; + private Map currentProximityInfo; // For contextual data like highlights + + private TopDocs topDocs; + + + public LuceneIndexCursor(LuceneQueryContext queryContext, + LuceneIndexEngine engine, + com.arcadedb.document.Document metadata) { + this.queryContext = queryContext; + this.engine = engine; + this.metadata = metadata; + this.searcher = queryContext.getSearcher(); // Get the potentially transactional searcher + + executeSearch(); + } + + // Constructor for when results (Set) are already fetched, e.g. from engine.getInTx() + // This is a simplified cursor that iterates over pre-fetched RIDs without scores or Lucene docs. + private Iterator preFetchedResultsIterator; + private Identifiable currentPreFetched; + private int preFetchedCount; + + public LuceneIndexCursor(Set preFetchedResults) { + this.queryContext = null; // Not applicable + this.engine = null; // Not applicable + this.metadata = null; // Not applicable + if (preFetchedResults != null) { + this.preFetchedResultsIterator = preFetchedResults.iterator(); + this.preFetchedCount = preFetchedResults.size(); + } else { + this.preFetchedResultsIterator = Collections.emptyIterator(); + this.preFetchedCount = 0; + } + } + + + private void executeSearch() { + if (queryContext == null) return; // Should not happen if not using pre-fetched constructor + + try { + int limit = queryContext.getContext() != null ? queryContext.getContext().getLimit() : Integer.MAX_VALUE; + if (limit == -1) limit = Integer.MAX_VALUE; // SQL limit -1 means no limit + + if (queryContext.getSort() != null) { + this.topDocs = searcher.search(queryContext.getQuery(), limit, queryContext.getSort()); + } else { + this.topDocs = searcher.search(queryContext.getQuery(), limit); + } + this.scoreDocs = topDocs.scoreDocs; + } catch (IOException e) { + logger.log(Level.SEVERE, "Error executing Lucene search", e); + this.scoreDocs = new ScoreDoc[0]; // Empty results on error + this.topDocs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]); + } + } + + @Override + public Object[] getKeys() { + // For Lucene, the "keys" are the search terms. This is not usually returned per document. + // If queryContext.getQuery() is available, one could try to extract terms, but it's complex. + if (currentRID != null) { + // Could potentially store the query that led to this hit if needed. + // For now, returning null as it's not a natural fit. + return null; + } + throw new NoSuchElementException("No current element or keys not applicable"); + } + + @Override + public Identifiable getRecord() { + // In ArcadeDB, IndexCursor usually returns RIDs. The record is loaded by the caller. + // If this cursor *must* return the full record, a DB lookup is needed. + // For now, consistent with returning RID via next() and getRID(). + // This method could load and cache it if frequently used. + if (currentRID != null && queryContext != null && queryContext.getContext() != null) { + return queryContext.getContext().getDatabase().lookupByRID(currentRID, true); + } + if (currentPreFetched != null) { + return currentPreFetched; + } + return null; + } + + public RID getRID() { + if (currentRID != null) { + return currentRID; + } + if (currentPreFetched != null) { + return currentPreFetched.getIdentity(); + } + return null; + } + + + @Override + public Map getProperties() { + // This could return highlights and score if structured appropriately. + // The currentProximityInfo is designed for this. + return currentProximityInfo != null ? currentProximityInfo : Collections.emptyMap(); + } + + @Override + public float getScore() { // Changed from int to float to match Lucene score + return currentScore; + } + + @Override + public boolean hasNext() { + if (preFetchedResultsIterator != null) { + return preFetchedResultsIterator.hasNext(); + } + if (scoreDocs == null) { + return false; + } + return (currentIndex + 1) < scoreDocs.length; + } + + @Override + public Identifiable next() { + if (preFetchedResultsIterator != null) { + if (!preFetchedResultsIterator.hasNext()) { + throw new NoSuchElementException(); + } + currentPreFetched = preFetchedResultsIterator.next(); + this.currentRID = currentPreFetched.getIdentity(); // Store RID + this.currentScore = 1.0f; // Pre-fetched results usually don't carry Lucene score directly + this.currentProximityInfo = Collections.singletonMap("$score", this.currentScore); + return currentPreFetched; // Or just currentRID if API prefers that + } + + if (!hasNext()) { + throw new NoSuchElementException(); + } + currentIndex++; + ScoreDoc scoreDoc = scoreDocs[currentIndex]; + try { + // Using storedFields().document() is preferred in modern Lucene for retrieving stored fields. + // searcher.doc(scoreDoc.doc) retrieves all (including non-stored if they were indexed in a certain way, but generally for stored). + Document luceneDoc = searcher.storedFields().document(scoreDoc.doc); + String ridString = luceneDoc.get(ArcadeLuceneIndexType.RID); // Use constant from ArcadeLuceneIndexType + + if (ridString == null) { + // Fallback or try another RID field if there are multiple conventions (e.g. from older data) + // For now, log and skip if primary RID field is missing. + logger.log(Level.WARNING, "Lucene document " + scoreDoc.doc + " is missing RID field (" + ArcadeLuceneIndexType.RID + ")"); + // Try to advance to next valid document or return null/throw + return next(); // Recursive call to try next, or could throw. Be careful with recursion. + } + + Database currentDb = queryContext != null && queryContext.getContext() != null ? queryContext.getContext().getDatabase() : null; + this.currentRID = new RID(currentDb, ridString); // Pass database if available for cluster info + this.currentScore = scoreDoc.score; + + // Prepare contextual data (score, highlights) + this.currentProximityInfo = new HashMap<>(); + this.currentProximityInfo.put("$score", this.currentScore); + + if (queryContext != null && queryContext.isHighlightingEnabled()) { + if (engine != null && engine.queryAnalyzer() != null) { // Ensure we have an analyzer for highlighting + queryContext.setHighlightingAnalyzer(engine.queryAnalyzer()); // Use engine's query analyzer + + // We need an IndexReader to pass to getHighlights if it needs one. + // The searcher in queryContext already has one. + IndexReader reader = queryContext.getSearcher().getIndexReader(); + Map highlights = queryContext.getHighlights(luceneDoc, reader); + if (highlights != null && !highlights.isEmpty()) { + this.currentProximityInfo.putAll(highlights); + } + } else { + logger.warning("Highlighting enabled but no queryAnalyzer available from engine to set on LuceneQueryContext."); + } + } + + // The engine.onRecordAddedToResultSet callback is now less critical for highlights, + // but can be kept if it serves other purposes (e.g. security, logging, complex context data). + // For now, let's assume its primary highlight-related role is superseded. + if (engine != null && queryContext != null) { + RecordId contextualRid = new RecordId(this.currentRID); + engine.onRecordAddedToResultSet(queryContext, contextualRid, luceneDoc, scoreDoc); + } + + + // IndexCursor traditionally returns Identifiable (which can be just the RID) + // If the caller needs the full record, they call getRecord(). + return this.currentRID; + + } catch (IOException e) { + throw new RuntimeException("Error fetching document from Lucene index", e); + } + } + + @Override + public void close() { + // Release Lucene resources if this cursor specifically acquired them. + // If searcher is managed by engine (e.g. via SearcherManager), + // this cursor typically doesn't close/release the searcher. + scoreDocs = null; + // searcher = null; // Don't nullify if it's shared from engine/queryContext + } + + @Override + public long getCount() { // Changed from size() to match typical usage for total hits + if (preFetchedResultsIterator != null) { + return preFetchedCount; + } + return topDocs != null && topDocs.totalHits != null ? topDocs.totalHits.value : 0; + } + + @Override + public long size() { // Kept for IndexCursor interface if it uses size() for current iteration count + return getCount(); + } + + + @Override + public void setLimit(int limit) { + // Limit should be applied during the search execution. + throw new UnsupportedOperationException("Limit must be set before search execution via CommandContext or metadata."); + } + + @Override + public int getLimit() { + // Return the limit that was applied to this cursor's search + if (queryContext != null && queryContext.getContext() != null) { + return queryContext.getContext().getLimit(); + } + return -1; + } + + @Override + public boolean isPaginated() { + // Lucene TopDocs inherently supports pagination if the search is re-executed with 'searchAfter'. + // This simple cursor iterates a fixed set of top N docs. So, it's "paginated" in the sense + // that it represents one page of results. + return true; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java new file mode 100644 index 0000000000..861f7eecef --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java @@ -0,0 +1,84 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.query; + +import com.arcadedb.document.Document; +import com.arcadedb.index.CompositeKey; // If key can be a CompositeKey +import com.arcadedb.query.sql.executor.CommandContext; + +import java.util.Map; + +/** + * A container to pass a query key (which can be a simple string, + * a CompositeKey, or other structures) along with associated metadata Document. + * The metadata can include options for highlighting, sorting, limits, etc. + */ +public class LuceneKeyAndMetadata { + + public final Object key; + public final Document metadata; + private CommandContext context; // Optional command context + + /** + * Constructor. + * + * @param key The main query key (e.g., String, CompositeKey). + * @param metadata A Document containing additional query parameters and options. + */ + public LuceneKeyAndMetadata(Object key, Document metadata) { + this.key = key; + this.metadata = metadata != null ? metadata : new Document(null); // Ensure metadata is never null + } + + /** + * Constructor with command context. + * + * @param key The main query key. + * @param metadata A Document containing additional query parameters. + * @param context The SQL command execution context. + */ + public LuceneKeyAndMetadata(Object key, Document metadata, CommandContext context) { + this.key = key; + this.metadata = metadata != null ? metadata : new Document(null); // Ensure metadata is never null + this.context = context; + } + + + public Object getKey() { + return key; + } + + public Document getMetadata() { + return metadata; + } + + public CommandContext getContext() { + return context; + } + + public LuceneKeyAndMetadata setContext(CommandContext context) { + this.context = context; + return this; + } + + /** + * Helper to get metadata as a Map, typically for options. + * @return Map representation of metadata, or empty map if null. + */ + public Map getMetadataAsMap() { + return this.metadata.toMap(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java new file mode 100644 index 0000000000..e4b4068f39 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java @@ -0,0 +1,254 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2014 Orient Technologies. + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.query; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.tx.LuceneTxChanges; // FIXME: Needs refactoring +import com.arcadedb.query.sql.executor.CommandContext; // Changed +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** Created by Enrico Risa on 08/01/15. */ +public class LuceneQueryContext { // Changed class name + private final CommandContext context; // Changed + private final IndexSearcher searcher; + private final Query query; + private final Sort sort; + private Optional changes; // FIXME: Needs refactoring + // private HashMap fragments; // Replaced by on-demand highlighting + + // Highlighter components - to be initialized if highlighting is requested + private org.apache.lucene.search.highlight.Highlighter highlighter; + private org.apache.lucene.analysis.Analyzer highlightingAnalyzer; // Analyzer used for highlighting (might be queryAnalyzer) + private String[] highlightingFields; + + + public LuceneQueryContext( // Changed + final CommandContext context, final IndexSearcher searcher, final Query query) { + this(context, searcher, query, Collections.emptyList()); + } + + public LuceneQueryContext( // Changed + final CommandContext context, + final IndexSearcher searcher, + final Query query, + final List sortFields) { + this.context = context; + this.searcher = searcher; + this.query = query; + if (sortFields == null || sortFields.isEmpty()) { // Added null check + sort = null; + } else { + sort = new Sort(sortFields.toArray(new SortField[0])); // Changed to new SortField[0] + } + changes = Optional.empty(); + // fragments = new HashMap<>(); // Not pre-cached anymore + + // Check metadata for highlighting setup + if (this.context != null && this.context.getVariable("highlight") instanceof Map) { + @SuppressWarnings("unchecked") + Map highlightParams = (Map) this.context.getVariable("highlight"); + // Simple setup for now, more advanced formatting can be added + // String preTag = (String) highlightParams.getOrDefault("preTag", ""); + // String postTag = (String) highlightParams.getOrDefault("postTag", ""); + // org.apache.lucene.search.highlight.Formatter formatter = new org.apache.lucene.search.highlight.SimpleHTMLFormatter(preTag, postTag); + org.apache.lucene.search.highlight.Formatter formatter = new org.apache.lucene.search.highlight.SimpleHTMLFormatter(); + org.apache.lucene.search.highlight.QueryScorer queryScorer = new org.apache.lucene.search.highlight.QueryScorer(query); + this.highlighter = new org.apache.lucene.search.highlight.Highlighter(formatter, queryScorer); + // Fragmenter: + // this.highlighter.setTextFragmenter(new org.apache.lucene.search.highlight.SimpleFragmenter(100)); // Example: 100 chars per fragment + + Object fieldsToHighlightObj = highlightParams.get("fields"); + if (fieldsToHighlightObj instanceof String) { + this.highlightingFields = ((String) fieldsToHighlightObj).split(","); + } else if (fieldsToHighlightObj instanceof List) { + @SuppressWarnings("unchecked") + List list = (List) fieldsToHighlightObj; + this.highlightingFields = list.toArray(new String[0]); + } + // Analyzer for highlighting should ideally be the one used for querying the specific fields. + // This is a simplification; a more robust solution would fetch field-specific analyzers. + // If the engine provides a general queryAnalyzer, use it. + // this.highlightingAnalyzer = searcher.getAnalyzer(); // This is not standard on IndexSearcher + // Analyzer must be passed or retrieved from engine/index definition + } + + } + + public boolean isHighlightingEnabled() { + return this.highlighter != null && this.highlightingFields != null && this.highlightingFields.length > 0; + } + + public LuceneQueryContext setHighlightingAnalyzer(Analyzer analyzer) { + this.highlightingAnalyzer = analyzer; + return this; + } + + + public boolean isInTx() { + return changes.isPresent(); + } + + public LuceneQueryContext withChanges(final LuceneTxChanges changes) { // FIXME: Needs refactoring + this.changes = Optional.ofNullable(changes); + return this; + } + + // addHighlightFragment removed as highlights are generated on demand by getHighlights + + public CommandContext getContext() { // Changed + return context; + } + + public Query getQuery() { + return query; + } + + public Optional getChanges() { // FIXME: Needs refactoring + return changes; + } + + public Sort getSort() { + return sort; + } + + public IndexSearcher getSearcher() { + // FIXME: LuceneTxChanges and its searcher() method need refactoring + return changes.map(c -> new IndexSearcher(multiReader(c))).orElse(searcher); + } + + private MultiReader multiReader(final LuceneTxChanges luceneTxChanges) { // FIXME: Needs refactoring + final IndexReader primaryReader = searcher.getIndexReader(); + // FIXME: luceneTxChanges.searcher() needs to be refactored and return an IndexSearcher + final IndexReader txReader = luceneTxChanges.searcher().getIndexReader(); + try { + // Lucene's MultiReader takes an array of IndexReaders. + // The boolean for sharing readers is gone in some modern versions, + // lifecycle of readers passed to MultiReader should be managed by the caller if they are not to be closed by MultiReader. + // However, if primaryReader and txReader are obtained just for this MultiReader, + // it might be okay for MultiReader to close them. + // The decRef logic was for when readers were shared. If they are not shared, it's not needed. + // Let's assume for now they are not shared and MultiReader can own them. + // If they are shared/managed elsewhere, then incRef/decRef or try-with-resources on the MultiReader is needed. + // For Lucene 9+, just passing readers is fine, their lifecycle is tricky. + // One common pattern is that MultiReader does NOT close the readers given to it by default. + // The `searcher.getIndexReader()` typically gives a reader that should not be closed by MultiReader if searcher is still live. + // `txReader` from `luceneTxChanges.searcher().getIndexReader()` also needs care. + // The original decRef implies they were "taken over". + // A safer approach for modern Lucene if readers are managed (e.g. by SearcherManager / NRTManager): + // DONT call decRef here. Ensure MultiReader is closed after use, and that it DOES NOT close its sub-readers + // if they are still managed externally. + // The constructor `new MultiReader(IndexReader... subReaders)` does NOT take ownership (doesn't close them). + + // Given the original decRef, it implies MultiReader was taking ownership. + // This is not standard for the varags MultiReader constructor. + // The constructor `MultiReader(IndexReader[] r, boolean closeSubReaders)` is gone. + // Let's assume the readers passed are temporary or their lifecycle is handled by the SearcherManager from which they came. + // If txReader is from a RAMDirectory, it's simpler. + // This part is tricky without knowing exactly how primaryReader and txReader are managed. + // For now, will replicate the structure but acknowledge the complexity. + // One option: increase ref count before passing to MultiReader, then MultiReader can decRef on its close. + // primaryReader.incRef(); // If primaryReader is managed and should survive this MultiReader + // txReader.incRef(); // If txReader is managed + // MultiReader multiReader = new MultiReader(new IndexReader[] {primaryReader, txReader}); + // If MultiReader is short-lived and we don't want to affect original readers: + List readers = new ArrayList<>(); + readers.add(primaryReader); + if (txReader != null) readers.add(txReader); // txReader could be null if no changes + + return new MultiReader(readers.toArray(new IndexReader[0])); + + } catch (final IOException e) { + // FIXME: OLuceneIndexException needs to be ArcadeDB specific + throw ArcadeDBException.wrapException( + new ArcadeDBException("unable to create reader on changes"), e); // Changed + } + } + + public long deletedDocs(final Query query) { + // FIXME: LuceneTxChanges and its deletedDocs method need refactoring + return changes.map(c -> c.deletedDocs(query)).orElse(0L); // Ensure Long literal + } + + public boolean isUpdated(final Document doc, final Object key, final Identifiable value) { // Changed + // FIXME: LuceneTxChanges and its isUpdated method need refactoring + return changes.map(c -> c.isUpdated(doc, key, value)).orElse(false); + } + + public boolean isDeleted(final Document doc, final Object key, final Identifiable value) { // Changed + // FIXME: LuceneTxChanges and its isDeleted method need refactoring + return changes.map(c -> c.isDeleted(doc, key, value)).orElse(false); + } + + /** + * Generates highlighted snippets for the given Lucene document and configured fields. + * Requires highlightingAnalyzer to be set. + */ + public Map getHighlights(Document luceneDoc, IndexReader reader) { + if (!isHighlightingEnabled() || luceneDoc == null || this.highlightingAnalyzer == null) { + return Collections.emptyMap(); + } + + Map highlights = new HashMap<>(); + for (String field : highlightingFields) { + String text = luceneDoc.get(field); + if (text != null) { + try { + // Get best fragments. Last param is maxNoFragments. + TextFragment[] frags = highlighter.getBestTextFragments(this.highlightingAnalyzer, field, text, 3); + StringBuilder sb = new StringBuilder(); + for (TextFragment frag : frags) { + if (frag != null && frag.getScore() > 0) { + sb.append(frag.toString()); + sb.append("... "); // Separator for multiple fragments + } + } + if (sb.length() > 0) { + highlights.put("$" + field + "_hl", sb.toString().trim()); + } + } catch (IOException | org.apache.lucene.search.highlight.InvalidTokenOffsetsException e) { + // Log error or handle as needed + System.err.println("Error highlighting field " + field + ": " + e.getMessage()); + } + } + } + return highlights; + } + + // getFragments() method removed, replaced by getHighlights() logic integrated into LuceneIndexCursor + + // getLimit() and onRecord() were not in the provided OLuceneQueryContext, + // they might be from a different class or an older version. + // If they are needed, they would be implemented here. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java new file mode 100644 index 0000000000..267aafe911 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java @@ -0,0 +1,108 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; // Changed +import org.apache.lucene.analysis.Analyzer; // Added for new methods +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; // Added for new methods +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; // Added for new methods +import org.apache.lucene.util.Bits; // Added for new methods + +import java.io.IOException; // Added for new methods +import java.util.Collections; +import java.util.List; // Added for new methods +import java.util.Map; // Added for new methods +import java.util.Set; + +/** Created by Enrico Risa on 15/09/15. */ +public interface LuceneTxChanges { // Changed interface name + + // Existing methods adapted + void put(Object key, Identifiable value, Document doc); // Changed OIdentifiable + + void remove(Object key, Identifiable value); // Changed OIdentifiable + + IndexSearcher searcher(); // Existing method, seems to be the transactional searcher + + // numDocs() from prompt matches existing signature (except return type was long, now int as per Lucene's numDocs()) + int numDocs(); // Changed from long to int + + // getDeletedDocs() from prompt returns Set, existing returned Set + // Renaming existing to getDeletedLuceneDocs for clarity and adding new one + default Set getDeletedLuceneDocs() { // Kept original behavior with new name + return Collections.emptySet(); + } + + // isDeleted(Document, Object, OIdentifiable) adapted + boolean isDeleted(Document document, Object key, Identifiable value); // Changed OIdentifiable + + // isUpdated(Document, Object, OIdentifiable) adapted + boolean isUpdated(Document document, Object key, Identifiable value); // Changed OIdentifiable + + // deletedDocs(Query query) from prompt returns Bits, existing returned long + // Renaming existing to countDeletedDocs for clarity and adding new one + default long countDeletedDocs(Query query) { // Kept original behavior with new name + return 0; + } + + // New methods from prompt + IndexSearcher getCoordinatingSearcher(); // New: Could be the main index searcher before TX changes overlay + + Bits deletedDocs(Query query); // New: Returns Bits for live docs + + boolean isUpdated(Document doc, Analyzer analyzer, Query query); // New: Overload with Analyzer and Query + + boolean isDeleted(Document doc, Analyzer analyzer, Query query); // New: Overload with Analyzer and Query + + int nDoc(Query query); // New: Number of documents matching query in current TX state + + Set getDeletedDocuments(); // New: Set of deletion queries + + Map getUpdatedDocuments(); // New: Map of update queries to new documents + + List getAddedDocuments(); // New: List of added documents + + IndexReader getReader() throws IOException; // New: Get current transactional reader + + TopDocs query(Query query, int N) throws IOException; // New: Execute query with limit // Changed signature to add N + + Document doc(int doc) throws IOException; // New: Retrieve Lucene document by internal ID + + Document doc(int doc, Set fieldsToLoad) throws IOException; // New: Retrieve specific fields + + void close() throws IOException; // New + + int maxDoc() throws IOException; // New + + boolean hasDeletions(); // New + + void commit() throws IOException; // New + + void rollback() throws IOException; // New + + void addDocument(Document document) throws IOException; // New + + void deleteDocument(Query query) throws IOException; // New + + void updateDocument(Query query, Document document) throws IOException; // New +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java new file mode 100644 index 0000000000..ba124633ca --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java @@ -0,0 +1,322 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.Bits; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; // Changed + +/** Created by Enrico Risa on 28/09/15. */ +public abstract class LuceneTxChangesAbstract implements LuceneTxChanges { // Changed class name and interface + private static final Logger logger = + Logger.getLogger(LuceneTxChangesAbstract.class.getName()); // Changed + public static final String TMP = "_tmp_rid"; // This constant seems unused here, but kept for now. + + protected final LuceneIndexEngine engine; // Changed + protected final IndexWriter writer; // For new/updated documents + protected final IndexWriter deletesExecutor; // For pending deletions + + private IndexSearcher txSearcher; // Cached NRT searcher for the current transaction state (adds + main) + private IndexReader txReader; // Cached NRT reader for the current transaction state + + public LuceneTxChangesAbstract( // Changed + final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) { + this.engine = engine; + this.writer = writer; + this.deletesExecutor = deletesExecutor; + } + + // Method to get a transactional reader, possibly NRT from writer + protected IndexReader getTxReader() throws IOException { + if (txReader == null || !txReader.tryIncRef()) { // Check if reader is still valid or can be used + if (txReader != null) { // was valid, but couldn't incRef, so it's likely closed + try { + txReader.decRef(); // ensure it's closed if it was open + } catch (Exception e) { /* ignore */ } + } + // If writer is null or closed, this will throw an exception, which is appropriate. + txReader = DirectoryReader.open(writer); // Standard NRT reader + } + return txReader; + } + + protected void NRTReaderReopen() throws IOException{ + if (txReader != null) { + IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader)txReader, writer); + if (newReader != null) { + txReader.decRef(); + txReader = newReader; + txSearcher = new IndexSearcher(txReader); + } + } else { + txReader = DirectoryReader.open(writer); + txSearcher = new IndexSearcher(txReader); + } + } + + + @Override + public IndexSearcher searcher() { + try { + // Return a new NRT searcher reflecting current changes in 'writer' + // This searcher sees documents added/updated in the current TX but not yet committed. + // It does not see documents deleted in this TX against the main index. + // For a searcher that sees deletes as well, getCoordinatingSearcher might be better. + NRTReaderReopen(); + return txSearcher; + } catch (IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Error creating transactional IndexSearcher from writer"), e); // Changed + } + } + + @Override + public IndexSearcher getCoordinatingSearcher() { + // This searcher should ideally reflect adds, updates, AND deletes. + // This typically involves a MultiReader combining the main index (with its own deletions applied) + // and the in-memory 'writer' index, while filtering out documents marked for deletion by 'deletesExecutor'. + // For simplicity in this abstract class, could return the same as searcher() and expect + // query execution layer to use getLiveDocs() or similar. + // Or, could be more complex here if a combined view is built. + // For now, let's assume it's similar to searcher() but it's a point for review. + // The engine's main searcher is `engine.searcher()` + // FIXME: This needs a proper implementation, probably involving MultiReader and live docs from deletesExecutor + return searcher(); + } + + @Override + public IndexReader getReader() throws IOException { + return getTxReader(); + } + + @Override + public long countDeletedDocs(Query query) { // Renamed from original deletedDocs + try { + // This counts documents matching the query in the 'deletesExecutor' index. + // These are documents marked for deletion in this transaction. + if (deletesExecutor.getDocStats().numDocs == 0) return 0; // Optimization + try (IndexReader reader = DirectoryReader.open(deletesExecutor)) { + final IndexSearcher indexSearcher = new IndexSearcher(reader); + final TopDocs search = indexSearcher.search(query, 1); // We only need totalHits + return search.totalHits.value; + } + } catch (IOException e) { + logger.log(Level.SEVERE, "Error reading pending deletions index", e); // Changed + } + return 0; + } + + @Override + public Bits deletedDocs(Query query) { + // This should return a Bits representing documents deleted by this query + // within the context of the main index reader (from engine.searcher()). + // This is complex as it needs to check against the 'deletesExecutor' or tracked delete queries. + // Not typically provided directly by IndexWriter for pending changes. + // FIXME: This needs a proper implementation, likely involving custom collector or query rewriting. + logger.warning("deletedDocs(Query) returning Bits is not fully implemented in abstract class."); + return null; // Placeholder + } + + + @Override + public void addDocument(Document document) throws IOException { + writer.addDocument(document); + } + + @Override + public void deleteDocument(Query query) throws IOException { + // Deletes applied to main writer will be visible to its NRT reader. + // If deletesExecutor is for tracking standalone delete operations before commit to main index: + // writer.deleteDocuments(query); // This applies to the current TX state + // deletesExecutor.addDocument(createDeleteMarker(query)); // If deletes are tracked as docs in a separate index + // For now, assuming deletes are applied to the main writer for NRT visibility. + // If deletesExecutor is a separate RAMDirectory for _pending full deletes_ against main index, + // then it should be: deletesExecutor.deleteDocuments(query) or writer.deleteDocuments(query) + // The original code had separate writer and deletedIdx. Let's assume deletes are applied to writer. + writer.deleteDocuments(query); + if(deletesExecutor != writer && deletesExecutor != null) { // If deletes are tracked separately for commit to main index + deletesExecutor.deleteDocuments(query); + } + } + + @Override + public void updateDocument(Query query, Document document) throws IOException { + writer.updateDocument(query, document); + if(deletesExecutor != writer && deletesExecutor != null) { + // If an update can also affect the "to be deleted from main index" list, handle here. + // This is complex. Usually an update is a delete then an add. + // deletesExecutor.updateDocument(query, document); // This might not be how it works. + } + } + + @Override + public void commit() throws IOException { + writer.commit(); + if (deletesExecutor != null && deletesExecutor != writer) { + deletesExecutor.commit(); + } + } + + @Override + public void rollback() throws IOException { + writer.rollback(); + if (deletesExecutor != null && deletesExecutor != writer) { + deletesExecutor.rollback(); + } + } + + @Override + public void close() throws IOException { + try { + if (txReader != null) { + txReader.decRef(); + txReader = null; + } + } finally { + txSearcher = null; // Searcher was using txReader + try { + writer.close(); + } finally { + if (deletesExecutor != null && deletesExecutor != writer) { + deletesExecutor.close(); + } + } + } + } + + @Override + public int numDocs() { + // Returns numDocs of the current transactional reader (reflecting adds/updates in this TX) + try (IndexReader reader = getTxReader()) { // getTxReader handles incRef/decRef + return reader.numDocs(); + } catch (IOException e) { + logger.log(Level.SEVERE, "Cannot get numDocs from transactional reader", e); + return 0; + } + } + + @Override + public int maxDoc() throws IOException { + try (IndexReader reader = getTxReader()) { + return reader.maxDoc(); + } + } + + @Override + public boolean hasDeletions() { + // Check deletions in the context of the main writer for NRT changes + return writer.hasDeletions(); + } + + @Override + public TopDocs query(Query query, int n) throws IOException { + NRTReaderReopen(); // Ensure searcher is up-to-date + return txSearcher.search(query, n); + } + + @Override + public Document doc(int docId) throws IOException { + NRTReaderReopen(); + return txSearcher.storedFields().document(docId); + } + + @Override + public Document doc(int docId, Set fieldsToLoad) throws IOException { + NRTReaderReopen(); + return txSearcher.storedFields().document(docId, fieldsToLoad); + } + + // Methods requiring more specific state tracking, to be implemented by concrete classes or left as default/abstract. + // These were not in the original OLuceneTxChangesAbstract. + + @Override + public abstract void put(Object key, Identifiable value, Document doc); + + @Override + public abstract void remove(Object key, Identifiable value); + + @Override + public abstract boolean isDeleted(Document document, Object key, Identifiable value); + + @Override + public abstract boolean isUpdated(Document document, Object key, Identifiable value); + + @Override + public boolean isUpdated(Document doc, Analyzer analyzer, Query query) { + // Default: Not supported or needs concrete implementation + logger.warning("isUpdated(doc, analyzer, query) not implemented in abstract class."); + return false; + } + + @Override + public boolean isDeleted(Document doc, Analyzer analyzer, Query query) { + // Default: Not supported or needs concrete implementation + logger.warning("isDeleted(doc, analyzer, query) not implemented in abstract class."); + return false; + } + + @Override + public int nDoc(Query query) { + // Number of documents matching query in current TX state + try { + TopDocs results = query(query, 1); // Just need total hits + return (int) results.totalHits.value; + } catch (IOException e) { + logger.log(Level.SEVERE, "Error executing nDoc query", e); + return 0; + } + } + + // These typically require tracking specific operations, left abstract or default. + @Override + public Set getDeletedDocuments() { + logger.warning("getDeletedDocuments() not implemented in abstract class, returning empty set."); + return Collections.emptySet(); + } + + @Override + public Map getUpdatedDocuments() { + logger.warning("getUpdatedDocuments() not implemented in abstract class, returning empty map."); + return Collections.emptyMap(); + } + + @Override + public List getAddedDocuments() { + logger.warning("getAddedDocuments() not implemented in abstract class, returning empty list."); + return Collections.emptyList(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java new file mode 100644 index 0000000000..20e8874497 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java @@ -0,0 +1,191 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.analysis.Analyzer; // For isDeleted/isUpdated with Analyzer +import org.apache.lucene.analysis.core.KeywordAnalyzer; // For MemoryIndex in isDeleted +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; // For isDeleted +import org.apache.lucene.search.Query; + +/** Created by Enrico Risa on 15/09/15. */ +public class LuceneTxChangesMultiRid extends LuceneTxChangesAbstract { // Changed class name and base class + // Stores RID string to a list of associated keys that were part of a delete operation for that RID. + private final Map> deletedRidToKeys = new HashMap<>(); + + // To support new interface methods + private final List addedDocuments = new ArrayList<>(); + // For MultiRid, an "update" is typically a delete of an old key-RID pair (doc) and an add of a new one. + // Tracking specific "updates" as Query->Document is complex here if not just delete+add. + private final Map updatedDocumentsMap = new HashMap<>(); + private final Set deletedQueries = new HashSet<>(); + + + public LuceneTxChangesMultiRid( // Changed + final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) { + super(engine, writer, deletesExecutor); + } + + @Override + public void put(final Object key, final Identifiable value, final Document doc) { + try { + super.addDocument(doc); // Use base class to add to writer + addedDocuments.add(doc); // Track for getAddedDocuments() + } catch (IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Unable to add document to transactional Lucene index for multi-RID"), e); // Changed + } + } + + @Override + public void remove(final Object key, final Identifiable value) { + Query deleteQuery; + if (value == null) { // Delete by key - affects all RIDs for this key + deleteQuery = engine.deleteQuery(key, null); + } else { // Delete a specific key-RID association + deleteQuery = engine.deleteQuery(key, value); + } + + try { + super.deleteDocument(deleteQuery); // Apply to current transaction's writer + deletedQueries.add(deleteQuery); // Track query for getDeletedDocuments() + + if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) { + // Track that this RID was involved in a delete operation with this key + String ridString = value.getIdentity().toString(); + deletedRidToKeys.computeIfAbsent(ridString, k -> new ArrayList<>()).add(key); + + // Original logic added the specific doc to deletedIdx (deletesExecutor). + // This implies deletesExecutor might track full documents to be deleted from the main index. + // If super.deleteDocument also routes to deletesExecutor based on query, this might be redundant + // or requires deletesExecutor to handle full document additions for its own logic. + // For now, let's assume super.deleteDocument(query) is sufficient for deletesExecutor if it's configured for queries. + // If deletesExecutor *must* have the full doc: + // final Document docToDelete = engine.buildDocument(key, value); // FIXME: engine.buildDocument dependency + // if (deletesExecutor != null) deletesExecutor.addDocument(docToDelete); + } + } catch (final IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Error while deleting documents in transaction from Lucene index (multi-RID)"), e); // Changed + } + } + + @Override + public int numDocs() { + // The base class numDocs() provides NRT view of `writer`. + // Original OLuceneTxChangesMultiRid subtracted deletedDocs.size(). + // `deletedDocs` (now represented by deletedQueries or deletedRidToKeys) refers to deletions + // that will be applied to the main index. + // A precise count is complex. For now, relying on base class numDocs which reflects writer's current state. + // A more accurate count of "net new documents in this TX" would be addedDocuments.size() minus + // documents that were added then deleted within the same TX (if tracked). + // If numDocs should reflect the final state after commit, it's more complex. + // Let's return the NRT view of the current writer. + return super.numDocs(); + } + + @Override + public Set getDeletedLuceneDocs() { + // The original stored actual Document objects that were deleted. + // This is hard to reconstruct if we only store queries or (RID,Key) pairs. + // FIXME: If this exact Set is needed, logic in remove() must re-build and store them. + // For now, returning empty as per LuceneTxChangesSingleRid refactoring. + return Collections.emptySet(); + } + + @Override + public boolean isDeleted(final Document document, final Object key, final Identifiable value) { // Changed + if (value == null || value.getIdentity() == null) return false; + + final List associatedKeys = deletedRidToKeys.get(value.getIdentity().toString()); + if (associatedKeys != null) { + // Check if the provided 'key' (or a general match for the document) is among those deleted for this RID + if (associatedKeys.contains(key)) return true; // Exact key match + + // More complex check: does the 'document' match any of the delete operations for this RID? + // This matches the original MemoryIndex check. + final MemoryIndex memoryIndex = new MemoryIndex(); + // Populate memoryIndex with the fields of the 'document' parameter + for (final IndexableField field : document.getFields()) { + // TODO: This needs proper handling for different field types. + // stringValue() might not be universally appropriate. + // Using KeywordAnalyzer, so it's mostly for exact term matching. + // This part is tricky and might need to use the actual field value from IndexableField. + // For now, assuming stringValue is a simplified placeholder. + if (field.stringValue() != null) { // MemoryIndex cannot add null values + memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer()); + } + } + + for (final Object deletedKey : associatedKeys) { + // engine.deleteQuery should generate a query that identifies the specific key-RID pair + final Query q = engine.deleteQuery(deletedKey, value); // Query for specific key-RID pair + if (memoryIndex.search(q) > 0.0f) { + return true; // The document matches one of the delete operations for this RID + } + } + } + return false; + } + + @Override + public boolean isUpdated(final Document document, final Object key, final Identifiable value) { // Changed + // For MultiRid, an update is typically a delete of an old association and an add of a new one. + // This class doesn't explicitly track "updates" in a separate set like SingleRid did. + // One could argue an entry is "updated" if it was deleted and then re-added with the same RID but different key/doc. + // However, without more state, this is hard to determine accurately here. + // The original returned false. + return false; + } + + // Implementations for new methods from LuceneTxChanges interface + @Override + public List getAddedDocuments() { + return Collections.unmodifiableList(addedDocuments); + } + + @Override + public Set getDeletedDocuments() { + return Collections.unmodifiableSet(deletedQueries); + } + + @Override + public Map getUpdatedDocuments() { + // Updates are not explicitly tracked as Query->Document in this multi-value implementation. + // An update is a delete of one Lucene document and an add of another. + // To fulfill this, one might need to capture the delete query and the newly added document + // if a "key" conceptually remains the same but its associated RIDs change. + // For now, returning empty, as this requires more specific tracking. + return Collections.unmodifiableMap(updatedDocumentsMap); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java new file mode 100644 index 0000000000..967ac52fba --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java @@ -0,0 +1,203 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * Copyright 2023 Arcade Data Ltd + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.Identifiable; // Changed +import com.arcadedb.exception.ArcadeDBException; // Changed +import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed +import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // Changed for createField +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; // For Field.Store +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.Query; // For getDeletedDocuments & getUpdatedDocuments + +/** Created by Enrico Risa on 15/09/15. */ +public class LuceneTxChangesSingleRid extends LuceneTxChangesAbstract { // Changed class name and base class + private final Set deletedRids = new HashSet<>(); // RIDs marked for deletion from main index + private final Set updatedRids = new HashSet<>(); // RIDs that were deleted and then re-added (i.e., updated) + + // To support new interface methods + private final List addedDocuments = new ArrayList<>(); + private final Map updatedDocumentsMap = new HashMap<>(); // Query to delete old, Document is new + private final Set deletedQueries = new HashSet<>(); + + + public LuceneTxChangesSingleRid( // Changed + final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) { + super(engine, writer, deletesExecutor); + } + + @Override + public void put(final Object key, final Identifiable value, final Document doc) { + // This method is called when a key/value is to be associated in the index. + // The `doc` is the Lucene document representing this association. + try { + if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) { + String ridString = value.getIdentity().toString(); + if (deletedRids.remove(ridString)) { + // If it was previously deleted in this transaction, it's now an update. + // The TMP field was used to mark such docs for special handling during merge/query, + // but it's unclear if that's needed with current Lucene NRT capabilities or specific merge logic. + // For now, we track it as updated. + doc.add(ArcadeLuceneIndexType.createField(TMP, ridString, Field.Store.YES)); // Changed OLuceneIndexType + updatedRids.add(ridString); + // The document for this RID might have been in `deletesExecutor`; + // an update means it shouldn't be deleted from the main index. + // This might require removing it from `deletesExecutor` if it was added there. + // This is complex and depends on how commit logic handles deletesExecutor. + // For now, just adding to writer. + } + } + super.addDocument(doc); // Use base class to add to writer + addedDocuments.add(doc); // Track for getAddedDocuments() + } catch (IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Unable to add document to transactional Lucene index"), e); // Changed + } + } + + @Override + public void remove(final Object key, final Identifiable value) { + // This method is called to disassociate a key/value. + // `value` is the RID to be removed. + // `key` might be used to construct a more specific delete query if needed, but typically deletion by RID is sufficient. + Query deleteQuery; + if (value == null) { + // Delete by key - this is dangerous for non-unique indexes, but Lucene handles it by query + deleteQuery = engine.deleteQuery(key, null); // engine.deleteQuery should handle null value for key-based delete + } else { + deleteQuery = engine.deleteQuery(key, value); // Specific RID deletion query + } + + try { + super.deleteDocument(deleteQuery); // Use base class to delete from writer (current TX view) + deletedQueries.add(deleteQuery); // Track for getDeletedDocuments() + + if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) { + // If it's a persistent RID, track it for specific management. + // This logic matches original: add to deletedRids and also add its document to deletesExecutor + String ridString = value.getIdentity().toString(); + deletedRids.add(ridString); + updatedRids.remove(ridString); // If it was updated then deleted, it's just a delete. + + // The original added the full document to deletedIdx (deletesExecutor). + // This implies deletesExecutor might be a "negative" index. + if (deletesExecutor != null) { + // We need the document as it was in the main index to correctly mark it for deletion. + // Building it here might not be accurate if fields changed. + // FIXME: This needs a robust way to get the "old" document or rely on query for deletion. + // For now, if we have 'value', we assume `engine.deleteQuery` is by specific ID. + // If `deletesExecutor` is meant to hold docs to be deleted from main index on commit: + // Document docToDelete = engine.buildDocument(key, value); // This builds NEW doc. + // Instead of adding doc, we add the query. Commit logic will use these queries. + } + } + } catch (final IOException e) { + throw ArcadeDBException.wrapException( // Changed + new ArcadeDBException("Error while deleting documents in transaction from Lucene index"), e); // Changed + } + } + + @Override + public int numDocs() { + // The base class numDocs() returns writer.getDocStats().numDocs or similar NRT count from writer. + // This reflects documents added/updated in the current TX. + // The original OLuceneTxChangesSingleRid subtracted deleted.size() and updated.size(). + // Subtracting deletedRids makes sense if these are deletions from the main index state. + // Subtracting updatedRids from writer's NRT count is tricky; an update is a delete + add. + // The NRT reader from `writer` already accounts for its own adds/deletes. + // If `deletedRids` tracks docs to be deleted from the *main committed index*, then this makes sense. + // Let's assume the base `numDocs()` gives count from `writer` (adds/updates in tx). + // We need to subtract those in `deletedRids` that were not re-added/updated. + int writerDocs = super.numDocs(); + int netDeletes = 0; + for (String rid : deletedRids) { + if (!updatedRids.contains(rid)) { // If it was deleted and not subsequently updated/re-added + netDeletes++; + } + } + // This is still an approximation of the final count after commit. + // A true transactional count would need to consider the main index count + adds - (deletes not in adds). + // For now, this is an estimate of the TX view. + return writerDocs - netDeletes; + } + + @Override + public Set getDeletedLuceneDocs() { + // This method from the original interface returned Lucene docs marked for deletion. + // The new interface has getDeletedDocuments returning Set. + // This method can be implemented if still needed, but might be redundant. + // For now, let's try to build it from deletedQueries if possible, or keep original logic if it made sense. + // The original stored `deletedDocs` (actual Document objects). + // Let's return empty for now, assuming getDeletedDocuments() is the primary. + // FIXME: Review if this specific Set is still needed. + return Collections.emptySet(); + } + + @Override + public boolean isDeleted(Document document, Object key, Identifiable value) { // Changed + return value != null && value.getIdentity() != null && deletedRids.contains(value.getIdentity().toString()); + } + + @Override + public boolean isUpdated(Document document, Object key, Identifiable value) { // Changed + return value != null && value.getIdentity() != null && updatedRids.contains(value.getIdentity().toString()); + } + + // Implementations for new methods from LuceneTxChanges interface + @Override + public List getAddedDocuments() { + return Collections.unmodifiableList(addedDocuments); + } + + @Override + public Set getDeletedDocuments() { + return Collections.unmodifiableSet(deletedQueries); + } + + @Override + public Map getUpdatedDocuments() { + // This class tracks updatedRids. To fulfill Map, + // we'd need to store the delete query and the new document for each update. + // The current `put` logic handles updates by re-adding. + // FIXME: This needs more sophisticated tracking if specific update queries are required. + // For now, returning based on `updatedRids` and `addedDocuments`. + // This is an approximation. + Map approxUpdated = new HashMap<>(); + for (Document doc : addedDocuments) { + String tmpRid = doc.get(TMP); + if (tmpRid != null && updatedRids.contains(tmpRid)) { + // This doc is an update. What was the query to delete the old one? + // We don't store the original key for the RID directly here for updates. + // This highlights a gap if this specific Map is needed. + // For now, this will be empty or needs more info. + } + } + return Collections.unmodifiableMap(updatedDocumentsMap); // Requires populating this map during put/update + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java b/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java new file mode 100644 index 0000000000..ca4c7b775a --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java @@ -0,0 +1,130 @@ +/* + * Copyright 2023 Arcade Data Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.util; + +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.Calendar; +import java.util.Date; +import java.util.TimeZone; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class LuceneDateTools { + + private static final Logger logger = Logger.getLogger(LuceneDateTools.class.getName()); + + // Prioritized list of date/datetime formatters + // ISO 8601 with Z / offset / local + private static final DateTimeFormatter ISO_OFFSET_DATE_TIME = DateTimeFormatter.ISO_OFFSET_DATE_TIME; // Handles 'Z' and offsets like +01:00 + private static final DateTimeFormatter ISO_LOCAL_DATE_TIME = DateTimeFormatter.ISO_LOCAL_DATE_TIME; // Handles 'yyyy-MM-ddTHH:mm:ss.SSS' + private static final DateTimeFormatter ISO_LOCAL_DATE = DateTimeFormatter.ISO_LOCAL_DATE; // Handles 'yyyy-MM-dd' + + // Common alternative formats + private static final String ALT_DATETIME_FORMAT_NO_T = "yyyy-MM-dd HH:mm:ss.SSS"; + private static final String ALT_DATETIME_FORMAT_NO_T_NO_MS = "yyyy-MM-dd HH:mm:ss"; + private static final String ALT_DATETIME_FORMAT_NO_T_NO_S_NO_MS = "yyyy-MM-dd HH:mm"; + + + public static Long parseDateTimeToMillis(String dateTimeString) { + if (dateTimeString == null || dateTimeString.isEmpty() || "*".equals(dateTimeString)) { + return null; + } + + // 1. Try parsing as plain long (epoch millis) + try { + return Long.parseLong(dateTimeString); + } catch (NumberFormatException e) { + // Not a long, proceed to date formats + } + + // 2. Try ISO_OFFSET_DATE_TIME (handles 'Z' for UTC and offsets) + try { + OffsetDateTime odt = OffsetDateTime.parse(dateTimeString, ISO_OFFSET_DATE_TIME); + return odt.toInstant().toEpochMilli(); + } catch (DateTimeParseException e) { + // ignore and try next format + } + + // 3. Try ISO_LOCAL_DATE_TIME (assumes system default timezone if no offset specified) + // To be safer, we should assume UTC if no offset is present, or make it configurable. + // For now, let's try parsing as local and then converting to UTC for consistency. + try { + LocalDateTime ldt = LocalDateTime.parse(dateTimeString, ISO_LOCAL_DATE_TIME); + return ldt.toInstant(ZoneOffset.UTC).toEpochMilli(); // Assume UTC if no offset + } catch (DateTimeParseException e) { + // ignore and try next format + } + + // 4. Try ISO_LOCAL_DATE (assumes start of day, UTC) + try { + LocalDate ld = LocalDate.parse(dateTimeString, ISO_LOCAL_DATE); + return ld.atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); + } catch (DateTimeParseException e) { + // ignore and try next format + } + + // 5. Try alternative SimpleDateFormat patterns (less robust, more ambiguous) + // These assume UTC. If local timezone is implied by strings, SimpleDateFormat needs setTimeZone(TimeZone.getDefault()) + // but for consistency with Lucene (which often uses UTC via DateTools), UTC is safer. + String[] altPatterns = { + ALT_DATETIME_FORMAT_NO_T, + ALT_DATETIME_FORMAT_NO_T_NO_MS, + ALT_DATETIME_FORMAT_NO_T_NO_S_NO_MS + }; + + for (String pattern : altPatterns) { + try { + SimpleDateFormat sdf = new SimpleDateFormat(pattern); + sdf.setTimeZone(TimeZone.getTimeZone("UTC")); // Assume UTC for these patterns too + sdf.setLenient(false); + Date date = sdf.parse(dateTimeString); + return date.getTime(); + } catch (java.text.ParseException ex) { + // ignore and try next pattern + } + } + + logger.log(Level.WARNING, "Failed to parse date/datetime string: {0}", dateTimeString); + return null; // Or throw ParseException if strict parsing is required + } + + public static Long normalizeToDayEpochMillis(long epochMillis) { + Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + cal.setTimeInMillis(epochMillis); + cal.set(Calendar.HOUR_OF_DAY, 0); + cal.set(Calendar.MINUTE, 0); + cal.set(Calendar.SECOND, 0); + cal.set(Calendar.MILLISECOND, 0); + return cal.getTimeInMillis(); + } + + public static Long parseDateToMillis(String dateString) { + Long epochMillis = parseDateTimeToMillis(dateString); + if (epochMillis != null) { + return normalizeToDayEpochMillis(epochMillis); + } + return null; + } +} diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory new file mode 100644 index 0000000000..2dbcff89d3 --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory @@ -0,0 +1,21 @@ +# +# /* +# * Copyright 2014 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.OLuceneIndexFactory +com.orientechnologies.spatial.OLuceneSpatialIndexFactory +com.arcadedb.lucene.OLuceneCrossClassIndexFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory new file mode 100644 index 0000000000..72a6b3fbab --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory @@ -0,0 +1,21 @@ +# +# /* +# * Copyright 2015 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.functions.OLuceneFunctionsFactory +com.orientechnologies.spatial.functions.OSpatialFunctionsFactory +com.arcadedb.lucene.functions.OLuceneCrossClassFunctionsFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory new file mode 100644 index 0000000000..02b1024bcd --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory @@ -0,0 +1,20 @@ +# +# /* +# * Copyright 2014 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.operator.OLuceneOperatorFactory +com.orientechnologies.spatial.operator.OLuceneSpatialOperatorFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler new file mode 100644 index 0000000000..db660fa15e --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler @@ -0,0 +1 @@ +com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler diff --git a/lucene/src/main/resources/plugin.json b/lucene/src/main/resources/plugin.json new file mode 100644 index 0000000000..c0ef9caa33 --- /dev/null +++ b/lucene/src/main/resources/plugin.json @@ -0,0 +1,8 @@ +{ + "name": "lucene-index", + "version": "1.7", + "javaClass": "com.arcadedb.lucene.ArcadeLuceneLifecycleManager", + "parameters": {}, + "description": "This is the Lucene Index integration", + "copyrights": "Orient Technologies LTD" +} diff --git a/pom.xml b/pom.xml index 4dc3484765..a07eaa3c68 100644 --- a/pom.xml +++ b/pom.xml @@ -120,6 +120,7 @@ studio package e2e + lucene