stopwords) {
+ try {
+ final Class> classAnalyzer = Class.forName(analyzerFQN);
+ final Constructor> constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class);
+ return (Analyzer) constructor.newInstance(new CharArraySet(stopwords, true));
+ } catch (final ClassNotFoundException e) {
+ throw new IndexException("Analyzer: " + analyzerFQN + " not found", e);
+ } catch (final NoSuchMethodException e) {
+ throw new IndexException("Couldn't instantiate analyzer: public constructor not found", e);
+ } catch (final Exception e) {
+ logger.log(
+ Level.SEVERE, "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e);
+ return new StandardAnalyzer();
+ }
+ }
+
+ public enum AnalyzerKind {
+ INDEX,
+ QUERY;
+
+ @Override
+ public String toString() {
+ return name().toLowerCase(Locale.ENGLISH);
+ }
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java
new file mode 100644
index 0000000000..ba140f59eb
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/ArcadeLucenePerFieldAnalyzerWrapper.java
@@ -0,0 +1,89 @@
+package com.arcadedb.lucene.analyzer;
+
+import static com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract.RID; // FIXME: This might need to be ArcadeDB specific constant if RID definition changes
+
+import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // FIXME: Ensure this is the correct refactored class for OLuceneIndexType
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+
+/**
+ * Created by frank on 10/12/15.
+ *
+ * Doesn't allow to wrap components or readers. Thread local resources can be
+ delegated to the
+ * delegate analyzer, but not allocated on this analyzer (limit memory consumption). Uses a per
+ * field reuse strategy.
+ */
+public class ArcadeLucenePerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper {
+ private final Analyzer defaultDelegateAnalyzer;
+ private final Map fieldAnalyzers;
+
+ /**
+ * Constructs with default analyzer.
+ *
+ * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use
+ * the one provided here.
+ */
+ public ArcadeLucenePerFieldAnalyzerWrapper(final Analyzer defaultAnalyzer) {
+ this(defaultAnalyzer, new HashMap<>());
+ }
+
+ /**
+ * Constructs with default analyzer and a map of analyzers to use for specific fields.
+ *
+ * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use
+ * the one provided here.
+ * @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields
+ */
+ public ArcadeLucenePerFieldAnalyzerWrapper(
+ final Analyzer defaultAnalyzer, final Map fieldAnalyzers) {
+ super(PER_FIELD_REUSE_STRATEGY);
+ this.defaultDelegateAnalyzer = defaultAnalyzer;
+ this.fieldAnalyzers = new HashMap<>();
+
+ this.fieldAnalyzers.putAll(fieldAnalyzers);
+
+ this.fieldAnalyzers.put(RID, new KeywordAnalyzer());
+ this.fieldAnalyzers.put(ArcadeLuceneIndexType.RID_HASH, new KeywordAnalyzer());
+ this.fieldAnalyzers.put("_CLASS", new KeywordAnalyzer());
+ this.fieldAnalyzers.put("_CLUSTER", new KeywordAnalyzer());
+ this.fieldAnalyzers.put("_JSON", new KeywordAnalyzer());
+ }
+
+ @Override
+ protected Analyzer getWrappedAnalyzer(final String fieldName) {
+ final Analyzer analyzer = fieldAnalyzers.get(fieldName);
+ return (analyzer != null) ? analyzer : defaultDelegateAnalyzer;
+ }
+
+ @Override
+ public String toString() {
+ return "ArcadeLucenePerFieldAnalyzerWrapper(" // Updated class name in toString
+ + fieldAnalyzers
+ + ", default="
+ + defaultDelegateAnalyzer
+ + ")";
+ }
+
+ public ArcadeLucenePerFieldAnalyzerWrapper add(final String field, final Analyzer analyzer) {
+ fieldAnalyzers.put(field, analyzer);
+ return this;
+ }
+
+ public ArcadeLucenePerFieldAnalyzerWrapper add(final ArcadeLucenePerFieldAnalyzerWrapper wrapper) { // Changed parameter type
+ fieldAnalyzers.putAll(wrapper.getAnalyzers());
+ return this;
+ }
+
+ public ArcadeLucenePerFieldAnalyzerWrapper remove(final String field) {
+ fieldAnalyzers.remove(field);
+ return this;
+ }
+
+ protected Map getAnalyzers() {
+ return fieldAnalyzers;
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java
new file mode 100644
index 0000000000..b6fd7d7ebb
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneDocumentBuilder.java
@@ -0,0 +1,390 @@
+/*
+ * Copyright 2023 Arcade Data Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.arcadedb.lucene.builder;
+
+import com.arcadedb.database.Database;
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.document.Document; // ArcadeDB Document
+import com.arcadedb.index.IndexDefinition;
+import com.arcadedb.lucene.index.ArcadeLuceneIndexType;
+import com.arcadedb.schema.DocumentType;
+import com.arcadedb.schema.Property;
+import com.arcadedb.schema.Type;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.lucene.document.Field; // Lucene Field
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+
+public class LuceneDocumentBuilder {
+
+ private static final Logger logger = Logger.getLogger(LuceneDocumentBuilder.class.getName());
+
+ public org.apache.lucene.document.Document build(IndexDefinition indexDefinition,
+ Object key, // The key used for indexing (can be composite)
+ Identifiable identifiableValue, // The record to index
+ Map collectionFields, // Info about collection fields (if needed, from old engine)
+ com.arcadedb.document.Document metadata) { // Query/index time metadata
+
+ org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
+
+ // Add RID field
+ if (identifiableValue != null && identifiableValue.getIdentity() != null) {
+ luceneDoc.add(ArcadeLuceneIndexType.createRidField(identifiableValue));
+ }
+
+ // Add KEY field(s) if the key is provided and the index is not on specific fields (manual index style)
+ // For automatic indexes, the key is usually derived from the document's fields.
+ if (key != null && (indexDefinition.getFields() == null || indexDefinition.getFields().isEmpty())) {
+ // This logic is more for manual indexes where 'key' is the value being indexed.
+ // For automatic indexes on document fields, this 'key' might be redundant or handled differently.
+ // Assuming KEY field stores the string representation of the key for now.
+ luceneDoc.add(new StringField(ArcadeLuceneIndexType.KEY, key.toString(), Field.Store.YES));
+ }
+
+
+ if (identifiableValue instanceof com.arcadedb.document.Document) {
+ com.arcadedb.document.Document record = (com.arcadedb.document.Document) identifiableValue;
+ DatabaseInternal db = record.getDatabase();
+ DocumentType recordType = record.getType();
+
+ List fieldsToIndex = indexDefinition.getFields();
+ if (fieldsToIndex == null || fieldsToIndex.isEmpty()) {
+ // If no specific fields defined for index (e.g. manual index),
+ // and we already added KEY, then we might be done for primary content for this key.
+ // However, if the 'value' (record) itself should have its fields indexed,
+ // then fieldsToIndex should probably default to all fields of the record.
+ // This part depends on the semantics of "automatic" vs "manual" Lucene indexes.
+ // For now, if no fields are in definition, we assume only KEY and RID are added.
+ } else {
+ for (String fieldName : fieldsToIndex) {
+ if (!record.has(fieldName)) {
+ continue;
+ }
+ Object fieldValue = record.get(fieldName);
+ if (fieldValue == null) {
+ continue;
+ }
+
+ Property property = recordType != null ? recordType.getProperty(fieldName) : null;
+ Type fieldType = property != null ? property.getType() : Type.STRING; // Default to STRING if no schema type
+
+ // Determine if field should be stored and sorted based on index definition options or metadata
+ boolean storeField = isToStore(indexDefinition, fieldName, metadata);
+ boolean sortField = isToSort(indexDefinition, fieldName, metadata);
+
+ // Get schema type of the field, and for collections/maps, the linked type
+ Type linkedType = (property != null) ? property.getOfType() : null;
+
+ indexValue(luceneDoc, fieldName, fieldValue, fieldType, linkedType,
+ storeField, sortField, 1, indexDefinition, metadata, db);
+ }
+ }
+ } else if (identifiableValue != null) {
+ // If the value is an Identifiable but not a Document (e.g. just an RID for a manual index key)
+ // and fields are defined in the index, this implies we should load the document
+ // and then process its fields. This case should ideally be handled by the caller
+ // by passing the actual Document record.
+ // If only key and RID are indexed for non-Document identifiables, current logic is okay.
+ }
+
+
+ // Add _CLASS field if type is available
+ String typeName = indexDefinition.getTypeName();
+ if (typeName != null && !typeName.isEmpty()) {
+ luceneDoc.add(new StringField("_CLASS", typeName, Field.Store.YES)); // Non-analyzed
+ }
+
+ // Log usage of collectionFields if it's passed but not deeply integrated yet
+ if (collectionFields != null && !collectionFields.isEmpty()) {
+ // The `collectionFields` map (from OrientDB's engine) indicated if a field was a collection of simple types.
+ // This information might be used to guide specific tokenization or if ArcadeLuceneIndexType.createFields
+ // needs more hints for collections of scalars vs. collections of embeddeds, though getType and getOfType should cover most cases.
+ // For now, just logging its presence.
+ logger.finer("Received 'collectionFields' map, but its specific nuanced behavior is not fully implemented beyond standard collection handling: " + collectionFields);
+ }
+
+ return luceneDoc;
+ }
+
+ /**
+ * Determines if a field should be stored in the Lucene index based on index definition options.
+ * Convention:
+ * - "storeFields": "*" or "ALL" means store all.
+ * - "storeFields": "fieldA,fieldB" means store only these.
+ * - "dontStoreFields": "fieldC,fieldD" means do not store these (takes precedence).
+ * - "store.": "true" or "false" for field-specific setting.
+ * Defaults to Field.Store.NO if not specified otherwise for full-text search efficiency.
+ */
+ private boolean isToStore(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) {
+ Map options = indexDefinition.getOptions();
+ // Query-time metadata can override index-time options
+ if (metadata != null) {
+ Object fieldSpecificStoreMeta = metadata.get("store." + fieldName);
+ if (fieldSpecificStoreMeta != null) return Boolean.parseBoolean(fieldSpecificStoreMeta.toString());
+
+ List queryStoredFields = metadata.get("storedFields"); // Assuming list of strings
+ if (queryStoredFields != null) {
+ if (queryStoredFields.contains(fieldName)) return true;
+ if (queryStoredFields.contains("*") || queryStoredFields.contains("ALL")) return true;
+ }
+ List queryDontStoreFields = metadata.get("dontStoreFields");
+ if (queryDontStoreFields != null && queryDontStoreFields.contains(fieldName)) return false;
+ }
+
+ // Index definition options
+ if (options != null) {
+ String fieldSpecificStoreOpt = options.get("store." + fieldName);
+ if (fieldSpecificStoreOpt != null) return Boolean.parseBoolean(fieldSpecificStoreOpt);
+
+ String dontStoreFieldsOpt = options.get("dontStoreFields");
+ if (dontStoreFieldsOpt != null) {
+ List dontStoreList = Arrays.asList(dontStoreFieldsOpt.toLowerCase().split("\\s*,\\s*"));
+ if (dontStoreList.contains(fieldName.toLowerCase())) return false;
+ }
+
+ String storeFieldsOpt = options.get("storeFields");
+ if (storeFieldsOpt != null) {
+ if ("*".equals(storeFieldsOpt) || "ALL".equalsIgnoreCase(storeFieldsOpt)) return true;
+ List storeList = Arrays.asList(storeFieldsOpt.toLowerCase().split("\\s*,\\s*"));
+ if (storeList.contains(fieldName.toLowerCase())) return true;
+ // If storeFields is specified but doesn't list this field, and no "*" or "ALL", assume don't store (unless dontStoreFields also doesn't list it).
+ // This means explicit list in storeFields acts as a whitelist if present.
+ return false;
+ }
+ }
+ // Default if no specific rules found: DO NOT STORE fields unless specified.
+ return false;
+ }
+
+ /**
+ * Determines if a field should have DocValues for sorting.
+ * Convention:
+ * - "storeFields": "*" or "ALL" means store all.
+ * - "storeFields": "fieldA,fieldB" means store only these.
+ * - "dontStoreFields": "fieldC,fieldD" means do not store these (takes precedence).
+ * - "store.": "true" or "false" for field-specific setting.
+ * Defaults to Field.Store.NO if not specified otherwise for full-text search efficiency.
+ */
+ private boolean isToStore(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) {
+ Map options = indexDefinition.getOptions();
+ // Query-time metadata can override index-time options
+ if (metadata != null) {
+ Object fieldSpecificStoreMeta = metadata.get("store." + fieldName);
+ if (fieldSpecificStoreMeta != null) return Boolean.parseBoolean(fieldSpecificStoreMeta.toString());
+
+ List queryStoredFields = metadata.get("storedFields"); // Assuming list of strings
+ if (queryStoredFields != null) {
+ if (queryStoredFields.contains(fieldName)) return true;
+ if (queryStoredFields.contains("*") || queryStoredFields.contains("ALL")) return true;
+ }
+ List queryDontStoreFields = metadata.get("dontStoreFields");
+ if (queryDontStoreFields != null && queryDontStoreFields.contains(fieldName)) return false;
+ }
+
+ // Index definition options
+ if (options != null) {
+ String fieldSpecificStoreOpt = options.get("store." + fieldName);
+ if (fieldSpecificStoreOpt != null) return Boolean.parseBoolean(fieldSpecificStoreOpt);
+
+ String dontStoreFieldsOpt = options.get("dontStoreFields");
+ if (dontStoreFieldsOpt != null) {
+ List dontStoreList = Arrays.asList(dontStoreFieldsOpt.toLowerCase().split("\\s*,\\s*"));
+ if (dontStoreList.contains(fieldName.toLowerCase())) return false;
+ }
+
+ String storeFieldsOpt = options.get("storeFields");
+ if (storeFieldsOpt != null) {
+ if ("*".equals(storeFieldsOpt) || "ALL".equalsIgnoreCase(storeFieldsOpt)) return true;
+ List storeList = Arrays.asList(storeFieldsOpt.toLowerCase().split("\\s*,\\s*"));
+ if (storeList.contains(fieldName.toLowerCase())) return true;
+ // If storeFields is specified but doesn't list this field, and no "*" or "ALL", assume don't store (unless dontStoreFields also doesn't list it).
+ // This means explicit list in storeFields acts as a whitelist if present.
+ return false;
+ }
+ }
+ // Default if no specific rules found: DO NOT STORE fields unless specified.
+ return false;
+ }
+
+ /**
+ * Determines if a field should have DocValues for sorting.
+ * Convention:
+ * - "sortableFields": "*" or "ALL" (less common for global sortability).
+ * - "sortableFields": "fieldA,fieldB".
+ * - "sort.": "true" or "false".
+ * Defaults to false.
+ */
+ private boolean isToSort(IndexDefinition indexDefinition, String fieldName, com.arcadedb.document.Document metadata) {
+ Map options = indexDefinition.getOptions();
+ // Query-time metadata can override index-time options
+ if (metadata != null) {
+ Object fieldSpecificSortMeta = metadata.get("sort." + fieldName);
+ if (fieldSpecificSortMeta != null) return Boolean.parseBoolean(fieldSpecificSortMeta.toString());
+
+ List querySortableFields = metadata.get("sortableFields"); // Assuming list of strings
+ if (querySortableFields != null) {
+ if (querySortableFields.contains("*") || querySortableFields.contains("ALL")) return true;
+ if (querySortableFields.contains(fieldName)) return true;
+ }
+ }
+
+ // Index definition options
+ if (options != null) {
+ String fieldSpecificSortOpt = options.get("sort." + fieldName);
+ if (fieldSpecificSortOpt != null) return Boolean.parseBoolean(fieldSpecificSortOpt);
+
+ String sortableFieldsOpt = options.get("sortableFields");
+ if (sortableFieldsOpt != null) {
+ if ("*".equals(sortableFieldsOpt) || "ALL".equalsIgnoreCase(sortableFieldsOpt)) return true;
+ List sortList = Arrays.asList(sortableFieldsOpt.toLowerCase().split("\\s*,\\s*"));
+ if (sortList.contains(fieldName.toLowerCase())) return true;
+ // If sortableFields is specified but doesn't list this field, and no "*" or "ALL", assume not sortable.
+ return false;
+ }
+ }
+ return false; // Default to not sortable
+ }
+
+ private void indexValue(org.apache.lucene.document.Document luceneDoc, String fieldName, Object fieldValue,
+ Type fieldType, Type linkedType, boolean storeField, boolean sortField,
+ int currentDepth, IndexDefinition rootIndexDefinition,
+ com.arcadedb.document.Document rootMetadata, DatabaseInternal database) {
+
+ int maxDepth = getMaxDepth(rootIndexDefinition, fieldName);
+ if (currentDepth > maxDepth) {
+ logger.finer("Max indexing depth ("+ maxDepth +") reached for field: " + fieldName);
+ return;
+ }
+
+ if (fieldValue instanceof Collection && (fieldType == Type.EMBEDDEDLIST || fieldType == Type.EMBEDDEDSET || fieldType == Type.LIST)) {
+ Collection> collection = (Collection>) fieldValue;
+ Type actualLinkedType = linkedType;
+ if (actualLinkedType == null && !collection.isEmpty()) {
+ Object firstElement = collection.iterator().next();
+ if (firstElement instanceof Document) actualLinkedType = Type.EMBEDDED;
+ else if (firstElement != null) actualLinkedType = Type.getTypeByValue(firstElement);
+ }
+
+ if (actualLinkedType != null && actualLinkedType != Type.EMBEDDED && actualLinkedType != Type.EMBEDDEDMAP) { // Scalar list/set
+ for (Object item : collection) {
+ if (item != null) {
+ List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item,
+ storeField ? Field.Store.YES : Field.Store.NO, sortField, actualLinkedType);
+ for (Field f : itemFields) luceneDoc.add(f);
+ }
+ }
+ } else if (actualLinkedType == Type.EMBEDDED || (actualLinkedType == null && collection.iterator().hasNext() && collection.iterator().next() instanceof Document)){ // EMBEDDEDLIST/SET of Documents
+ for (Object item : collection) {
+ if (item instanceof Document) {
+ indexEmbeddedContent(luceneDoc, fieldName, (Document) item, currentDepth, rootIndexDefinition, rootMetadata, database);
+ } else if (item != null) { // Non-document item in what was expected to be an embedded list
+ logger.finer("Item in embedded list for field '" + fieldName + "' is not a Document, indexing toString(): " + item.getClass());
+ List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING);
+ for (Field f : itemFields) luceneDoc.add(f);
+ }
+ }
+ } else {
+ logger.finer("Collection field '" + fieldName + "' contains unhandled linked type: " + actualLinkedType + " or collection is empty/mixed.");
+ // Optionally index toString() for each item as a fallback
+ for (Object item : collection) {
+ if (item != null) {
+ List itemFields = ArcadeLuceneIndexType.createFields(fieldName, item.toString(), storeField ? Field.Store.YES : Field.Store.NO, false, Type.STRING);
+ for (Field f : itemFields) luceneDoc.add(f);
+ }
+ }
+ }
+ } else if (fieldValue instanceof Map && fieldType == Type.EMBEDDEDMAP) {
+ indexEmbeddedContent(luceneDoc, fieldName, (Map) fieldValue, currentDepth, rootIndexDefinition, rootMetadata, database);
+ } else if (fieldValue instanceof Document && fieldType == Type.EMBEDDED) {
+ indexEmbeddedContent(luceneDoc, fieldName, (Document) fieldValue, currentDepth, rootIndexDefinition, rootMetadata, database);
+ } else { // Scalar field or unhandled complex type treated as scalar
+ List luceneFields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue,
+ storeField ? Field.Store.YES : Field.Store.NO, sortField, fieldType);
+ for (Field f : luceneFields) luceneDoc.add(f);
+ }
+ }
+
+ private void indexEmbeddedContent(org.apache.lucene.document.Document luceneDoc, String baseFieldName,
+ Object embeddedObject, int currentDepth,
+ IndexDefinition rootIndexDefinition, com.arcadedb.document.Document rootMetadata,
+ DatabaseInternal database) {
+ if (embeddedObject instanceof Document) {
+ Document embeddedDoc = (Document) embeddedObject;
+ DocumentType embeddedSchemaType = embeddedDoc.getType();
+
+ for (String innerFieldName : embeddedDoc.getPropertyNames()) {
+ Object innerFieldValue = embeddedDoc.get(innerFieldName);
+ if (innerFieldValue == null) continue;
+
+ String prefixedFieldName = baseFieldName + "." + innerFieldName;
+ // TODO: Add options to include/exclude specific embedded fields `rootIndexDefinition.getOptions().get("includeEmbedded." + prefixedFieldName)`
+
+ Property innerProperty = (embeddedSchemaType != null) ? embeddedSchemaType.getProperty(innerFieldName) : null;
+ Type innerFieldType = (innerProperty != null) ? innerProperty.getType() : Type.getTypeByValue(innerFieldValue);
+ Type innerLinkedType = (innerProperty != null) ? innerProperty.getOfType() : null;
+
+ boolean storeField = isToStore(rootIndexDefinition, prefixedFieldName, rootMetadata);
+ boolean sortField = isToSort(rootIndexDefinition, prefixedFieldName, rootMetadata);
+
+ indexValue(luceneDoc, prefixedFieldName, innerFieldValue, innerFieldType, innerLinkedType,
+ storeField, sortField, currentDepth + 1, rootIndexDefinition, rootMetadata, database);
+ }
+ } else if (embeddedObject instanceof Map) {
+ @SuppressWarnings("unchecked")
+ Map map = (Map) embeddedObject;
+ for (Map.Entry entry : map.entrySet()) {
+ String mapKey = entry.getKey();
+ Object mapValue = entry.getValue();
+ if (mapValue == null) continue;
+
+ String prefixedFieldName = baseFieldName + "." + mapKey;
+ // TODO: Add options to include/exclude specific embedded fields
+
+ Type valueType = Type.getTypeByValue(mapValue); // Infer type from map value
+ // For maps, linkedType is generally not applicable unless map values are consistently typed Documents.
+
+ boolean storeField = isToStore(rootIndexDefinition, prefixedFieldName, rootMetadata);
+ boolean sortField = isToSort(rootIndexDefinition, prefixedFieldName, rootMetadata);
+
+ // Here, we treat map values. If a map value is another Document/Map/Collection, it will be handled by recursive call.
+ indexValue(luceneDoc, prefixedFieldName, mapValue, valueType, null, // Pass null for linkedType for map values for now
+ storeField, sortField, currentDepth + 1, rootIndexDefinition, rootMetadata, database);
+ }
+ }
+ // Collections within embedded content are handled by the recursive call to indexValue
+ }
+
+ private int getMaxDepth(IndexDefinition indexDefinition, String fieldName) {
+ Map options = indexDefinition.getOptions();
+ if (options != null) {
+ String specificDepth = options.get("embeddedIndexingDepth." + fieldName);
+ if (specificDepth != null) {
+ try { return Integer.parseInt(specificDepth); } catch (NumberFormatException e) { /* ignore */ }
+ }
+ String globalDepth = options.get("embeddedIndexingDepth");
+ if (globalDepth != null) {
+ try { return Integer.parseInt(globalDepth); } catch (NumberFormatException e) { /* ignore */ }
+ }
+ }
+ return 1; // Default depth if not specified
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java
new file mode 100644
index 0000000000..294647b19d
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/builder/LuceneQueryBuilder.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2023 Arcade Data Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.arcadedb.lucene.builder;
+
+import com.arcadedb.database.Database;
+import com.arcadedb.database.DatabaseInternal; // Required for schema access
+import com.arcadedb.document.Document; // ArcadeDB Document
+import com.arcadedb.index.CompositeKey;
+import com.arcadedb.index.IndexDefinition;
+import com.arcadedb.lucene.parser.ArcadeLuceneMultiFieldQueryParser; // FIXME: Needs refactoring
+import com.arcadedb.schema.DocumentType;
+import com.arcadedb.schema.Property;
+import com.arcadedb.schema.Schema;
+import com.arcadedb.schema.Type;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TermRangeQuery; // For string ranges, newStringRange
+import org.apache.lucene.index.Term;
+// Import Point field range queries
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.DoublePoint;
+
+
+public class LuceneQueryBuilder {
+
+ private static final Logger logger = Logger.getLogger(LuceneQueryBuilder.class.getName());
+ public static final Document EMPTY_METADATA = new Document(null); // Assuming Document can be db-less for this constant
+
+ private final boolean allowLeadingWildcard;
+ private final boolean splitOnWhitespace;
+
+ public LuceneQueryBuilder(Document metadata) {
+ if (metadata == null) {
+ metadata = EMPTY_METADATA;
+ }
+ this.allowLeadingWildcard = Boolean.TRUE.equals(metadata.get("allowLeadingWildcard"));
+ // Lucene's StandardQueryParser and MultiFieldQueryParser split on whitespace by default.
+ // This setting in OrientDB was more about how the string was fed *to* the parser or if specific syntax implied no split.
+ // For now, assuming default Lucene behavior is mostly fine. If specific "phrase" vs "term" logic is needed from splitOnWhitespace,
+ // it would affect how the query string is constructed or which parser is used.
+ this.splitOnWhitespace = Boolean.TRUE.equals(metadata.get("splitOnWhitespace")); // Default true
+ }
+
+ public Query query(IndexDefinition indexDefinition, Object key, Document metadata, Analyzer analyzer, DatabaseInternal database) throws ParseException {
+ if (key == null) {
+ throw new IllegalArgumentException("Query key cannot be null");
+ }
+ if (metadata == null) {
+ metadata = EMPTY_METADATA;
+ }
+
+ String[] fields = indexDefinition.getFields().toArray(new String[0]);
+ if (fields.length == 0) {
+ // Default to a common field if not specified, e.g. "_all" or a convention
+ // This case needs clarification based on how schema-less Lucene indexes were handled.
+ // For now, let's assume if no fields, it might be a special query type or error.
+ // Or, if key is string, it searches default fields of the parser.
+ // For now, if no fields defined in index, and key is String, let parser use its default field.
+ // This requires parser to be configured with default field(s).
+ // fields = new String[] { "_DEFAULT_SEARCH_FIELD" }; // Placeholder for default search field
+ logger.warning("Querying Lucene index " + indexDefinition.getName() + " with no fields defined in index definition. Query may not behave as expected.");
+ }
+
+ Map fieldTypes = new HashMap<>();
+ if (database != null && indexDefinition.getTypeName() != null) {
+ Schema schema = database.getSchema();
+ DocumentType docType = schema.getType(indexDefinition.getTypeName());
+ if (docType != null) {
+ for (String fieldName : indexDefinition.getFields()) {
+ Property prop = docType.getProperty(fieldName);
+ if (prop != null) {
+ fieldTypes.put(fieldName, prop.getType());
+ } else {
+ fieldTypes.put(fieldName, Type.STRING); // Default if property not found in schema
+ }
+ }
+ } else {
+ for (String fieldName : indexDefinition.getFields()) {
+ fieldTypes.put(fieldName, Type.STRING); // Default if type not found
+ }
+ }
+ } else {
+ for (String fieldName : indexDefinition.getFields()) {
+ fieldTypes.put(fieldName, Type.STRING); // Default if no DB or typeName
+ }
+ }
+
+
+ if (key instanceof String) {
+ // ArcadeLuceneMultiFieldQueryParser is now available.
+ ArcadeLuceneMultiFieldQueryParser parser = new ArcadeLuceneMultiFieldQueryParser(fieldTypes, fields, analyzer);
+ parser.setAllowLeadingWildcard(allowLeadingWildcard);
+ // this.splitOnWhitespace is available but MultiFieldQueryParser handles split on whitespace by default.
+ // If specific behavior like "always phrase if false" is needed, parser logic would be more complex.
+ // For now, assuming standard MFQP behavior is sufficient.
+ // if (!this.splitOnWhitespace) { /* Potentially use different parser or pre-process query string */ }
+
+ Map boost = metadata.get("boost", Map.class);
+ if (boost != null) {
+ parser.setBoosts(boost);
+ }
+ return parser.parse((String) key);
+
+ } else if (key instanceof CompositeKey) {
+ CompositeKey compositeKey = (CompositeKey) key;
+ List keys = compositeKey.getKeys();
+ BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
+
+ if (keys.size() != fields.length) {
+ throw new IllegalArgumentException("CompositeKey size does not match index definition fields count.");
+ }
+
+ for (int i = 0; i < keys.size(); i++) {
+ Object partKey = keys.get(i);
+ String fieldName = fields[i];
+ Type fieldType = fieldTypes.getOrDefault(fieldName, Type.STRING);
+
+ if (partKey != null) {
+ Query partQuery = com.arcadedb.lucene.index.ArcadeLuceneIndexType.createExactFieldQuery(fieldName, partKey, fieldType, database);
+ booleanQuery.add(partQuery, BooleanClause.Occur.MUST);
+ }
+ }
+ return booleanQuery.build();
+ }
+ // FIXME: Add support for specific range query objects if defined (this would be a new key instanceof MyCustomRangeObject)
+ // else if (key instanceof ...) {
+ // MyCustomRange range = (MyCustomRange) key;
+ // String fieldName = range.getField();
+ // Type fieldType = fieldTypes.getOrDefault(fieldName, Type.STRING);
+ // if (fieldType.isNumeric()) {
+ // if (fieldType == Type.LONG || fieldType == Type.INTEGER || fieldType == Type.SHORT || fieldType == Type.BYTE || fieldType == Type.DATETIME || fieldType == Type.DATE) {
+ // return LongPoint.newRangeQuery(fieldName, (Long)range.getLower(), (Long)range.getUpper());
+ // } // Add other numeric types
+ // } else if (fieldType == Type.STRING) {
+ // return TermRangeQuery.newStringRange(fieldName, range.getLower().toString(), range.getUpper().toString(), range.isLowerInclusive(), range.isUpperInclusive());
+ // }
+ // }
+
+ // Default fallback or throw exception for unsupported key types
+ logger.warning("Unsupported key type for Lucene query: " + key.getClass().getName() + ". Attempting TermQuery on toString().");
+ return new TermQuery(new Term(fields.length > 0 ? fields[0] : "_DEFAULT_", key.toString())); // Fallback, likely not useful
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java
new file mode 100644
index 0000000000..8a023552a0
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneCrossClassIndexEngine.java
@@ -0,0 +1,423 @@
+package com.arcadedb.lucene.engine;
+
+// import static com.arcadedb.lucene.OLuceneIndexFactory.LUCENE_ALGORITHM; // FIXME: Define or import appropriately
+
+import com.arcadedb.database.DatabaseThreadLocal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID;
+import com.arcadedb.database.RecordId;
+import com.arcadedb.database.TransactionContext; // For AtomicOperation
+// import com.arcadedb.database.config.IndexEngineData; // FIXME: Find ArcadeDB equivalent or refactor
+import com.arcadedb.document.Document;
+import com.arcadedb.engine.Storage;
+import com.arcadedb.index.Index;
+import com.arcadedb.index.IndexDefinition;
+import com.arcadedb.index.IndexKeyUpdater;
+import com.arcadedb.index.IndexMetadata;
+import com.arcadedb.index.engine.IndexValidator;
+import com.arcadedb.index.IndexValuesTransformer;
+import com.arcadedb.lucene.analyzer.ArcadeLucenePerFieldAnalyzerWrapper; // Refactored
+import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring
+import com.arcadedb.lucene.parser.ArcadeLuceneMultiFieldQueryParser; // FIXME: Needs refactoring
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.lucene.query.LuceneQueryContext; // FIXME: Needs refactoring
+import com.arcadedb.lucene.tx.LuceneTxChanges; // FIXME: Needs refactoring
+import com.arcadedb.schema.DocumentType; // Changed from OClass
+import com.arcadedb.schema.Type; // Changed from OType
+import com.arcadedb.utility.Pair; // Changed from ORawPair
+import com.arcadedb.lucene.engine.ArcadeLuceneEngineUtils; // Added import
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document; // Lucene Document
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.highlight.TextFragment;
+
+/**
+ * Created by frank on 03/11/2016.
+ */
+public class ArcadeLuceneCrossClassIndexEngine implements LuceneIndexEngine { // Changed class name and interface
+ private static final Logger logger =
+ Logger.getLogger(ArcadeLuceneCrossClassIndexEngine.class.getName()); // Changed logger
+ private final Storage storage; // Changed OStorage
+ private final String indexName;
+ private final int indexId;
+ private static final String LUCENE_ALGORITHM = "LUCENE"; // Placeholder for algorithm name
+ private IndexMetadata markerIndexMetadata; // Optional: if you need to store it
+
+
+ public ArcadeLuceneCrossClassIndexEngine(int indexId, Storage storage, String indexName) { // Changed OStorage
+ this.indexId = indexId;
+ this.storage = storage;
+ this.indexName = indexName;
+ }
+
+ @Override
+ public void init(IndexMetadata metadata) { // Changed OIndexMetadata
+ // This engine orchestrates queries across other Lucene indexes.
+ // It doesn't manage its own Lucene directory or writers in the same way
+ // a full-text index engine does.
+ // The 'metadata' here belongs to the "marker" index that caused this
+ // cross-class engine to be instantiated.
+
+ this.markerIndexMetadata = metadata; // Store if needed for any config
+
+ // For now, primarily log initialization.
+ // Any specific configurations for the cross-class behavior that might
+ // be stored in the markerIndexMetadata.getOptions() could be parsed here.
+ logger.info("ArcadeLuceneCrossClassIndexEngine initialized for marker index: " + (metadata != null ? metadata.getName() : "null"));
+
+ // Example: If you had a default list of fields to use for cross-class searches
+ // if not specified in query metadata, you could load it from metadata.getOptions().
+ // Map options = metadata.getOptions();
+ // String defaultFieldsStr = options.get("crossClassDefaultFields");
+ // if (defaultFieldsStr != null) { ... parse and store ... }
+ }
+
+ @Override
+ public void flush() {}
+
+ @Override
+ public int getId() {
+ return indexId;
+ }
+
+ // FIXME: IndexEngineData equivalent in ArcadeDB?
+ @Override
+ public void create(TransactionContext atomicOperation, Object data) throws IOException {} // Changed OAtomicOperation, IndexEngineData
+
+ @Override
+ public void delete(TransactionContext atomicOperation) {} // Changed OAtomicOperation
+
+ // FIXME: IndexEngineData equivalent in ArcadeDB?
+ @Override
+ public void load(Object data) {} // Changed IndexEngineData
+
+ @Override
+ public boolean remove(TransactionContext atomicOperation, Object key) { // Changed OAtomicOperation
+ return false;
+ }
+
+ @Override
+ public void clear(TransactionContext atomicOperation) {} // Changed OAtomicOperation
+
+ @Override
+ public void close() {}
+
+ @Override
+ public Object get(Object key) {
+ // FIXME: This method requires significant refactoring once dependent classes are updated
+ // (LuceneKeyAndMetadata, ArcadeLuceneFullTextIndex, ArcadeLuceneMultiFieldQueryParser, OLuceneIndexEngineUtils, LuceneResultSet)
+
+ final LuceneKeyAndMetadata keyAndMeta = (LuceneKeyAndMetadata) key; // FIXME
+ final Document arcadedbMetadata = keyAndMeta.metadata; // ArcadeDB Document // FIXME
+ final List excludes =
+ Optional.ofNullable(arcadedbMetadata.>getProperty("excludes"))
+ .orElse(Collections.emptyList());
+ final List includes =
+ Optional.ofNullable(arcadedbMetadata.>getProperty("includes"))
+ .orElse(Collections.emptyList());
+
+ final Collection extends Index> indexes = // Changed OIndex to Index
+ DatabaseThreadLocal.INSTANCE // Changed ODatabaseRecordThreadLocal
+ .get()
+ .getSchema() // Changed getMetadata().getIndexManager()
+ .getIndexes()
+ .stream()
+ .filter(i -> !excludes.contains(i.getName()))
+ .filter(i -> includes.isEmpty() || includes.contains(i.getName()))
+ .collect(Collectors.toList());
+
+ final ArcadeLucenePerFieldAnalyzerWrapper globalAnalyzer = // Changed OLucenePerFieldAnalyzerWrapper
+ new ArcadeLucenePerFieldAnalyzerWrapper(new StandardAnalyzer());
+
+ final List globalFields = new ArrayList();
+ final List globalReaders = new ArrayList();
+ final Map types = new HashMap<>(); // Changed OType to Type
+
+ try {
+ for (Index index : indexes) { // Changed OIndex to Index
+ // FIXME: index.getAlgorithm() might be different, DocumentType.INDEX_TYPE.FULLTEXT might be different
+ if (index.getAlgorithm().equalsIgnoreCase(LUCENE_ALGORITHM)
+ && index.getType().equalsIgnoreCase(DocumentType.INDEX_TYPE.FULLTEXT.toString())) {
+
+ final IndexDefinition definition = index.getDefinition(); // Changed OIndexDefinition
+ final String typeName = definition.getTypeName(); // Changed getClassName
+
+ String[] indexFields =
+ definition.getFields().toArray(new String[definition.getFields().size()]);
+
+ for (int i = 0; i < indexFields.length; i++) {
+ String field = indexFields[i];
+ types.put(typeName + "." + field, definition.getTypes()[i]);
+ globalFields.add(typeName + "." + field);
+ }
+
+ ArcadeLuceneFullTextIndex fullTextIndex = (ArcadeLuceneFullTextIndex) index.getAssociatedIndex(); // Changed OLuceneFullTextIndex, getInternal()
+
+ globalAnalyzer.add((ArcadeLucenePerFieldAnalyzerWrapper) fullTextIndex.queryAnalyzer()); // FIXME: queryAnalyzer might not be directly on ArcadeLuceneFullTextIndex
+
+ globalReaders.add(fullTextIndex.searcher().getIndexReader()); // FIXME: searcher might not be directly on ArcadeLuceneFullTextIndex
+ }
+ }
+
+ if (globalReaders.isEmpty()) {
+ return new LuceneResultSet(this, null, arcadedbMetadata); // FIXME: LuceneResultSet
+ }
+
+ IndexReader indexReader = new MultiReader(globalReaders.toArray(new IndexReader[] {}));
+ IndexSearcher searcher = new IndexSearcher(indexReader);
+
+ Map boost =
+ Optional.ofNullable(arcadedbMetadata.>getProperty("boost"))
+ .orElse(new HashMap<>());
+
+ // FIXME: ArcadeLuceneMultiFieldQueryParser needs refactoring
+ ArcadeLuceneMultiFieldQueryParser p =
+ new ArcadeLuceneMultiFieldQueryParser(
+ types, globalFields.toArray(new String[] {}), globalAnalyzer, boost);
+
+ p.setAllowLeadingWildcard(
+ Optional.ofNullable(arcadedbMetadata.getProperty("allowLeadingWildcard")).orElse(false));
+ p.setSplitOnWhitespace(
+ Optional.ofNullable(arcadedbMetadata.getProperty("splitOnWhitespace")).orElse(true));
+
+ Object params = keyAndMeta.key.getKeys().get(0); // FIXME: keyAndMeta.key structure might change
+ Query query = p.parse(params.toString());
+
+ final List sortFields = ArcadeLuceneEngineUtils.buildSortFields(arcadedbMetadata, null, DatabaseThreadLocal.INSTANCE.get());
+ // final List fields = OLuceneIndexEngineUtils.buildSortFields(arcadedbMetadata);
+
+ LuceneQueryContext ctx = new LuceneQueryContext(null, searcher, query, sortFields); // FIXME
+ return new LuceneResultSet(this, ctx, arcadedbMetadata); // FIXME
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "unable to create multi-reader", e);
+ } catch (ParseException e) {
+ logger.log(Level.SEVERE, "unable to parse query", e);
+ }
+ return null;
+ }
+
+ @Override
+ public void put(TransactionContext atomicOperation, Object key, Object value) {} // Changed OAtomicOperation
+
+ @Override
+ public void put(TransactionContext atomicOperation, Object key, RID value) {} // Changed OAtomicOperation, ORID
+
+ @Override
+ public boolean remove(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID
+ return false;
+ }
+
+ @Override
+ public void update( // Changed OAtomicOperation, OIndexKeyUpdater
+ TransactionContext atomicOperation, Object key, IndexKeyUpdater updater) {}
+
+ @Override
+ public boolean validatedPut( // Changed OAtomicOperation, ORID, IndexEngineValidator
+ TransactionContext atomicOperation,
+ Object key,
+ RID value,
+ IndexValidator validator) {
+ return false;
+ }
+
+ @Override
+ public Stream> iterateEntriesBetween( // Changed ORawPair, ORID, IndexEngineValuesTransformer
+ Object rangeFrom,
+ boolean fromInclusive,
+ Object rangeTo,
+ boolean toInclusive,
+ boolean ascSortOrder,
+ IndexValuesTransformer transformer) {
+ return Stream.empty();
+ }
+
+ @Override
+ public Stream> iterateEntriesMajor( // Changed ORawPair, ORID, IndexEngineValuesTransformer
+ Object fromKey,
+ boolean isInclusive,
+ boolean ascSortOrder,
+ IndexValuesTransformer transformer) {
+ return Stream.empty();
+ }
+
+ @Override
+ public Stream> iterateEntriesMinor( // Changed ORawPair, ORID, IndexEngineValuesTransformer
+ Object toKey,
+ boolean isInclusive,
+ boolean ascSortOrder,
+ IndexValuesTransformer transformer) {
+ return Stream.empty();
+ }
+
+ @Override
+ public Stream> stream(IndexValuesTransformer valuesTransformer) { // Changed ORawPair, ORID
+ return Stream.empty();
+ }
+
+ @Override
+ public Stream> descStream(IndexValuesTransformer valuesTransformer) { // Changed ORawPair, ORID
+ return Stream.empty();
+ }
+
+ @Override
+ public Stream keyStream() {
+ return Stream.empty();
+ }
+
+ @Override
+ public long size(IndexValuesTransformer transformer) { // Changed IndexEngineValuesTransformer
+ return 0;
+ }
+
+ @Override
+ public boolean hasRangeQuerySupport() {
+ return false;
+ }
+
+ @Override
+ public String getName() {
+ return indexName;
+ }
+
+ @Override
+ public boolean acquireAtomicExclusiveLock(Object key) {
+ return false;
+ }
+
+ @Override
+ public String getIndexNameByKey(Object key) {
+ return null;
+ }
+
+ @Override
+ public String indexName() {
+ return indexName;
+ }
+
+ @Override
+ public void onRecordAddedToResultSet( // Changed parameter types
+ LuceneQueryContext queryContext, // FIXME
+ RecordId recordId, // Changed OContextualRecordId
+ Document ret, // Lucene Document
+ final ScoreDoc score) {
+
+ // FIXME: RecordId in ArcadeDB does not have setContext. How to pass this data?
+ // recordId.setContext(
+ // new HashMap() {
+ // {
+ // Map frag = queryContext.getFragments();
+ // frag.entrySet().stream()
+ // .forEach(
+ // f -> {
+ // TextFragment[] fragments = f.getValue();
+ // StringBuilder hlField = new StringBuilder();
+ // for (int j = 0; j < fragments.length; j++) {
+ // if ((fragments[j] != null) && (fragments[j].getScore() > 0)) {
+ // hlField.append(fragments[j].toString());
+ // }
+ // }
+ // put("$" + f.getKey() + "_hl", hlField.toString());
+ // });
+ // put("$score", score.score);
+ // }
+ // });
+ }
+
+ @Override
+ public Document buildDocument(Object key, Identifiable value) { // Changed OIdentifiable, Lucene Document
+ return null;
+ }
+
+ @Override
+ public Query buildQuery(Object query) {
+ return null;
+ }
+
+ @Override
+ public Analyzer indexAnalyzer() {
+ return null;
+ }
+
+ @Override
+ public Analyzer queryAnalyzer() {
+ return null;
+ }
+
+ @Override
+ public boolean remove(Object key, Identifiable value) { // Changed OIdentifiable
+ return false;
+ }
+
+ @Override
+ public IndexSearcher searcher() {
+ return null;
+ }
+
+ @Override
+ public void release(IndexSearcher searcher) {}
+
+ @Override
+ public Set getInTx(Object key, LuceneTxChanges changes) { // Changed OIdentifiable, OLuceneTxChanges
+ return null;
+ }
+
+ @Override
+ public long sizeInTx(LuceneTxChanges changes) { // Changed OLuceneTxChanges
+ return 0;
+ }
+
+ @Override
+ public LuceneTxChanges buildTxChanges() throws IOException { // Changed OLuceneTxChanges
+ return null;
+ }
+
+ @Override
+ public Query deleteQuery(Object key, Identifiable value) { // Changed OIdentifiable
+ return null;
+ }
+
+ @Override
+ public boolean isCollectionIndex() {
+ return false;
+ }
+
+ @Override
+ public void freeze(boolean throwException) {}
+
+ @Override
+ public void release() {}
+
+ @Override
+ public void updateUniqueIndexVersion(Object key) {}
+
+ @Override
+ public int getUniqueIndexVersion(Object key) {
+ return 0;
+ }
+
+ @Override
+ public boolean remove(Object key) {
+ return false;
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java
new file mode 100644
index 0000000000..e9aa4127c5
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneEngineUtils.java
@@ -0,0 +1,160 @@
+package com.arcadedb.lucene.engine;
+
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.document.Document;
+import com.arcadedb.index.IndexDefinition;
+import com.arcadedb.schema.Property;
+import com.arcadedb.schema.Type;
+import com.arcadedb.schema.DocumentType;
+
+import org.apache.lucene.search.SortField;
+// Corrected import for SortField.Type
+// import org.apache.lucene.search.SortField.первый; // This was incorrect in the prompt
+// No, SortField.Type is an enum inside SortField, direct import not needed for Type itself,
+// but rather SortField.Type.INT etc.
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+public class ArcadeLuceneEngineUtils {
+
+ private static final Logger logger = Logger.getLogger(ArcadeLuceneEngineUtils.class.getName());
+
+ /**
+ * Builds a list of Lucene SortField objects based on sorting criteria
+ * specified in the metadata document.
+ *
+ * @param arcadedbMetadata The metadata document, typically from query options.
+ * Expected to contain a "sort" or "orderBy" field.
+ * The value can be a String (e.g., "fieldA ASC, fieldB DESC")
+ * or a List of Maps (e.g., [{"field": "fieldA", "direction": "ASC"}, ...]).
+ * @param indexDefinition Optional: The index definition, used to infer field types for sorting if not specified.
+ * @param database Optional: The database instance, used to get schema for type inference.
+ * @return A list of Lucene SortField objects.
+ */
+ public static List buildSortFields(Document arcadedbMetadata, IndexDefinition indexDefinition, DatabaseInternal database) {
+ List sortFields = new ArrayList<>();
+ if (arcadedbMetadata == null) {
+ return sortFields;
+ }
+
+ Object sortCriteria = arcadedbMetadata.get("sort");
+ if (sortCriteria == null) {
+ sortCriteria = arcadedbMetadata.get("orderBy");
+ }
+
+ if (sortCriteria == null) {
+ return sortFields;
+ }
+
+ if (sortCriteria instanceof String) {
+ // Parse string like "fieldA ASC, fieldB DESC"
+ String[] criteria = ((String) sortCriteria).split(",");
+ for (String criterion : criteria) {
+ String[] parts = criterion.trim().split("\\s+"); // Use \\s+ for one or more spaces
+ String fieldName = parts[0].trim();
+ if (fieldName.isEmpty()) continue;
+
+ boolean reverse = parts.length > 1 && "DESC".equalsIgnoreCase(parts[1].trim());
+
+ SortField.Type sortType = inferSortType(fieldName, indexDefinition, database);
+ sortFields.add(new SortField(fieldName, sortType, reverse));
+ }
+ } else if (sortCriteria instanceof List) {
+ // Parse list of maps, e.g., [{"field": "fieldA", "direction": "ASC"}, ...]
+ try {
+ @SuppressWarnings("unchecked") // Generic type for list elements from Document.get()
+ List criteriaList = (List) sortCriteria;
+ for (Object criterionObj : criteriaList) {
+ if (criterionObj instanceof Map) {
+ @SuppressWarnings("unchecked")
+ Map criterion = (Map) criterionObj;
+ String fieldName = criterion.get("field");
+ String direction = criterion.get("direction");
+ if (fieldName != null && !fieldName.trim().isEmpty()) {
+ boolean reverse = "DESC".equalsIgnoreCase(direction);
+ SortField.Type sortType = inferSortType(fieldName.trim(), indexDefinition, database);
+ sortFields.add(new SortField(fieldName.trim(), sortType, reverse));
+ }
+ } else if (criterionObj instanceof String) { // Support list of strings like ["fieldA ASC", "fieldB DESC"]
+ String[] parts = ((String)criterionObj).trim().split("\\s+");
+ String fieldName = parts[0].trim();
+ if (fieldName.isEmpty()) continue;
+ boolean reverse = parts.length > 1 && "DESC".equalsIgnoreCase(parts[1].trim());
+ SortField.Type sortType = inferSortType(fieldName, indexDefinition, database);
+ sortFields.add(new SortField(fieldName, sortType, reverse));
+ }
+ }
+ } catch (ClassCastException e) {
+ logger.warning("Could not parse 'sort' criteria from List due to unexpected element types: " + e.getMessage());
+ }
+ } else {
+ logger.warning("Unsupported 'sort' criteria format: " + sortCriteria.getClass().getName());
+ }
+
+ return sortFields;
+ }
+
+ /**
+ * Infers the Lucene SortField.Type for a given field name.
+ *
+ * @param fieldName The name of the field.
+ * @param indexDefinition Optional: The index definition containing schema information.
+ * @param database Optional: The database instance for schema lookup.
+ * @return The inferred SortField.Type, defaults to STRING if type cannot be determined.
+ */
+ private static SortField.Type inferSortType(String fieldName, IndexDefinition indexDefinition, DatabaseInternal database) {
+ // Special Lucene sort field for relevance score
+ if ("score".equalsIgnoreCase(fieldName) || SortField.FIELD_SCORE.toString().equals(fieldName)) {
+ return SortField.Type.SCORE;
+ }
+ // Special Lucene sort field for document order
+ if (SortField.FIELD_DOC.toString().equals(fieldName)) {
+ return SortField.Type.DOC;
+ }
+
+ if (database != null && indexDefinition != null && indexDefinition.getTypeName() != null) {
+ DocumentType docType = database.getSchema().getType(indexDefinition.getTypeName());
+ if (docType != null) {
+ Property property = docType.getProperty(fieldName);
+ if (property != null) {
+ Type propertyType = property.getType();
+ switch (propertyType) {
+ case INTEGER:
+ case SHORT:
+ case BYTE:
+ return SortField.Type.INT;
+ case LONG:
+ case DATETIME: // Assuming DATETIME is stored as long epoch millis for sorting
+ case DATE: // Assuming DATE is stored as long epoch millis for sorting
+ return SortField.Type.LONG;
+ case FLOAT:
+ return SortField.Type.FLOAT;
+ case DOUBLE:
+ return SortField.Type.DOUBLE;
+ case STRING:
+ case TEXT:
+ case ENUM:
+ case UUID: // UUIDs are often sorted as strings
+ case BINARY: // Might be sorted as string, or custom if specific byte order needed
+ return SortField.Type.STRING;
+ // Add other types as needed, e.g., CUSTOM for specific comparators
+ // BOOLEAN is not directly sortable with a standard SortField.Type unless mapped to INT/STRING
+ default:
+ logger.finer("Cannot infer specific Lucene SortField.Type for ArcadeDB Type " + propertyType + " on field '" + fieldName + "'. Defaulting to STRING.");
+ return SortField.Type.STRING;
+ }
+ } else {
+ logger.finer("Property '" + fieldName + "' not found in type '" + indexDefinition.getTypeName() + "'. Defaulting to STRING sort type.");
+ }
+ } else {
+ logger.finer("DocumentType '" + indexDefinition.getTypeName() + "' not found in schema. Defaulting to STRING sort type for field '" + fieldName + "'.");
+ }
+ }
+ // Default if schema info is unavailable or field not found
+ logger.finer("Insufficient schema information for field '" + fieldName + "'. Defaulting to STRING sort type.");
+ return SortField.Type.STRING;
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java
new file mode 100644
index 0000000000..cedc2ba1c9
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/engine/ArcadeLuceneFullTextIndexEngine.java
@@ -0,0 +1,420 @@
+/*
+ * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * Copyright 2014 Orient Technologies.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.arcadedb.lucene.engine;
+
+import static com.arcadedb.lucene.builder.LuceneQueryBuilder.EMPTY_METADATA; // FIXME: LuceneQueryBuilder needs refactoring
+
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID;
+import com.arcadedb.database.RecordId;
+import com.arcadedb.database.TransactionContext; // For AtomicOperation
+import com.arcadedb.document.Document; // ArcadeDB Document
+import com.arcadedb.engine.Storage;
+import com.arcadedb.exception.IndexException; // Changed exception
+import com.arcadedb.index.CompositeKey;
+import com.arcadedb.index.IndexKeyUpdater;
+import com.arcadedb.index.IndexMetadata;
+import com.arcadedb.index.IndexValuesTransformer;
+import com.arcadedb.index.engine.IndexValidator;
+import com.arcadedb.lucene.builder.LuceneDocumentBuilder; // FIXME: Needs refactoring
+import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring
+import com.arcadedb.lucene.collections.ArcadeLuceneIndexTransformer; // FIXME: Needs refactoring
+import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring
+import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneIndexType;
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.lucene.query.LuceneQueryContext;
+import com.arcadedb.lucene.tx.LuceneTxChanges;
+import com.arcadedb.query.sql.executor.CommandContext;
+import com.arcadedb.query.sql.parser.ParseException;
+import com.arcadedb.schema.Type; // For manual index field creation
+import com.arcadedb.utility.Pair; // Changed from ORawPair
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.stream.Stream;
+import org.apache.lucene.document.Document; // Lucene Document
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.store.Directory;
+
+public class ArcadeLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract implements LuceneIndexEngine { // Changed class, base, and interface
+ private static final Logger logger =
+ Logger.getLogger(ArcadeLuceneFullTextIndexEngine.class.getName()); // Changed logger
+
+ private final LuceneDocumentBuilder builder;
+ private LuceneQueryBuilder queryBuilder;
+ // bonsayFileId removed as it's not used for standard Lucene updates.
+ // If a specific versioning or optimistic locking mechanism is needed for index entries,
+ // it would require a different design, possibly involving specific fields in Lucene documents.
+
+ public ArcadeLuceneFullTextIndexEngine(Storage storage, String idxName) {
+ super(storage, idxName);
+ builder = new LuceneDocumentBuilder();
+ }
+
+ @Override
+ public void init(IndexMetadata indexMetadata) {
+ // The super.init in OLuceneIndexEngineAbstract expects:
+ // (String indexName, String indexType, IndexDefinition indexDefinition, boolean isAutomatic, Document metadata)
+ // IndexMetadata (ArcadeDB) has: name, typeName (of Schema Type), algorithm, propertyNames, keyTypes, options, unique, automatic, associatedToBucket, nullStrategy.
+ // It does not directly have a single "indexType" string in the sense of "LUCENE" or "FULLTEXT" - that's algorithm.
+ // The "metadata" Document for super.init should be created from indexMetadata.getOptions().
+
+ com.arcadedb.document.Document engineInitMetadata = new com.arcadedb.document.Document(getDatabase());
+ if (indexMetadata.getOptions() != null) {
+ engineInitMetadata.fromMap(indexMetadata.getOptions());
+ }
+
+ super.init(indexMetadata.getName(),
+ indexMetadata.getAlgorithm(), // Pass algorithm as indexType
+ indexMetadata, // Pass the whole IndexMetadata as IndexDefinition (it implements it)
+ indexMetadata.isAutomatic(),
+ engineInitMetadata);
+
+ // queryBuilder uses the same options Document
+ queryBuilder = new LuceneQueryBuilder(engineInitMetadata);
+ }
+
+ @Override
+ public IndexWriter createIndexWriter(Directory directory) throws IOException {
+ // FIXME: OLuceneIndexWriterFactory needs to be ArcadeLuceneIndexWriterFactory
+ // OLuceneIndexWriterFactory fc = new OLuceneIndexWriterFactory();
+ // logger.log(Level.FINE, "Creating Lucene index in ''{0}''...", directory);
+ // return fc.createIndexWriter(directory, metadata, indexAnalyzer());
+ throw new UnsupportedOperationException("ArcadeLuceneIndexWriterFactory not yet implemented");
+ }
+
+ @Override
+ public void onRecordAddedToResultSet( // Changed parameter types
+ final LuceneQueryContext queryContext,
+ final RecordId recordId, // Changed OContextualRecordId
+ final Document ret, // Lucene Document
+ final ScoreDoc score) {
+ HashMap data = new HashMap();
+
+ final Map frag = queryContext.getFragments();
+ frag.forEach(
+ (key, fragments) -> {
+ final StringBuilder hlField = new StringBuilder();
+ for (final TextFragment fragment : fragments) {
+ if ((fragment != null) && (fragment.getScore() > 0)) {
+ hlField.append(fragment.toString());
+ }
+ }
+ data.put("$" + key + "_hl", hlField.toString());
+ });
+ data.put("$score", score.score);
+
+ // recordId.setContext(data); // FIXME: RecordId in ArcadeDB does not have setContext. How to pass this data?
+ // This might need a wrapper class or different result handling.
+ }
+
+ @Override
+ public boolean remove(final TransactionContext atomicOperation, final Object key) { // Changed OAtomicOperation
+ return remove(key);
+ }
+
+ @Override
+ public boolean remove(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID
+ return remove(key, value);
+ }
+
+ @Override
+ public Object get(final Object key) {
+ return getInTx(key, null);
+ }
+
+ @Override
+ public void update(
+ final TransactionContext txContext, // Changed parameter name for clarity
+ final Object key,
+ final IndexKeyUpdater updater) {
+ // A Lucene update is typically a delete followed by an add.
+ // The 'key' here is what identifies the document(s) to be updated.
+ // The 'updater' provides the new value(s)/Identifiable(s).
+
+ // 1. Determine the new Identifiable that results from the update.
+ // The updater.update(oldValue, ...) is meant to get the new value.
+ // 'oldValue' for an index is usually the set of RIDs mapped to the key.
+ // Since this is a full-text index, the 'key' itself might be complex.
+ // For simplicity, if we assume the updater gives the *new complete Identifiable* to index:
+ Object newValue = updater.update(null, null).getValue(); // Passing null for oldValue and bonsayFileId.
+
+ if (!(newValue instanceof Identifiable)) {
+ throw new IndexException("Updater did not provide an Identifiable value for Lucene index update. Key: " + key);
+ }
+ Identifiable newIdentifiable = (Identifiable) newValue;
+
+ // 2. Delete old document(s) associated with the key.
+ // This requires a query that uniquely identifies the old document(s) for this key.
+ // If the key is the RID itself (e.g. auto index on @rid), then it's simple.
+ // If the key is field values, and these values *might have changed*, then deleting by
+ // the *old* key is important. The current `key` parameter should represent the old key.
+ // However, IndexKeyUpdater is often used when the key itself doesn't change, but the RID does (e.g. unique index).
+ // Or when the indexed content of the RID changes, but the RID (and key) remains the same.
+
+ // Let's assume 'key' can identify the old document(s) and 'newIdentifiable' is the new state to index.
+ // If the RID is constant and only content changes:
+ // We need to re-build the Lucene document for newIdentifiable and use Lucene's updateDocument.
+
+ // Simplest approach for now: delete by key, then put new document.
+ // This assumes 'key' can uniquely identify the document via a query.
+ // If 'key' is the set of indexed fields from the *old* version of the document:
+ if (key != null) {
+ Query deleteByOldKeyQuery = this.queryBuilder.query(this.indexDefinition, key, EMPTY_METADATA, this.queryAnalyzer(), getDatabase());
+ try {
+ this.deleteDocument(deleteByOldKeyQuery); // From OLuceneIndexEngineAbstract
+ } catch (IOException e) {
+ throw new IndexException("Error deleting old document during update for key: " + key, e);
+ }
+ } else if (newIdentifiable != null && newIdentifiable.getIdentity() != null) {
+ // If key is null, but we have the new Identifiable's RID, try to delete by RID.
+ // This is only safe if we are sure this RID was previously indexed and this is a true update.
+ Query deleteByRidQuery = ArcadeLuceneIndexType.createQueryId(newIdentifiable);
+ try {
+ this.deleteDocument(deleteByRidQuery);
+ } catch (IOException e) {
+ throw new IndexException("Error deleting old document by RID during update for: " + newIdentifiable.getIdentity(), e);
+ }
+ } else {
+ throw new IndexException("Cannot determine document to update for Lucene index. Key and new Identifiable are null.");
+ }
+
+ // 3. Put the new document state
+ // The 'key' for put should be derived from the newIdentifiable's fields if it's an automatic index.
+ // If it's a manual index, the 'key' might remain the same or be derived.
+ // For now, assuming the 'key' parameter to 'update' is what we use to identify the document,
+ // and the new content comes from 'newIdentifiable'.
+ // The 'put' method will call buildDocument(key, newIdentifiable).
+ put(txContext, key, newIdentifiable); // Pass the original key for now
+ }
+
+ @Override
+ public void put(final TransactionContext atomicOperation, final Object key, final Object value) { // Changed OAtomicOperation
+ updateLastAccess();
+ openIfClosed();
+ final Document doc = buildDocument(key, (Identifiable) value); // Lucene Document
+ addDocument(doc);
+ }
+
+ @Override
+ public void put(TransactionContext atomicOperation, Object key, RID value) { // Changed OAtomicOperation, ORID
+ updateLastAccess();
+ openIfClosed();
+ final Document doc = buildDocument(key, value); // Lucene Document
+ addDocument(doc);
+ }
+
+ @Override
+ public boolean validatedPut( // Changed OAtomicOperation, ORID, IndexEngineValidator
+ TransactionContext atomicOperation,
+ Object key,
+ RID value,
+ IndexValidator validator) {
+ throw new UnsupportedOperationException(
+ "Validated put is not supported by ArcadeLuceneFullTextIndexEngine");
+ }
+
+ @Override
+ public Stream> iterateEntriesBetween( // Changed ORawPair, ORID, IndexEngineValuesTransformer
+ Object rangeFrom,
+ boolean fromInclusive,
+ Object rangeTo,
+ boolean toInclusive,
+ boolean ascSortOrder,
+ IndexValuesTransformer transformer) {
+ // FIXME: OLuceneResultSet and LuceneIndexTransformer need refactoring
+ return ArcadeLuceneIndexTransformer.transformToStream((LuceneResultSet) get(rangeFrom), rangeFrom);
+ }
+
+ private Set getResults( // Changed OIdentifiable, OCommandContext, OLuceneTxChanges, ODocument
+ final Query query,
+ final CommandContext context,
+ final LuceneTxChanges changes,
+ final Document metadata) { // ArcadeDB Document for metadata
+ // sort
+ // FIXME: OLuceneIndexEngineUtils.buildSortFields needs refactoring
+ // final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata);
+ final List fields = null; // Placeholder
+ final IndexSearcher luceneSearcher = searcher();
+ final LuceneQueryContext queryContext =
+ new LuceneQueryContext(context, luceneSearcher, query, fields).withChanges(changes);
+ // FIXME: OLuceneResultSet needs refactoring to LuceneResultSet
+ return new LuceneResultSet(this, queryContext, metadata);
+ }
+
+ @Override
+ public Stream> iterateEntriesMajor( // Changed ORawPair, ORID, IndexEngineValuesTransformer
+ Object fromKey,
+ boolean isInclusive,
+ boolean ascSortOrder,
+ IndexValuesTransformer transformer) {
+ return null;
+ }
+
+ @Override
+ public Stream> iterateEntriesMinor( // Changed ORawPair, ORID, IndexEngineValuesTransformer
+ Object toKey,
+ boolean isInclusive,
+ boolean ascSortOrder,
+ IndexValuesTransformer transformer) {
+ return null;
+ }
+
+ @Override
+ public boolean hasRangeQuerySupport() {
+ return false;
+ }
+
+ @Override
+ public void updateUniqueIndexVersion(Object key) {
+ // not implemented
+ }
+
+ @Override
+ public int getUniqueIndexVersion(Object key) {
+ return 0; // not implemented
+ }
+
+ @Override
+ public Document buildDocument(Object key, Identifiable value) { // Changed OIdentifiable, Lucene Document
+ if (indexDefinition.isAutomatic()) {
+ // builder is an instance of LuceneDocumentBuilder
+ // LuceneDocumentBuilder.build expects: IndexDefinition, Object key, Identifiable value, Map collectionFields, Document metadata
+ // collectionFields and metadata are available as protected members from OLuceneIndexEngineAbstract
+ return builder.build(indexDefinition, key, value, this.collectionFields, this.metadata);
+ } else {
+ return putInManualindex(key, value);
+ }
+ }
+
+ private static Document putInManualindex(Object key, Identifiable oIdentifiable) { // Changed OIdentifiable, Lucene Document
+ Document luceneDoc = new Document(); // Lucene Document
+ luceneDoc.add(ArcadeLuceneIndexType.createRidField(oIdentifiable));
+ // The ID field for manual indexes might store the key itself if simple, or a hash if complex.
+ // createIdField might be more about a specific format if needed.
+ // For now, let's assume the key itself or its parts are added below with specific field names.
+ // If a single "ID" field representing the whole key is desired for searching the key:
+ // luceneDoc.add(ArcadeLuceneIndexType.createIdField(oIdentifiable, key));
+
+
+ if (key instanceof CompositeKey) {
+ List keys = ((CompositeKey) key).getKeys();
+ // If this manual index has a definition with field names for composite parts:
+ List definedFields = null;
+ // Type[] definedTypes = null; // Not directly available for manual index key parts in IndexDefinition easily
+ // if (indexDefinition != null) { // indexDefinition is not available in this static context directly
+ // definedFields = indexDefinition.getFields();
+ // // definedTypes = indexDefinition.getTypes(); // This is for the main value, not necessarily for key parts
+ // }
+
+ for (int i = 0; i < keys.size(); i++) {
+ Object subKey = keys.get(i);
+ if (subKey == null) continue;
+ String fieldName = (definedFields != null && i < definedFields.size()) ? definedFields.get(i) : "k" + i;
+ Type type = Type.getTypeByValue(subKey);
+ // For manual keys, typically store and index them. Sorting is less common for manual keys.
+ List fields = ArcadeLuceneIndexType.createFields(fieldName, subKey, Field.Store.YES, false, type);
+ for (Field f : fields) {
+ luceneDoc.add(f);
+ }
+ }
+ } else if (key instanceof Collection) {
+ @SuppressWarnings("unchecked")
+ Collection keys = (Collection) key;
+ int i = 0;
+ for (Object item : keys) {
+ if (item == null) continue;
+ String fieldName = "k" + i; // Implicit field name for collection items
+ Type type = Type.getTypeByValue(item);
+ List fields = ArcadeLuceneIndexType.createFields(fieldName, item, Field.Store.YES, false, type);
+ for (Field f : fields) {
+ luceneDoc.add(f);
+ }
+ i++;
+ }
+ } else if (key != null) {
+ // Single key
+ // String fieldName = (indexDefinition != null && !indexDefinition.getFields().isEmpty()) ? indexDefinition.getFields().get(0) : "k0";
+ String fieldName = "k0"; // Default field name for single manual key
+ Type type = Type.getTypeByValue(key);
+ // Store.NO was used in original for single key; this means it's indexed but not retrievable from Lucene doc.
+ // Let's make it configurable or default to YES for consistency if this key is what user searches.
+ // For now, keeping Store.NO to match original hint, but this is questionable.
+ // If it's the actual key to be searched, it should likely be YES or its components stored.
+ // Given createFields also adds Point fields which are not stored, this might be okay.
+ List fields = ArcadeLuceneIndexType.createFields(fieldName, key, Field.Store.NO, false, type);
+ for (Field f : fields) {
+ luceneDoc.add(f);
+ }
+ }
+ return luceneDoc;
+ }
+
+ @Override
+ public Query buildQuery(final Object maybeQuery) {
+ try {
+ if (maybeQuery instanceof String) {
+ return queryBuilder.query(indexDefinition, (String) maybeQuery, new com.arcadedb.document.Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer(), getDatabase());
+ } else {
+ LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) maybeQuery; // FIXME: LuceneKeyAndMetadata needs refactoring
+ return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer(), getDatabase());
+ }
+ } catch (final ParseException e) {
+ throw new IndexException("Error parsing query for index '" + name + "'", e); // Changed exception
+ }
+ }
+
+ @Override
+ public Set getInTx(Object key, LuceneTxChanges changes) { // Changed OIdentifiable, OLuceneTxChanges
+ updateLastAccess();
+ openIfClosed();
+ try {
+ if (key instanceof LuceneKeyAndMetadata) { // FIXME: LuceneKeyAndMetadata needs refactoring
+ LuceneKeyAndMetadata q = (LuceneKeyAndMetadata) key;
+ Query luceneQuery = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer(), getDatabase());
+
+ CommandContext commandContext = q.getContext(); // LuceneKeyAndMetadata now has getContext()
+ return getResults(luceneQuery, commandContext, changes, q.metadata);
+
+ } else {
+ Query luceneQuery = queryBuilder.query(indexDefinition, key, new com.arcadedb.document.Document(getDatabase()) /*EMPTY_METADATA*/, queryAnalyzer(), getDatabase());
+
+ CommandContext commandContext = null;
+ if (key instanceof LuceneCompositeKey) { // FIXME: LuceneCompositeKey needs refactoring
+ commandContext = ((LuceneCompositeKey) key).getContext(); // Assuming LuceneCompositeKey might have a context
+ }
+ return getResults(luceneQuery, commandContext, changes, new com.arcadedb.document.Document(getDatabase())/*EMPTY_METADATA*/);
+ }
+ } catch (ParseException e) {
+ throw new IndexException("Error parsing lucene query for index '" + name + "'", e); // Changed exception
+ }
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java
new file mode 100644
index 0000000000..49e1c5db19
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/engine/LuceneIndexEngine.java
@@ -0,0 +1,69 @@
+/*
+ *
+ * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * * Copyright 2014 Orient Technologies.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package com.arcadedb.lucene.engine;
+
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RecordId;
+import com.arcadedb.engine.WALFile; // For Freezeable
+import com.arcadedb.index.IndexEngine;
+import com.arcadedb.lucene.query.LuceneQueryContext; // Will be refactored
+import com.arcadedb.lucene.tx.LuceneTxChanges; // Will be refactored
+import java.io.IOException;
+import java.util.Set;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+
+/** Created by Enrico Risa on 04/09/15. */
+public interface LuceneIndexEngine extends IndexEngine, WALFile.Freezeable { // Changed interface name and extended interfaces
+
+ String indexName();
+
+ void onRecordAddedToResultSet( // Changed parameter types
+ LuceneQueryContext queryContext, RecordId recordId, Document ret, ScoreDoc score);
+
+ Document buildDocument(Object key, Identifiable value); // Changed parameter type
+
+ Query buildQuery(Object query);
+
+ Analyzer indexAnalyzer();
+
+ Analyzer queryAnalyzer();
+
+ boolean remove(Object key, Identifiable value); // Changed parameter type
+
+ boolean remove(Object key);
+
+ IndexSearcher searcher();
+
+ void release(IndexSearcher searcher);
+
+ Set getInTx(Object key, LuceneTxChanges changes); // Changed parameter and return types
+
+ long sizeInTx(LuceneTxChanges changes); // Changed parameter type
+
+ LuceneTxChanges buildTxChanges() throws IOException; // Changed return type
+
+ Query deleteQuery(Object key, Identifiable value); // Changed parameter type
+
+ boolean isCollectionIndex();
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java
new file mode 100644
index 0000000000..5a24641965
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassFunctionsFactory.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * Copyright 2023 Arcade Data Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.arcadedb.lucene.functions;
+
+import com.arcadedb.query.sql.SQLFunctionRegistry; // Assuming this is the ArcadeDB equivalent
+
+// FIXME: The actual function class (ArcadeLuceneCrossClassSearchFunction) will need to be created/refactored separately.
+
+public class ArcadeLuceneCrossClassFunctionsFactory { // Changed class name
+
+ public static void onStartup() { // Changed to a static method for registration
+ SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneCrossClassSearchFunction()); // FIXME: Placeholder for refactored class
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java
new file mode 100644
index 0000000000..9e5fba63a6
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneCrossClassSearchFunction.java
@@ -0,0 +1,259 @@
+package com.arcadedb.lucene.functions;
+
+// import static com.arcadedb.lucene.OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS; // FIXME Define or import
+import com.arcadedb.database.DatabaseContext;
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID; // Changed
+import com.arcadedb.document.Document; // Changed
+import com.arcadedb.index.Index; // Changed
+import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring
+import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring
+import com.arcadedb.lucene.engine.ArcadeLuceneCrossClassIndexEngine; // Changed
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring (used as type in old code, though engine is likely target)
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import com.arcadedb.query.sql.executor.Result; // Changed
+import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed
+import com.arcadedb.query.sql.parser.Expression; // Changed
+import com.arcadedb.query.sql.parser.FromClause; // Changed
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger; // Changed
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * This function uses the CrossClassIndex to search documents across all the Lucene indexes defined in a database
+ *
+ * Created by frank on 19/02/2016.
+ */
+public class ArcadeLuceneCrossClassSearchFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class
+ private static final Logger logger =
+ Logger.getLogger(ArcadeLuceneCrossClassSearchFunction.class.getName()); // Changed
+
+ public static final String NAME = "search_cross"; // Changed from SEARCH_CROSS
+ private static final String LUCENE_CROSS_CLASS_ALGORITHM = "LUCENE_CROSS_CLASS"; // Placeholder
+
+ private ArcadeLuceneCrossClassIndexEngine crossClassEngineInstance = null; // Cache for the engine
+
+ public ArcadeLuceneCrossClassSearchFunction() {
+ super(NAME, 1, 2); // query, [metadata]
+ }
+
+ // searchForIndex in the template expects args for index name. This class doesn't use that.
+ // It finds a specific *kind* of index (cross class).
+ // So, the searchForIndex from the template is not suitable.
+ // This function might not be a good fit for ArcadeLuceneSearchFunctionTemplate if it cannot provide a single index.
+ // However, if ArcadeLuceneCrossClassIndexEngine is treated as *the* index, it could work.
+
+ @Override
+ public Iterable searchFromTarget( // Changed
+ FromClause target, // Target is ignored by this function as it's cross-class
+ BinaryCompareOperator operator,
+ Object rightValue,
+ CommandContext ctx, // Changed
+ Expression... args) { // Changed
+
+ ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); // FIXME: Needs robust way to get this engine
+
+ if (engine == null) {
+ logger.warning("Lucene Cross Class Index Engine not found.");
+ return Collections.emptySet();
+ }
+
+ Expression expression = args[0];
+ String query = (String) expression.execute((Result) null, ctx); // Changed
+
+ Document metadata = getMetadata(args, ctx, 1); // Changed, metadata is args[1]
+
+ // The engine's 'get' method should return Iterable or similar
+ // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring
+ Object result = engine.get(
+ new LuceneKeyAndMetadata(
+ new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata));
+
+ if (result instanceof Iterable) {
+ return (Iterable) result;
+ }
+ return Collections.emptySet();
+ }
+
+ @Override
+ public Object execute(
+ Object iThis,
+ Identifiable currentRecord, // Changed
+ Object currentResult,
+ Object[] params,
+ CommandContext ctx) { // Changed
+
+ ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx); // FIXME
+
+ if (engine == null) {
+ logger.warning("Lucene Cross Class Index Engine not found for execute.");
+ return Collections.emptySet();
+ }
+
+ String query = (String) params[0];
+ Document metadata = getMetadata(params, 1); // Changed
+
+ // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring
+ Object result = engine.get(
+ new LuceneKeyAndMetadata(
+ new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata));
+
+ return result;
+ }
+
+ private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed
+ if (args.length > metadataParamIndex) {
+ // Assuming getMetadata from ArcadeLuceneSearchFunctionTemplate is suitable
+ return super.getMetadata(args[metadataParamIndex], ctx);
+ }
+ // FIXME: LuceneQueryBuilder.EMPTY_METADATA
+ return new Document(); // LuceneQueryBuilder.EMPTY_METADATA;
+ }
+
+ private Document getMetadata(Object[] params, int metadataParamIndex) { // Changed
+ if (params.length > metadataParamIndex) {
+ if (params[metadataParamIndex] instanceof Map) {
+ return new Document().fromMap((Map) params[metadataParamIndex]);
+ } else if (params[metadataParamIndex] instanceof String) {
+ return new Document().fromJSON((String) params[metadataParamIndex]);
+ } else if (params[metadataParamIndex] instanceof Document) {
+ return (Document) params[metadataParamIndex];
+ }
+ // Fallback or error if type is not recognized
+ try {
+ return new Document().fromJSON(params[metadataParamIndex].toString());
+ } catch (Exception e) {
+ // ignore
+ }
+ }
+ // FIXME: LuceneQueryBuilder.EMPTY_METADATA
+ return new Document(); // LuceneQueryBuilder.EMPTY_METADATA;
+ }
+
+ // This method is problematic as the template expects an ArcadeLuceneFullTextIndex.
+ // This function uses a different kind of engine.
+ // Returning null tells the template that direct indexed execution (via that specific index type) is not possible.
+ @Override
+ protected ArcadeLuceneFullTextIndex searchForIndex( // FIXME: This signature might not be appropriate for this class
+ FromClause target, CommandContext ctx, Expression... args) {
+ return null; // This function doesn't use a single, standard Lucene full-text index from the target.
+ // It uses the ArcadeLuceneCrossClassIndexEngine.
+ }
+
+ // Helper to get the specific cross-class engine instance
+ private ArcadeLuceneCrossClassIndexEngine getCrossClassEngine(CommandContext ctx) {
+ if (this.crossClassEngineInstance != null && this.crossClassEngineInstance.getDatabase() == ctx.getDatabase()) {
+ // Ensure cached engine is for the same database instance, though typically SQL functions are per-query.
+ // If function instances are per-query, caching might offer little benefit unless getCrossClassEngine is called multiple times in one execution.
+ // If functions are singletons, then caching is more useful but needs to be thread-safe or per-database-instance.
+ // For now, simple instance caching. If SQLFunctions are per-query, this cache won't persist across queries.
+ return this.crossClassEngineInstance;
+ }
+
+ DatabaseInternal database = null;
+ if (ctx instanceof DatabaseContext) { // Check if CommandContext is or provides DatabaseContext
+ database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase();
+ } else if (ctx.getDatabase() instanceof DatabaseInternal) { // Standard way to get Database
+ database = (DatabaseInternal) ctx.getDatabase();
+ }
+
+ if (database == null) {
+ logger.warning("Database not found in CommandContext for getCrossClassEngine. CommandContext type: " + ctx.getClass().getName());
+ return null;
+ }
+
+ for (Index idx : database.getSchema().getIndexes()) {
+ IndexEngine engine = idx.getAssociatedIndex(); // Index.getAssociatedIndex() returns IndexEngine
+ if (engine instanceof ArcadeLuceneCrossClassIndexEngine) {
+ this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) engine;
+ logger.fine("Found ArcadeLuceneCrossClassIndexEngine via associated engine of index: " + idx.getName());
+ return this.crossClassEngineInstance;
+ }
+ // Check if the index itself is a wrapper for the engine (less likely with getAssociatedIndex)
+ // or if algorithm matches (if factory handler associates this engine type with an algorithm for a "marker" index)
+ if (LUCENE_CROSS_CLASS_ALGORITHM.equals(idx.getAlgorithm())) {
+ if (engine instanceof ArcadeLuceneCrossClassIndexEngine) { // Should be true if factory did its job
+ this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) engine;
+ logger.fine("Found ArcadeLuceneCrossClassIndexEngine via algorithm on index: " + idx.getName());
+ return this.crossClassEngineInstance;
+ } else if (engine == null && idx instanceof ArcadeLuceneCrossClassIndexEngine) {
+ // This case is if the Index object itself *is* the engine, which is not standard for ArcadeDB.
+ // But keeping a check for robustness during refactoring.
+ this.crossClassEngineInstance = (ArcadeLuceneCrossClassIndexEngine) idx;
+ logger.warning("Found ArcadeLuceneCrossClassIndexEngine directly as an Index instance (unusual): " + idx.getName());
+ return this.crossClassEngineInstance;
+ }
+ }
+ }
+
+ logger.warning("ArcadeLuceneCrossClassIndexEngine not found. Ensure an index of type '" + LUCENE_CROSS_CLASS_ALGORITHM +
+ "' (which uses this engine) is defined, or that an existing index correctly associates this engine.");
+ return null;
+ }
+
+
+ @Override
+ public String getSyntax() {
+ // logger.debug("syntax"); // Logging in getSyntax is unusual
+ return NAME + "('', [ ])";
+ }
+
+ // Other overrides from OIndexableSQLFunction (estimate, canExecuteInline, etc.)
+ // The original class had specific implementations for these.
+ // If extending ArcadeLuceneSearchFunctionTemplate, these might be inherited or need specific overrides.
+ // For now, relying on template's (which has FIXMEs) or needing specific ones here.
+
+ @Override
+ public long estimate(
+ FromClause target,
+ BinaryCompareOperator operator,
+ Object rightValue,
+ CommandContext ctx,
+ Expression... args) {
+ // Cross-class estimation is complex. Returning a default or trying to get a count from the engine.
+ ArcadeLuceneCrossClassIndexEngine engine = getCrossClassEngine(ctx);
+ if (engine != null) {
+ // FIXME: The engine might need a size estimation method
+ // return engine.sizeEstimate(args...);
+ }
+ return super.estimate(target, operator, rightValue, ctx, args); // Fallback to template's estimate
+ }
+
+ @Override
+ public boolean allowsIndexedExecution(
+ FromClause target,
+ BinaryCompareOperator operator,
+ Object rightValue,
+ CommandContext ctx,
+ Expression... args) {
+ // This function *always* uses its specialized engine, so it's "indexed" in that sense.
+ return getCrossClassEngine(ctx) != null;
+ }
+ @Override
+ public boolean canExecuteInline(
+ FromClause target,
+ BinaryCompareOperator operator,
+ Object rightValue,
+ CommandContext ctx,
+ Expression... args) {
+ return false; // Cross class search is likely too complex for simple inline execution
+ }
+
+ @Override
+ public boolean shouldExecuteAfterSearch(
+ FromClause target,
+ BinaryCompareOperator operator,
+ Object rightValue,
+ CommandContext ctx,
+ Expression... args) {
+ return false;
+ }
+
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java
new file mode 100644
index 0000000000..ca707bae1a
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsFactory.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * Copyright 2023 Arcade Data Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.arcadedb.lucene.functions;
+
+import com.arcadedb.query.sql.SQLFunctionRegistry; // Assuming this is the ArcadeDB equivalent
+
+// FIXME: The actual function classes (e.g., ArcadeLuceneSearchOnIndexFunction) will need to be created/refactored separately.
+// For now, we are just changing the instantiation call.
+
+public class ArcadeLuceneFunctionsFactory { // Changed class name
+
+ public static void onStartup() { // Changed to a static method for registration
+ SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnIndexFunction()); // FIXME: Placeholder for refactored class
+ SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnFieldsFunction()); // FIXME: Placeholder for refactored class
+ SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchOnClassFunction()); // FIXME: Placeholder for refactored class
+ SQLFunctionRegistry.INSTANCE.register(new ArcadeLuceneSearchMoreLikeThisFunction()); // FIXME: Placeholder for refactored class
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java
new file mode 100644
index 0000000000..df3f5b7307
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneFunctionsUtils.java
@@ -0,0 +1,64 @@
+package com.arcadedb.lucene.functions;
+
+import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database
+import com.arcadedb.database.DatabaseInternal; // Changed
+import com.arcadedb.index.Index; // Changed
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import com.arcadedb.query.sql.executor.Result; // Changed
+import com.arcadedb.query.sql.parser.Expression; // Changed
+import com.arcadedb.schema.Schema; // Changed
+import org.apache.lucene.index.memory.MemoryIndex;
+
+/** Created by frank on 13/02/2017. */
+public class ArcadeLuceneFunctionsUtils { // Changed class name
+ public static final String MEMORY_INDEX = "_memoryIndex";
+
+ protected static ArcadeLuceneFullTextIndex searchForIndex(Expression[] args, CommandContext ctx) { // Changed types
+ final String indexName = (String) args[0].execute((Result) null, ctx); // Changed types
+ return getLuceneFullTextIndex(ctx, indexName);
+ }
+
+ protected static ArcadeLuceneFullTextIndex getLuceneFullTextIndex( // Changed types
+ final CommandContext ctx, final String indexName) {
+ // Assuming CommandContext gives access to DatabaseInternal instance
+ final DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify how to get DatabaseInternal from CommandContext
+ // database.activateOnCurrentThread(); // This might not be needed or done differently in ArcadeDB
+
+ final Schema schema = database.getSchema(); // Changed OMetadataInternal
+
+ // FIXME: metadata.getIndexManagerInternal().getIndex(documentDatabase, indexName) changed to schema.getIndex()
+ // Also, the casting and type checking for ArcadeLuceneFullTextIndex needs ArcadeLuceneFullTextIndex to be properly defined and refactored.
+ final Index index = schema.getIndex(indexName);
+
+ if (!(index instanceof ArcadeLuceneFullTextIndex)) { // FIXME
+ throw new IllegalArgumentException("Not a valid Lucene index:: " + indexName);
+ }
+ return (ArcadeLuceneFullTextIndex) index; // FIXME
+ }
+
+ public static MemoryIndex getOrCreateMemoryIndex(CommandContext ctx) { // Changed OCommandContext
+ MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX);
+ if (memoryIndex == null) {
+ memoryIndex = new MemoryIndex();
+ ctx.setVariable(MEMORY_INDEX, memoryIndex);
+ }
+ memoryIndex.reset();
+ return memoryIndex;
+ }
+
+ public static String doubleEscape(final String s) {
+ final StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < s.length(); ++i) {
+ final char c = s.charAt(i);
+ if (c == 92 || c == 43 || c == 45 || c == 33 || c == 40 || c == 41 || c == 58 || c == 94
+ || c == 91 || c == 93 || c == 34 || c == 123 || c == 125 || c == 126 || c == 42 || c == 63
+ || c == 124 || c == 38 || c == 47) {
+ sb.append('\\');
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java
new file mode 100644
index 0000000000..a872e5520c
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchFunctionTemplate.java
@@ -0,0 +1,76 @@
+package com.arcadedb.lucene.functions;
+
+import com.arcadedb.database.Identifiable; // Changed
+import com.arcadedb.document.Document; // Changed
+import com.arcadedb.lucene.collections.LuceneResultSet; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import com.arcadedb.query.sql.executor.Result; // Changed
+import com.arcadedb.query.sql.function.SQLFunction; // Standard ArcadeDB SQLFunction if SQLFunctionAbstract is not public or is different
+import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed
+import com.arcadedb.query.sql.parser.Expression; // Changed
+import com.arcadedb.query.sql.parser.FromClause; // Changed
+import java.util.Map;
+
+/** Created by frank on 25/05/2017. */
+// Changed base class and removed IndexableSQLFunction interface
+public abstract class ArcadeLuceneSearchFunctionTemplate implements SQLFunction {
+
+ protected final String name;
+
+ public ArcadeLuceneSearchFunctionTemplate(final String name) {
+ this.name = name;
+ // Parameter count checks will be done in each concrete class's execute method
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ // The following methods are from the old IndexableSQLFunction interface and will be removed.
+ // If ArcadeDB has a new way for functions to declare index usability, that would be a separate implementation.
+ // public abstract boolean canExecuteInline(...);
+ // public abstract boolean allowsIndexedExecution(...);
+ // public abstract boolean shouldExecuteAfterSearch(...);
+ // public abstract long estimate(...);
+ // public abstract Iterable searchFromTarget(...); // This logic moves into execute
+
+ // The execute method is abstract in SQLFunction and must be implemented by concrete subclasses.
+ // public abstract Object execute(Object self, Identifiable currentRecord, Object currentResult, Object[] params, CommandContext context);
+
+ protected Document getMetadata(Expression metadataExpression, CommandContext ctx) {
+ if (metadataExpression == null) return new Document(ctx.getDatabase());
+ final Object md = metadataExpression.execute((Result) null, ctx);
+ if (md instanceof Document) {
+ return (Document) md;
+ } else if (md instanceof Map) {
+ return new Document().fromMap((Map) md); // Changed ODocument
+ } else if (md instanceof String) {
+ try {
+ return new Document().fromJSON((String) md); // Changed ODocument
+ } catch (Exception e) {
+ // It might not be a JSON string, but the raw metadata string itself (e.g. analyzer class name)
+ // This part needs careful review based on how metadata is actually passed and used.
+ // For now, returning a document with a field containing the string.
+ Document doc = new Document();
+ doc.set("metadata", (String) md); // FIXME: Review this fallback for non-JSON metadata strings
+ return doc;
+ }
+ } else if (metadata != null) {
+ // Fallback if metadata is not null but not a recognized type, try its string representation as JSON
+ try {
+ return new Document().fromJSON(metadata.toString()); // Changed ODocument
+ } catch (Exception e) {
+ Document doc = new Document();
+ doc.set("metadata", metadata.toString()); // FIXME: Review this fallback
+ return doc;
+ }
+ }
+ return new Document(); // Empty document if null or unparseable
+ }
+
+ // Changed OLuceneFullTextIndex, OFromClause, OCommandContext, OExpression
+ protected abstract ArcadeLuceneFullTextIndex searchForIndex( // FIXME
+ FromClause target, CommandContext ctx, Expression... args);
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java
new file mode 100644
index 0000000000..1b67424217
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchMoreLikeThisFunction.java
@@ -0,0 +1,389 @@
+package com.arcadedb.lucene.functions;
+
+import com.arcadedb.database.Database; // Changed ODatabaseSession to Database
+import com.arcadedb.database.DatabaseContext; // For context access
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID; // Changed
+import com.arcadedb.document.Document; // Changed
+import com.arcadedb.document.Element; // Changed
+import com.arcadedb.exception.ArcadeDBException; // Changed
+import com.arcadedb.index.Index; // Changed
+import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // For RID field name
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import com.arcadedb.query.sql.executor.Result; // Changed
+import com.arcadedb.query.sql.function.IndexableSQLFunction; // Assuming
+import com.arcadedb.query.sql.function.SQLFunctionAbstract; // Assuming
+import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed
+import com.arcadedb.query.sql.parser.Expression; // Changed
+import com.arcadedb.query.sql.parser.FromClause; // Changed
+import com.arcadedb.query.sql.parser.FromItem; // Changed
+import com.arcadedb.query.sql.parser.Identifier;
+import com.arcadedb.schema.DocumentType;
+import com.arcadedb.schema.Schema;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.mlt.MoreLikeThis;
+import org.apache.lucene.queryparser.classic.QueryParser; // Used for escape
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery; // Directly use BooleanQuery.Builder
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+/** Created by frank on 15/01/2017. */
+public class ArcadeLuceneSearchMoreLikeThisFunction extends ArcadeLuceneSearchFunctionTemplate // Changed base
+ implements IndexableSQLFunction { // Assuming from template
+
+ private static final Logger logger =
+ Logger.getLogger(ArcadeLuceneSearchMoreLikeThisFunction.class.getName()); // Changed
+
+ public static final String NAME = "search_more_like_this"; // Changed name
+
+ public ArcadeLuceneSearchMoreLikeThisFunction() {
+ super(NAME, 1, 2); // params: rids, [metadata]
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public Object execute( // FIXME: Signature might change
+ Object iThis,
+ Identifiable iCurrentRecord, // Changed
+ Object iCurrentResult,
+ Object[] params,
+ CommandContext ctx) { // Changed
+
+ // This function's logic in OrientDB was to check if iCurrentRecord is similar to records identified by RIDs in params[0].
+ // This seems more like a filter for a WHERE clause rather than a direct result-producing function.
+ // The return type 'boolean' suggests this.
+
+ if (!(iCurrentRecord instanceof Document)) { // Changed
+ return false;
+ }
+ String className = ((Document) iCurrentRecord).getTypeName(); // Changed
+ ArcadeLuceneFullTextIndex index = this.searchForIndex(ctx, className); // FIXME
+
+ if (index == null) return false; // Cannot perform MLT without an index
+
+ IndexSearcher searcher = index.searcher(); // FIXME
+ if (searcher == null) return false;
+
+ Document metadata = getMetadataDoc(params, 1); // metadata is params[1] // Changed
+
+ List ridsAsString = parseRidsObj(ctx, params[0]);
+ if (ridsAsString.isEmpty()) return false;
+
+ List others = // Changed ORecord to Identifiable
+ ridsAsString.stream()
+ .map(ridStr -> (Identifiable) new RID(ctx.getDatabase(), ridStr)) // Changed ORecordId
+ .map(id -> ctx.getDatabase().lookupByRID(id.getIdentity(), true).getRecord()) // Load record // Changed
+ .filter(r -> r instanceof Element) // Ensure it's an element
+ .collect(Collectors.toList());
+
+ MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); // FIXME
+
+ BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); // Changed
+
+ // The MLT query should be built against the content of 'others'
+ // And then we check if iCurrentRecord matches this mltQuery.
+ // This is different from how 'searchFromTarget' works.
+
+ // This part seems to generate a query based on the 'others' documents
+ addLikeQueries(others, mlt, queryBuilder, ctx.getDatabase()); // Changed
+
+ Query mltQuery = queryBuilder.build();
+ if (mltQuery.toString().isEmpty()) { // No terms generated if documents are empty or too common/rare
+ return false;
+ }
+
+ // Now, check if iCurrentRecord matches the mltQuery.
+ // This requires indexing iCurrentRecord in-memory.
+ MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx);
+ org.apache.lucene.document.Document luceneDoc = index.buildDocument(null, iCurrentRecord); // FIXME: Key might be needed or different buildDocument signature
+ if (luceneDoc != null) {
+ for (org.apache.lucene.index.IndexableField field : luceneDoc.getFields()) {
+ memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // FIXME
+ }
+ } else {
+ return false;
+ }
+ return memoryIndex.search(mltQuery) > 0.0f;
+ }
+
+ @Override
+ public String getSyntax() {
+ return NAME + "( , [ ] )"; // Corrected syntax
+ }
+
+ @Override
+ public Iterable searchFromTarget( // Changed
+ FromClause target, // Changed
+ BinaryCompareOperator operator, // Changed
+ Object rightValue,
+ CommandContext ctx, // Changed
+ Expression... args) { // Changed
+
+ ArcadeLuceneFullTextIndex index = this.searchForIndex(target, ctx, args); // FIXME
+
+ if (index == null) return Collections.emptySet();
+
+ IndexSearcher searcher = index.searcher(); // FIXME
+ if (searcher == null) return Collections.emptySet();
+
+
+ Expression ridExpression = args[0];
+ Document metadata = getMetadataFromExpression(args, ctx, 1); // metadata is args[1] // Changed
+
+ List ridsAsString = parseRids(ctx, ridExpression);
+ if (ridsAsString.isEmpty()) return Collections.emptySet();
+
+ List others = // Changed
+ ridsAsString.stream()
+ .map(ridStr -> (Identifiable) new RID(ctx.getDatabase(), ridStr)) // Changed
+ .map(id -> ctx.getDatabase().lookupByRID(id.getIdentity(), true).getRecord()) // Load record // Changed
+ .filter(r -> r instanceof Element)
+ .collect(Collectors.toList());
+
+ MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); // FIXME
+
+ BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); // Changed
+
+ excludeOtherFromResults(ridsAsString, queryBuilder); // Keep input RIDs out of results
+
+ addLikeQueries(others, mlt, queryBuilder, ctx.getDatabase()); // Changed
+
+ Query mltQuery = queryBuilder.build();
+ if (mltQuery.toString().isEmpty()) return Collections.emptySet();
+
+
+ // Execute the mltQuery against the main index
+ // FIXME: index.getInternal().getRids() needs to be replaced
+ // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring
+ // This part is highly dependent on how ArcadeLuceneFullTextIndex exposes search capabilities
+ try (Stream rids = // Changed
+ index
+ .getAssociatedIndex() // Assuming
+ .getRids( // This method might not exist
+ new LuceneKeyAndMetadata( // FIXME
+ new LuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), // FIXME
+ metadata))) {
+ return rids.map(rid -> (Identifiable) rid).collect(Collectors.toSet()); // Changed
+ } catch (Exception e) {
+ logger.log(Level.SEVERE, "Error executing MoreLikeThis query via getRids", e);
+ return Collections.emptySet();
+ }
+ }
+
+ private List parseRids(CommandContext ctx, Expression expression) { // Changed
+ Object expResult = expression.execute((Result) null, ctx); // Changed
+ return parseRidsObj(ctx, expResult);
+ }
+
+ private List parseRidsObj(CommandContext ctx, Object expResult) { // Changed
+ if (expResult instanceof Identifiable) { // Changed
+ return Collections.singletonList(((Identifiable) expResult).getIdentity().toString());
+ }
+
+ Iterator> iter; // Wildcard for iterator type
+ if (expResult instanceof Iterable) {
+ iter = ((Iterable>) expResult).iterator();
+ } else if (expResult instanceof Iterator) {
+ iter = (Iterator>) expResult;
+ } else {
+ return Collections.emptyList();
+ }
+
+ List rids = new ArrayList<>();
+ while (iter.hasNext()) {
+ Object item = iter.next();
+ if (item instanceof Result) { // Changed
+ if (((Result) item).isElement()) {
+ ((Result) item).getIdentity().ifPresent(id -> rids.add(id.toString())); // Changed
+ } else {
+ Set properties = ((Result) item).getPropertyNames();
+ if (properties.size() == 1) {
+ Object val = ((Result) item).getProperty(properties.iterator().next());
+ if (val instanceof Identifiable) { // Changed
+ rids.add(((Identifiable) val).getIdentity().toString());
+ }
+ }
+ }
+ } else if (item instanceof Identifiable) { // Changed
+ rids.add(((Identifiable) item).getIdentity().toString());
+ }
+ }
+ return rids;
+ }
+
+ private Document getMetadataDoc(Object[] params, int metadataParamIndex) { // Changed
+ if (params.length > metadataParamIndex) {
+ if (params[metadataParamIndex] instanceof Map) {
+ return new Document().fromMap((Map) params[metadataParamIndex]);
+ } else if (params[metadataParamIndex] instanceof String) {
+ return new Document().fromJSON((String) params[metadataParamIndex]);
+ }
+ return new Document().fromJSON(params[metadataParamIndex].toString());
+ }
+ return new Document(); // Empty if not present
+ }
+
+ private Document getMetadataFromExpression(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed
+ if (args.length > metadataParamIndex) {
+ return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate
+ }
+ return new Document(); // Empty if not present
+ }
+
+
+ private MoreLikeThis buildMoreLikeThis( // Changed
+ ArcadeLuceneFullTextIndex index, IndexSearcher searcher, Document metadata) { // FIXME
+
+ try {
+ MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
+
+ mlt.setAnalyzer(index.queryAnalyzer()); // FIXME
+
+ // FIXME: index.getDefinition() might be different
+ mlt.setFieldNames(
+ Optional.ofNullable(metadata.>getProperty("fieldNames"))
+ .orElse(index.getDefinition().getFields())
+ .toArray(new String[] {}));
+
+ mlt.setMaxQueryTerms(
+ Optional.ofNullable(metadata.getProperty("maxQueryTerms"))
+ .orElse(MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
+ // ... (rest of MoreLikeThis setters, ensure getProperty types match)
+ mlt.setMinTermFreq(
+ Optional.ofNullable(metadata.getProperty("minTermFreq"))
+ .orElse(MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
+ mlt.setMaxDocFreq(
+ Optional.ofNullable(metadata.getProperty("maxDocFreq"))
+ .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
+ mlt.setMinDocFreq(
+ Optional.ofNullable(metadata.getProperty("minDocFreq"))
+ .orElse(MoreLikeThis.DEFAULT_MIN_DOC_FREQ)); // Corrected from DEFAULT_MAX_DOC_FREQ
+ mlt.setBoost(
+ Optional.ofNullable(metadata.getProperty("boost"))
+ .orElse(MoreLikeThis.DEFAULT_BOOST));
+ mlt.setBoostFactor(
+ Optional.ofNullable(metadata.getProperty("boostFactor")).orElse(1f));
+ mlt.setMaxWordLen(
+ Optional.ofNullable(metadata.getProperty("maxWordLen"))
+ .orElse(MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
+ mlt.setMinWordLen(
+ Optional.ofNullable(metadata.getProperty("minWordLen"))
+ .orElse(MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
+ // setMaxNumTokensParsed was removed in later Lucene versions, check alternatives if needed.
+ // mlt.setMaxNumTokensParsed(
+ // Optional.ofNullable(metadata.getProperty("maxNumTokensParsed"))
+ // .orElse(MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
+ mlt.setStopWords(
+ (Set>)
+ Optional.ofNullable(metadata.get("stopWords")) // Simpler get for Set
+ .orElse(MoreLikeThis.DEFAULT_STOP_WORDS));
+
+
+ return mlt;
+ } catch (IOException e) {
+ throw ArcadeDBException.wrapException(new ArcadeDBException("Lucene IO Exception"), e); // Changed
+ }
+ }
+
+ private void addLikeQueries( // Changed
+ List others, MoreLikeThis mlt, BooleanQuery.Builder queryBuilder, Database database) { // Changed
+ others.stream()
+ .filter(id -> id instanceof Element) // ensure it's an element to get properties
+ .map(id -> (Element) id)
+ .forEach(
+ element ->
+ Arrays.stream(mlt.getFieldNames()) // These are the fields to check for similarity
+ .forEach(
+ fieldName -> {
+ Object propertyValue = element.getProperty(fieldName);
+ if (propertyValue != null) {
+ try {
+ // MoreLikeThis.like() can take a String directly for a field's content
+ Query fieldQuery = mlt.like(fieldName, new StringReader(propertyValue.toString()));
+ if (!fieldQuery.toString().isEmpty()) // Check if anything was generated
+ queryBuilder.add(fieldQuery, Occur.SHOULD);
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Error during Lucene MoreLikeThis query generation for field " + fieldName, e);
+ }
+ }
+ }));
+ }
+
+ private void excludeOtherFromResults(List ridsAsString, BooleanQuery.Builder queryBuilder) { // Changed
+ ridsAsString.stream()
+ .forEach(
+ rid ->
+ queryBuilder.add( // Use ArcadeLuceneIndexType.RID for consistency
+ new TermQuery(new Term(ArcadeLuceneIndexType.RID, QueryParser.escape(rid))), Occur.MUST_NOT));
+ }
+
+ // searchForIndex from OLuceneSearchFunctionTemplate should be used or overridden if different logic needed for target.
+ // The private helpers here were specific to how OLuceneSearchMoreLikeThisFunction determined its index.
+ // For now, relying on the overridden searchForIndex from ArcadeLuceneSearchFunctionTemplate.
+ // If this function *always* uses class name from context (iThis) for 'execute' and target for 'searchFromTarget',
+ // then the template's searchForIndex might need to be made non-abstract or this class needs its own.
+ // The original OLuceneSearchMoreLikeThisFunction had its own searchForIndex.
+
+ @Override
+ protected ArcadeLuceneFullTextIndex searchForIndex( // Changed
+ FromClause target, CommandContext ctx, Expression... args) { // FIXME
+ FromItem item = target.getItem(); // Changed
+ Identifier identifier = item.getIdentifier(); // Changed
+ String className = identifier.getStringValue();
+ return searchForIndex(ctx, className); // Calls private helper
+ }
+
+ private ArcadeLuceneFullTextIndex searchForIndex(CommandContext ctx, String className) { // Changed
+ DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify
+ // database.activateOnCurrentThread(); // May not be needed
+
+ Schema schema = database.getSchema(); // Changed
+ DocumentType docType = schema.getType(className); // Changed
+
+ if (docType == null) {
+ return null;
+ }
+
+ List indices = // Changed
+ docType.getIndexes(true).stream()
+ .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME
+ .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME
+ .collect(Collectors.toList());
+
+ if (indices.size() > 1) {
+ // Consider if a more specific index selection is needed, e.g. one covering certain fields if provided in metadata
+ throw new IllegalArgumentException("Too many full-text Lucene indices on class: " + className + ". Disambiguate or configure.");
+ }
+ return indices.size() == 0 ? null : indices.get(0);
+ }
+
+
+ // estimate, canExecuteInline, allowsIndexedExecution, shouldExecuteAfterSearch
+ // are inherited from ArcadeLuceneSearchFunctionTemplate.
+ // Their default implementations in the template might need review for this specific function's behavior.
+ // E.g., allowsIndexedExecution for MLT depends on finding *an* index on the class to get an IndexReader.
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java
new file mode 100644
index 0000000000..a6a893a127
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnClassFunction.java
@@ -0,0 +1,242 @@
+package com.arcadedb.lucene.functions;
+
+// Static import from ArcadeLuceneFunctionsUtils if getOrCreateMemoryIndex is public there, or keep local.
+// For now, assuming it's accessible via ArcadeLuceneFunctionsUtils.
+// import static com.arcadedb.lucene.functions.ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex;
+
+import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID; // Changed
+import com.arcadedb.document.Document; // Changed
+import com.arcadedb.document.Element; // Changed
+import com.arcadedb.index.Index; // Changed
+import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring
+import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import com.arcadedb.query.sql.executor.Result; // Changed
+import com.arcadedb.query.sql.executor.ResultInternal; // Changed
+import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed
+import com.arcadedb.query.sql.parser.Expression; // Changed
+import com.arcadedb.query.sql.parser.FromClause; // Changed
+import com.arcadedb.query.sql.parser.FromItem; // Changed
+import com.arcadedb.schema.DocumentType; // Changed
+import com.arcadedb.schema.Schema; // Changed
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.memory.MemoryIndex;
+
+/** Created by frank on 15/01/2017. */
+public class ArcadeLuceneSearchOnClassFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class
+
+ public static final String NAME = "search_class";
+
+ public ArcadeLuceneSearchOnClassFunction() {
+ super(NAME, 1, 2); // Original params: className, query, [metadata] - now query, [metadata] as class comes from context
+ // However, the original code takes classname as param for searchForIndex,
+ // but in execute it gets class from iThis.
+ // The original super was (NAME, 1, 2) -> (query, [metadata]), class was implicit from target.
+ // Let's stick to (NAME, 2, 3) -> (className, query, [metadata]) for now if it's a global function.
+ // If it's context aware (iThis), then (query, [metadata]) is fine.
+ // The original `search_class(, , [ ])`
+ // super(NAME, 2, 3); // (className, query, [metadata])
+ // The original code for OLuceneSearchOnClassFunction used (NAME, 1, 2)
+ // and derived className from `iThis` in `execute` or from `target` in `searchFromTarget`.
+ // Let's keep the original arity and rely on context for class name.
+ super(NAME, 1, 2);
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ // canExecuteInline from template is likely fine if it relies on searchForIndex.
+
+ @Override
+ public Object execute( // FIXME: Signature might change
+ Object iThis,
+ Identifiable iCurrentRecord, // Changed
+ Object iCurrentResult,
+ Object[] params,
+ CommandContext ctx) { // Changed
+
+ Result result; // Changed
+ if (iThis instanceof Result) {
+ result = (Result) iThis;
+ } else if (iThis instanceof Identifiable) {
+ result = new ResultInternal((Identifiable) iThis); // Changed
+ } else {
+ // Cannot determine current record or class, perhaps throw error or return false
+ return false;
+ }
+
+ if (!result.getElement().isPresent()) return false;
+ Element element = result.getElement().get(); // Changed
+ if (element.getType() == null) return false; // Changed, was getSchemaType().isPresent()
+
+ String className = element.getType().getName(); // Changed
+
+ ArcadeLuceneFullTextIndex index = searchForIndex(ctx, className); // FIXME
+
+ if (index == null) return false;
+
+ String query = (String) params[0];
+
+ MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx);
+
+ // FIXME: index.getDefinition() might be different.
+ List key =
+ index.getDefinition().getFields().stream()
+ .map(s -> element.getProperty(s))
+ .collect(Collectors.toList());
+
+ // FIXME: index.buildDocument and index.indexAnalyzer might not exist or have different signatures
+ org.apache.lucene.document.Document luceneDoc = index.buildDocument(key, iCurrentRecord);
+ if (luceneDoc != null) {
+ for (IndexableField field : luceneDoc.getFields()) {
+ // Simplified, assuming stringValue is appropriate. Lucene's MemoryIndex.addField handles various IndexableField types.
+ memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer());
+ }
+ }
+
+
+ Document metadata = getMetadataDoc(params); // Changed
+ // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring
+ LuceneKeyAndMetadata keyAndMetadata =
+ new LuceneKeyAndMetadata(
+ new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata);
+
+ // FIXME: index.buildQuery might not exist or have different signature
+ return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f;
+ }
+
+ private Document getMetadataDoc(Object[] params) { // Changed
+ if (params.length == 2) { // Original used params[1] for metadata if arity was 2 (query, metadata)
+ if (params[1] instanceof Map) {
+ return new Document().fromMap((Map) params[1]); // Changed
+ } else if (params[1] instanceof String) {
+ return new Document().fromJSON((String) params[1]);
+ }
+ return new Document().fromJSON(params[1].toString());
+ }
+ // FIXME: LuceneQueryBuilder.EMPTY_METADATA
+ return new Document(); //LuceneQueryBuilder.EMPTY_METADATA;
+ }
+
+ @Override
+ public String getSyntax() {
+ // Original was "SEARCH_INDEX( indexName, [ metdatada {} ] )" which seems incorrect for search_class
+ return "search_class( , [ ] )"; // Class is implicit from context
+ }
+
+ @Override
+ public boolean filterResult() {
+ return true;
+ }
+
+ // FIXME: This method's signature and logic are highly dependent on ArcadeDB's IndexableSQLFunction interface
+ @Override
+ public Iterable searchFromTarget( // Changed
+ FromClause target, // Changed
+ BinaryCompareOperator operator, // Changed
+ Object rightValue,
+ CommandContext ctx, // Changed
+ Expression... args) { // Changed
+
+ // In this context, the class comes from the target FromClause
+ ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME
+
+ Expression expression = args[0]; // Query is the first argument to the function
+ String query = (String) expression.execute((Result) null, ctx); // Changed
+
+ if (index != null) {
+ Document meta = getMetadata(args, ctx, 1); // Metadata is the second argument (index 1) if present
+
+ List luceneResultSet; // Changed
+ try (Stream rids = // Changed
+ // FIXME: index.getInternal().getRids() needs to be replaced with ArcadeDB equivalent
+ // This whole block is highly dependent on ArcadeLuceneFullTextIndex and LuceneKeyAndMetadata refactoring
+ index
+ .getAssociatedIndex() // Assuming getAssociatedIndex() is the way
+ .getRids( // This method might not exist on ArcadeDB's Index interface
+ new LuceneKeyAndMetadata( // FIXME
+ new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME
+ luceneResultSet = rids.collect(Collectors.toList());
+ }
+ return luceneResultSet;
+ }
+ return Collections.emptySet();
+ }
+
+ private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed types
+ if (args.length > metadataParamIndex) {
+ return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate
+ }
+ // FIXME: LuceneQueryBuilder.EMPTY_METADATA
+ return new Document(); // LuceneQueryBuilder.EMPTY_METADATA;
+ }
+
+ @Override
+ protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types
+ FromClause target, CommandContext ctx, Expression... args) { // FIXME
+ FromItem item = target.getItem(); // Changed
+
+ // This function determines the class from the target (FROM clause)
+ String className = item.getIdentifier().getStringValue(); // Changed
+
+ return searchForIndex(ctx, className); // Calls private helper
+ }
+
+ private ArcadeLuceneFullTextIndex searchForIndex(CommandContext ctx, String className) { // Changed types
+ DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify
+ // database.activateOnCurrentThread(); // May not be needed
+
+ Schema schema = database.getSchema(); // Changed
+ DocumentType docType = schema.getType(className); // Changed
+
+ if (docType == null) {
+ return null;
+ }
+
+ List indices = // Changed
+ docType.getIndexes(true).stream() // getIndexes(true) for all indexes including supertypes
+ .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME
+ .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME
+ .collect(Collectors.toList());
+
+ if (indices.size() > 1) {
+ // Try to find an index that is defined ONLY on this class, not subclasses/supertypes if possible
+ // Or, if multiple, pick one based on a convention (e.g. specific fields)
+ // For now, this logic is simplified.
+ // Original code just picked the first one if only one, or threw error.
+ // We might need a more sophisticated way if multiple Lucene indexes can exist on a class hierarchy.
+ for (ArcadeLuceneFullTextIndex idx : indices) {
+ if (idx.getDefinition().getTypeName().equals(className)) { // Check if index is defined on this exact class
+ return idx;
+ }
+ }
+ // If no index is defined directly on this class, but inherited, it might be ambiguous.
+ // However, the original code's filter `dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes()`
+ // would only get indexes directly on that class.
+ // `docType.getIndexes(true)` gets all. Let's refine to match original more closely for now:
+ indices = docType.getIndexes(false).stream() // false = only indexes defined on this type
+ .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME
+ .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME
+ .collect(Collectors.toList());
+ if (indices.size() > 1) {
+ throw new IllegalArgumentException("Too many full-text indices on given class: " + className + ". Specify the index name using search_index function.");
+ }
+ }
+
+
+ return indices.size() == 0 ? null : indices.get(0);
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java
new file mode 100644
index 0000000000..3c6123a32b
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnFieldsFunction.java
@@ -0,0 +1,291 @@
+package com.arcadedb.lucene.functions;
+
+// import static com.arcadedb.lucene.functions.ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex; // Assuming public access
+
+import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID; // Changed
+import com.arcadedb.document.Document; // Changed
+import com.arcadedb.document.Element; // Changed
+import com.arcadedb.index.Index; // Changed
+import com.arcadedb.index.IndexDefinition;
+import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring
+import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import com.arcadedb.query.sql.executor.Result; // Changed
+import com.arcadedb.query.sql.executor.ResultInternal; // Changed
+import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed
+import com.arcadedb.query.sql.parser.Expression; // Changed
+import com.arcadedb.query.sql.parser.FromClause; // Changed
+import com.arcadedb.query.sql.parser.FromItem; // Changed
+import com.arcadedb.query.sql.parser.Identifier;
+import com.arcadedb.schema.DocumentType; // Changed
+import com.arcadedb.schema.Schema; // Changed
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.memory.MemoryIndex;
+
+/** Created by frank on 15/01/2017. */
+public class ArcadeLuceneSearchOnFieldsFunction extends ArcadeLuceneSearchFunctionTemplate { // Changed base class
+
+ public static final String NAME = "search_fields";
+
+ public ArcadeLuceneSearchOnFieldsFunction() {
+ // Original params: fieldNames, query, [metadata]
+ // Class name is derived from context (iThis or target)
+ super(NAME, 2, 3);
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public Object execute( // FIXME: Signature might change
+ Object iThis,
+ Identifiable iCurrentRecord, // Changed
+ Object iCurrentResult,
+ Object[] params,
+ CommandContext ctx) { // Changed
+
+ Result result; // Changed
+ if (iThis instanceof Result) {
+ result = (Result) iThis;
+ } else if (iThis instanceof Identifiable) {
+ result = new ResultInternal((Identifiable) iThis); // Changed
+ } else {
+ return false; // Cannot determine current record
+ }
+
+ if (!result.getElement().isPresent()) return false;
+ Element element = result.getElement().get(); // Changed
+ if (element.getType() == null) return false; // Changed
+ String className = element.getType().getName(); // Changed
+
+ @SuppressWarnings("unchecked")
+ List fieldNames = (List) params[0];
+
+ // Note: searchForIndex here might not be strictly necessary if we always build an in-memory index from the current record's fields.
+ // However, the original code uses it to get definition and analyzer.
+ ArcadeLuceneFullTextIndex index = searchForIndex(className, ctx, fieldNames); // FIXME
+
+ if (index == null) {
+ // If no pre-existing index matches, we might still proceed if we can get a default analyzer
+ // or one from metadata, but building a Lucene document without an IndexDefinition is problematic.
+ // For now, returning false if no suitable index is found to provide an analyzer/definition.
+ // This part might need a different strategy for on-the-fly indexing without a backing index.
+ return false;
+ }
+
+ String query;
+ if (params.length < 2 || params[1] == null) { // query is params[1]
+ query = null;
+ } else {
+ query = params[1].toString();
+ }
+
+ MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx);
+
+ // FIXME: This part needs to build a Lucene document using ONLY the specified fieldNames
+ // from the 'element', and using the types from the schema for those fields.
+ // The 'key' concept from OLuceneSearchOnIndexFunction is not directly applicable here in the same way.
+ // index.buildDocument(key, iCurrentRecord) is not right for this context.
+ org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
+ DocumentType docType = element.getType();
+ if (docType != null) {
+ for(String fieldName : fieldNames) {
+ if (element.has(fieldName)) {
+ Object fieldValue = element.getProperty(fieldName);
+ com.arcadedb.schema.Property prop = docType.getProperty(fieldName);
+ Type fieldType = prop != null ? prop.getType() : Type.STRING; // Default to string if no prop
+ // FIXME: ArcadeLuceneIndexType.createFields needs correct store/sort parameters.
+ // Assuming Field.Store.YES and no sorting for memory index fields for now.
+ List fields = ArcadeLuceneIndexType.createFields(fieldName, fieldValue, Field.Store.YES, false, fieldType);
+ for(org.apache.lucene.document.Field f : fields) {
+ luceneDoc.add(f);
+ }
+ }
+ }
+ }
+
+ if (luceneDoc.getFields().isEmpty()) return false; // No fields were added
+
+ // Add all fields from the created luceneDoc to memoryIndex
+ for (IndexableField field : luceneDoc.getFields()) {
+ // Simplified, assuming stringValue is appropriate for all, which is not robust.
+ // MemoryIndex.addField handles various IndexableField types, so this might be okay if createFields returns typed fields.
+ memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer()); // FIXME: index.indexAnalyzer() dependency
+ }
+
+
+ Document metadata = getMetadataDoc(params, 2); // metadata is params[2]
+ // FIXME: LuceneCompositeKey and LuceneKeyAndMetadata need refactoring
+ LuceneKeyAndMetadata keyAndMetadata =
+ new LuceneKeyAndMetadata(
+ new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata);
+
+ // FIXME: index.buildQuery might not exist or have different signature
+ return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f;
+ }
+
+ private Document getMetadataDoc(Object[] params, int metadataParamIndex) { // Changed
+ if (params.length > metadataParamIndex) {
+ if (params[metadataParamIndex] instanceof Map) {
+ return new Document().fromMap((Map) params[metadataParamIndex]); // Changed
+ } else if (params[metadataParamIndex] instanceof String) {
+ return new Document().fromJSON((String) params[metadataParamIndex]);
+ }
+ return new Document().fromJSON(params[metadataParamIndex].toString());
+ }
+ // FIXME: LuceneQueryBuilder.EMPTY_METADATA
+ return new Document(); // LuceneQueryBuilder.EMPTY_METADATA;
+ }
+
+ @Override
+ public String getSyntax() {
+ return "search_fields( , , [ ] )"; // Class is implicit
+ }
+
+ // searchFromTarget and related metadata method from template might not be directly applicable
+ // as this function operates on specified fields of current record using MemoryIndex.
+ // If it were to support indexed execution, it would need to find a covering persistent index.
+ @Override
+ public Iterable searchFromTarget(
+ FromClause target,
+ BinaryCompareOperator operator,
+ Object rightValue,
+ CommandContext ctx,
+ Expression... args) {
+
+ // This function, as implemented in execute(), builds an in-memory index for the current record.
+ // For it to be "indexable" in a broader query, it would need to find a persistent Lucene index
+ // that covers the requested fields for the target class.
+ ArcadeLuceneFullTextIndex index = searchForIndex(target, ctx, args); // FIXME
+
+ // First arg (args[0]) is fieldNamesList, second (args[1]) is query
+ if (args.length < 2) throw new IllegalArgumentException("search_fields requires at least fieldNames and query parameters.");
+
+ @SuppressWarnings("unchecked")
+ // List fieldNames = (List) args[0].execute((Result) null, ctx); // This is how searchForIndex gets it.
+ // We need the query string here.
+ Expression queryExpression = args[1];
+ String query = (String) queryExpression.execute((Result) null, ctx);
+
+
+ if (index != null && query != null) {
+ Document meta = getMetadata(args, ctx, 2); // Metadata is third arg (index 2)
+ Set luceneResultSet; // Changed
+ try (Stream rids = // Changed
+ // FIXME: index.getInternal().getRids() needs to be replaced
+ index
+ .getAssociatedIndex()
+ .getRids( // This method might not exist
+ new LuceneKeyAndMetadata( // FIXME
+ new LuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { // FIXME
+ luceneResultSet = rids.collect(Collectors.toSet());
+ }
+ return luceneResultSet;
+ }
+ // Original threw RuntimeException, returning empty set might be safer for unhandled cases.
+ return Collections.emptySet();
+ }
+
+ private Document getMetadata(Expression[] args, CommandContext ctx, int metadataParamIndex) { // Changed
+ if (args.length > metadataParamIndex) {
+ return getMetadata(args[metadataParamIndex], ctx); // Calls method from ArcadeLuceneSearchFunctionTemplate
+ }
+ // FIXME: LuceneQueryBuilder.EMPTY_METADATA
+ return new Document(); // LuceneQueryBuilder.EMPTY_METADATA;
+ }
+
+
+ @Override
+ protected ArcadeLuceneFullTextIndex searchForIndex( // Changed types
+ FromClause target, CommandContext ctx, Expression... args) { // FIXME
+ // First argument to the function (args[0]) is the list of field names
+ if (args == null || args.length == 0) {
+ throw new IllegalArgumentException("Field names list parameter is missing.");
+ }
+ Object fieldNamesParam = args[0].execute((Result) null, ctx);
+ if (!(fieldNamesParam instanceof List)) {
+ throw new IllegalArgumentException("Field names parameter must be a list.");
+ }
+ @SuppressWarnings("unchecked")
+ List fieldNames = (List) fieldNamesParam;
+
+ FromItem item = target.getItem(); // Changed
+ Identifier identifier = item.getIdentifier(); // Changed
+ String className = identifier.getStringValue();
+
+ return searchForIndex(className, ctx, fieldNames); // Calls private helper
+ }
+
+ private ArcadeLuceneFullTextIndex searchForIndex( // Changed types
+ String className, CommandContext ctx, List fieldNames) {
+ DatabaseInternal database = (DatabaseInternal) ((DatabaseContext) ctx).getDatabase(); // FIXME: Verify
+ // database.activateOnCurrentThread(); // May not be needed
+
+ Schema schema = database.getSchema(); // Changed
+ DocumentType docType = schema.getType(className); // Changed
+
+ if (docType == null) {
+ return null;
+ }
+ List indices = // Changed
+ docType.getIndexes(true).stream() // getIndexes(true) for all indexes including supertypes
+ .filter(idx -> idx instanceof ArcadeLuceneFullTextIndex) // FIXME
+ .map(idx -> (ArcadeLuceneFullTextIndex) idx) // FIXME
+ .filter(idx -> intersect(idx.getDefinition().getFields(), fieldNames))
+ .collect(Collectors.toList());
+
+ if (indices.size() > 1) {
+ // If multiple indexes match (e.g. one on [f1], another on [f2], and we search [f1,f2])
+ // This logic might need refinement. For now, it implies any single index covering *at least one* field.
+ // The original code would throw "too many indices matching given field name" only if multiple INDIVIDUAL indexes
+ // were found that EACH satisfy the intersect condition.
+ // A more robust approach might be to find the "best" covering index or combine results if that makes sense.
+ // For now, sticking to "if any index covers any of the fields, and there's only one such index"
+ // The original code finds an index if ANY of its fields are in fieldNames.
+ // If multiple such indexes exist, it's an error.
+
+ // Let's find the one with the most matching fields? Or just the first one?
+ // The original code would throw if 'indices.size() > 1'.
+ throw new IllegalArgumentException(
+ "Too many Lucene indices on class '" + className + "' match the specified fields: " + String.join(",", fieldNames)
+ + ". Specify a single target index using search_index().");
+ }
+
+ return indices.size() == 0 ? null : indices.get(0);
+ }
+
+ // intersection and intersect methods are helpers, can remain as they are (generic)
+ public List intersection(List list1, List list2) {
+ List list = new ArrayList();
+ for (T t : list1) {
+ if (list2.contains(t)) {
+ list.add(t);
+ }
+ }
+ return list;
+ }
+
+ public boolean intersect(List list1, List list2) {
+ for (T t : list1) {
+ if (list2.contains(t)) {
+ return true;
+ }
+ }
+ return false;
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java
new file mode 100644
index 0000000000..80aad5ac9a
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/functions/ArcadeLuceneSearchOnIndexFunction.java
@@ -0,0 +1,139 @@
+package com.arcadedb.lucene.functions;
+
+import com.arcadedb.database.DatabaseContext; // Assuming CommandContext provides access to Database
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID;
+import com.arcadedb.document.Document; // Changed
+import com.arcadedb.index.Index; // Changed
+import com.arcadedb.lucene.builder.LuceneQueryBuilder; // FIXME: Needs refactoring
+import com.arcadedb.lucene.collections.LuceneCompositeKey; // FIXME: Needs refactoring
+import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; // FIXME: Needs refactoring
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import com.arcadedb.query.sql.executor.Result; // Changed
+import com.arcadedb.query.sql.executor.ResultInternal; // Changed
+import com.arcadedb.query.sql.parser.BinaryCompareOperator; // Changed
+import com.arcadedb.query.sql.parser.Expression; // Changed
+import com.arcadedb.query.sql.parser.FromClause; // Changed
+import com.arcadedb.query.sql.parser.FromItem; // Changed
+import com.arcadedb.query.sql.parser.Identifier; // Changed
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.memory.MemoryIndex;
+
+/** Created by frank on 15/01/2017. */
+public class ArcadeLuceneSearchOnIndexFunction extends ArcadeLuceneSearchFunctionTemplate {
+
+ public static final String NAME = "search_index";
+
+ public ArcadeLuceneSearchOnIndexFunction() {
+ super(NAME);
+ }
+
+ @Override
+ public Object execute(
+ Object self, // Is the target of the function, could be null, or an identifier (index name) or a collection
+ Identifiable currentRecord,
+ Object currentResult,
+ Object[] params,
+ CommandContext ctx) {
+
+ validateParameterCount(params, 2, 3);
+
+ String indexName = params[0].toString();
+ String query = params[1].toString();
+ Document metadata = params.length == 3 ? getMetadata((Expression) params[2], ctx) : new Document(ctx.getDatabase());
+
+ ArcadeLuceneFullTextIndex index = ArcadeLuceneFunctionsUtils.getLuceneFullTextIndex(ctx, indexName);
+
+ if (index == null) {
+ // If used in a WHERE clause for a specific record, returning false means "filter out"
+ // If used as a standalone function returning a set, return empty set.
+ // The `filterResult` method in template handles boolean conversion.
+ return currentRecord != null ? false : Collections.emptySet();
+ }
+
+ // If currentRecord is not null, this function is likely used in a WHERE clause context.
+ // It needs to determine if the currentRecord matches the Lucene query *within its own fields*.
+ if (currentRecord != null && currentRecord.getIdentity() != null) {
+ MemoryIndex memoryIndex = ArcadeLuceneFunctionsUtils.getOrCreateMemoryIndex(ctx);
+
+ // We need the Lucene Document for the currentRecord
+ // The 'key' for buildDocument in this context is not a separate key, but derived from the record itself if auto index.
+ // Or, if the index has specific fields, those are used.
+ // Since we are in context of a specific record, we use its fields.
+ org.apache.lucene.document.Document luceneDoc = index.buildDocument(null, currentRecord); // Pass null for key if derived from record
+
+ if (luceneDoc != null) {
+ for (IndexableField field : luceneDoc.getFields()) {
+ // Simplified: use stringValue. Actual field data might be needed for MemoryIndex if not string.
+ // MemoryIndex.addField can take Analyzer, which it gets from the IndexableFieldType.
+ // If the field is not indexed with an analyzer (e.g. StringField), it's fine.
+ // If it is (e.g. TextField), index.indexAnalyzer() should be used.
+ // For simplicity, assuming MemoryIndex handles it or we use the general indexAnalyzer.
+ memoryIndex.addField(field.name(), field.stringValue(), index.indexAnalyzer());
+ }
+ } else {
+ return false; // Cannot build Lucene doc for current record
+ }
+
+ // The query here is the main Lucene query from params[1]
+ // Metadata for this specific sub-query within MemoryIndex.
+ LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(query, metadata, ctx);
+ org.apache.lucene.search.Query luceneQuery = index.buildQuery(keyAndMeta); // Build query using index's config
+
+ return memoryIndex.search(luceneQuery) > 0.0f;
+ } else {
+ // If currentRecord is null, this function is likely used to return a set of results from the specified index.
+ // This is the "searchFromTarget" equivalent.
+ LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(query, metadata, ctx);
+ // The `index.get(keyAndMeta)` should return a LuceneResultSet or similar.
+ // The `ArcadeLuceneFullTextIndex.get(Object[])` was changed to return IndexCursor.
+ // We might need a direct way to execute a query via engine and get results.
+ // For now, assuming `index.get(keyAndMeta)` returns a Set or IndexCursor via engine.
+
+ // The `get` method on `ArcadeLuceneFullTextIndex` takes `Object[] keys`.
+ // We need to wrap `keyAndMeta` or pass its components.
+ // Let's assume the engine's getInTx is what we want.
+ if (index.getEngine() instanceof LuceneIndexEngine) {
+ LuceneIndexEngine luceneEngine = (LuceneIndexEngine) index.getEngine();
+ // LuceneKeyAndMetadata is already the 'key' for getInTx
+ return luceneEngine.getInTx(keyAndMeta, null); // Passing null for LuceneTxChanges for non-transactional view
+ }
+ return Collections.emptySet();
+ }
+ }
+
+ private Document getMetadata(Object[] params, CommandContext ctx) { // Kept for direct param access if needed
+ if (params.length == 3 && params[2] != null) {
+ if (params[2] instanceof Map) {
+ return new Document(ctx.getDatabase()).fromMap((Map) params[2]);
+ } else if (params[2] instanceof String) {
+ return new Document(ctx.getDatabase()).fromJSON((String) params[2]);
+ } else if (params[2] instanceof Expression) { // If metadata is an expression
+ return getMetadata((Expression) params[2], ctx);
+ } else if (params[2] instanceof Document) {
+ return (Document) params[2];
+ }
+ try {
+ return new Document(ctx.getDatabase()).fromJSON(params[2].toString());
+ } catch (Exception e) { /* ignore, return empty */ }
+ }
+ return new Document(ctx.getDatabase()); // LuceneQueryBuilder.EMPTY_METADATA;
+ }
+
+
+ @Override
+ public String getSyntax() {
+ return getName() + "( , [, ] )";
+ }
+
+ // Removed searchFromTarget, estimate, canExecuteInline, allowsIndexedExecution, shouldExecuteAfterSearch
+ // searchForIndex is not needed here as index name is a direct parameter.
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java
new file mode 100644
index 0000000000..7c9f759cbf
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java
@@ -0,0 +1,571 @@
+package com.arcadedb.lucene.index;
+
+import com.arcadedb.database.DatabaseInternal;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID;
+import com.arcadedb.database.TransactionContext;
+import com.arcadedb.document.Document;
+import com.arcadedb.engine.PaginatedFile; // For constructor, might not be directly used by Lucene
+import com.arcadedb.engine.Storage;
+import com.arcadedb.index.Index;
+import com.arcadedb.index.IndexCursor;
+import com.arcadedb.index.IndexException;
+import com.arcadedb.index.IndexInternal;
+import com.arcadedb.index.RangeIndexCursor;
+import com.arcadedb.index.TypeIndex;
+import com.arcadedb.index.engine.IndexEngine;
+import com.arcadedb.lucene.engine.ArcadeLuceneFullTextIndexEngine; // Changed from OLuceneFullTextIndexEngine
+import com.arcadedb.lucene.engine.LuceneIndexEngine; // The refactored interface
+import com.arcadedb.lucene.query.LuceneKeyAndMetadata; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext;
+import com.arcadedb.schema.IndexBuilder;
+import com.arcadedb.schema.IndexDefinition;
+import com.arcadedb.schema.Schema;
+import com.arcadedb.schema.Type;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+
+public class ArcadeLuceneFullTextIndex implements IndexInternal {
+
+ private final DatabaseInternal database;
+ private final String name;
+ private IndexDefinition definition;
+ private String filePath; // Path where Lucene index files are stored
+ private int fileId; // ArcadeDB fileId, might not be directly used by Lucene files themselves
+ private PaginatedFile metadataFile; // For ArcadeDB metadata about this index
+
+ private LuceneIndexEngine engine; // Changed type to interface
+ private STATUS status = STATUS.OFFLINE;
+
+ // Moved constants to ArcadeLuceneIndexFactoryHandler
+ // public static final String LUCENE_ALGORITHM = "LUCENE";
+
+
+ // Constructor matching AbstractIndex an IndexFactory might call
+ public ArcadeLuceneFullTextIndex(DatabaseInternal db, String name, String typeName, IndexDefinition definition,
+ String filePath, PaginatedFile metadataFile, PaginatedFile[] dataFiles,
+ PaginatedFile[] treeFiles, int fileId, int pageSize,
+ TransactionContext.AtomicOperation atomicOperation) {
+ this.database = db;
+ this.name = name;
+ this.definition = definition;
+ this.filePath = filePath; // Should be directory for Lucene
+ this.metadataFile = metadataFile; // ArcadeDB own metadata for this index
+ this.fileId = fileId;
+ // pageSize, dataFiles, treeFiles might be less relevant for Lucene which manages its own files.
+
+ // Engine initialization is deferred to lazyInit or build/load
+ }
+
+ private void lazyInit() {
+ if (engine == null) {
+ // Determine if this is part of an active transaction and if an engine instance already exists for this TX.
+ if (database.isTransactionActive() && database.getTransaction().getInvolvedIndexEngine(getName()) instanceof LuceneIndexEngine) {
+ this.engine = (LuceneIndexEngine) database.getTransaction().getInvolvedIndexEngine(getName());
+ if (this.engine == null) { // Should not happen if getInvolvedIndexEngine returned one
+ throw new IndexException("Cannot find transactional Lucene engine for index " + getName() + " though it was marked as involved.");
+ }
+ } else {
+ String algorithm = getAlgorithm(); // Uses the overridden getAlgorithm()
+ com.arcadedb.document.Document engineMetadataDoc = new com.arcadedb.document.Document(database);
+ if (this.definition != null && this.definition.getOptions() != null) {
+ engineMetadataDoc.fromMap(this.definition.getOptions());
+ }
+
+ if (com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler.LUCENE_CROSS_CLASS_ALGORITHM.equalsIgnoreCase(algorithm)) {
+ ArcadeLuceneCrossClassIndexEngine crossEngine = new ArcadeLuceneCrossClassIndexEngine(this.fileId, database.getStorage(), this.name);
+
+ // Construct IndexMetadata Pojo for crossEngine.init()
+ // OLuceneCrossClassIndexEngine.init takes IndexMetadata.
+ // IndexMetadata needs: name, typeName (class this index is on, can be null for cross-class marker), List propertyNames, Type[] keyTypes, String algorithm, boolean isAutomatic, Map options
+ IndexMetadata im = new IndexMetadata(
+ this.name,
+ this.definition.getPropertyNames(),
+ this.definition.getKeyTypes(),
+ this.definition.getOptions()
+ );
+ im.setTypeName(this.definition.getTypeName()); // May be null if truly cross-class and not bound to a type
+ im.setAlgorithm(algorithm);
+ im.setIsAutomatic(this.isAutomatic());
+ im.setUnique(this.isUnique());
+ im.setNullStrategy(this.getNullStrategy());
+ // Add other relevant properties from 'this.definition' to 'im' if needed by crossEngine.init()
+
+ crossEngine.init(im);
+ this.engine = crossEngine;
+ } else { // Default to LUCENE_FULL_TEXT_ALGORITHM
+ ArcadeLuceneFullTextIndexEngine ftEngine = new ArcadeLuceneFullTextIndexEngine(database.getStorage(), name);
+ // OLuceneIndexEngineAbstract.init expects: String indexName, String indexType(algorithm), IndexDefinition, boolean isAutomatic, Document metadata
+ ftEngine.init(getName(), algorithm, definition, isAutomatic(), engineMetadataDoc);
+ this.engine = ftEngine;
+ }
+ }
+ this.status = STATUS.ONLINE;
+ }
+ }
+
+
+ @Override
+ public String getAssociatedFileName() {
+ return filePath;
+ }
+
+ @Override
+ public void build(IndexBuilder builder) {
+ this.definition = builder.getIndexDefinition();
+ // filePath might be set by IndexBuilder or derived, ensure it's correct for Lucene (a directory path)
+ this.filePath = builder.getFilePath() != null ? builder.getFilePath() : database.getDatabasePath() + "/" + builder.getFileName();
+ this.fileId = builder.getFileId(); // Get fileId from builder
+
+ lazyInit(); // Initialize engine
+ try {
+ Document engineMetadata = new Document(database);
+ if (this.definition.getOptions() != null) {
+ engineMetadata.fromMap(this.definition.getOptions());
+ }
+
+ // Parameters for engine.create:
+ // valueSerializer, keySerializer: null for Lucene as it handles its own types.
+ // keyTypes: from definition
+ // nullPointerSupport: from definition
+ // propertyNames.size(): as keySize (number of indexed fields)
+ // clustersToIndex: from definition
+ // options: from definition
+ engine.create(
+ null, // valueSerializer
+ this.isAutomatic(),
+ this.getKeyTypes(),
+ this.getDefinition().isNullStrategyNode(), // nullPointerSupport
+ null, // keySerializer
+ this.getDefinition().getPropertyNames() != null ? this.getDefinition().getPropertyNames().size() : 0, // keySize
+ this.getDefinition().getClustersToIndex(), // clustersToIndex (might be null)
+ this.getDefinition().getOptions(), // engineProperties
+ engineMetadata // metadata Document for engine
+ );
+ this.status = STATUS.ONLINE;
+ } catch (Exception e) {
+ throw new IndexException("Error during Lucene index build for index '" + getName() + "'", e);
+ }
+ }
+
+ @Override
+ public void setMetadata(IndexDefinition definition, String filePath, int pageSize, byte nullStrategy) {
+ this.definition = definition;
+ this.filePath = filePath;
+ // pageSize and nullStrategy are part of definition or handled by Lucene engine differently.
+ // This method is usually for loading existing index metadata.
+ // We might need to re-init or load the engine here.
+ if (engine != null) {
+ engine.close(); // Close existing engine if any
+ }
+ engine = null; // Reset engine
+ lazyInit(); // Re-initialize with new metadata
+ // engine.load(...) might be relevant here if this implies loading an existing index.
+ }
+
+ @Override
+ public STATUS getStatus() {
+ return status;
+ }
+
+ @Override
+ public void setStatus(STATUS status) {
+ this.status = status;
+ // Potentially pass this to the engine if it has its own status
+ }
+
+ @Override
+ public void close() {
+ if (engine != null) {
+ engine.close();
+ engine = null;
+ }
+ status = STATUS.OFFLINE;
+ }
+
+ @Override
+ public void drop() {
+ if (engine != null) {
+ engine.delete(); // Engine handles file deletion
+ engine = null;
+ }
+ // Additional cleanup of ArcadeDB metadata files if any (e.g., this.metadataFile)
+ // This is usually handled by Schema.dropIndex calling this.
+ status = STATUS.OFFLINE;
+ }
+
+ @Override
+ public int getFileId() {
+ return fileId; // Or a specific ID for Lucene structure if different
+ }
+
+ @Override
+ public T getComponent(String name, Class type) {
+ if (type.isAssignableFrom(engine.getClass())) {
+ return type.cast(engine);
+ }
+ return null;
+ }
+
+ @Override
+ public Type[] getKeyTypes() {
+ return definition != null ? definition.getKeyTypes() : null;
+ }
+
+ @Override
+ public byte[] getBinaryKeyTypes() {
+ // Lucene doesn't use this in the same way as binary comparable keys.
+ return null;
+ }
+
+ @Override
+ public void setTypeIndex(TypeIndex typeIndex) {
+ // Associated with schema type's index list. Store if needed.
+ }
+
+ @Override
+ public TypeIndex getTypeIndex() {
+ return null; // Retrieve if stored
+ }
+
+ @Override
+ public void scheduleCompaction() {
+ // Lucene has IndexWriter.forceMerge or IndexWriter.maybeMerge.
+ // This could be a trigger for that.
+ lazyInit();
+ // engine.forceMerge(); // FIXME: Add such a method to engine interface if needed
+ }
+
+ @Override
+ public String getMostRecentFileName() {
+ return null; // Not directly applicable
+ }
+
+ @Override
+ public Map toJSON() {
+ // Serialize index configuration/stats to JSON.
+ // Include name, type, definition, engine stats.
+ Map json = new java.util.HashMap<>();
+ json.put("name", getName());
+ json.put("typeName", getTypeName());
+ json.put("algorithm", getAlgorithm());
+ if (definition != null) {
+ json.put("definition", definition.getOptions()); // Or more detailed definition
+ }
+ if (engine != null) {
+ // FIXME: engine should provide some stats or config
+ // json.put("engineStats", engine.getStats());
+ }
+ return json;
+ }
+
+ @Override
+ public Index getAssociatedIndex() {
+ return null;
+ }
+
+ // --- Index Methods ---
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public String getTypeName() { // This should be the Type's name this index is on, not algorithm
+ return definition != null ? definition.getTypeName() : null;
+ }
+
+ @Override
+ public String getAlgorithm() {
+ // Return the actual algorithm from the definition if available
+ return (definition != null && definition.getAlgorithm() != null) ?
+ definition.getAlgorithm() :
+ com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler.LUCENE_FULL_TEXT_ALGORITHM;
+ }
+
+
+ @Override
+ public IndexDefinition getDefinition() {
+ return definition;
+ }
+
+ @Override
+ public boolean isUnique() {
+ return definition != null && definition.isUnique(); // Lucene full-text usually not unique
+ }
+
+ @Override
+ public List getPropertyNames() {
+ return definition != null ? definition.getPropertyNames() : Collections.emptyList();
+ }
+
+ @Override
+ public long countEntries() {
+ lazyInit();
+ // engine.size(null) or engine.sizeInTx(null)
+ // The ValuesTransformer is for OrientDB's SBTree based indexes. For Lucene, it's just a doc count.
+ return engine.size(null);
+ }
+
+ public long getRecordCount() { // From OLuceneFullTextIndex
+ return countEntries();
+ }
+
+
+ @Override
+ public IndexCursor get(Object[] keys) {
+ lazyInit();
+ if (keys == null || keys.length == 0 || keys[0] == null) {
+ throw new IllegalArgumentException("Lucene query key cannot be null.");
+ }
+ // Assuming keys[0] is the query string or a LuceneKeyAndMetadata object
+ // FIXME: This needs to adapt to how LuceneKeyAndMetadata is structured and if options are passed
+ Object queryKey = keys[0];
+ Document metadata = null;
+ if (keys.length > 1 && keys[1] instanceof Map) {
+ metadata = new Document(database, (Map) keys[1]);
+ } else if (keys.length > 1 && keys[1] instanceof Document) {
+ metadata = (Document) keys[1];
+ }
+
+ // The engine's get method: Set getInTx(Object key, LuceneTxChanges changes)
+ // This needs to be wrapped in an IndexCursor.
+ // The key for engine.getInTx is likely LuceneKeyAndMetadata
+ // FIXME: Construct LuceneKeyAndMetadata correctly
+ LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(queryKey, metadata, null); // Assuming CommandContext can be null here
+
+ Set results = engine.getInTx(keyAndMeta, null); // Passing null for changes if not in tx or tx changes not used
+ return new LuceneIndexCursor(results.iterator()); // FIXME: LuceneIndexCursor needs to be implemented
+ }
+
+ @Override
+ public IndexCursor get(Object[] keys, int limit) {
+ // FIXME: Implement limit. Lucene TopDocs can handle this.
+ // This will require engine.getInTx or a similar method to accept a limit.
+ lazyInit();
+ if (keys == null || keys.length == 0 || keys[0] == null) {
+ throw new IllegalArgumentException("Lucene query key cannot be null.");
+ }
+ Object queryKey = keys[0];
+ Document metadata = new Document(database); // Default empty metadata
+ if (keys.length > 1 && keys[1] instanceof Map) {
+ metadata.fromMap((Map) keys[1]);
+ } else if (keys.length > 1 && keys[1] instanceof Document) {
+ metadata = (Document) keys[1];
+ }
+ if (limit > 0) {
+ metadata.set("limit", limit); // Pass limit via metadata
+ }
+ LuceneKeyAndMetadata keyAndMeta = new LuceneKeyAndMetadata(queryKey, metadata, null);
+ Set results = engine.getInTx(keyAndMeta, null);
+ return new LuceneIndexCursor(results.iterator()); // FIXME: LuceneIndexCursor
+ }
+
+
+ @Override
+ public Stream getRidsStream(Object[] keys) {
+ IndexCursor cursor = get(keys);
+ return cursor.ridsStream();
+ }
+
+ public Set get(Object key) { // From OLuceneFullTextIndex, matching engine's getInTx
+ lazyInit();
+ // This 'key' is likely LuceneKeyAndMetadata or the raw query string.
+ return engine.getInTx(key, null); // Assuming null for LuceneTxChanges if not in a tx context for this call
+ }
+
+ public Set getRids(Object key) { // New method, if useful
+ lazyInit();
+ // This 'key' is likely LuceneKeyAndMetadata or the raw query string.
+ // engine.getInTx returns Set
+ return engine.getInTx(key, null).stream().map(Identifiable::getIdentity).collect(Collectors.toSet());
+ }
+
+
+ @Override
+ public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded) {
+ throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax.");
+ }
+
+ @Override
+ public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit) {
+ throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax.");
+ }
+
+ @Override
+ public IndexCursor iterator(boolean ascendingOrder) {
+ throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene. Use a match_all query if needed.");
+ }
+
+ @Override
+ public IndexCursor iterator(boolean ascendingOrder, Object[] fromKey, boolean fromKeyInclusive) {
+ throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene.");
+ }
+
+ @Override
+ public IndexCursor descendingIterator() {
+ throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene.");
+ }
+
+ @Override
+ public IndexCursor descendingIterator(Object[] fromKey, boolean fromKeyInclusive) {
+ throw new UnsupportedOperationException("Full iteration is not typically efficient for Lucene.");
+ }
+
+ @Override
+ public boolean supportsOrderedIterations() {
+ return false; // Lucene orders by relevance score by default, not by key.
+ }
+
+ @Override
+ public boolean isAutomatic() {
+ return definition != null && definition.isAutomatic();
+ }
+
+ @Override
+ public void setRebuilding(boolean rebuilding) {
+ // Could set a flag or inform the engine
+ }
+
+ @Override
+ public IndexEngine getEngine() {
+ lazyInit();
+ return engine;
+ }
+
+ @Override
+ public boolean isValid() {
+ // Check if engine is initialized and Lucene index is readable
+ lazyInit();
+ // FIXME: engine needs an isValid() or similar check
+ return engine != null;
+ }
+
+ @Override
+ public Map getStats() {
+ // FIXME: engine should provide stats (num docs, etc.)
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public void setStats(Map stats) {
+ // Not typically set from outside
+ }
+
+ @Override
+ public void compact() throws IOException {
+ lazyInit();
+ // engine.forceMerge(); // FIXME: Add to engine if needed
+ }
+
+ @Override
+ public boolean isCompacting() {
+ return false; // FIXME: engine should report this
+ }
+
+ @Override
+ public List getFileIds() {
+ return Collections.singletonList(fileId); // Main metadata file ID
+ }
+
+ @Override
+ public int getPageSize() {
+ return -1; // Not page-based like ArcadeDB native
+ }
+
+ @Override
+ public void setPageSize(int pageSize) {
+ // No-op for Lucene
+ }
+
+ @Override
+ public byte getNullStrategy() {
+ return definition != null ? definition.getNullStrategy().getValue() : Index.NULL_STRATEGY.ERROR.getValue();
+ }
+
+ @Override
+ public void setNullStrategy(byte nullStrategy) {
+ // Usually immutable
+ }
+
+ @Override
+ public void set(TransactionContext tx, Object[] keys, RID[] rids) throws IndexException {
+ lazyInit();
+ // This is for unique indexes usually. Lucene full-text is not typically unique.
+ // If used, it implies key -> RID mapping.
+ // For Lucene, it's document (derived from RID's record) -> indexed.
+ // This method needs careful interpretation for Lucene.
+ // Assuming keys[0] is the "key" to index (could be a document itself or fields)
+ // and rids[0] is the value.
+ if (keys == null || keys.length == 0 || rids == null || rids.length == 0) {
+ throw new IndexException("Keys and RIDs must be provided for Lucene set operation for index '" + getName() + "'.");
+ }
+ // Engine methods (put, remove) were refactored to take TransactionContext directly.
+ engine.put(tx, keys[0], rids[0]);
+ }
+
+ @Override
+ public void remove(TransactionContext tx, Object[] keys, Identifiable rid) throws IndexException {
+ lazyInit();
+ if (keys == null || keys.length == 0) {
+ throw new IndexException("Keys must be provided for Lucene remove operation for index '" + getName() + "'.");
+ }
+ // Engine methods (put, remove) were refactored to take TransactionContext directly.
+ if (rid != null) {
+ engine.remove(tx, keys[0], rid);
+ } else {
+ engine.remove(tx, keys[0]); // Remove all documents matching key
+ }
+ }
+
+ @Override
+ public void remove(TransactionContext tx, Object[] keys) throws IndexException {
+ remove(tx, keys, null); // Remove all RIDs associated with these keys
+ }
+
+ @Override
+ public IndexCursor range(boolean ascendingOrder) {
+ throw new UnsupportedOperationException("Range queries without keys are not directly supported. Use a match_all query.");
+ }
+
+ @Override
+ public IndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit, int skip) {
+ throw new UnsupportedOperationException("Range queries are not directly supported by Lucene full-text index in this manner. Use Lucene query syntax.");
+ }
+
+ @Override
+ public int getAssociatedBucketId() {
+ if (definition == null) return -1;
+ List bucketIds = definition.getBucketIds();
+ return bucketIds != null && !bucketIds.isEmpty() ? bucketIds.get(0) : -1;
+ }
+
+ // --- Lucene Specific Accessors ---
+ public IndexSearcher searcher() {
+ lazyInit();
+ return engine.searcher();
+ }
+
+ public Analyzer indexAnalyzer() {
+ lazyInit();
+ return engine.indexAnalyzer();
+ }
+
+ public Analyzer queryAnalyzer() {
+ lazyInit();
+ return engine.queryAnalyzer();
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java
new file mode 100644
index 0000000000..50af17bf24
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneIndexType.java
@@ -0,0 +1,345 @@
+/*
+ * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * Copyright 2014 Orient Technologies.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.arcadedb.lucene.index; // Changed package
+
+import com.arcadedb.database.Identifiable; // Changed import
+import com.arcadedb.database.RID; // Changed import
+import com.arcadedb.document.Document; // ArcadeDB Document
+import com.arcadedb.exception.ArcadeDBException; // Changed import
+import com.arcadedb.index.CompositeKey; // Changed import
+import com.arcadedb.index.IndexDefinition; // Changed import
+import com.arcadedb.schema.Type; // Changed import
+import com.arcadedb.lucene.util.LuceneDateTools; // Added import
+import java.io.UnsupportedEncodingException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import org.apache.lucene.document.Field; // Lucene Document Field
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.FloatDocValuesField;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
+
+/** Created by enricorisa on 21/03/14. */
+public class ArcadeLuceneIndexType { // Changed class name
+ public static final String RID_HASH = "_RID_HASH";
+ public static final String RID = "_RID"; // Defined locally
+ public static final String KEY = "_KEY"; // Defined locally
+
+ public static Field createField( // Simplified, assuming store is passed correctly by caller for specific needs
+ final String fieldName, final Object value, final Field.Store store) {
+ if (fieldName.startsWith("_CLASS") || fieldName.startsWith("_CLUSTER")) {
+ return new StringField(fieldName, value.toString(), store);
+ }
+ // Defaulting to TextField, assuming analysis. Use StringField if non-analyzed is the default.
+ return new TextField(fieldName, value.toString(), store);
+ }
+
+ public static String extractId(org.apache.lucene.document.Document doc) { // Lucene Document
+ String value = doc.get(RID_HASH);
+ if (value != null) {
+ int pos = value.indexOf("|");
+ if (pos > 0) {
+ return value.substring(0, pos);
+ } else {
+ return value;
+ }
+ } else {
+ return null;
+ }
+ }
+
+ public static Field createIdField(final Identifiable id, final Object key) { // Changed Identifiable
+ return new StringField(RID_HASH, genValueId(id, key), Field.Store.YES);
+ }
+
+ public static Field createRidField(final Identifiable id) { // Renamed from createOldIdField, Changed Identifiable
+ return new StringField(RID, id.getIdentity().toString(), Field.Store.YES);
+ }
+
+ public static String genValueId(final Identifiable id, final Object key) { // Changed Identifiable
+ String value = id.getIdentity().toString() + "|";
+ value += hashKey(key);
+ return value;
+ }
+
+ public static List createFields(
+ String fieldName, Object value, Field.Store store, Boolean sort, Type type) { // Added Type parameter
+ List luceneFields = new ArrayList<>();
+
+ if (value instanceof Number) {
+ Number number = (Number) value;
+ if (type == Type.LONG || value instanceof Long) {
+ luceneFields.add(new LongPoint(fieldName, number.longValue()));
+ luceneFields.add(new NumericDocValuesField(fieldName, number.longValue())); // For sorting/faceting
+ if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.longValue()));
+ } else if (type == Type.FLOAT || value instanceof Float) {
+ luceneFields.add(new FloatPoint(fieldName, number.floatValue()));
+ luceneFields.add(new FloatDocValuesField(fieldName, number.floatValue())); // For sorting/faceting
+ if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.floatValue()));
+ } else if (type == Type.DOUBLE || value instanceof Double) {
+ luceneFields.add(new DoublePoint(fieldName, number.doubleValue()));
+ luceneFields.add(new DoubleDocValuesField(fieldName, number.doubleValue())); // For sorting/faceting
+ if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.doubleValue()));
+ } else { // INTEGER, SHORT, BYTE
+ luceneFields.add(new IntPoint(fieldName, number.intValue()));
+ luceneFields.add(new NumericDocValuesField(fieldName, number.longValue())); // Use long for DV for all integer types
+ if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, number.intValue()));
+ }
+ } else if (type == Type.DATETIME || type == Type.DATE) {
+ Long time = null;
+ if (value instanceof Date) {
+ time = ((Date) value).getTime();
+ } else if (value instanceof Number) {
+ time = ((Number) value).longValue();
+ } else if (value instanceof String) {
+ time = LuceneDateTools.parseDateTimeToMillis((String) value);
+ }
+ if (time != null) {
+ if (type == Type.DATE) {
+ time = LuceneDateTools.normalizeToDayEpochMillis(time);
+ }
+ luceneFields.add(new LongPoint(fieldName, time));
+ luceneFields.add(new NumericDocValuesField(fieldName, time)); // For sorting/faceting
+ if (store == Field.Store.YES) luceneFields.add(new StoredField(fieldName, time));
+ }
+ // Optionally, add the original value as a TextField if searchable as text and not just date
+ // if (value != null && store == Field.Store.YES) luceneFields.add(new TextField(fieldName, value.toString(), store));
+ } else if (type == Type.STRING || type == Type.TEXT || type == Type.ENUM || type == Type.UUID || value instanceof String) { // Added TEXT, ENUM, UUID
+ String stringValue = value.toString();
+ // Defaulting to TextField (analyzed). If non-analyzed is preferred for some types (e.g. UUID, ENUM), use StringField.
+ luceneFields.add(new TextField(fieldName, stringValue, store));
+ // Or use StringField for non-analyzed:
+ // luceneFields.add(new StringField(fieldName, stringValue, store));
+ if (Boolean.TRUE.equals(sort)) {
+ luceneFields.add(new SortedDocValuesField(fieldName, new BytesRef(stringValue)));
+ }
+ } else {
+ // Default to TextField for other types or if type is null
+ luceneFields.add(new TextField(fieldName, value.toString(), store));
+ if (Boolean.TRUE.equals(sort)) {
+ luceneFields.add(new SortedDocValuesField(fieldName, new BytesRef(value.toString())));
+ }
+ }
+ return luceneFields;
+ }
+
+ public static Query createExactQuery(IndexDefinition index, Object key) { // Changed OIndexDefinition
+ Query query = null;
+ if (key instanceof String) {
+ final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+ if (index.getFields().size() > 0) {
+ for (String idx : index.getFields()) {
+ queryBuilder.add(new TermQuery(new Term(idx, key.toString())), BooleanClause.Occur.SHOULD);
+ }
+ } else {
+ queryBuilder.add(new TermQuery(new Term(KEY, key.toString())), BooleanClause.Occur.SHOULD);
+ }
+ query = queryBuilder.build();
+ } else if (key instanceof CompositeKey) { // Changed OCompositeKey
+ final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+ int i = 0;
+ CompositeKey keys = (CompositeKey) key;
+ for (String idx : index.getFields()) {
+ String val = (String) keys.getKeys().get(i); // Assuming keys are strings
+ queryBuilder.add(new TermQuery(new Term(idx, val)), BooleanClause.Occur.MUST);
+ i++;
+ }
+ query = queryBuilder.build();
+ }
+ return query;
+ }
+
+ public static Query createQueryId(Identifiable value) { // Changed OIdentifiable
+ return new TermQuery(new Term(RID, value.getIdentity().toString()));
+ }
+
+ public static Query createQueryId(Identifiable value, Object key) { // Changed OIdentifiable
+ return new TermQuery(new Term(RID_HASH, genValueId(value, key)));
+ }
+
+ public static String hashKey(Object key) {
+ try {
+ String keyString;
+ if (key instanceof Document) { // Changed ODocument to ArcadeDB Document
+ keyString = ((Document) key).toJSON().toString(); // Assuming toJSON returns JSON object
+ } else {
+ keyString = key.toString();
+ }
+ MessageDigest sha256 = MessageDigest.getInstance("SHA-256");
+ byte[] bytes = sha256.digest(keyString.getBytes("UTF-8"));
+ return Base64.getEncoder().encodeToString(bytes);
+ } catch (NoSuchAlgorithmException e) {
+ throw ArcadeDBException.wrapException(new ArcadeDBException("fail to find sha algorithm"), e); // Changed exception
+ } catch (UnsupportedEncodingException e) {
+ throw ArcadeDBException.wrapException(new ArcadeDBException("fail to find utf-8 encoding"), e); // Changed exception
+ }
+ }
+
+ public static Query createDeleteQuery( // Changed OIdentifiable, ODocument
+ Identifiable value, List fields, Object key, com.arcadedb.document.Document metadata) {
+
+ // TODO Implementation of Composite keys with Collection
+ final BooleanQuery.Builder filter = new BooleanQuery.Builder();
+ final BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ // TODO: Condition on Id and field key only for backward compatibility
+ if (value != null) {
+ builder.add(createQueryId(value), BooleanClause.Occur.MUST);
+ }
+ String field = fields.iterator().next();
+ builder.add(
+ new TermQuery(new Term(field, key.toString().toLowerCase(Locale.ENGLISH))),
+ BooleanClause.Occur.MUST);
+
+ filter.add(builder.build(), BooleanClause.Occur.SHOULD);
+ if (value != null) {
+ filter.add(createQueryId(value, key), BooleanClause.Occur.SHOULD);
+ }
+ return filter.build();
+ }
+
+ /**
+ * Creates a Lucene Query for exact matching on a field, considering the field's schema type.
+ *
+ * @param fieldName The name of the field.
+ * @param value The value to match.
+ * @param type The ArcadeDB schema Type of the field.
+ * @param database The database instance (currently unused here, but might be useful for context or complex types).
+ * @return A Lucene Query.
+ */
+ public static Query createExactFieldQuery(String fieldName, Object value, Type type, com.arcadedb.database.DatabaseInternal database) {
+ if (value == null) {
+ // Or handle as a specific query for null if Lucene supports it for the type, e.g. for checking existence.
+ // For now, a query that matches nothing or a specific "null value" term if that's how nulls are indexed.
+ // This behavior needs to align with how nulls are indexed by createFields.
+ // Assuming null means "match no specific value" for now, which might mean it's handled by query structure.
+ // A "must not exist" or "must exist" query is different. This is for "fieldName:null".
+ // Let's treat it as a TermQuery for "null" string for now if type is string, otherwise it's problematic for points.
+ if (type == Type.STRING || type == Type.TEXT || type == Type.ENUM) {
+ return new TermQuery(new Term(fieldName, "null")); // Or a special null marker if used during indexing
+ }
+ // For numeric/point types, matching "null" is usually done by ensuring the field *doesn't* exist,
+ // which is more complex (e.g., BooleanQuery with MUST_NOT(MatchAllDocs) + filter on field existence).
+ // For simplicity, an exact match for a null value on a point field should probably yield no results or error.
+ // Returning a query that matches nothing for non-string nulls.
+ return new BooleanQuery.Builder().build(); // Empty BooleanQuery matches nothing
+ }
+
+ switch (type) {
+ case STRING:
+ case TEXT:
+ case ENUM:
+ case UUID: // UUIDs are typically indexed and queried as strings
+ return new TermQuery(new Term(fieldName, value.toString()));
+ case INTEGER:
+ if (value instanceof Number) {
+ return IntPoint.newExactQuery(fieldName, ((Number) value).intValue());
+ } else {
+ try {
+ return IntPoint.newExactQuery(fieldName, Integer.parseInt(value.toString()));
+ } catch (NumberFormatException e) {
+ // Log warning, fallback to TermQuery
+ return new TermQuery(new Term(fieldName, value.toString()));
+ }
+ }
+ case LONG:
+ case DATETIME: // Assuming stored as long (epoch millis)
+ case DATE: // Assuming stored as long (epoch millis)
+ if (value instanceof Number) {
+ return LongPoint.newExactQuery(fieldName, ((Number) value).longValue());
+ } else if (value instanceof java.util.Date) {
+ return LongPoint.newExactQuery(fieldName, ((java.util.Date) value).getTime());
+ } else {
+ try {
+ Long time = null;
+ if (value instanceof String) {
+ time = LuceneDateTools.parseDateTimeToMillis((String) value);
+ } else { // Already Long or Date
+ // Handled by previous instanceof checks
+ }
+ if (time == null) { // Parsing failed or was not a convertible type
+ // Log warning or throw? For now, fallback to TermQuery on original string.
+ return new TermQuery(new Term(fieldName, value.toString()));
+ }
+ if (type == Type.DATE) {
+ time = LuceneDateTools.normalizeToDayEpochMillis(time);
+ }
+ return LongPoint.newExactQuery(fieldName, time);
+ } catch (NumberFormatException e) { // Should be caught by LuceneDateTools or earlier instanceof
+ return new TermQuery(new Term(fieldName, value.toString()));
+ }
+ }
+ case FLOAT:
+ if (value instanceof Number) {
+ return FloatPoint.newExactQuery(fieldName, ((Number) value).floatValue());
+ } else {
+ try {
+ return FloatPoint.newExactQuery(fieldName, Float.parseFloat(value.toString()));
+ } catch (NumberFormatException e) {
+ return new TermQuery(new Term(fieldName, value.toString()));
+ }
+ }
+ case DOUBLE:
+ if (value instanceof Number) {
+ return DoublePoint.newExactQuery(fieldName, ((Number) value).doubleValue());
+ } else {
+ try {
+ return DoublePoint.newExactQuery(fieldName, Double.parseDouble(value.toString()));
+ } catch (NumberFormatException e) {
+ return new TermQuery(new Term(fieldName, value.toString()));
+ }
+ }
+ case SHORT:
+ case BYTE:
+ if (value instanceof Number) {
+ return IntPoint.newExactQuery(fieldName, ((Number) value).intValue()); // Promote to IntPoint
+ } else {
+ try {
+ return IntPoint.newExactQuery(fieldName, Short.parseShort(value.toString()));
+ } catch (NumberFormatException e) {
+ return new TermQuery(new Term(fieldName, value.toString()));
+ }
+ }
+ case BOOLEAN:
+ // Lucene typically stores booleans as "T"/"F" or "true"/"false" in a StringField,
+ // or as 0/1 in a numeric field. Assuming string "true" or "false" as indexed by createFields default for strings.
+ return new TermQuery(new Term(fieldName, value.toString().toLowerCase(Locale.ENGLISH)));
+ default:
+ // For BINARY, EMBEDDED, LINK etc., default to TermQuery on string representation.
+ // This might not be effective unless specific string representations are indexed.
+ return new TermQuery(new Term(fieldName, value.toString()));
+ }
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java
new file mode 100644
index 0000000000..a0b1cc9d48
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.arcadedb.lucene.index;
+
+import com.arcadedb.lucene.OLuceneCrossClassIndexFactory;
+import com.arcadedb.lucene.engine.OLuceneIndexEngine;
+import com.arcadedb.database.OIdentifiable;
+import com.arcadedb.database.exception.OInvalidIndexEngineIdException;
+import com.arcadedb.database.index.OIndexMetadata;
+import com.arcadedb.database.storage.OStorage;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.search.Query;
+
+public class OLuceneFullTextIndex extends OLuceneIndexNotUnique {
+
+ public OLuceneFullTextIndex(OIndexMetadata im, final OStorage storage) {
+ super(im, storage);
+ }
+
+ public Document buildDocument(final Object key, OIdentifiable identifieable) {
+
+ while (true)
+ try {
+ return storage.callIndexEngine(
+ false,
+ indexId,
+ engine -> {
+ OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine;
+ return indexEngine.buildDocument(key, identifieable);
+ });
+ } catch (OInvalidIndexEngineIdException e) {
+ doReloadIndexEngine();
+ }
+ }
+
+ public Query buildQuery(final Object query) {
+ while (true)
+ try {
+ return storage.callIndexEngine(
+ false,
+ indexId,
+ engine -> {
+ OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine;
+ return indexEngine.buildQuery(query);
+ });
+ } catch (OInvalidIndexEngineIdException e) {
+ doReloadIndexEngine();
+ }
+ }
+
+ public Analyzer queryAnalyzer() {
+ while (true)
+ try {
+ return storage.callIndexEngine(
+ false,
+ indexId,
+ engine -> {
+ OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine;
+ return indexEngine.queryAnalyzer();
+ });
+ } catch (final OInvalidIndexEngineIdException e) {
+ doReloadIndexEngine();
+ }
+ }
+
+ public boolean isCollectionIndex() {
+ while (true) {
+ try {
+ return storage.callIndexEngine(
+ false,
+ indexId,
+ engine -> {
+ OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine;
+ return indexEngine.isCollectionIndex();
+ });
+ } catch (OInvalidIndexEngineIdException e) {
+ doReloadIndexEngine();
+ }
+ }
+ }
+
+ public Analyzer indexAnalyzer() {
+ while (true) {
+ try {
+ return storage.callIndexEngine(
+ false,
+ indexId,
+ engine -> {
+ OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine;
+ return indexEngine.indexAnalyzer();
+ });
+ } catch (OInvalidIndexEngineIdException e) {
+ doReloadIndexEngine();
+ }
+ }
+ }
+
+ @Override
+ public boolean isAutomatic() {
+ return super.isAutomatic()
+ || OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS.equals(im.getAlgorithm());
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java b/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java
new file mode 100644
index 0000000000..99264342eb
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/parser/ArcadeLuceneMultiFieldQueryParser.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2023 Arcade Data Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.arcadedb.lucene.parser;
+
+import com.arcadedb.schema.Type;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.DateTools; // For date parsing, if needed
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermRangeQuery; // For newStringRange
+import org.apache.lucene.util.BytesRef;
+
+import java.text.SimpleDateFormat; // Example for date parsing
+import java.util.Date; // Example for date parsing
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class ArcadeLuceneMultiFieldQueryParser extends MultiFieldQueryParser {
+
+ private static final Logger logger = Logger.getLogger(ArcadeLuceneMultiFieldQueryParser.class.getName());
+
+ private final Map fieldTypes;
+
+ // Date format constants removed, will use LuceneDateTools
+
+ public ArcadeLuceneMultiFieldQueryParser(Map fieldTypes, String[] fields, Analyzer analyzer, Map boosts) {
+ super(fields, analyzer, boosts);
+ this.fieldTypes = fieldTypes != null ? new HashMap<>(fieldTypes) : new HashMap<>();
+ }
+
+ public ArcadeLuceneMultiFieldQueryParser(Map fieldTypes, String[] fields, Analyzer analyzer) {
+ super(fields, analyzer);
+ this.fieldTypes = fieldTypes != null ? new HashMap<>(fieldTypes) : new HashMap<>();
+ }
+
+ protected Type getFieldType(String field) {
+ return fieldTypes.get(field);
+ }
+
+ @Override
+ protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
+ Type fieldType = getFieldType(field);
+
+ if (fieldType == null) {
+ logger.log(Level.FINE, "No type information for field {0} in range query, defaulting to string range.", field);
+ fieldType = Type.STRING; // Default to string range if type unknown
+ }
+
+ // Lucene's default MultiFieldQueryParser uses TermRangeQuery for ranges on text fields.
+ // For specific data types, we need to create appropriate Point range queries.
+
+ try {
+ switch (fieldType) {
+ case STRING:
+ case TEXT:
+ // For string ranges, ensure part1 and part2 are not null for TermRangeQuery.newStringRange
+ // The superclass handles * as open range for TermRangeQuery.
+ // If super.newRangeQuery is called, it will likely create a TermRangeQuery.
+ // TermRangeQuery.newStringRange is more explicit for string ranges.
+ BytesRef lowerTerm = part1 == null ? null : new BytesRef(part1);
+ BytesRef upperTerm = part2 == null ? null : new BytesRef(part2);
+ return TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive);
+
+ case INTEGER:
+ Integer lowerInt = (part1 == null || "*".equals(part1)) ? null : Integer.parseInt(part1);
+ Integer upperInt = (part2 == null || "*".equals(part2)) ? null : Integer.parseInt(part2);
+ return IntPoint.newRangeQuery(field,
+ lowerInt == null ? Integer.MIN_VALUE : (startInclusive ? lowerInt : lowerInt + 1),
+ upperInt == null ? Integer.MAX_VALUE : (endInclusive ? upperInt : upperInt - 1));
+
+ case LONG:
+ case DATETIME:
+ case DATE:
+ Long lowerLong = com.arcadedb.lucene.util.LuceneDateTools.parseDateTimeToMillis(part1);
+ Long upperLong = com.arcadedb.lucene.util.LuceneDateTools.parseDateTimeToMillis(part2);
+
+ if (fieldType == Type.DATE) {
+ if (lowerLong != null) lowerLong = com.arcadedb.lucene.util.LuceneDateTools.normalizeToDayEpochMillis(lowerLong);
+ if (upperLong != null) upperLong = com.arcadedb.lucene.util.LuceneDateTools.normalizeToDayEpochMillis(upperLong);
+ }
+
+ // Adjust for inclusive/exclusive after potential null from parsing
+ long actualLowerLong = lowerLong == null ? Long.MIN_VALUE : (startInclusive ? lowerLong : lowerLong + 1L);
+ if (lowerLong == null && "*".equals(part1)) actualLowerLong = Long.MIN_VALUE; // Explicit open start
+ else if (lowerLong == null && part1 != null) throw new ParseException("Cannot parse lower date range: " + part1);
+
+
+ long actualUpperLong = upperLong == null ? Long.MAX_VALUE : (endInclusive ? upperLong : upperLong - 1L);
+ if (upperLong == null && "*".equals(part2)) actualUpperLong = Long.MAX_VALUE; // Explicit open end
+ else if (upperLong == null && part2 != null) throw new ParseException("Cannot parse upper date range: " + part2);
+
+ // Ensure lower is not greater than upper after adjustments if both are specified
+ if (lowerLong != null && upperLong != null && actualLowerLong > actualUpperLong) {
+ actualLowerLong = lowerLong; // Reset to original parsed if adjustments inverted range for point fields
+ actualUpperLong = upperLong;
+ // For point fields, if startInclusive=false means actual_low = low+1, endInclusive=false means actual_high = high-1
+ // If after this actual_low > actual_high, it means no values can exist.
+ // Lucene's LongPoint.newRangeQuery handles this correctly by creating a query that matches nothing.
+ }
+
+ return LongPoint.newRangeQuery(field, actualLowerLong, actualUpperLong);
+ case LONG: // Separate from DATE/DATETIME for clarity if parseDateTimeToMillis is too specific
+ Long lowerPlainLong = (part1 == null || "*".equals(part1)) ? null : Long.parseLong(part1);
+ Long upperPlainLong = (part2 == null || "*".equals(part2)) ? null : Long.parseLong(part2);
+ return LongPoint.newRangeQuery(field,
+ lowerPlainLong == null ? Long.MIN_VALUE : (startInclusive ? lowerPlainLong : lowerPlainLong + 1L),
+ upperPlainLong == null ? Long.MAX_VALUE : (endInclusive ? upperPlainLong : upperPlainLong - 1L));
+
+ case FLOAT:
+ Float lowerFloat = (part1 == null || "*".equals(part1)) ? null : Float.parseFloat(part1);
+ Float upperFloat = (part2 == null || "*".equals(part2)) ? null : Float.parseFloat(part2);
+ // Point queries are exclusive for lower, inclusive for upper by default with null/MIN/MAX handling.
+ // Adjusting for inclusive/exclusive:
+ float actualLowerFloat = lowerFloat == null ? Float.NEGATIVE_INFINITY : (startInclusive ? lowerFloat : Math.nextUp(lowerFloat));
+ float actualUpperFloat = upperFloat == null ? Float.POSITIVE_INFINITY : (endInclusive ? upperFloat : Math.nextDown(upperFloat));
+ return FloatPoint.newRangeQuery(field, actualLowerFloat, actualUpperFloat);
+
+
+ case DOUBLE:
+ Double lowerDouble = (part1 == null || "*".equals(part1)) ? null : Double.parseDouble(part1);
+ Double upperDouble = (part2 == null || "*".equals(part2)) ? null : Double.parseDouble(part2);
+ double actualLowerDouble = lowerDouble == null ? Double.NEGATIVE_INFINITY : (startInclusive ? lowerDouble : Math.nextUp(lowerDouble));
+ double actualUpperDouble = upperDouble == null ? Double.POSITIVE_INFINITY : (endInclusive ? upperDouble : Math.nextDown(upperDouble));
+ return DoublePoint.newRangeQuery(field, actualLowerDouble, actualUpperDouble);
+
+ case SHORT:
+ case BYTE:
+ // Promote to IntPoint for querying, as Lucene has no ShortPoint/BytePoint
+ Integer lowerShortOrByte = (part1 == null || "*".equals(part1)) ? null : Integer.parseInt(part1);
+ Integer upperShortOrByte = (part2 == null || "*".equals(part2)) ? null : Integer.parseInt(part2);
+ return IntPoint.newRangeQuery(field,
+ lowerShortOrByte == null ? Integer.MIN_VALUE : (startInclusive ? lowerShortOrByte : lowerShortOrByte + 1),
+ upperShortOrByte == null ? Integer.MAX_VALUE : (endInclusive ? upperShortOrByte : upperShortOrByte - 1));
+
+ default:
+ logger.log(Level.WARNING, "Unhandled type {0} for field {1} in range query. Defaulting to string range.", new Object[]{fieldType, field});
+ return TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive);
+ }
+ } catch (NumberFormatException e) {
+ throw new ParseException("Could not parse number in range query for field " + field + ": " + e.getMessage());
+ }
+ // Removed catch for java.text.ParseException as LuceneDateTools handles its own parsing issues or returns null
+ }
+
+ // Date parsing helper removed, now using LuceneDateTools
+
+ // Wildcard, Prefix, Fuzzy queries usually apply to text fields.
+ // The superclass versions are generally fine. If specific behavior is needed
+ // for non-text fields (e.g., to disallow or handle differently),
+ // these methods can be overridden. For now, relying on superclass.
+
+ // @Override
+ // protected Query getWildcardQuery(String field, String termStr) throws ParseException {
+ // Type fieldType = getFieldType(field);
+ // if (fieldType != null && fieldType.isNumeric()) {
+ // // Wildcards on numeric points don't make sense.
+ // // Could throw error or return a MatchNoDocsQuery, or let super handle (might error).
+ // // For now, let super decide, it might try to parse termStr as a number.
+ // }
+ // return super.getWildcardQuery(field, termStr);
+ // }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java
new file mode 100644
index 0000000000..8aee90bad4
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java
@@ -0,0 +1,277 @@
+package com.arcadedb.lucene.query;
+
+import com.arcadedb.database.Database;
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.database.RID;
+import com.arcadedb.database.RecordId; // ArcadeDB RecordId for context
+import com.arcadedb.index.IndexCursor;
+import com.arcadedb.lucene.engine.LuceneIndexEngine; // Assumed engine interface
+import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // For RID field name
+import com.arcadedb.query.sql.executor.Result;
+import com.arcadedb.query.sql.executor.ResultInternal;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.lucene.document.Document; // Lucene Document
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
+
+public class LuceneIndexCursor implements IndexCursor {
+
+ private static final Logger logger = Logger.getLogger(LuceneIndexCursor.class.getName());
+
+ private final LuceneQueryContext queryContext;
+ private final LuceneIndexEngine engine; // Engine for callbacks
+ private final com.arcadedb.document.Document metadata; // ArcadeDB Document for query metadata
+
+ private ScoreDoc[] scoreDocs;
+ private IndexSearcher searcher;
+ private int currentIndex = -1; // Before the first element
+ private RID currentRID;
+ private float currentScore;
+ private Map currentProximityInfo; // For contextual data like highlights
+
+ private TopDocs topDocs;
+
+
+ public LuceneIndexCursor(LuceneQueryContext queryContext,
+ LuceneIndexEngine engine,
+ com.arcadedb.document.Document metadata) {
+ this.queryContext = queryContext;
+ this.engine = engine;
+ this.metadata = metadata;
+ this.searcher = queryContext.getSearcher(); // Get the potentially transactional searcher
+
+ executeSearch();
+ }
+
+ // Constructor for when results (Set) are already fetched, e.g. from engine.getInTx()
+ // This is a simplified cursor that iterates over pre-fetched RIDs without scores or Lucene docs.
+ private Iterator preFetchedResultsIterator;
+ private Identifiable currentPreFetched;
+ private int preFetchedCount;
+
+ public LuceneIndexCursor(Set preFetchedResults) {
+ this.queryContext = null; // Not applicable
+ this.engine = null; // Not applicable
+ this.metadata = null; // Not applicable
+ if (preFetchedResults != null) {
+ this.preFetchedResultsIterator = preFetchedResults.iterator();
+ this.preFetchedCount = preFetchedResults.size();
+ } else {
+ this.preFetchedResultsIterator = Collections.emptyIterator();
+ this.preFetchedCount = 0;
+ }
+ }
+
+
+ private void executeSearch() {
+ if (queryContext == null) return; // Should not happen if not using pre-fetched constructor
+
+ try {
+ int limit = queryContext.getContext() != null ? queryContext.getContext().getLimit() : Integer.MAX_VALUE;
+ if (limit == -1) limit = Integer.MAX_VALUE; // SQL limit -1 means no limit
+
+ if (queryContext.getSort() != null) {
+ this.topDocs = searcher.search(queryContext.getQuery(), limit, queryContext.getSort());
+ } else {
+ this.topDocs = searcher.search(queryContext.getQuery(), limit);
+ }
+ this.scoreDocs = topDocs.scoreDocs;
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Error executing Lucene search", e);
+ this.scoreDocs = new ScoreDoc[0]; // Empty results on error
+ this.topDocs = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
+ }
+ }
+
+ @Override
+ public Object[] getKeys() {
+ // For Lucene, the "keys" are the search terms. This is not usually returned per document.
+ // If queryContext.getQuery() is available, one could try to extract terms, but it's complex.
+ if (currentRID != null) {
+ // Could potentially store the query that led to this hit if needed.
+ // For now, returning null as it's not a natural fit.
+ return null;
+ }
+ throw new NoSuchElementException("No current element or keys not applicable");
+ }
+
+ @Override
+ public Identifiable getRecord() {
+ // In ArcadeDB, IndexCursor usually returns RIDs. The record is loaded by the caller.
+ // If this cursor *must* return the full record, a DB lookup is needed.
+ // For now, consistent with returning RID via next() and getRID().
+ // This method could load and cache it if frequently used.
+ if (currentRID != null && queryContext != null && queryContext.getContext() != null) {
+ return queryContext.getContext().getDatabase().lookupByRID(currentRID, true);
+ }
+ if (currentPreFetched != null) {
+ return currentPreFetched;
+ }
+ return null;
+ }
+
+ public RID getRID() {
+ if (currentRID != null) {
+ return currentRID;
+ }
+ if (currentPreFetched != null) {
+ return currentPreFetched.getIdentity();
+ }
+ return null;
+ }
+
+
+ @Override
+ public Map getProperties() {
+ // This could return highlights and score if structured appropriately.
+ // The currentProximityInfo is designed for this.
+ return currentProximityInfo != null ? currentProximityInfo : Collections.emptyMap();
+ }
+
+ @Override
+ public float getScore() { // Changed from int to float to match Lucene score
+ return currentScore;
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (preFetchedResultsIterator != null) {
+ return preFetchedResultsIterator.hasNext();
+ }
+ if (scoreDocs == null) {
+ return false;
+ }
+ return (currentIndex + 1) < scoreDocs.length;
+ }
+
+ @Override
+ public Identifiable next() {
+ if (preFetchedResultsIterator != null) {
+ if (!preFetchedResultsIterator.hasNext()) {
+ throw new NoSuchElementException();
+ }
+ currentPreFetched = preFetchedResultsIterator.next();
+ this.currentRID = currentPreFetched.getIdentity(); // Store RID
+ this.currentScore = 1.0f; // Pre-fetched results usually don't carry Lucene score directly
+ this.currentProximityInfo = Collections.singletonMap("$score", this.currentScore);
+ return currentPreFetched; // Or just currentRID if API prefers that
+ }
+
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+ currentIndex++;
+ ScoreDoc scoreDoc = scoreDocs[currentIndex];
+ try {
+ // Using storedFields().document() is preferred in modern Lucene for retrieving stored fields.
+ // searcher.doc(scoreDoc.doc) retrieves all (including non-stored if they were indexed in a certain way, but generally for stored).
+ Document luceneDoc = searcher.storedFields().document(scoreDoc.doc);
+ String ridString = luceneDoc.get(ArcadeLuceneIndexType.RID); // Use constant from ArcadeLuceneIndexType
+
+ if (ridString == null) {
+ // Fallback or try another RID field if there are multiple conventions (e.g. from older data)
+ // For now, log and skip if primary RID field is missing.
+ logger.log(Level.WARNING, "Lucene document " + scoreDoc.doc + " is missing RID field (" + ArcadeLuceneIndexType.RID + ")");
+ // Try to advance to next valid document or return null/throw
+ return next(); // Recursive call to try next, or could throw. Be careful with recursion.
+ }
+
+ Database currentDb = queryContext != null && queryContext.getContext() != null ? queryContext.getContext().getDatabase() : null;
+ this.currentRID = new RID(currentDb, ridString); // Pass database if available for cluster info
+ this.currentScore = scoreDoc.score;
+
+ // Prepare contextual data (score, highlights)
+ this.currentProximityInfo = new HashMap<>();
+ this.currentProximityInfo.put("$score", this.currentScore);
+
+ if (queryContext != null && queryContext.isHighlightingEnabled()) {
+ if (engine != null && engine.queryAnalyzer() != null) { // Ensure we have an analyzer for highlighting
+ queryContext.setHighlightingAnalyzer(engine.queryAnalyzer()); // Use engine's query analyzer
+
+ // We need an IndexReader to pass to getHighlights if it needs one.
+ // The searcher in queryContext already has one.
+ IndexReader reader = queryContext.getSearcher().getIndexReader();
+ Map highlights = queryContext.getHighlights(luceneDoc, reader);
+ if (highlights != null && !highlights.isEmpty()) {
+ this.currentProximityInfo.putAll(highlights);
+ }
+ } else {
+ logger.warning("Highlighting enabled but no queryAnalyzer available from engine to set on LuceneQueryContext.");
+ }
+ }
+
+ // The engine.onRecordAddedToResultSet callback is now less critical for highlights,
+ // but can be kept if it serves other purposes (e.g. security, logging, complex context data).
+ // For now, let's assume its primary highlight-related role is superseded.
+ if (engine != null && queryContext != null) {
+ RecordId contextualRid = new RecordId(this.currentRID);
+ engine.onRecordAddedToResultSet(queryContext, contextualRid, luceneDoc, scoreDoc);
+ }
+
+
+ // IndexCursor traditionally returns Identifiable (which can be just the RID)
+ // If the caller needs the full record, they call getRecord().
+ return this.currentRID;
+
+ } catch (IOException e) {
+ throw new RuntimeException("Error fetching document from Lucene index", e);
+ }
+ }
+
+ @Override
+ public void close() {
+ // Release Lucene resources if this cursor specifically acquired them.
+ // If searcher is managed by engine (e.g. via SearcherManager),
+ // this cursor typically doesn't close/release the searcher.
+ scoreDocs = null;
+ // searcher = null; // Don't nullify if it's shared from engine/queryContext
+ }
+
+ @Override
+ public long getCount() { // Changed from size() to match typical usage for total hits
+ if (preFetchedResultsIterator != null) {
+ return preFetchedCount;
+ }
+ return topDocs != null && topDocs.totalHits != null ? topDocs.totalHits.value : 0;
+ }
+
+ @Override
+ public long size() { // Kept for IndexCursor interface if it uses size() for current iteration count
+ return getCount();
+ }
+
+
+ @Override
+ public void setLimit(int limit) {
+ // Limit should be applied during the search execution.
+ throw new UnsupportedOperationException("Limit must be set before search execution via CommandContext or metadata.");
+ }
+
+ @Override
+ public int getLimit() {
+ // Return the limit that was applied to this cursor's search
+ if (queryContext != null && queryContext.getContext() != null) {
+ return queryContext.getContext().getLimit();
+ }
+ return -1;
+ }
+
+ @Override
+ public boolean isPaginated() {
+ // Lucene TopDocs inherently supports pagination if the search is re-executed with 'searchAfter'.
+ // This simple cursor iterates a fixed set of top N docs. So, it's "paginated" in the sense
+ // that it represents one page of results.
+ return true;
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java
new file mode 100644
index 0000000000..861f7eecef
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneKeyAndMetadata.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2023 Arcade Data Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.arcadedb.lucene.query;
+
+import com.arcadedb.document.Document;
+import com.arcadedb.index.CompositeKey; // If key can be a CompositeKey
+import com.arcadedb.query.sql.executor.CommandContext;
+
+import java.util.Map;
+
+/**
+ * A container to pass a query key (which can be a simple string,
+ * a CompositeKey, or other structures) along with associated metadata Document.
+ * The metadata can include options for highlighting, sorting, limits, etc.
+ */
+public class LuceneKeyAndMetadata {
+
+ public final Object key;
+ public final Document metadata;
+ private CommandContext context; // Optional command context
+
+ /**
+ * Constructor.
+ *
+ * @param key The main query key (e.g., String, CompositeKey).
+ * @param metadata A Document containing additional query parameters and options.
+ */
+ public LuceneKeyAndMetadata(Object key, Document metadata) {
+ this.key = key;
+ this.metadata = metadata != null ? metadata : new Document(null); // Ensure metadata is never null
+ }
+
+ /**
+ * Constructor with command context.
+ *
+ * @param key The main query key.
+ * @param metadata A Document containing additional query parameters.
+ * @param context The SQL command execution context.
+ */
+ public LuceneKeyAndMetadata(Object key, Document metadata, CommandContext context) {
+ this.key = key;
+ this.metadata = metadata != null ? metadata : new Document(null); // Ensure metadata is never null
+ this.context = context;
+ }
+
+
+ public Object getKey() {
+ return key;
+ }
+
+ public Document getMetadata() {
+ return metadata;
+ }
+
+ public CommandContext getContext() {
+ return context;
+ }
+
+ public LuceneKeyAndMetadata setContext(CommandContext context) {
+ this.context = context;
+ return this;
+ }
+
+ /**
+ * Helper to get metadata as a Map, typically for options.
+ * @return Map representation of metadata, or empty map if null.
+ */
+ public Map getMetadataAsMap() {
+ return this.metadata.toMap();
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java
new file mode 100644
index 0000000000..e4b4068f39
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneQueryContext.java
@@ -0,0 +1,254 @@
+/*
+ *
+ * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * * Copyright 2014 Orient Technologies.
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package com.arcadedb.lucene.query;
+
+import com.arcadedb.database.Identifiable; // Changed
+import com.arcadedb.exception.ArcadeDBException; // Changed
+import com.arcadedb.lucene.tx.LuceneTxChanges; // FIXME: Needs refactoring
+import com.arcadedb.query.sql.executor.CommandContext; // Changed
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.highlight.TextFragment;
+
+/** Created by Enrico Risa on 08/01/15. */
+public class LuceneQueryContext { // Changed class name
+ private final CommandContext context; // Changed
+ private final IndexSearcher searcher;
+ private final Query query;
+ private final Sort sort;
+ private Optional changes; // FIXME: Needs refactoring
+ // private HashMap fragments; // Replaced by on-demand highlighting
+
+ // Highlighter components - to be initialized if highlighting is requested
+ private org.apache.lucene.search.highlight.Highlighter highlighter;
+ private org.apache.lucene.analysis.Analyzer highlightingAnalyzer; // Analyzer used for highlighting (might be queryAnalyzer)
+ private String[] highlightingFields;
+
+
+ public LuceneQueryContext( // Changed
+ final CommandContext context, final IndexSearcher searcher, final Query query) {
+ this(context, searcher, query, Collections.emptyList());
+ }
+
+ public LuceneQueryContext( // Changed
+ final CommandContext context,
+ final IndexSearcher searcher,
+ final Query query,
+ final List sortFields) {
+ this.context = context;
+ this.searcher = searcher;
+ this.query = query;
+ if (sortFields == null || sortFields.isEmpty()) { // Added null check
+ sort = null;
+ } else {
+ sort = new Sort(sortFields.toArray(new SortField[0])); // Changed to new SortField[0]
+ }
+ changes = Optional.empty();
+ // fragments = new HashMap<>(); // Not pre-cached anymore
+
+ // Check metadata for highlighting setup
+ if (this.context != null && this.context.getVariable("highlight") instanceof Map) {
+ @SuppressWarnings("unchecked")
+ Map highlightParams = (Map) this.context.getVariable("highlight");
+ // Simple setup for now, more advanced formatting can be added
+ // String preTag = (String) highlightParams.getOrDefault("preTag", "");
+ // String postTag = (String) highlightParams.getOrDefault("postTag", " ");
+ // org.apache.lucene.search.highlight.Formatter formatter = new org.apache.lucene.search.highlight.SimpleHTMLFormatter(preTag, postTag);
+ org.apache.lucene.search.highlight.Formatter formatter = new org.apache.lucene.search.highlight.SimpleHTMLFormatter();
+ org.apache.lucene.search.highlight.QueryScorer queryScorer = new org.apache.lucene.search.highlight.QueryScorer(query);
+ this.highlighter = new org.apache.lucene.search.highlight.Highlighter(formatter, queryScorer);
+ // Fragmenter:
+ // this.highlighter.setTextFragmenter(new org.apache.lucene.search.highlight.SimpleFragmenter(100)); // Example: 100 chars per fragment
+
+ Object fieldsToHighlightObj = highlightParams.get("fields");
+ if (fieldsToHighlightObj instanceof String) {
+ this.highlightingFields = ((String) fieldsToHighlightObj).split(",");
+ } else if (fieldsToHighlightObj instanceof List) {
+ @SuppressWarnings("unchecked")
+ List list = (List) fieldsToHighlightObj;
+ this.highlightingFields = list.toArray(new String[0]);
+ }
+ // Analyzer for highlighting should ideally be the one used for querying the specific fields.
+ // This is a simplification; a more robust solution would fetch field-specific analyzers.
+ // If the engine provides a general queryAnalyzer, use it.
+ // this.highlightingAnalyzer = searcher.getAnalyzer(); // This is not standard on IndexSearcher
+ // Analyzer must be passed or retrieved from engine/index definition
+ }
+
+ }
+
+ public boolean isHighlightingEnabled() {
+ return this.highlighter != null && this.highlightingFields != null && this.highlightingFields.length > 0;
+ }
+
+ public LuceneQueryContext setHighlightingAnalyzer(Analyzer analyzer) {
+ this.highlightingAnalyzer = analyzer;
+ return this;
+ }
+
+
+ public boolean isInTx() {
+ return changes.isPresent();
+ }
+
+ public LuceneQueryContext withChanges(final LuceneTxChanges changes) { // FIXME: Needs refactoring
+ this.changes = Optional.ofNullable(changes);
+ return this;
+ }
+
+ // addHighlightFragment removed as highlights are generated on demand by getHighlights
+
+ public CommandContext getContext() { // Changed
+ return context;
+ }
+
+ public Query getQuery() {
+ return query;
+ }
+
+ public Optional getChanges() { // FIXME: Needs refactoring
+ return changes;
+ }
+
+ public Sort getSort() {
+ return sort;
+ }
+
+ public IndexSearcher getSearcher() {
+ // FIXME: LuceneTxChanges and its searcher() method need refactoring
+ return changes.map(c -> new IndexSearcher(multiReader(c))).orElse(searcher);
+ }
+
+ private MultiReader multiReader(final LuceneTxChanges luceneTxChanges) { // FIXME: Needs refactoring
+ final IndexReader primaryReader = searcher.getIndexReader();
+ // FIXME: luceneTxChanges.searcher() needs to be refactored and return an IndexSearcher
+ final IndexReader txReader = luceneTxChanges.searcher().getIndexReader();
+ try {
+ // Lucene's MultiReader takes an array of IndexReaders.
+ // The boolean for sharing readers is gone in some modern versions,
+ // lifecycle of readers passed to MultiReader should be managed by the caller if they are not to be closed by MultiReader.
+ // However, if primaryReader and txReader are obtained just for this MultiReader,
+ // it might be okay for MultiReader to close them.
+ // The decRef logic was for when readers were shared. If they are not shared, it's not needed.
+ // Let's assume for now they are not shared and MultiReader can own them.
+ // If they are shared/managed elsewhere, then incRef/decRef or try-with-resources on the MultiReader is needed.
+ // For Lucene 9+, just passing readers is fine, their lifecycle is tricky.
+ // One common pattern is that MultiReader does NOT close the readers given to it by default.
+ // The `searcher.getIndexReader()` typically gives a reader that should not be closed by MultiReader if searcher is still live.
+ // `txReader` from `luceneTxChanges.searcher().getIndexReader()` also needs care.
+ // The original decRef implies they were "taken over".
+ // A safer approach for modern Lucene if readers are managed (e.g. by SearcherManager / NRTManager):
+ // DONT call decRef here. Ensure MultiReader is closed after use, and that it DOES NOT close its sub-readers
+ // if they are still managed externally.
+ // The constructor `new MultiReader(IndexReader... subReaders)` does NOT take ownership (doesn't close them).
+
+ // Given the original decRef, it implies MultiReader was taking ownership.
+ // This is not standard for the varags MultiReader constructor.
+ // The constructor `MultiReader(IndexReader[] r, boolean closeSubReaders)` is gone.
+ // Let's assume the readers passed are temporary or their lifecycle is handled by the SearcherManager from which they came.
+ // If txReader is from a RAMDirectory, it's simpler.
+ // This part is tricky without knowing exactly how primaryReader and txReader are managed.
+ // For now, will replicate the structure but acknowledge the complexity.
+ // One option: increase ref count before passing to MultiReader, then MultiReader can decRef on its close.
+ // primaryReader.incRef(); // If primaryReader is managed and should survive this MultiReader
+ // txReader.incRef(); // If txReader is managed
+ // MultiReader multiReader = new MultiReader(new IndexReader[] {primaryReader, txReader});
+ // If MultiReader is short-lived and we don't want to affect original readers:
+ List readers = new ArrayList<>();
+ readers.add(primaryReader);
+ if (txReader != null) readers.add(txReader); // txReader could be null if no changes
+
+ return new MultiReader(readers.toArray(new IndexReader[0]));
+
+ } catch (final IOException e) {
+ // FIXME: OLuceneIndexException needs to be ArcadeDB specific
+ throw ArcadeDBException.wrapException(
+ new ArcadeDBException("unable to create reader on changes"), e); // Changed
+ }
+ }
+
+ public long deletedDocs(final Query query) {
+ // FIXME: LuceneTxChanges and its deletedDocs method need refactoring
+ return changes.map(c -> c.deletedDocs(query)).orElse(0L); // Ensure Long literal
+ }
+
+ public boolean isUpdated(final Document doc, final Object key, final Identifiable value) { // Changed
+ // FIXME: LuceneTxChanges and its isUpdated method need refactoring
+ return changes.map(c -> c.isUpdated(doc, key, value)).orElse(false);
+ }
+
+ public boolean isDeleted(final Document doc, final Object key, final Identifiable value) { // Changed
+ // FIXME: LuceneTxChanges and its isDeleted method need refactoring
+ return changes.map(c -> c.isDeleted(doc, key, value)).orElse(false);
+ }
+
+ /**
+ * Generates highlighted snippets for the given Lucene document and configured fields.
+ * Requires highlightingAnalyzer to be set.
+ */
+ public Map getHighlights(Document luceneDoc, IndexReader reader) {
+ if (!isHighlightingEnabled() || luceneDoc == null || this.highlightingAnalyzer == null) {
+ return Collections.emptyMap();
+ }
+
+ Map highlights = new HashMap<>();
+ for (String field : highlightingFields) {
+ String text = luceneDoc.get(field);
+ if (text != null) {
+ try {
+ // Get best fragments. Last param is maxNoFragments.
+ TextFragment[] frags = highlighter.getBestTextFragments(this.highlightingAnalyzer, field, text, 3);
+ StringBuilder sb = new StringBuilder();
+ for (TextFragment frag : frags) {
+ if (frag != null && frag.getScore() > 0) {
+ sb.append(frag.toString());
+ sb.append("... "); // Separator for multiple fragments
+ }
+ }
+ if (sb.length() > 0) {
+ highlights.put("$" + field + "_hl", sb.toString().trim());
+ }
+ } catch (IOException | org.apache.lucene.search.highlight.InvalidTokenOffsetsException e) {
+ // Log error or handle as needed
+ System.err.println("Error highlighting field " + field + ": " + e.getMessage());
+ }
+ }
+ }
+ return highlights;
+ }
+
+ // getFragments() method removed, replaced by getHighlights() logic integrated into LuceneIndexCursor
+
+ // getLimit() and onRecord() were not in the provided OLuceneQueryContext,
+ // they might be from a different class or an older version.
+ // If they are needed, they would be implemented here.
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java
new file mode 100644
index 0000000000..267aafe911
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChanges.java
@@ -0,0 +1,108 @@
+/*
+ *
+ * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * * Copyright 2023 Arcade Data Ltd
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package com.arcadedb.lucene.tx;
+
+import com.arcadedb.database.Identifiable; // Changed
+import org.apache.lucene.analysis.Analyzer; // Added for new methods
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader; // Added for new methods
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs; // Added for new methods
+import org.apache.lucene.util.Bits; // Added for new methods
+
+import java.io.IOException; // Added for new methods
+import java.util.Collections;
+import java.util.List; // Added for new methods
+import java.util.Map; // Added for new methods
+import java.util.Set;
+
+/** Created by Enrico Risa on 15/09/15. */
+public interface LuceneTxChanges { // Changed interface name
+
+ // Existing methods adapted
+ void put(Object key, Identifiable value, Document doc); // Changed OIdentifiable
+
+ void remove(Object key, Identifiable value); // Changed OIdentifiable
+
+ IndexSearcher searcher(); // Existing method, seems to be the transactional searcher
+
+ // numDocs() from prompt matches existing signature (except return type was long, now int as per Lucene's numDocs())
+ int numDocs(); // Changed from long to int
+
+ // getDeletedDocs() from prompt returns Set, existing returned Set
+ // Renaming existing to getDeletedLuceneDocs for clarity and adding new one
+ default Set getDeletedLuceneDocs() { // Kept original behavior with new name
+ return Collections.emptySet();
+ }
+
+ // isDeleted(Document, Object, OIdentifiable) adapted
+ boolean isDeleted(Document document, Object key, Identifiable value); // Changed OIdentifiable
+
+ // isUpdated(Document, Object, OIdentifiable) adapted
+ boolean isUpdated(Document document, Object key, Identifiable value); // Changed OIdentifiable
+
+ // deletedDocs(Query query) from prompt returns Bits, existing returned long
+ // Renaming existing to countDeletedDocs for clarity and adding new one
+ default long countDeletedDocs(Query query) { // Kept original behavior with new name
+ return 0;
+ }
+
+ // New methods from prompt
+ IndexSearcher getCoordinatingSearcher(); // New: Could be the main index searcher before TX changes overlay
+
+ Bits deletedDocs(Query query); // New: Returns Bits for live docs
+
+ boolean isUpdated(Document doc, Analyzer analyzer, Query query); // New: Overload with Analyzer and Query
+
+ boolean isDeleted(Document doc, Analyzer analyzer, Query query); // New: Overload with Analyzer and Query
+
+ int nDoc(Query query); // New: Number of documents matching query in current TX state
+
+ Set getDeletedDocuments(); // New: Set of deletion queries
+
+ Map getUpdatedDocuments(); // New: Map of update queries to new documents
+
+ List getAddedDocuments(); // New: List of added documents
+
+ IndexReader getReader() throws IOException; // New: Get current transactional reader
+
+ TopDocs query(Query query, int N) throws IOException; // New: Execute query with limit // Changed signature to add N
+
+ Document doc(int doc) throws IOException; // New: Retrieve Lucene document by internal ID
+
+ Document doc(int doc, Set fieldsToLoad) throws IOException; // New: Retrieve specific fields
+
+ void close() throws IOException; // New
+
+ int maxDoc() throws IOException; // New
+
+ boolean hasDeletions(); // New
+
+ void commit() throws IOException; // New
+
+ void rollback() throws IOException; // New
+
+ void addDocument(Document document) throws IOException; // New
+
+ void deleteDocument(Query query) throws IOException; // New
+
+ void updateDocument(Query query, Document document) throws IOException; // New
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java
new file mode 100644
index 0000000000..ba124633ca
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesAbstract.java
@@ -0,0 +1,322 @@
+/*
+ *
+ * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * * Copyright 2023 Arcade Data Ltd
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package com.arcadedb.lucene.tx;
+
+import com.arcadedb.database.Identifiable;
+import com.arcadedb.exception.ArcadeDBException; // Changed
+import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger; // Changed
+
+/** Created by Enrico Risa on 28/09/15. */
+public abstract class LuceneTxChangesAbstract implements LuceneTxChanges { // Changed class name and interface
+ private static final Logger logger =
+ Logger.getLogger(LuceneTxChangesAbstract.class.getName()); // Changed
+ public static final String TMP = "_tmp_rid"; // This constant seems unused here, but kept for now.
+
+ protected final LuceneIndexEngine engine; // Changed
+ protected final IndexWriter writer; // For new/updated documents
+ protected final IndexWriter deletesExecutor; // For pending deletions
+
+ private IndexSearcher txSearcher; // Cached NRT searcher for the current transaction state (adds + main)
+ private IndexReader txReader; // Cached NRT reader for the current transaction state
+
+ public LuceneTxChangesAbstract( // Changed
+ final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) {
+ this.engine = engine;
+ this.writer = writer;
+ this.deletesExecutor = deletesExecutor;
+ }
+
+ // Method to get a transactional reader, possibly NRT from writer
+ protected IndexReader getTxReader() throws IOException {
+ if (txReader == null || !txReader.tryIncRef()) { // Check if reader is still valid or can be used
+ if (txReader != null) { // was valid, but couldn't incRef, so it's likely closed
+ try {
+ txReader.decRef(); // ensure it's closed if it was open
+ } catch (Exception e) { /* ignore */ }
+ }
+ // If writer is null or closed, this will throw an exception, which is appropriate.
+ txReader = DirectoryReader.open(writer); // Standard NRT reader
+ }
+ return txReader;
+ }
+
+ protected void NRTReaderReopen() throws IOException{
+ if (txReader != null) {
+ IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader)txReader, writer);
+ if (newReader != null) {
+ txReader.decRef();
+ txReader = newReader;
+ txSearcher = new IndexSearcher(txReader);
+ }
+ } else {
+ txReader = DirectoryReader.open(writer);
+ txSearcher = new IndexSearcher(txReader);
+ }
+ }
+
+
+ @Override
+ public IndexSearcher searcher() {
+ try {
+ // Return a new NRT searcher reflecting current changes in 'writer'
+ // This searcher sees documents added/updated in the current TX but not yet committed.
+ // It does not see documents deleted in this TX against the main index.
+ // For a searcher that sees deletes as well, getCoordinatingSearcher might be better.
+ NRTReaderReopen();
+ return txSearcher;
+ } catch (IOException e) {
+ throw ArcadeDBException.wrapException( // Changed
+ new ArcadeDBException("Error creating transactional IndexSearcher from writer"), e); // Changed
+ }
+ }
+
+ @Override
+ public IndexSearcher getCoordinatingSearcher() {
+ // This searcher should ideally reflect adds, updates, AND deletes.
+ // This typically involves a MultiReader combining the main index (with its own deletions applied)
+ // and the in-memory 'writer' index, while filtering out documents marked for deletion by 'deletesExecutor'.
+ // For simplicity in this abstract class, could return the same as searcher() and expect
+ // query execution layer to use getLiveDocs() or similar.
+ // Or, could be more complex here if a combined view is built.
+ // For now, let's assume it's similar to searcher() but it's a point for review.
+ // The engine's main searcher is `engine.searcher()`
+ // FIXME: This needs a proper implementation, probably involving MultiReader and live docs from deletesExecutor
+ return searcher();
+ }
+
+ @Override
+ public IndexReader getReader() throws IOException {
+ return getTxReader();
+ }
+
+ @Override
+ public long countDeletedDocs(Query query) { // Renamed from original deletedDocs
+ try {
+ // This counts documents matching the query in the 'deletesExecutor' index.
+ // These are documents marked for deletion in this transaction.
+ if (deletesExecutor.getDocStats().numDocs == 0) return 0; // Optimization
+ try (IndexReader reader = DirectoryReader.open(deletesExecutor)) {
+ final IndexSearcher indexSearcher = new IndexSearcher(reader);
+ final TopDocs search = indexSearcher.search(query, 1); // We only need totalHits
+ return search.totalHits.value;
+ }
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Error reading pending deletions index", e); // Changed
+ }
+ return 0;
+ }
+
+ @Override
+ public Bits deletedDocs(Query query) {
+ // This should return a Bits representing documents deleted by this query
+ // within the context of the main index reader (from engine.searcher()).
+ // This is complex as it needs to check against the 'deletesExecutor' or tracked delete queries.
+ // Not typically provided directly by IndexWriter for pending changes.
+ // FIXME: This needs a proper implementation, likely involving custom collector or query rewriting.
+ logger.warning("deletedDocs(Query) returning Bits is not fully implemented in abstract class.");
+ return null; // Placeholder
+ }
+
+
+ @Override
+ public void addDocument(Document document) throws IOException {
+ writer.addDocument(document);
+ }
+
+ @Override
+ public void deleteDocument(Query query) throws IOException {
+ // Deletes applied to main writer will be visible to its NRT reader.
+ // If deletesExecutor is for tracking standalone delete operations before commit to main index:
+ // writer.deleteDocuments(query); // This applies to the current TX state
+ // deletesExecutor.addDocument(createDeleteMarker(query)); // If deletes are tracked as docs in a separate index
+ // For now, assuming deletes are applied to the main writer for NRT visibility.
+ // If deletesExecutor is a separate RAMDirectory for _pending full deletes_ against main index,
+ // then it should be: deletesExecutor.deleteDocuments(query) or writer.deleteDocuments(query)
+ // The original code had separate writer and deletedIdx. Let's assume deletes are applied to writer.
+ writer.deleteDocuments(query);
+ if(deletesExecutor != writer && deletesExecutor != null) { // If deletes are tracked separately for commit to main index
+ deletesExecutor.deleteDocuments(query);
+ }
+ }
+
+ @Override
+ public void updateDocument(Query query, Document document) throws IOException {
+ writer.updateDocument(query, document);
+ if(deletesExecutor != writer && deletesExecutor != null) {
+ // If an update can also affect the "to be deleted from main index" list, handle here.
+ // This is complex. Usually an update is a delete then an add.
+ // deletesExecutor.updateDocument(query, document); // This might not be how it works.
+ }
+ }
+
+ @Override
+ public void commit() throws IOException {
+ writer.commit();
+ if (deletesExecutor != null && deletesExecutor != writer) {
+ deletesExecutor.commit();
+ }
+ }
+
+ @Override
+ public void rollback() throws IOException {
+ writer.rollback();
+ if (deletesExecutor != null && deletesExecutor != writer) {
+ deletesExecutor.rollback();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ if (txReader != null) {
+ txReader.decRef();
+ txReader = null;
+ }
+ } finally {
+ txSearcher = null; // Searcher was using txReader
+ try {
+ writer.close();
+ } finally {
+ if (deletesExecutor != null && deletesExecutor != writer) {
+ deletesExecutor.close();
+ }
+ }
+ }
+ }
+
+ @Override
+ public int numDocs() {
+ // Returns numDocs of the current transactional reader (reflecting adds/updates in this TX)
+ try (IndexReader reader = getTxReader()) { // getTxReader handles incRef/decRef
+ return reader.numDocs();
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Cannot get numDocs from transactional reader", e);
+ return 0;
+ }
+ }
+
+ @Override
+ public int maxDoc() throws IOException {
+ try (IndexReader reader = getTxReader()) {
+ return reader.maxDoc();
+ }
+ }
+
+ @Override
+ public boolean hasDeletions() {
+ // Check deletions in the context of the main writer for NRT changes
+ return writer.hasDeletions();
+ }
+
+ @Override
+ public TopDocs query(Query query, int n) throws IOException {
+ NRTReaderReopen(); // Ensure searcher is up-to-date
+ return txSearcher.search(query, n);
+ }
+
+ @Override
+ public Document doc(int docId) throws IOException {
+ NRTReaderReopen();
+ return txSearcher.storedFields().document(docId);
+ }
+
+ @Override
+ public Document doc(int docId, Set fieldsToLoad) throws IOException {
+ NRTReaderReopen();
+ return txSearcher.storedFields().document(docId, fieldsToLoad);
+ }
+
+ // Methods requiring more specific state tracking, to be implemented by concrete classes or left as default/abstract.
+ // These were not in the original OLuceneTxChangesAbstract.
+
+ @Override
+ public abstract void put(Object key, Identifiable value, Document doc);
+
+ @Override
+ public abstract void remove(Object key, Identifiable value);
+
+ @Override
+ public abstract boolean isDeleted(Document document, Object key, Identifiable value);
+
+ @Override
+ public abstract boolean isUpdated(Document document, Object key, Identifiable value);
+
+ @Override
+ public boolean isUpdated(Document doc, Analyzer analyzer, Query query) {
+ // Default: Not supported or needs concrete implementation
+ logger.warning("isUpdated(doc, analyzer, query) not implemented in abstract class.");
+ return false;
+ }
+
+ @Override
+ public boolean isDeleted(Document doc, Analyzer analyzer, Query query) {
+ // Default: Not supported or needs concrete implementation
+ logger.warning("isDeleted(doc, analyzer, query) not implemented in abstract class.");
+ return false;
+ }
+
+ @Override
+ public int nDoc(Query query) {
+ // Number of documents matching query in current TX state
+ try {
+ TopDocs results = query(query, 1); // Just need total hits
+ return (int) results.totalHits.value;
+ } catch (IOException e) {
+ logger.log(Level.SEVERE, "Error executing nDoc query", e);
+ return 0;
+ }
+ }
+
+ // These typically require tracking specific operations, left abstract or default.
+ @Override
+ public Set getDeletedDocuments() {
+ logger.warning("getDeletedDocuments() not implemented in abstract class, returning empty set.");
+ return Collections.emptySet();
+ }
+
+ @Override
+ public Map getUpdatedDocuments() {
+ logger.warning("getUpdatedDocuments() not implemented in abstract class, returning empty map.");
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public List getAddedDocuments() {
+ logger.warning("getAddedDocuments() not implemented in abstract class, returning empty list.");
+ return Collections.emptyList();
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java
new file mode 100644
index 0000000000..20e8874497
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesMultiRid.java
@@ -0,0 +1,191 @@
+/*
+ *
+ * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * * Copyright 2023 Arcade Data Ltd
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package com.arcadedb.lucene.tx;
+
+import com.arcadedb.database.Identifiable; // Changed
+import com.arcadedb.exception.ArcadeDBException; // Changed
+import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.lucene.analysis.Analyzer; // For isDeleted/isUpdated with Analyzer
+import org.apache.lucene.analysis.core.KeywordAnalyzer; // For MemoryIndex in isDeleted
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.memory.MemoryIndex; // For isDeleted
+import org.apache.lucene.search.Query;
+
+/** Created by Enrico Risa on 15/09/15. */
+public class LuceneTxChangesMultiRid extends LuceneTxChangesAbstract { // Changed class name and base class
+ // Stores RID string to a list of associated keys that were part of a delete operation for that RID.
+ private final Map> deletedRidToKeys = new HashMap<>();
+
+ // To support new interface methods
+ private final List addedDocuments = new ArrayList<>();
+ // For MultiRid, an "update" is typically a delete of an old key-RID pair (doc) and an add of a new one.
+ // Tracking specific "updates" as Query->Document is complex here if not just delete+add.
+ private final Map updatedDocumentsMap = new HashMap<>();
+ private final Set deletedQueries = new HashSet<>();
+
+
+ public LuceneTxChangesMultiRid( // Changed
+ final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) {
+ super(engine, writer, deletesExecutor);
+ }
+
+ @Override
+ public void put(final Object key, final Identifiable value, final Document doc) {
+ try {
+ super.addDocument(doc); // Use base class to add to writer
+ addedDocuments.add(doc); // Track for getAddedDocuments()
+ } catch (IOException e) {
+ throw ArcadeDBException.wrapException( // Changed
+ new ArcadeDBException("Unable to add document to transactional Lucene index for multi-RID"), e); // Changed
+ }
+ }
+
+ @Override
+ public void remove(final Object key, final Identifiable value) {
+ Query deleteQuery;
+ if (value == null) { // Delete by key - affects all RIDs for this key
+ deleteQuery = engine.deleteQuery(key, null);
+ } else { // Delete a specific key-RID association
+ deleteQuery = engine.deleteQuery(key, value);
+ }
+
+ try {
+ super.deleteDocument(deleteQuery); // Apply to current transaction's writer
+ deletedQueries.add(deleteQuery); // Track query for getDeletedDocuments()
+
+ if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) {
+ // Track that this RID was involved in a delete operation with this key
+ String ridString = value.getIdentity().toString();
+ deletedRidToKeys.computeIfAbsent(ridString, k -> new ArrayList<>()).add(key);
+
+ // Original logic added the specific doc to deletedIdx (deletesExecutor).
+ // This implies deletesExecutor might track full documents to be deleted from the main index.
+ // If super.deleteDocument also routes to deletesExecutor based on query, this might be redundant
+ // or requires deletesExecutor to handle full document additions for its own logic.
+ // For now, let's assume super.deleteDocument(query) is sufficient for deletesExecutor if it's configured for queries.
+ // If deletesExecutor *must* have the full doc:
+ // final Document docToDelete = engine.buildDocument(key, value); // FIXME: engine.buildDocument dependency
+ // if (deletesExecutor != null) deletesExecutor.addDocument(docToDelete);
+ }
+ } catch (final IOException e) {
+ throw ArcadeDBException.wrapException( // Changed
+ new ArcadeDBException("Error while deleting documents in transaction from Lucene index (multi-RID)"), e); // Changed
+ }
+ }
+
+ @Override
+ public int numDocs() {
+ // The base class numDocs() provides NRT view of `writer`.
+ // Original OLuceneTxChangesMultiRid subtracted deletedDocs.size().
+ // `deletedDocs` (now represented by deletedQueries or deletedRidToKeys) refers to deletions
+ // that will be applied to the main index.
+ // A precise count is complex. For now, relying on base class numDocs which reflects writer's current state.
+ // A more accurate count of "net new documents in this TX" would be addedDocuments.size() minus
+ // documents that were added then deleted within the same TX (if tracked).
+ // If numDocs should reflect the final state after commit, it's more complex.
+ // Let's return the NRT view of the current writer.
+ return super.numDocs();
+ }
+
+ @Override
+ public Set getDeletedLuceneDocs() {
+ // The original stored actual Document objects that were deleted.
+ // This is hard to reconstruct if we only store queries or (RID,Key) pairs.
+ // FIXME: If this exact Set is needed, logic in remove() must re-build and store them.
+ // For now, returning empty as per LuceneTxChangesSingleRid refactoring.
+ return Collections.emptySet();
+ }
+
+ @Override
+ public boolean isDeleted(final Document document, final Object key, final Identifiable value) { // Changed
+ if (value == null || value.getIdentity() == null) return false;
+
+ final List associatedKeys = deletedRidToKeys.get(value.getIdentity().toString());
+ if (associatedKeys != null) {
+ // Check if the provided 'key' (or a general match for the document) is among those deleted for this RID
+ if (associatedKeys.contains(key)) return true; // Exact key match
+
+ // More complex check: does the 'document' match any of the delete operations for this RID?
+ // This matches the original MemoryIndex check.
+ final MemoryIndex memoryIndex = new MemoryIndex();
+ // Populate memoryIndex with the fields of the 'document' parameter
+ for (final IndexableField field : document.getFields()) {
+ // TODO: This needs proper handling for different field types.
+ // stringValue() might not be universally appropriate.
+ // Using KeywordAnalyzer, so it's mostly for exact term matching.
+ // This part is tricky and might need to use the actual field value from IndexableField.
+ // For now, assuming stringValue is a simplified placeholder.
+ if (field.stringValue() != null) { // MemoryIndex cannot add null values
+ memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer());
+ }
+ }
+
+ for (final Object deletedKey : associatedKeys) {
+ // engine.deleteQuery should generate a query that identifies the specific key-RID pair
+ final Query q = engine.deleteQuery(deletedKey, value); // Query for specific key-RID pair
+ if (memoryIndex.search(q) > 0.0f) {
+ return true; // The document matches one of the delete operations for this RID
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean isUpdated(final Document document, final Object key, final Identifiable value) { // Changed
+ // For MultiRid, an update is typically a delete of an old association and an add of a new one.
+ // This class doesn't explicitly track "updates" in a separate set like SingleRid did.
+ // One could argue an entry is "updated" if it was deleted and then re-added with the same RID but different key/doc.
+ // However, without more state, this is hard to determine accurately here.
+ // The original returned false.
+ return false;
+ }
+
+ // Implementations for new methods from LuceneTxChanges interface
+ @Override
+ public List getAddedDocuments() {
+ return Collections.unmodifiableList(addedDocuments);
+ }
+
+ @Override
+ public Set getDeletedDocuments() {
+ return Collections.unmodifiableSet(deletedQueries);
+ }
+
+ @Override
+ public Map getUpdatedDocuments() {
+ // Updates are not explicitly tracked as Query->Document in this multi-value implementation.
+ // An update is a delete of one Lucene document and an add of another.
+ // To fulfill this, one might need to capture the delete query and the newly added document
+ // if a "key" conceptually remains the same but its associated RIDs change.
+ // For now, returning empty, as this requires more specific tracking.
+ return Collections.unmodifiableMap(updatedDocumentsMap);
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java
new file mode 100644
index 0000000000..967ac52fba
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/tx/LuceneTxChangesSingleRid.java
@@ -0,0 +1,203 @@
+/*
+ *
+ * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
+ * * Copyright 2023 Arcade Data Ltd
+ * *
+ * * Licensed under the Apache License, Version 2.0 (the "License");
+ * * you may not use this file except in compliance with the License.
+ * * You may obtain a copy of the License at
+ * *
+ * * http://www.apache.org/licenses/LICENSE-2.0
+ * *
+ * * Unless required by applicable law or agreed to in writing, software
+ * * distributed under the License is distributed on an "AS IS" BASIS,
+ * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * * See the License for the specific language governing permissions and
+ * * limitations under the License.
+ *
+ */
+
+package com.arcadedb.lucene.tx;
+
+import com.arcadedb.database.Identifiable; // Changed
+import com.arcadedb.exception.ArcadeDBException; // Changed
+import com.arcadedb.lucene.engine.LuceneIndexEngine; // Changed
+import com.arcadedb.lucene.index.ArcadeLuceneIndexType; // Changed for createField
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field; // For Field.Store
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.Query; // For getDeletedDocuments & getUpdatedDocuments
+
+/** Created by Enrico Risa on 15/09/15. */
+public class LuceneTxChangesSingleRid extends LuceneTxChangesAbstract { // Changed class name and base class
+ private final Set deletedRids = new HashSet<>(); // RIDs marked for deletion from main index
+ private final Set updatedRids = new HashSet<>(); // RIDs that were deleted and then re-added (i.e., updated)
+
+ // To support new interface methods
+ private final List addedDocuments = new ArrayList<>();
+ private final Map updatedDocumentsMap = new HashMap<>(); // Query to delete old, Document is new
+ private final Set deletedQueries = new HashSet<>();
+
+
+ public LuceneTxChangesSingleRid( // Changed
+ final LuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletesExecutor) {
+ super(engine, writer, deletesExecutor);
+ }
+
+ @Override
+ public void put(final Object key, final Identifiable value, final Document doc) {
+ // This method is called when a key/value is to be associated in the index.
+ // The `doc` is the Lucene document representing this association.
+ try {
+ if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) {
+ String ridString = value.getIdentity().toString();
+ if (deletedRids.remove(ridString)) {
+ // If it was previously deleted in this transaction, it's now an update.
+ // The TMP field was used to mark such docs for special handling during merge/query,
+ // but it's unclear if that's needed with current Lucene NRT capabilities or specific merge logic.
+ // For now, we track it as updated.
+ doc.add(ArcadeLuceneIndexType.createField(TMP, ridString, Field.Store.YES)); // Changed OLuceneIndexType
+ updatedRids.add(ridString);
+ // The document for this RID might have been in `deletesExecutor`;
+ // an update means it shouldn't be deleted from the main index.
+ // This might require removing it from `deletesExecutor` if it was added there.
+ // This is complex and depends on how commit logic handles deletesExecutor.
+ // For now, just adding to writer.
+ }
+ }
+ super.addDocument(doc); // Use base class to add to writer
+ addedDocuments.add(doc); // Track for getAddedDocuments()
+ } catch (IOException e) {
+ throw ArcadeDBException.wrapException( // Changed
+ new ArcadeDBException("Unable to add document to transactional Lucene index"), e); // Changed
+ }
+ }
+
+ @Override
+ public void remove(final Object key, final Identifiable value) {
+ // This method is called to disassociate a key/value.
+ // `value` is the RID to be removed.
+ // `key` might be used to construct a more specific delete query if needed, but typically deletion by RID is sufficient.
+ Query deleteQuery;
+ if (value == null) {
+ // Delete by key - this is dangerous for non-unique indexes, but Lucene handles it by query
+ deleteQuery = engine.deleteQuery(key, null); // engine.deleteQuery should handle null value for key-based delete
+ } else {
+ deleteQuery = engine.deleteQuery(key, value); // Specific RID deletion query
+ }
+
+ try {
+ super.deleteDocument(deleteQuery); // Use base class to delete from writer (current TX view)
+ deletedQueries.add(deleteQuery); // Track for getDeletedDocuments()
+
+ if (value != null && value.getIdentity() != null && !value.getIdentity().isNew()) {
+ // If it's a persistent RID, track it for specific management.
+ // This logic matches original: add to deletedRids and also add its document to deletesExecutor
+ String ridString = value.getIdentity().toString();
+ deletedRids.add(ridString);
+ updatedRids.remove(ridString); // If it was updated then deleted, it's just a delete.
+
+ // The original added the full document to deletedIdx (deletesExecutor).
+ // This implies deletesExecutor might be a "negative" index.
+ if (deletesExecutor != null) {
+ // We need the document as it was in the main index to correctly mark it for deletion.
+ // Building it here might not be accurate if fields changed.
+ // FIXME: This needs a robust way to get the "old" document or rely on query for deletion.
+ // For now, if we have 'value', we assume `engine.deleteQuery` is by specific ID.
+ // If `deletesExecutor` is meant to hold docs to be deleted from main index on commit:
+ // Document docToDelete = engine.buildDocument(key, value); // This builds NEW doc.
+ // Instead of adding doc, we add the query. Commit logic will use these queries.
+ }
+ }
+ } catch (final IOException e) {
+ throw ArcadeDBException.wrapException( // Changed
+ new ArcadeDBException("Error while deleting documents in transaction from Lucene index"), e); // Changed
+ }
+ }
+
+ @Override
+ public int numDocs() {
+ // The base class numDocs() returns writer.getDocStats().numDocs or similar NRT count from writer.
+ // This reflects documents added/updated in the current TX.
+ // The original OLuceneTxChangesSingleRid subtracted deleted.size() and updated.size().
+ // Subtracting deletedRids makes sense if these are deletions from the main index state.
+ // Subtracting updatedRids from writer's NRT count is tricky; an update is a delete + add.
+ // The NRT reader from `writer` already accounts for its own adds/deletes.
+ // If `deletedRids` tracks docs to be deleted from the *main committed index*, then this makes sense.
+ // Let's assume the base `numDocs()` gives count from `writer` (adds/updates in tx).
+ // We need to subtract those in `deletedRids` that were not re-added/updated.
+ int writerDocs = super.numDocs();
+ int netDeletes = 0;
+ for (String rid : deletedRids) {
+ if (!updatedRids.contains(rid)) { // If it was deleted and not subsequently updated/re-added
+ netDeletes++;
+ }
+ }
+ // This is still an approximation of the final count after commit.
+ // A true transactional count would need to consider the main index count + adds - (deletes not in adds).
+ // For now, this is an estimate of the TX view.
+ return writerDocs - netDeletes;
+ }
+
+ @Override
+ public Set getDeletedLuceneDocs() {
+ // This method from the original interface returned Lucene docs marked for deletion.
+ // The new interface has getDeletedDocuments returning Set.
+ // This method can be implemented if still needed, but might be redundant.
+ // For now, let's try to build it from deletedQueries if possible, or keep original logic if it made sense.
+ // The original stored `deletedDocs` (actual Document objects).
+ // Let's return empty for now, assuming getDeletedDocuments() is the primary.
+ // FIXME: Review if this specific Set is still needed.
+ return Collections.emptySet();
+ }
+
+ @Override
+ public boolean isDeleted(Document document, Object key, Identifiable value) { // Changed
+ return value != null && value.getIdentity() != null && deletedRids.contains(value.getIdentity().toString());
+ }
+
+ @Override
+ public boolean isUpdated(Document document, Object key, Identifiable value) { // Changed
+ return value != null && value.getIdentity() != null && updatedRids.contains(value.getIdentity().toString());
+ }
+
+ // Implementations for new methods from LuceneTxChanges interface
+ @Override
+ public List getAddedDocuments() {
+ return Collections.unmodifiableList(addedDocuments);
+ }
+
+ @Override
+ public Set getDeletedDocuments() {
+ return Collections.unmodifiableSet(deletedQueries);
+ }
+
+ @Override
+ public Map getUpdatedDocuments() {
+ // This class tracks updatedRids. To fulfill Map,
+ // we'd need to store the delete query and the new document for each update.
+ // The current `put` logic handles updates by re-adding.
+ // FIXME: This needs more sophisticated tracking if specific update queries are required.
+ // For now, returning based on `updatedRids` and `addedDocuments`.
+ // This is an approximation.
+ Map approxUpdated = new HashMap<>();
+ for (Document doc : addedDocuments) {
+ String tmpRid = doc.get(TMP);
+ if (tmpRid != null && updatedRids.contains(tmpRid)) {
+ // This doc is an update. What was the query to delete the old one?
+ // We don't store the original key for the RID directly here for updates.
+ // This highlights a gap if this specific Map is needed.
+ // For now, this will be empty or needs more info.
+ }
+ }
+ return Collections.unmodifiableMap(updatedDocumentsMap); // Requires populating this map during put/update
+ }
+}
diff --git a/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java b/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java
new file mode 100644
index 0000000000..ca4c7b775a
--- /dev/null
+++ b/lucene/src/main/java/com/arcadedb/lucene/util/LuceneDateTools.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2023 Arcade Data Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.arcadedb.lucene.util;
+
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class LuceneDateTools {
+
+ private static final Logger logger = Logger.getLogger(LuceneDateTools.class.getName());
+
+ // Prioritized list of date/datetime formatters
+ // ISO 8601 with Z / offset / local
+ private static final DateTimeFormatter ISO_OFFSET_DATE_TIME = DateTimeFormatter.ISO_OFFSET_DATE_TIME; // Handles 'Z' and offsets like +01:00
+ private static final DateTimeFormatter ISO_LOCAL_DATE_TIME = DateTimeFormatter.ISO_LOCAL_DATE_TIME; // Handles 'yyyy-MM-ddTHH:mm:ss.SSS'
+ private static final DateTimeFormatter ISO_LOCAL_DATE = DateTimeFormatter.ISO_LOCAL_DATE; // Handles 'yyyy-MM-dd'
+
+ // Common alternative formats
+ private static final String ALT_DATETIME_FORMAT_NO_T = "yyyy-MM-dd HH:mm:ss.SSS";
+ private static final String ALT_DATETIME_FORMAT_NO_T_NO_MS = "yyyy-MM-dd HH:mm:ss";
+ private static final String ALT_DATETIME_FORMAT_NO_T_NO_S_NO_MS = "yyyy-MM-dd HH:mm";
+
+
+ public static Long parseDateTimeToMillis(String dateTimeString) {
+ if (dateTimeString == null || dateTimeString.isEmpty() || "*".equals(dateTimeString)) {
+ return null;
+ }
+
+ // 1. Try parsing as plain long (epoch millis)
+ try {
+ return Long.parseLong(dateTimeString);
+ } catch (NumberFormatException e) {
+ // Not a long, proceed to date formats
+ }
+
+ // 2. Try ISO_OFFSET_DATE_TIME (handles 'Z' for UTC and offsets)
+ try {
+ OffsetDateTime odt = OffsetDateTime.parse(dateTimeString, ISO_OFFSET_DATE_TIME);
+ return odt.toInstant().toEpochMilli();
+ } catch (DateTimeParseException e) {
+ // ignore and try next format
+ }
+
+ // 3. Try ISO_LOCAL_DATE_TIME (assumes system default timezone if no offset specified)
+ // To be safer, we should assume UTC if no offset is present, or make it configurable.
+ // For now, let's try parsing as local and then converting to UTC for consistency.
+ try {
+ LocalDateTime ldt = LocalDateTime.parse(dateTimeString, ISO_LOCAL_DATE_TIME);
+ return ldt.toInstant(ZoneOffset.UTC).toEpochMilli(); // Assume UTC if no offset
+ } catch (DateTimeParseException e) {
+ // ignore and try next format
+ }
+
+ // 4. Try ISO_LOCAL_DATE (assumes start of day, UTC)
+ try {
+ LocalDate ld = LocalDate.parse(dateTimeString, ISO_LOCAL_DATE);
+ return ld.atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
+ } catch (DateTimeParseException e) {
+ // ignore and try next format
+ }
+
+ // 5. Try alternative SimpleDateFormat patterns (less robust, more ambiguous)
+ // These assume UTC. If local timezone is implied by strings, SimpleDateFormat needs setTimeZone(TimeZone.getDefault())
+ // but for consistency with Lucene (which often uses UTC via DateTools), UTC is safer.
+ String[] altPatterns = {
+ ALT_DATETIME_FORMAT_NO_T,
+ ALT_DATETIME_FORMAT_NO_T_NO_MS,
+ ALT_DATETIME_FORMAT_NO_T_NO_S_NO_MS
+ };
+
+ for (String pattern : altPatterns) {
+ try {
+ SimpleDateFormat sdf = new SimpleDateFormat(pattern);
+ sdf.setTimeZone(TimeZone.getTimeZone("UTC")); // Assume UTC for these patterns too
+ sdf.setLenient(false);
+ Date date = sdf.parse(dateTimeString);
+ return date.getTime();
+ } catch (java.text.ParseException ex) {
+ // ignore and try next pattern
+ }
+ }
+
+ logger.log(Level.WARNING, "Failed to parse date/datetime string: {0}", dateTimeString);
+ return null; // Or throw ParseException if strict parsing is required
+ }
+
+ public static Long normalizeToDayEpochMillis(long epochMillis) {
+ Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ cal.setTimeInMillis(epochMillis);
+ cal.set(Calendar.HOUR_OF_DAY, 0);
+ cal.set(Calendar.MINUTE, 0);
+ cal.set(Calendar.SECOND, 0);
+ cal.set(Calendar.MILLISECOND, 0);
+ return cal.getTimeInMillis();
+ }
+
+ public static Long parseDateToMillis(String dateString) {
+ Long epochMillis = parseDateTimeToMillis(dateString);
+ if (epochMillis != null) {
+ return normalizeToDayEpochMillis(epochMillis);
+ }
+ return null;
+ }
+}
diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory
new file mode 100644
index 0000000000..2dbcff89d3
--- /dev/null
+++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory
@@ -0,0 +1,21 @@
+#
+# /*
+# * Copyright 2014 Orient Technologies.
+# *
+# * Licensed under the Apache License, Version 2.0 (the "License");
+# * you may not use this file except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+#
+
+com.arcadedb.lucene.OLuceneIndexFactory
+com.orientechnologies.spatial.OLuceneSpatialIndexFactory
+com.arcadedb.lucene.OLuceneCrossClassIndexFactory
diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory
new file mode 100644
index 0000000000..72a6b3fbab
--- /dev/null
+++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory
@@ -0,0 +1,21 @@
+#
+# /*
+# * Copyright 2015 Orient Technologies.
+# *
+# * Licensed under the Apache License, Version 2.0 (the "License");
+# * you may not use this file except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+#
+
+com.arcadedb.lucene.functions.OLuceneFunctionsFactory
+com.orientechnologies.spatial.functions.OSpatialFunctionsFactory
+com.arcadedb.lucene.functions.OLuceneCrossClassFunctionsFactory
diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory
new file mode 100644
index 0000000000..02b1024bcd
--- /dev/null
+++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory
@@ -0,0 +1,20 @@
+#
+# /*
+# * Copyright 2014 Orient Technologies.
+# *
+# * Licensed under the Apache License, Version 2.0 (the "License");
+# * you may not use this file except in compliance with the License.
+# * You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+#
+
+com.arcadedb.lucene.operator.OLuceneOperatorFactory
+com.orientechnologies.spatial.operator.OLuceneSpatialOperatorFactory
diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler
new file mode 100644
index 0000000000..db660fa15e
--- /dev/null
+++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler
@@ -0,0 +1 @@
+com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler
diff --git a/lucene/src/main/resources/plugin.json b/lucene/src/main/resources/plugin.json
new file mode 100644
index 0000000000..c0ef9caa33
--- /dev/null
+++ b/lucene/src/main/resources/plugin.json
@@ -0,0 +1,8 @@
+{
+ "name": "lucene-index",
+ "version": "1.7",
+ "javaClass": "com.arcadedb.lucene.ArcadeLuceneLifecycleManager",
+ "parameters": {},
+ "description": "This is the Lucene Index integration",
+ "copyrights": "Orient Technologies LTD"
+}
diff --git a/pom.xml b/pom.xml
index 4dc3484765..a07eaa3c68 100644
--- a/pom.xml
+++ b/pom.xml
@@ -120,6 +120,7 @@
studio
package
e2e
+ lucene