Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
# Changelog

## [0.6.14] - 2026-05-06

### Changed
- Knowledge defaults are now declared directly in `Knowledge.default_settings`, and helpers, runners, actors, and JSON sidecar persistence use Legion logging, settings, and JSON helpers end to end.

## [0.6.13] - 2026-05-06

### Added
- Knowledge ingest now supports optional LLM-based chunk filtering through `knowledge.ingest.filter_prompt`, with confidence thresholding, content-hash caching, and a runner-level `filter: false` bypass for no-filter ingest flows.

## [0.6.12] - 2026-05-06

### Added
- Query and retrieve runners now support optional neighbor expansion (`expand_neighbors: true`, `neighbor_radius:`) to include adjacent document chunks around Apollo retrieval hits.

### Fixed
- Knowledge ingest now sends chunk source metadata as Apollo `context` so `source_file` and `chunk_index` are available for neighbor retrieval.

## [0.6.11] - 2026-05-06

### Fixed
- Knowledge ingest and maintenance now resolve Apollo data models through the namespaced `Legion::Data::Model::Apollo::*` classes introduced by the legion-data schema cleanup, with fallback support for legacy Apollo model constants.

## [0.6.10] - 2026-04-28

### Fixed
Expand Down
34 changes: 34 additions & 0 deletions lib/legion/extensions/knowledge.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# frozen_string_literal: true

require 'legion/logging'
require 'legion/settings'
require 'legion/json'
require_relative 'knowledge/version'
require_relative 'knowledge/helpers/manifest'
require_relative 'knowledge/helpers/manifest_store'
require_relative 'knowledge/helpers/parser'
require_relative 'knowledge/helpers/chunker'
require_relative 'knowledge/helpers/apollo_models'
require_relative 'knowledge/runners/ingest'
require_relative 'knowledge/runners/query'
require_relative 'knowledge/runners/corpus'
Expand All @@ -27,11 +31,41 @@
module Legion
module Extensions
module Knowledge
extend Legion::Logging::Helper
extend Legion::Settings::Helper
extend Legion::Extensions::Core if defined?(Legion::Extensions::Core)

def self.remote_invocable?
false
end

def self.default_settings
{
corpus_path: nil,
monitors: [],
chunker: {
max_tokens: 512,
overlap_tokens: 128
},
query: {
top_k: 5,
neighbor_radius: 1
},
ingest: {
filter_prompt: nil,
filter_threshold: 0.5
},
maintenance: {
stale_threshold: 0.3,
cold_chunk_days: 7,
quality_report_limit: 10
},
actors: {
watcher_interval: 300,
maintenance_interval: 21_600
}
}
end
end
end
end
6 changes: 5 additions & 1 deletion lib/legion/extensions/knowledge/actors/corpus_ingest.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ module Extensions
module Knowledge
module Actor
class CorpusIngest < Legion::Extensions::Actors::Subscription
include Legion::Logging::Helper
include Legion::Settings::Helper

def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
def runner_function = 'ingest_file'
def check_subtask? = false
Expand All @@ -13,7 +16,8 @@ def generate_task? = false
def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
Legion.const_defined?(:Transport, false) &&
defined?(Legion::Extensions::Knowledge::Runners::Ingest)
rescue StandardError => _e
rescue StandardError => e
handle_exception(e, level: :warn, operation: 'knowledge.corpus_ingest.enabled')
false
end
end
Expand Down
19 changes: 7 additions & 12 deletions lib/legion/extensions/knowledge/actors/corpus_watcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,25 @@ module Extensions
module Knowledge
module Actor
class CorpusWatcher < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
include Legion::Logging::Helper
include Legion::Settings::Helper

def runner_class = 'Legion::Extensions::Knowledge::Runners::Ingest'
def runner_function = 'ingest_corpus'
def check_subtask? = false
def generate_task? = false

def time
if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
Legion::Settings.dig(:knowledge, :actors, :watcher_interval) || 300
else
300
end
settings[:actors][:watcher_interval]
rescue StandardError => e
log.warn(e.message)
handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.time')
300
end

def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
resolve_monitors.any?
rescue StandardError => e
log.warn(e.message)
handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.enabled')
false
end

Expand All @@ -34,14 +33,10 @@ def args

private

def log
Legion::Logging
end

def resolve_monitors
Runners::Monitor.resolve_monitors
rescue StandardError => e
log.warn(e.message)
handle_exception(e, level: :warn, operation: 'knowledge.corpus_watcher.resolve_monitors')
[]
end
end
Expand Down
23 changes: 8 additions & 15 deletions lib/legion/extensions/knowledge/actors/maintenance_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@ module Extensions
module Knowledge
module Actor
class MaintenanceRunner < Legion::Extensions::Actors::Every # rubocop:disable Legion/Extension/EveryActorRequiresTime
include Legion::Logging::Helper
include Legion::Settings::Helper

def runner_class = 'Legion::Extensions::Knowledge::Runners::Maintenance'
def runner_function = 'health'
def check_subtask? = false
def generate_task? = false

def time
if defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?
Legion::Settings.dig(:knowledge, :actors, :maintenance_interval) || 21_600
else
21_600
end
settings[:actors][:maintenance_interval]
rescue StandardError => e
log.warn(e.message)
handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.time')
21_600
end

Expand All @@ -26,7 +25,7 @@ def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects

true
rescue StandardError => e
log.warn(e.message)
handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.enabled')
false
end

Expand All @@ -36,16 +35,10 @@ def args

private

def log
Legion::Logging
end

def corpus_path
return nil unless defined?(Legion::Settings) && !Legion::Settings[:knowledge].nil?

Legion::Settings.dig(:knowledge, :corpus_path)
settings[:corpus_path]
rescue StandardError => e
log.warn(e.message)
handle_exception(e, level: :warn, operation: 'knowledge.maintenance_runner.corpus_path')
nil
end
end
Expand Down
45 changes: 45 additions & 0 deletions lib/legion/extensions/knowledge/helpers/apollo_models.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# frozen_string_literal: true

module Legion
module Extensions
module Knowledge
module Helpers
module ApolloModels
class << self
def entry
namespaced_apollo_model(:Entry) || legacy_model(:ApolloEntry)
end

def access_log
namespaced_apollo_model(:AccessLog) || legacy_model(:ApolloAccessLog)
end

def entry_available?
!entry.nil?
end

def access_log_available?
!access_log.nil?
end

private

def namespaced_apollo_model(name)
return nil unless defined?(Legion::Data::Model::Apollo)
return nil unless Legion::Data::Model::Apollo.const_defined?(name, false)

Legion::Data::Model::Apollo.const_get(name, false)
end

def legacy_model(name)
return nil unless defined?(Legion::Data::Model)
return nil unless Legion::Data::Model.const_defined?(name, false)

Legion::Data::Model.const_get(name, false)
end
end
end
end
end
end
end
25 changes: 5 additions & 20 deletions lib/legion/extensions/knowledge/helpers/chunker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ module Extensions
module Knowledge
module Helpers
module Chunker
extend Legion::Logging::Helper
extend Legion::Settings::Helper

CHARS_PER_TOKEN = 4

module_function

def chunk(sections:, max_tokens: nil, overlap_tokens: nil)
resolved_max = max_tokens || settings_max_tokens || 512
resolved_overlap = overlap_tokens || settings_overlap_tokens || 128
resolved_max = max_tokens || settings[:chunker][:max_tokens]
resolved_overlap = overlap_tokens || settings[:chunker][:overlap_tokens]

max_chars = resolved_max * CHARS_PER_TOKEN
overlap_chars = resolved_overlap * CHARS_PER_TOKEN
Expand Down Expand Up @@ -89,24 +92,6 @@ def apollo_compatible_content_hash(content)
end
end
private_class_method :apollo_compatible_content_hash

def settings_max_tokens
return nil unless defined?(Legion::Settings)

Legion::Settings.dig(:knowledge, :chunker, :max_tokens)
rescue StandardError => _e
nil
end
private_class_method :settings_max_tokens

def settings_overlap_tokens
return nil unless defined?(Legion::Settings)

Legion::Settings.dig(:knowledge, :chunker, :overlap_tokens)
rescue StandardError => _e
nil
end
private_class_method :settings_overlap_tokens
end
end
end
Expand Down
9 changes: 3 additions & 6 deletions lib/legion/extensions/knowledge/helpers/manifest.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ module Extensions
module Knowledge
module Helpers
module Manifest
extend Legion::Logging::Helper

module_function

def scan(path:, extensions: %w[.md .txt .docx .pdf])
Expand All @@ -25,15 +27,10 @@ def walk(entry, extensions, results)
results << build_entry(entry)
end
rescue Errno::EPERM, Errno::EACCES, Errno::ELOOP, Errno::ENOENT => e
log.debug("[manifest] skipping unreadable #{entry}: #{e.class}: #{e.message}")
handle_exception(e, level: :warn, operation: 'knowledge.manifest.walk', entry: entry)
end
private_class_method :walk

def log
Legion::Logging
end
private_class_method :log

def diff(current:, previous:)
current_map = current.to_h { |e| [e[:path], e[:sha256]] }
previous_map = previous.to_h { |e| [e[:path], e[:sha256]] }
Expand Down
15 changes: 10 additions & 5 deletions lib/legion/extensions/knowledge/helpers/manifest_store.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

require 'digest'
require 'fileutils'
require 'json'
require 'legion/json'
require 'tempfile'

module Legion
module Extensions
module Knowledge
module Helpers
module ManifestStore
extend Legion::Logging::Helper
extend Legion::JSON::Helper

module_function

STORE_DIR = ::File.expand_path('~/.legionio/knowledge').freeze
Expand All @@ -19,19 +22,21 @@ def load(corpus_path:)
return [] unless ::File.exist?(path)

raw = ::File.read(path, encoding: 'utf-8')
::JSON.parse(raw, symbolize_names: true)
rescue StandardError => _e
json_parse(raw)
rescue StandardError => e
handle_exception(e, level: :warn, operation: 'knowledge.manifest_store.load', corpus_path: corpus_path)
[]
end

def save(corpus_path:, manifest:)
::FileUtils.mkdir_p(STORE_DIR)
path = store_path(corpus_path: corpus_path)
tmp = "#{path}.tmp"
::File.write(tmp, ::JSON.generate(manifest.map { |e| serialize_entry(e) }))
::File.write(tmp, json_generate(manifest.map { |e| serialize_entry(e) }))
::File.rename(tmp, path)
true
rescue StandardError => _e
rescue StandardError => e
handle_exception(e, level: :warn, operation: 'knowledge.manifest_store.save', corpus_path: corpus_path)
false
end

Expand Down
3 changes: 3 additions & 0 deletions lib/legion/extensions/knowledge/helpers/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ module Extensions
module Knowledge
module Helpers
module Parser
extend Legion::Logging::Helper

module_function

def parse(file_path:)
Expand Down Expand Up @@ -57,6 +59,7 @@ def extract_via_data(file_path:)
heading = ::File.basename(file_path, '.*')
[{ heading: heading, section_path: [], content: result[:text].strip, source_file: file_path }]
rescue StandardError => e
handle_exception(e, level: :warn, operation: 'knowledge.parser.extract_via_data', file_path: file_path)
[{ error: 'extraction_failed', source_file: file_path, detail: e.message }]
end

Expand Down
Loading
Loading