diff --git a/src/main/kotlin/simplerag/ragback/RagBackApplication.kt b/src/main/kotlin/simplerag/ragback/RagBackApplication.kt index 20598b4..e7cc2d5 100644 --- a/src/main/kotlin/simplerag/ragback/RagBackApplication.kt +++ b/src/main/kotlin/simplerag/ragback/RagBackApplication.kt @@ -2,8 +2,10 @@ package simplerag.ragback import org.springframework.boot.autoconfigure.SpringBootApplication import org.springframework.boot.runApplication +import org.springframework.data.jpa.repository.config.EnableJpaAuditing @SpringBootApplication +@EnableJpaAuditing class RagBackApplication fun main(args: Array) { diff --git a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt new file mode 100644 index 0000000..5d7a193 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt @@ -0,0 +1,29 @@ +package simplerag.ragback.domain.chat.entity + +import jakarta.persistence.* +import simplerag.ragback.domain.index.entity.Index +import simplerag.ragback.domain.prompt.entity.Prompt +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "models") +class Model( + + @Column(name = "name", nullable = false, unique = true, length = 100) + val name: String, + + @Column(name = "llm_model", nullable = false) + val llmModel: String, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "indexes_id") + val index: Index, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "prompts_id") + val prompt: Prompt, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "models_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt index 6a2cc6b..1169374 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt @@ -1,11 +1,12 @@ package simplerag.ragback.domain.document.entity import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity import java.time.LocalDateTime @Entity @Table( - name = "data_file", + name = "data_files", uniqueConstraints = [UniqueConstraint(columnNames = ["sha256"])] ) class DataFile( @@ -22,15 +23,10 @@ class DataFile( @Column(nullable = false, length = 64) val sha256: String, - @Column(nullable = false, length = 2048) + @Column(nullable = false, length = 2048, name = "file_url") val fileUrl: String, - @Column(nullable = false) - val updatedAt: LocalDateTime, - - @Column(nullable = false) - val createdAt: LocalDateTime, - @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "data_files_id") val id: Long? = null, -) +) : BaseEntity() diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt index a9771db..6994eed 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt @@ -1,22 +1,24 @@ package simplerag.ragback.domain.document.entity import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity @Entity @Table( - name = "data_file_tags", - uniqueConstraints = [UniqueConstraint(columnNames = ["data_file_id", "tag_id"])] + name = "data_files_tags", + uniqueConstraints = [UniqueConstraint(columnNames = ["data_files_id", "tags_id"])] ) class DataFileTag( @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "tag_id", nullable = false) + @JoinColumn(name = "tags_id", nullable = false) var tag: Tag, @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "data_file_id", nullable = false) + @JoinColumn(name = "data_files_id", nullable = false) var dataFile: DataFile, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "data_files_tags_id") val id: Long? = null, -) \ No newline at end of file +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt index 7669439..fe17647 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt @@ -1,6 +1,7 @@ package simplerag.ragback.domain.document.entity import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity @Entity @Table( @@ -13,5 +14,6 @@ class Tag( val name: String, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "tags_id") val id: Long? = null, -) \ No newline at end of file +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt index bcde47d..8678574 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt @@ -42,7 +42,6 @@ class DataFileService( throw CustomException(ErrorCode.INVALID_INPUT) } - val now = LocalDateTime.now() val uploadedUrls = mutableListOf() registerRollbackCleanup(uploadedUrls) @@ -63,7 +62,7 @@ class DataFileService( uploadedUrls += fileUrl val dataFile = try { - dataFileRepository.save(DataFile(meta.title, type, sizeBytes, sha256, fileUrl, now, now)) + dataFileRepository.save(DataFile(meta.title, type, sizeBytes, sha256, fileUrl)) } catch (ex: DataIntegrityViolationException) { throw FileException(ErrorCode.ALREADY_FILE, sha256) } diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt new file mode 100644 index 0000000..3c1862f --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -0,0 +1,42 @@ +package simplerag.ragback.domain.index.entity + +import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity +import simplerag.ragback.global.util.FloatArrayToPgVectorStringConverter + +// 임베딩 크기를 서비스단에서 검증을 해줘야함 +@Entity +@Table(name = "chunk_embeddings") +class ChunkEmbedding( + + @Column(name = "content", nullable = false) + @Lob + val content: String, + + @Convert(converter = FloatArrayToPgVectorStringConverter::class) + @Column(name = "embedding", nullable = false) + private var _embedding: FloatArray, + + @Column(name = "embedding_dim", nullable = false) + val embeddingDim: Int, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "indexes_id", nullable = false) + val index: Index, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "chunk_embeddings_id") + val id: Long? = null, +): BaseEntity() { + + @get:Transient + val embedding: FloatArray get() = _embedding.copyOf() + + fun updateEmbedding(newVec: FloatArray) { + require(newVec.size == embeddingDim) { + "Embedding dimension mismatch: expected=$embeddingDim, got=${newVec.size}" + } + _embedding = newVec.copyOf() + } + +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt new file mode 100644 index 0000000..060722c --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt @@ -0,0 +1,22 @@ +package simplerag.ragback.domain.index.entity + +import jakarta.persistence.* +import simplerag.ragback.domain.document.entity.DataFile +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "data_files_indexes") +class DataFileIndex( + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "data_files_id", nullable = false) + val dataFile: DataFile, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "indexes_id", nullable = false) + val index: Index, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "data_files_indexes_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt new file mode 100644 index 0000000..630783a --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -0,0 +1,42 @@ +package simplerag.ragback.domain.index.entity + +import jakarta.persistence.* +import jakarta.validation.constraints.Min +import simplerag.ragback.domain.index.entity.enums.EmbeddingModel +import simplerag.ragback.domain.index.entity.enums.SimilarityMetric +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "indexes") +class Index( + + @Column(name = "snapshot_name", length = 255, nullable = false) + val snapshotName: String, + + @Column(name = "chunking_size", nullable = false) + @Min(1) + val chunkingSize: Int, + + @Column(name = "overlap_size", nullable = false) + @Min(0) + val overlapSize: Int, + + @Column(name = "similarity_metric", nullable = false) + @Enumerated(EnumType.STRING) + val similarityMetric: SimilarityMetric, + + @Column(name = "top_k", nullable = false) + @Min(1) + val topK: Int, + + @Column(name = "embedding_model", nullable = false, length = 255) + @Enumerated(EnumType.STRING) + val embeddingModel: EmbeddingModel, + + @Column(name = "reranker", nullable = false) + val reranker: Boolean, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "indexes_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt new file mode 100644 index 0000000..6729034 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt @@ -0,0 +1,49 @@ +package simplerag.ragback.domain.index.entity.enums + +enum class EmbeddingModel( + val dim: Int, + val modelId: String +) { + // OpenAI + TEXT_EMBEDDING_3_SMALL(1536, "text-embedding-3-small"), + TEXT_EMBEDDING_3_LARGE(3072, "text-embedding-3-large"), + + // SBERT / HuggingFace + ALL_MINILM_L6_V2(384, "sentence-transformers/all-MiniLM-L6-v2"), + ALL_MP_NET_BASE_V2(768, "sentence-transformers/all-mpnet-base-v2"), + MULTI_QA_MP_NET_BASE_DOT_V1(768, "sentence-transformers/multi-qa-mpnet-base-dot-v1"), + DISTILUSE_BASE_MULTILINGUAL_CASED_V2(512, "sentence-transformers/distiluse-base-multilingual-cased-v2"), + PARAPHRASE_MULTILINGUAL_MINILM_L12_V2(384, "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"), + KO_SBERT_V1(768, "jhgan/ko-sbert-v1"), + KOR_SROBERTA(768, "jhgan/ko-sroberta-medium-nli"), + + // Korean specific + BM_KO_SMALL(512, "bespin-global/klue-sroberta-base-continue-learning-by-mnr"), + + // Instructor / Mistral + INSTRUCTOR_BASE(768, "hkunlp/instructor-base"), + INSTRUCTOR_XL(1024, "hkunlp/instructor-xl"), + MISTRAL_EMBED(1024, "mistral-embed"), + + // BGE / E5 etc + BGE_SMALL_EN(384, "BAAI/bge-small-en-v1.5"), + BGE_BASE_EN(768, "BAAI/bge-base-en-v1.5"), + BGE_LARGE_EN(1024, "BAAI/bge-large-en-v1.5"), + BGE_M3(1024, "BAAI/bge-m3"), + E5_SMALL(384, "intfloat/e5-small-v2"), + E5_BASE(768, "intfloat/e5-base-v2"), + E5_LARGE(1024, "intfloat/e5-large-v2"), + + // Old word vectors + FASTTEXT_KO(300, "fasttext-ko-300d"); + + companion object { + fun findByModelId(modelId: String): EmbeddingModel? { + return entries.find { it.modelId == modelId } + } + + fun getAllModelIds(): List { + return entries.map { it.modelId } + } + } +} diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/SimilarityMetric.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/SimilarityMetric.kt new file mode 100644 index 0000000..a4bf0ac --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/SimilarityMetric.kt @@ -0,0 +1,9 @@ +package simplerag.ragback.domain.index.entity.enums + +enum class SimilarityMetric( + val description: String +) { + COSINE("코사인 유사도"), + EUCLIDEAN("유클리드 거리"), + DOT_PRODUCT("내적 유사도") +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt new file mode 100644 index 0000000..270be2f --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt @@ -0,0 +1,28 @@ +package simplerag.ragback.domain.prompt.entity + +import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "few_shots") +class FewShot( + + @Column(name = "question", nullable = false, length = 255) + val question: String, + + @Column(name = "answer", nullable = false) + @Lob + val answer: String, + + @Column(name = "evidence", nullable = false) + @Lob + val evidence: String, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "prompts_id", nullable = false) + val prompt: Prompt, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "few_shots_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt new file mode 100644 index 0000000..17a920e --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt @@ -0,0 +1,25 @@ +package simplerag.ragback.domain.prompt.entity + +import jakarta.persistence.* +import simplerag.ragback.domain.prompt.entity.enums.PreSet +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "prompts") +class Prompt( + + @Column(name = "name", length = 100, nullable = false) + val name: String, + + @Enumerated(EnumType.STRING) + @Column(name = "pre_set", nullable = false) + val preSet: PreSet, + + @Column(name = "system_prompt", nullable = false) + @Lob + val systemPrompt: String, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "prompts_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt new file mode 100644 index 0000000..21dab94 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt @@ -0,0 +1,5 @@ +package simplerag.ragback.domain.prompt.entity.enums + +enum class PreSet { + NONE +} diff --git a/src/main/kotlin/simplerag/ragback/global/entity/BaseEntity.kt b/src/main/kotlin/simplerag/ragback/global/entity/BaseEntity.kt new file mode 100644 index 0000000..272f5ab --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/entity/BaseEntity.kt @@ -0,0 +1,21 @@ +package simplerag.ragback.global.entity + +import jakarta.persistence.Column +import jakarta.persistence.EntityListeners +import jakarta.persistence.MappedSuperclass +import org.springframework.data.annotation.CreatedDate +import org.springframework.data.annotation.LastModifiedDate +import org.springframework.data.jpa.domain.support.AuditingEntityListener +import java.time.LocalDateTime + +@MappedSuperclass +@EntityListeners(AuditingEntityListener::class) +abstract class BaseEntity { + @CreatedDate + @Column(name = "created_at", nullable = false, updatable = false) + lateinit var createdAt: LocalDateTime + + @LastModifiedDate + @Column(name = "updated_at", nullable = false) + lateinit var updatedAt: LocalDateTime +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt new file mode 100644 index 0000000..04e1d23 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt @@ -0,0 +1,34 @@ +package simplerag.ragback.global.util + +import jakarta.persistence.AttributeConverter +import jakarta.persistence.Converter + +@Converter(autoApply = false) +class FloatArrayToPgVectorStringConverter : AttributeConverter { + override fun convertToDatabaseColumn(attribute: FloatArray?): String { + requireNotNull(attribute) { "Embedding (FloatArray) must not be null" } + require(attribute.isNotEmpty()) { "Embedding must not be empty; expected fixed dimension (e.g., 1536)" } + require(attribute.all { !it.isNaN() && !it.isInfinite() }) { + "Embedding must not contain NaN/Infinity" + } + return attribute.joinToString(prefix = "[", postfix = "]", separator = ",") { it.toString() } + } + + override fun convertToEntityAttribute(dbData: String?): FloatArray { + if (dbData.isNullOrBlank()) return floatArrayOf() + val body = dbData.trim().removePrefix("[").removeSuffix("]").trim() + if (body.isBlank()) return floatArrayOf() + return try { + body.split(',') + .map { it.trim().toFloat() } + .toFloatArray() + .also { arr -> + require(arr.all { it.isFinite() }) { + "Embedding must not contain NaN/Infinity (db → entity)" + } + } + } catch (e: NumberFormatException) { + throw IllegalArgumentException("Invalid vector literal for pgvector: '$dbData'", e) + } + } +} \ No newline at end of file diff --git a/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt b/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt index 2d58831..31264d1 100644 --- a/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt +++ b/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt @@ -94,7 +94,6 @@ class DataFileServiceTest( // given val bytes = "same".toByteArray() val sha = sha256Hex(bytes) - val now = LocalDateTime.now() dataFileRepository.save( DataFile( title = "exists", @@ -102,8 +101,6 @@ class DataFileServiceTest( sizeBytes = 0, sha256 = sha, fileUrl = "fake://original/exists.txt", - updatedAt = now, - createdAt = now ) ) val req = DataFileBulkCreateRequest(listOf(DataFileCreateItem("dup", listOf("tag"))))