From d63c940d4668b77f98cba3852c2b6ced1ca18b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 15:33:17 +0900 Subject: [PATCH 01/30] =?UTF-8?q?:sparkles:=20feature:=20=EC=8B=9C?= =?UTF-8?q?=EA=B0=84=20=EC=9E=90=EB=8F=99=20=EC=83=9D=EC=84=B1=20=EB=A1=9C?= =?UTF-8?q?=EC=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simplerag/ragback/RagBackApplication.kt | 2 ++ .../ragback/global/entity/BaseEntity.kt | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 src/main/kotlin/simplerag/ragback/global/entity/BaseEntity.kt diff --git a/src/main/kotlin/simplerag/ragback/RagBackApplication.kt b/src/main/kotlin/simplerag/ragback/RagBackApplication.kt index 20598b4..e7cc2d5 100644 --- a/src/main/kotlin/simplerag/ragback/RagBackApplication.kt +++ b/src/main/kotlin/simplerag/ragback/RagBackApplication.kt @@ -2,8 +2,10 @@ package simplerag.ragback import org.springframework.boot.autoconfigure.SpringBootApplication import org.springframework.boot.runApplication +import org.springframework.data.jpa.repository.config.EnableJpaAuditing @SpringBootApplication +@EnableJpaAuditing class RagBackApplication fun main(args: Array) { diff --git a/src/main/kotlin/simplerag/ragback/global/entity/BaseEntity.kt b/src/main/kotlin/simplerag/ragback/global/entity/BaseEntity.kt new file mode 100644 index 0000000..272f5ab --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/entity/BaseEntity.kt @@ -0,0 +1,21 @@ +package simplerag.ragback.global.entity + +import jakarta.persistence.Column +import jakarta.persistence.EntityListeners +import jakarta.persistence.MappedSuperclass +import org.springframework.data.annotation.CreatedDate +import org.springframework.data.annotation.LastModifiedDate +import org.springframework.data.jpa.domain.support.AuditingEntityListener +import java.time.LocalDateTime + +@MappedSuperclass +@EntityListeners(AuditingEntityListener::class) +abstract class BaseEntity { + @CreatedDate + @Column(name = "created_at", nullable = false, updatable = false) + lateinit var createdAt: LocalDateTime + + @LastModifiedDate + @Column(name = "updated_at", nullable = false) + lateinit var updatedAt: LocalDateTime +} \ No newline at end of file From b342d3ca9c6873cdfbc9eaeba64a7683773c1178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 15:33:34 +0900 Subject: [PATCH 02/30] =?UTF-8?q?:sparkles:=20feature:=20datafile=20?= =?UTF-8?q?=EA=B4=80=EB=A0=A8=20=EC=97=94=ED=8B=B0=ED=8B=B0=20=EC=88=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ragback/domain/document/entity/DataFile.kt | 10 +++------- .../ragback/domain/document/entity/DataFileTag.kt | 4 +++- .../simplerag/ragback/domain/document/entity/Tag.kt | 4 +++- .../ragback/domain/document/service/DataFileService.kt | 3 +-- .../domain/document/service/DataFileServiceTest.kt | 3 --- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt index 6a2cc6b..90f8251 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt @@ -1,6 +1,7 @@ package simplerag.ragback.domain.document.entity import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity import java.time.LocalDateTime @Entity @@ -25,12 +26,7 @@ class DataFile( @Column(nullable = false, length = 2048) val fileUrl: String, - @Column(nullable = false) - val updatedAt: LocalDateTime, - - @Column(nullable = false) - val createdAt: LocalDateTime, - @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "data_file_id") val id: Long? = null, -) +) : BaseEntity() diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt index a9771db..9328d4e 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt @@ -1,6 +1,7 @@ package simplerag.ragback.domain.document.entity import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity @Entity @Table( @@ -18,5 +19,6 @@ class DataFileTag( var dataFile: DataFile, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "data_file_tag_id") val id: Long? = null, -) \ No newline at end of file +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt index 7669439..fe17647 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt @@ -1,6 +1,7 @@ package simplerag.ragback.domain.document.entity import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity @Entity @Table( @@ -13,5 +14,6 @@ class Tag( val name: String, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "tags_id") val id: Long? = null, -) \ No newline at end of file +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt index bcde47d..8678574 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt @@ -42,7 +42,6 @@ class DataFileService( throw CustomException(ErrorCode.INVALID_INPUT) } - val now = LocalDateTime.now() val uploadedUrls = mutableListOf() registerRollbackCleanup(uploadedUrls) @@ -63,7 +62,7 @@ class DataFileService( uploadedUrls += fileUrl val dataFile = try { - dataFileRepository.save(DataFile(meta.title, type, sizeBytes, sha256, fileUrl, now, now)) + dataFileRepository.save(DataFile(meta.title, type, sizeBytes, sha256, fileUrl)) } catch (ex: DataIntegrityViolationException) { throw FileException(ErrorCode.ALREADY_FILE, sha256) } diff --git a/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt b/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt index 2d58831..31264d1 100644 --- a/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt +++ b/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt @@ -94,7 +94,6 @@ class DataFileServiceTest( // given val bytes = "same".toByteArray() val sha = sha256Hex(bytes) - val now = LocalDateTime.now() dataFileRepository.save( DataFile( title = "exists", @@ -102,8 +101,6 @@ class DataFileServiceTest( sizeBytes = 0, sha256 = sha, fileUrl = "fake://original/exists.txt", - updatedAt = now, - createdAt = now ) ) val req = DataFileBulkCreateRequest(listOf(DataFileCreateItem("dup", listOf("tag")))) From 79115f7e36c15369e65f1e677d6a25dd51a15b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 15:33:43 +0900 Subject: [PATCH 03/30] =?UTF-8?q?:sparkles:=20feature:=20=ED=94=84?= =?UTF-8?q?=EB=A1=AC=ED=94=84=ED=8A=B8=20=EA=B4=80=EB=A0=A8=20=EC=97=94?= =?UTF-8?q?=ED=8B=B0=ED=8B=B0=20=EC=83=9D=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ragback/domain/prompt/entity/FewShot.kt | 26 +++++++++++++++++++ .../ragback/domain/prompt/entity/Prompt.kt | 24 +++++++++++++++++ .../domain/prompt/entity/enums/PreSet.kt | 5 ++++ 3 files changed, 55 insertions(+) create mode 100644 src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt create mode 100644 src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt create mode 100644 src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt new file mode 100644 index 0000000..ded14fe --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt @@ -0,0 +1,26 @@ +package simplerag.ragback.domain.prompt.entity + +import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "few_shot") +class FewShot( + + @Column(name = "question", nullable = false, length = 255) + val question: String, + + @Column(name = "answer", nullable = false, columnDefinition = "TEXT") + val answer: String, + + @Column(name = "evidence", nullable = false, columnDefinition = "TEXT") + val evidence: String, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "prompt_id", nullable = false) + val prompt: Prompt, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "few_shot_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt new file mode 100644 index 0000000..465ddf1 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt @@ -0,0 +1,24 @@ +package simplerag.ragback.domain.prompt.entity + +import jakarta.persistence.* +import simplerag.ragback.domain.prompt.entity.enums.PreSet +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "prompt") +class Prompt( + + @Column(name = "name", length = 100, nullable = false) + val name: String, + + @Enumerated(EnumType.STRING) + @Column(name = "pre_set", nullable = false) + val preSet: PreSet, + + @Column(name = "system_prompt", nullable = false, columnDefinition = "TEXT") + val systemPrompt: String, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "prompt_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt new file mode 100644 index 0000000..c9650d2 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt @@ -0,0 +1,5 @@ +package simplerag.ragback.domain.prompt.entity.enums + +enum class PreSet { + +} From 31c2465c78a31ee86c5934a1bbb74be9c0bb011b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 15:48:22 +0900 Subject: [PATCH 04/30] =?UTF-8?q?:sparkles:=20feature:=20=EC=9D=B8?= =?UTF-8?q?=EB=8D=B1=EC=8A=A4=20=EA=B4=80=EB=A0=A8=20=EC=97=94=ED=8B=B0?= =?UTF-8?q?=ED=8B=B0=20=EC=83=9D=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/index/entity/ChunkEmbedding.kt | 23 +++++++++++++ .../domain/index/entity/DataFileIndex.kt | 22 ++++++++++++ .../ragback/domain/index/entity/Index.kt | 34 +++++++++++++++++++ .../index/entity/enums/SimilarityMetric.kt | 9 +++++ 4 files changed, 88 insertions(+) create mode 100644 src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt create mode 100644 src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt create mode 100644 src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt create mode 100644 src/main/kotlin/simplerag/ragback/domain/index/entity/enums/SimilarityMetric.kt diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt new file mode 100644 index 0000000..c499b1e --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -0,0 +1,23 @@ +package simplerag.ragback.domain.index.entity + +import jakarta.persistence.* +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "chunk_embedding") +class ChunkEmbedding( + + @Column(name = "content", nullable = false, columnDefinition = "TEXT") + val content: String, + + @Column(name = "embedding", nullable = false) + val embedding: FloatArray, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "indexes_id") + val index: Index, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "chunk_embedding_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt new file mode 100644 index 0000000..344cc46 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt @@ -0,0 +1,22 @@ +package simplerag.ragback.domain.index.entity + +import jakarta.persistence.* +import simplerag.ragback.domain.document.entity.DataFile +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "data_file_index") +class DataFileIndex( + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "data_file_id", nullable = false) + val dataFile: DataFile, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "indexes_id", nullable = false) + val index: Index, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "data_file_index_id") + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt new file mode 100644 index 0000000..2d67495 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -0,0 +1,34 @@ +package simplerag.ragback.domain.index.entity + +import jakarta.persistence.* +import simplerag.ragback.domain.index.entity.enums.SimilarityMetric +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "indexes") +class Index( + + @Column(name = "snapshot_name", length = 255, nullable = false) + val snapshotName: String, + + @Column(name = "chunking_size", nullable = false) + val chunkingSize: Int, + + @Column(name = "overlap_size", nullable = false) + val overlapSize: Int, + + @Column(name = "similarity_metric", nullable = false) + val similarityMetric: SimilarityMetric, + + @Column(name = "top_k", nullable = false) + val topK: Int, + + @Column(name = "embedding_model", nullable = false, length = 255) + val embeddingModel: String, + + @Column(name = "reranker", nullable = false) + val reranker: Boolean, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + val id: Long? = null, +): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/SimilarityMetric.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/SimilarityMetric.kt new file mode 100644 index 0000000..a4bf0ac --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/SimilarityMetric.kt @@ -0,0 +1,9 @@ +package simplerag.ragback.domain.index.entity.enums + +enum class SimilarityMetric( + val description: String +) { + COSINE("코사인 유사도"), + EUCLIDEAN("유클리드 거리"), + DOT_PRODUCT("내적 유사도") +} \ No newline at end of file From 6e43a37bb2f7a555b3a859b6f2c219666493e68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 15:48:28 +0900 Subject: [PATCH 05/30] =?UTF-8?q?:sparkles:=20feature:=20=EB=AA=A8?= =?UTF-8?q?=EB=8D=B8=20=EC=97=94=ED=8B=B0=ED=8B=B0=20=EC=83=9D=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ragback/domain/chat/entity/Model.kt | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt diff --git a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt new file mode 100644 index 0000000..88849a3 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt @@ -0,0 +1,27 @@ +package simplerag.ragback.domain.chat.entity + +import jakarta.persistence.* +import simplerag.ragback.domain.prompt.entity.Prompt +import simplerag.ragback.global.entity.BaseEntity + +@Entity +@Table(name = "model") +class Model( + + @Column(name = "name", nullable = false, unique = true, length = 100) + val name: String, + + @Column(name = "llm_model", nullable = false, unique = true) + val llmModel: String, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "indexes_id", nullable = false) + val index: Index, + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "prompt_id", nullable = false) + val prompt: Prompt, + + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + val id: Long? = null, +): BaseEntity() \ No newline at end of file From 4ca3f09bec43f931045f830649ef704ee8f1f6c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 16:10:55 +0900 Subject: [PATCH 06/30] =?UTF-8?q?:bug:=20fix:=20=EC=9D=B8=EB=8D=B1?= =?UTF-8?q?=EC=8A=A4=20=EC=8B=AC=EB=B3=BC=20=EC=83=9D=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt index 88849a3..89af170 100644 --- a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt +++ b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt @@ -1,6 +1,7 @@ package simplerag.ragback.domain.chat.entity import jakarta.persistence.* +import simplerag.ragback.domain.index.entity.Index import simplerag.ragback.domain.prompt.entity.Prompt import simplerag.ragback.global.entity.BaseEntity From 66c5f143ac538a914b7f32c5d29b22eb17f6e8a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 16:18:39 +0900 Subject: [PATCH 07/30] =?UTF-8?q?:bug:=20fix:=20=EC=97=94=ED=8B=B0?= =?UTF-8?q?=ED=8B=B0=20=EB=AA=85=EB=AA=85=20=EA=B7=9C=EC=B9=99=20=EC=88=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ragback/domain/chat/entity/Model.kt | 5 +++-- .../domain/document/entity/DataFile.kt | 2 +- .../domain/document/entity/DataFileTag.kt | 8 ++++---- .../domain/index/entity/ChunkEmbedding.kt | 6 +++--- .../domain/index/entity/DataFileIndex.kt | 6 +++--- .../ragback/domain/index/entity/Index.kt | 1 + .../ragback/domain/prompt/entity/FewShot.kt | 6 +++--- .../ragback/domain/prompt/entity/Prompt.kt | 4 ++-- .../ragback/global/util/FloatJsonConverter.kt | 19 +++++++++++++++++++ src/main/resources/application-local.yml | 2 +- 10 files changed, 40 insertions(+), 19 deletions(-) create mode 100644 src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt diff --git a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt index 89af170..2a8a3f8 100644 --- a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt +++ b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt @@ -6,7 +6,7 @@ import simplerag.ragback.domain.prompt.entity.Prompt import simplerag.ragback.global.entity.BaseEntity @Entity -@Table(name = "model") +@Table(name = "models") class Model( @Column(name = "name", nullable = false, unique = true, length = 100) @@ -20,9 +20,10 @@ class Model( val index: Index, @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "prompt_id", nullable = false) + @JoinColumn(name = "prompts_id", nullable = false) val prompt: Prompt, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "models_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt index 90f8251..9b4e59d 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt @@ -23,7 +23,7 @@ class DataFile( @Column(nullable = false, length = 64) val sha256: String, - @Column(nullable = false, length = 2048) + @Column(nullable = false, length = 2048, name = "file_url") val fileUrl: String, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt index 9328d4e..47d68a0 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt @@ -5,20 +5,20 @@ import simplerag.ragback.global.entity.BaseEntity @Entity @Table( - name = "data_file_tags", + name = "data_files_tags", uniqueConstraints = [UniqueConstraint(columnNames = ["data_file_id", "tag_id"])] ) class DataFileTag( @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "tag_id", nullable = false) + @JoinColumn(name = "tags_id", nullable = false) var tag: Tag, @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "data_file_id", nullable = false) + @JoinColumn(name = "data_files_id", nullable = false) var dataFile: DataFile, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "data_file_tag_id") + @Column(name = "data_files_tags_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index c499b1e..4a2682e 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -4,7 +4,7 @@ import jakarta.persistence.* import simplerag.ragback.global.entity.BaseEntity @Entity -@Table(name = "chunk_embedding") +@Table(name = "chunk_embeddings") class ChunkEmbedding( @Column(name = "content", nullable = false, columnDefinition = "TEXT") @@ -14,10 +14,10 @@ class ChunkEmbedding( val embedding: FloatArray, @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "indexes_id") + @JoinColumn(name = "indexes_id", nullable = false) val index: Index, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "chunk_embedding_id") + @Column(name = "chunk_embeddings_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt index 344cc46..0a287eb 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt @@ -5,11 +5,11 @@ import simplerag.ragback.domain.document.entity.DataFile import simplerag.ragback.global.entity.BaseEntity @Entity -@Table(name = "data_file_index") +@Table(name = "data_file_indexes") class DataFileIndex( @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "data_file_id", nullable = false) + @JoinColumn(name = "data_files_id", nullable = false) val dataFile: DataFile, @ManyToOne(fetch = FetchType.LAZY) @@ -17,6 +17,6 @@ class DataFileIndex( val index: Index, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "data_file_index_id") + @Column(name = "data_file_indexes_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt index 2d67495..12af56b 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -30,5 +30,6 @@ class Index( val reranker: Boolean, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "indexes_id", nullable = false) val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt index ded14fe..8dff2e6 100644 --- a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt @@ -4,7 +4,7 @@ import jakarta.persistence.* import simplerag.ragback.global.entity.BaseEntity @Entity -@Table(name = "few_shot") +@Table(name = "few_shots") class FewShot( @Column(name = "question", nullable = false, length = 255) @@ -17,10 +17,10 @@ class FewShot( val evidence: String, @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "prompt_id", nullable = false) + @JoinColumn(name = "prompts_id", nullable = false) val prompt: Prompt, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "few_shot_id") + @Column(name = "few_shots_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt index 465ddf1..697e3af 100644 --- a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt @@ -5,7 +5,7 @@ import simplerag.ragback.domain.prompt.entity.enums.PreSet import simplerag.ragback.global.entity.BaseEntity @Entity -@Table(name = "prompt") +@Table(name = "prompts") class Prompt( @Column(name = "name", length = 100, nullable = false) @@ -19,6 +19,6 @@ class Prompt( val systemPrompt: String, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "prompt_id") + @Column(name = "prompts_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt new file mode 100644 index 0000000..fda7933 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt @@ -0,0 +1,19 @@ +package simplerag.ragback.global.util + +import jakarta.persistence.AttributeConverter +import jakarta.persistence.Converter + +@Converter(autoApply = false) +class FloatArrayJsonConverter : AttributeConverter { + override fun convertToDatabaseColumn(attribute: FloatArray?): String = + attribute?.joinToString(prefix = "[", postfix = "]") { it.toString() } ?: "[]" + + override fun convertToEntityAttribute(dbData: String?): FloatArray { + if (dbData.isNullOrBlank()) return floatArrayOf() + // 매우 단순한 파서 (필요시 Jackson 등으로 교체) + return dbData.trim().removePrefix("[").removeSuffix("]") + .split(",") + .mapNotNull { it.trim().toFloatOrNull() } + .toFloatArray() + } +} \ No newline at end of file diff --git a/src/main/resources/application-local.yml b/src/main/resources/application-local.yml index 5019101..24a4c0f 100644 --- a/src/main/resources/application-local.yml +++ b/src/main/resources/application-local.yml @@ -6,7 +6,7 @@ spring: driver-class-name: org.postgresql.Driver jpa: hibernate: - ddl-auto: update + ddl-auto: create-drop show-sql: true logging: From 0c0ea962c5dbfe05ff17f07b71633f5ca08cca77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 16:27:13 +0900 Subject: [PATCH 08/30] =?UTF-8?q?:bug:=20fix:=20=EC=97=94=ED=8B=B0?= =?UTF-8?q?=ED=8B=B0=20=EC=98=A4=EB=A5=98=20=EC=88=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/document/entity/DataFile.kt | 4 ++-- .../domain/document/entity/DataFileTag.kt | 2 +- .../domain/index/entity/ChunkEmbedding.kt | 6 +++-- .../domain/index/entity/DataFileIndex.kt | 4 ++-- .../domain/prompt/entity/enums/PreSet.kt | 2 +- .../ragback/global/util/FloatJsonConverter.kt | 24 ++++++++++++------- 6 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt index 9b4e59d..1169374 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt @@ -6,7 +6,7 @@ import java.time.LocalDateTime @Entity @Table( - name = "data_file", + name = "data_files", uniqueConstraints = [UniqueConstraint(columnNames = ["sha256"])] ) class DataFile( @@ -27,6 +27,6 @@ class DataFile( val fileUrl: String, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "data_file_id") + @Column(name = "data_files_id") val id: Long? = null, ) : BaseEntity() diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt index 47d68a0..6994eed 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt @@ -6,7 +6,7 @@ import simplerag.ragback.global.entity.BaseEntity @Entity @Table( name = "data_files_tags", - uniqueConstraints = [UniqueConstraint(columnNames = ["data_file_id", "tag_id"])] + uniqueConstraints = [UniqueConstraint(columnNames = ["data_files_id", "tags_id"])] ) class DataFileTag( diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index 4a2682e..0574744 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -2,6 +2,7 @@ package simplerag.ragback.domain.index.entity import jakarta.persistence.* import simplerag.ragback.global.entity.BaseEntity +import simplerag.ragback.global.util.FloatArrayToPgVectorStringConverter @Entity @Table(name = "chunk_embeddings") @@ -10,8 +11,9 @@ class ChunkEmbedding( @Column(name = "content", nullable = false, columnDefinition = "TEXT") val content: String, - @Column(name = "embedding", nullable = false) - val embedding: FloatArray, + @Convert(converter = FloatArrayToPgVectorStringConverter::class) + @Column(name = "embedding", nullable = false, columnDefinition = "vector(1536)") // 차원 수에 맞추세요 + var embedding: FloatArray, @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "indexes_id", nullable = false) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt index 0a287eb..060722c 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt @@ -5,7 +5,7 @@ import simplerag.ragback.domain.document.entity.DataFile import simplerag.ragback.global.entity.BaseEntity @Entity -@Table(name = "data_file_indexes") +@Table(name = "data_files_indexes") class DataFileIndex( @ManyToOne(fetch = FetchType.LAZY) @@ -17,6 +17,6 @@ class DataFileIndex( val index: Index, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "data_file_indexes_id") + @Column(name = "data_files_indexes_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt index c9650d2..21dab94 100644 --- a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/enums/PreSet.kt @@ -1,5 +1,5 @@ package simplerag.ragback.domain.prompt.entity.enums enum class PreSet { - + NONE } diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt index fda7933..e431372 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt @@ -4,16 +4,22 @@ import jakarta.persistence.AttributeConverter import jakarta.persistence.Converter @Converter(autoApply = false) -class FloatArrayJsonConverter : AttributeConverter { - override fun convertToDatabaseColumn(attribute: FloatArray?): String = - attribute?.joinToString(prefix = "[", postfix = "]") { it.toString() } ?: "[]" - +class FloatArrayToPgVectorStringConverter : AttributeConverter { + override fun convertToDatabaseColumn(attribute: FloatArray?): String { + if (attribute == null) return "[]" + return buildString { + append('[') + attribute.forEachIndexed { i, v -> + if (i > 0) append(',') + append(v.toString()) + } + append(']') + } + } override fun convertToEntityAttribute(dbData: String?): FloatArray { if (dbData.isNullOrBlank()) return floatArrayOf() - // 매우 단순한 파서 (필요시 Jackson 등으로 교체) - return dbData.trim().removePrefix("[").removeSuffix("]") - .split(",") - .mapNotNull { it.trim().toFloatOrNull() } - .toFloatArray() + val body = dbData.trim().removePrefix("[").removeSuffix("]") + if (body.isBlank()) return floatArrayOf() + return body.split(',').map { it.trim().toFloat() }.toFloatArray() } } \ No newline at end of file From 625a91e2a00090400464e8b3fa00f31dd50dc20b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 16:28:37 +0900 Subject: [PATCH 09/30] =?UTF-8?q?:bug:=20fix:=20size=20check=20=EB=A1=9C?= =?UTF-8?q?=EC=A7=81=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kotlin/simplerag/ragback/domain/index/entity/Index.kt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt index 12af56b..8167cb4 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -1,6 +1,7 @@ package simplerag.ragback.domain.index.entity import jakarta.persistence.* +import jakarta.validation.constraints.Min import simplerag.ragback.domain.index.entity.enums.SimilarityMetric import simplerag.ragback.global.entity.BaseEntity @@ -12,15 +13,18 @@ class Index( val snapshotName: String, @Column(name = "chunking_size", nullable = false) + @Min(1) val chunkingSize: Int, @Column(name = "overlap_size", nullable = false) + @Min(0) val overlapSize: Int, @Column(name = "similarity_metric", nullable = false) val similarityMetric: SimilarityMetric, @Column(name = "top_k", nullable = false) + @Min(1) val topK: Int, @Column(name = "embedding_model", nullable = false, length = 255) From cbdf1dd725533f9726a618dc85f523c221e36326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 16:29:03 +0900 Subject: [PATCH 10/30] =?UTF-8?q?:bug:=20fix:=20enum=20=EC=A0=80=EC=9E=A5?= =?UTF-8?q?=20=EA=B7=9C=EC=B9=99=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt index 8167cb4..e5d6c7c 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -21,6 +21,7 @@ class Index( val overlapSize: Int, @Column(name = "similarity_metric", nullable = false) + @Enumerated(EnumType.STRING) val similarityMetric: SimilarityMetric, @Column(name = "top_k", nullable = false) From 6669cb5817383d028f160c8434246400fb5f351e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 16:32:14 +0900 Subject: [PATCH 11/30] =?UTF-8?q?:recycle:=20refactor:=20Lob=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=EB=A1=9C=20=EB=A9=80=ED=8B=B0=20=EB=B0=B4=EB=8D=94=20?= =?UTF-8?q?=ED=99=95=EB=B3=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simplerag/ragback/domain/prompt/entity/FewShot.kt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt index 8dff2e6..270be2f 100644 --- a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt @@ -10,10 +10,12 @@ class FewShot( @Column(name = "question", nullable = false, length = 255) val question: String, - @Column(name = "answer", nullable = false, columnDefinition = "TEXT") + @Column(name = "answer", nullable = false) + @Lob val answer: String, - @Column(name = "evidence", nullable = false, columnDefinition = "TEXT") + @Column(name = "evidence", nullable = false) + @Lob val evidence: String, @ManyToOne(fetch = FetchType.LAZY) From e929732cac28b2efd7525ef6b02b6439de9e6daf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 17:30:42 +0900 Subject: [PATCH 12/30] =?UTF-8?q?:rocket:=20chore:=20=EC=97=94=ED=8B=B0?= =?UTF-8?q?=ED=8B=B0=20=EC=88=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt index 2a8a3f8..08596d1 100644 --- a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt +++ b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt @@ -15,11 +15,11 @@ class Model( @Column(name = "llm_model", nullable = false, unique = true) val llmModel: String, - @ManyToOne(fetch = FetchType.LAZY) + @OneToOne(fetch = FetchType.LAZY) @JoinColumn(name = "indexes_id", nullable = false) val index: Index, - @ManyToOne(fetch = FetchType.LAZY) + @OneToOne(fetch = FetchType.LAZY) @JoinColumn(name = "prompts_id", nullable = false) val prompt: Prompt, From db5816fb86fc1a9fdc8226fabcab387cc13eee54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 17:32:16 +0900 Subject: [PATCH 13/30] =?UTF-8?q?:rocket:=20chore:=20=EC=97=94=ED=8B=B0?= =?UTF-8?q?=ED=8B=B0=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simplerag/ragback/domain/index/entity/ChunkEmbedding.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index 0574744..c4cbbf5 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -8,7 +8,8 @@ import simplerag.ragback.global.util.FloatArrayToPgVectorStringConverter @Table(name = "chunk_embeddings") class ChunkEmbedding( - @Column(name = "content", nullable = false, columnDefinition = "TEXT") + @Column(name = "content", nullable = false) + @Lob val content: String, @Convert(converter = FloatArrayToPgVectorStringConverter::class) From 4671e1a6b698456fa5d60c6535ddbc1ec1edaab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 17:34:01 +0900 Subject: [PATCH 14/30] =?UTF-8?q?:rocket:=20chore:=20=EC=97=94=ED=8B=B0?= =?UTF-8?q?=ED=8B=B0=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simplerag/ragback/domain/index/entity/ChunkEmbedding.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index c4cbbf5..044c385 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -13,7 +13,7 @@ class ChunkEmbedding( val content: String, @Convert(converter = FloatArrayToPgVectorStringConverter::class) - @Column(name = "embedding", nullable = false, columnDefinition = "vector(1536)") // 차원 수에 맞추세요 + @Column(name = "embedding", nullable = false) var embedding: FloatArray, @ManyToOne(fetch = FetchType.LAZY) From 8b9bf4644136abdb1a7e9bd15e73115592826e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 17:34:37 +0900 Subject: [PATCH 15/30] =?UTF-8?q?:rocket:=20chore:=20=EC=9D=B8=EB=8D=B1?= =?UTF-8?q?=EC=8A=A4=20id=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt index e5d6c7c..2cf45a7 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -35,6 +35,6 @@ class Index( val reranker: Boolean, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "indexes_id", nullable = false) + @Column(name = "indexes_id") val id: Long? = null, ): BaseEntity() \ No newline at end of file From 8eeb524118c79c5b8ffa94f8f09bee5c8a11acfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 17:35:08 +0900 Subject: [PATCH 16/30] =?UTF-8?q?:rocket:=20chore:=20=ED=94=84=EB=A1=AC?= =?UTF-8?q?=ED=94=84=ED=8A=B8=20=EC=97=94=ED=8B=B0=ED=8B=B0=20Lob=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt index 697e3af..17a920e 100644 --- a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt @@ -15,7 +15,8 @@ class Prompt( @Column(name = "pre_set", nullable = false) val preSet: PreSet, - @Column(name = "system_prompt", nullable = false, columnDefinition = "TEXT") + @Column(name = "system_prompt", nullable = false) + @Lob val systemPrompt: String, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) From 1b6cb9680e3b790b8b05b6061f7af2fb1ebfeadb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 17:36:44 +0900 Subject: [PATCH 17/30] =?UTF-8?q?:bug:=20fix:=20convertToDatabaseColumn=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ragback/global/util/FloatJsonConverter.kt | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt index e431372..83675d0 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt @@ -6,16 +6,11 @@ import jakarta.persistence.Converter @Converter(autoApply = false) class FloatArrayToPgVectorStringConverter : AttributeConverter { override fun convertToDatabaseColumn(attribute: FloatArray?): String { - if (attribute == null) return "[]" - return buildString { - append('[') - attribute.forEachIndexed { i, v -> - if (i > 0) append(',') - append(v.toString()) - } - append(']') - } + requireNotNull(attribute) { "Embedding (FloatArray) must not be null" } + require(attribute.isNotEmpty()) { "Embedding must not be empty; expected fixed dimension (e.g., 1536)" } + return attribute.joinToString(prefix = "[", postfix = "]", separator = ",") { it.toString() } } + override fun convertToEntityAttribute(dbData: String?): FloatArray { if (dbData.isNullOrBlank()) return floatArrayOf() val body = dbData.trim().removePrefix("[").removeSuffix("]") From b39b21cff5b3dfa249cbdb1728c25daae8d61681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 17:37:10 +0900 Subject: [PATCH 18/30] =?UTF-8?q?:bug:=20fix:=20convertToEntityAttribute?= =?UTF-8?q?=20=EB=94=94=EB=B2=84=EA=B9=85=20=EC=9A=A9=EC=9D=B4=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ragback/global/util/FloatJsonConverter.kt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt index 83675d0..e3cfc6d 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt @@ -13,8 +13,14 @@ class FloatArrayToPgVectorStringConverter : AttributeConverter Date: Sun, 17 Aug 2025 17:37:39 +0900 Subject: [PATCH 19/30] =?UTF-8?q?:rocket:=20chore:=20ddl-auto=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/resources/application-local.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/application-local.yml b/src/main/resources/application-local.yml index 24a4c0f..5019101 100644 --- a/src/main/resources/application-local.yml +++ b/src/main/resources/application-local.yml @@ -6,7 +6,7 @@ spring: driver-class-name: org.postgresql.Driver jpa: hibernate: - ddl-auto: create-drop + ddl-auto: update show-sql: true logging: From 75c51bdea07e7870a128fdabd8e2b44fb4d6b48b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 21:29:11 +0900 Subject: [PATCH 20/30] =?UTF-8?q?:rocket:=20chore:=20unique=20=EC=A0=9C?= =?UTF-8?q?=EC=95=BD=20=EC=A1=B0=EA=B1=B4=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt index 08596d1..05bafad 100644 --- a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt +++ b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt @@ -16,11 +16,11 @@ class Model( val llmModel: String, @OneToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "indexes_id", nullable = false) + @JoinColumn(name = "indexes_id", nullable = false, unique = true) val index: Index, @OneToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "prompts_id", nullable = false) + @JoinColumn(name = "prompts_id", nullable = false, unique = true) val prompt: Prompt, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) From 1f1aefa0da1e0b0049abea5b1f059ce4d0fbceba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 21:42:47 +0900 Subject: [PATCH 21/30] =?UTF-8?q?:bug:=20fix:=20=EC=9E=84=EB=B2=A0?= =?UTF-8?q?=EB=94=A9=20=EC=82=AC=EC=9D=B4=EC=A6=88=20=EC=A0=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/index/entity/ChunkEmbedding.kt | 33 ++++++++++++++- .../ragback/domain/index/entity/Index.kt | 4 +- .../index/entity/enums/EmbeddingModel.kt | 40 +++++++++++++++++++ 3 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index 044c385..3f1a04d 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -14,7 +14,7 @@ class ChunkEmbedding( @Convert(converter = FloatArrayToPgVectorStringConverter::class) @Column(name = "embedding", nullable = false) - var embedding: FloatArray, + var _embedding: FloatArray, @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "indexes_id", nullable = false) @@ -23,4 +23,33 @@ class ChunkEmbedding( @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "chunk_embeddings_id") val id: Long? = null, -): BaseEntity() \ No newline at end of file +): BaseEntity() { + + val embedding: FloatArray get() = _embedding.copyOf() + + init { + verifyDimAndValues(index.embeddingModel.dim, _embedding) + } + + fun updateEmbedding(newVec: FloatArray) { + verifyDimAndValues(index.embeddingModel.dim, newVec) + _embedding = newVec.copyOf() + } + + @PrePersist + @PreUpdate + fun verifyBeforeSave() { + verifyDimAndValues(index.embeddingModel.dim, _embedding) + } + + private fun verifyDimAndValues(expected: Int, vec: FloatArray) { + require(vec.isNotEmpty()) { "Embedding must not be empty" } + require(vec.size == expected) { + "Embedding dimension must be $expected but was ${vec.size}" + } + require(vec.all { it.isFinite() }) { + "Embedding must not contain NaN/Infinity" + } + } + +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt index 2cf45a7..630783a 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -2,6 +2,7 @@ package simplerag.ragback.domain.index.entity import jakarta.persistence.* import jakarta.validation.constraints.Min +import simplerag.ragback.domain.index.entity.enums.EmbeddingModel import simplerag.ragback.domain.index.entity.enums.SimilarityMetric import simplerag.ragback.global.entity.BaseEntity @@ -29,7 +30,8 @@ class Index( val topK: Int, @Column(name = "embedding_model", nullable = false, length = 255) - val embeddingModel: String, + @Enumerated(EnumType.STRING) + val embeddingModel: EmbeddingModel, @Column(name = "reranker", nullable = false) val reranker: Boolean, diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt new file mode 100644 index 0000000..c72cd30 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt @@ -0,0 +1,40 @@ +package simplerag.ragback.domain.index.entity.enums + +enum class EmbeddingModel(val dim: Int) { + // OpenAI + TEXT_EMBEDDING_ADA_002(1536), + TEXT_EMBEDDING_3_SMALL(1536), + TEXT_EMBEDDING_3_LARGE(3072), + + // SBERT / HuggingFace + ALL_MINILM_L6_V2(384), + ALL_MP_NET_BASE_V2(768), + MULTI_QA_MP_NET_BASE_DOT_V1(768), + DISTILUSE_BASE_MULTILINGUAL_CASUAL(512), + PARAPHRASE_MULTILINGUAL_MINILM_L12_V2(384), + KO_SBERT_V1(768), + KOR_SROBERTA(768), + + // Korean specific + KPF_BERT_KOREAN(768), + KOCSEBERT(768), + BM_KO_SMALL(512), + BM_KO_LARGE(1024), + + // Instructor / Mistral + INSTRUCTOR_BASE(768), + INSTRUCTOR_XL(1024), + MISTRAL_EMBED(4096), + + // BGE / E5 etc + BGE_SMALL_EN(384), + BGE_BASE_EN(768), + BGE_LARGE_EN(1024), + BGE_M3(1024), + E5_SMALL(384), + E5_BASE(768), + E5_LARGE(1024), + + // Old word vectors + FASTTEXT_KO(300), +} From 65793347686432c0a7618aba93e247c04caa548f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 21:43:33 +0900 Subject: [PATCH 22/30] =?UTF-8?q?:recycle:=20refactor:=20pgvector=20?= =?UTF-8?q?=EC=9E=85=EB=A0=A5=EC=97=90=20NaN/Infinity=20=EA=B0=92=20?= =?UTF-8?q?=EC=9C=A0=EC=9E=85=20=EC=B0=A8=EB=8B=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt index e3cfc6d..cff374e 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt @@ -8,6 +8,9 @@ class FloatArrayToPgVectorStringConverter : AttributeConverter Date: Sun, 17 Aug 2025 21:44:12 +0900 Subject: [PATCH 23/30] =?UTF-8?q?:recycle:=20refactor:=20converter=20?= =?UTF-8?q?=EB=AA=85=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...oatJsonConverter.kt => FloatArrayToPgVectorStringConverter.kt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/kotlin/simplerag/ragback/global/util/{FloatJsonConverter.kt => FloatArrayToPgVectorStringConverter.kt} (100%) diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt similarity index 100% rename from src/main/kotlin/simplerag/ragback/global/util/FloatJsonConverter.kt rename to src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt From 1349b696c5064489ac3afaae267e9be85835f0bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 21:59:37 +0900 Subject: [PATCH 24/30] =?UTF-8?q?:bug:=20fix:=20=ED=8C=8C=EC=83=9D=20?= =?UTF-8?q?=EB=A7=A4=ED=95=91=20=EC=A0=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simplerag/ragback/domain/index/entity/ChunkEmbedding.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index 3f1a04d..ec61e60 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -14,7 +14,7 @@ class ChunkEmbedding( @Convert(converter = FloatArrayToPgVectorStringConverter::class) @Column(name = "embedding", nullable = false) - var _embedding: FloatArray, + private var _embedding: FloatArray, @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "indexes_id", nullable = false) @@ -25,6 +25,7 @@ class ChunkEmbedding( val id: Long? = null, ): BaseEntity() { + @get:Transient val embedding: FloatArray get() = _embedding.copyOf() init { From b2c0a181921d6f94b62c7580bd62645a9357f2fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 22:00:06 +0900 Subject: [PATCH 25/30] =?UTF-8?q?:bug:=20fix:=20JPA=20init=20=EC=A0=9C?= =?UTF-8?q?=EA=B1=B0=20(NPE)=20=EB=B0=A9=EC=A7=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simplerag/ragback/domain/index/entity/ChunkEmbedding.kt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index ec61e60..b961583 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -28,10 +28,6 @@ class ChunkEmbedding( @get:Transient val embedding: FloatArray get() = _embedding.copyOf() - init { - verifyDimAndValues(index.embeddingModel.dim, _embedding) - } - fun updateEmbedding(newVec: FloatArray) { verifyDimAndValues(index.embeddingModel.dim, newVec) _embedding = newVec.copyOf() From 4b0f30305e7d65ec0f105a9a9de78c34b244a6f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 22:04:58 +0900 Subject: [PATCH 26/30] =?UTF-8?q?:zap:=20perf:=20=EB=B0=B0=EC=B9=98=20?= =?UTF-8?q?=EC=A0=80=EC=9E=A5=20N=20+=201=20=EB=AC=B8=EC=A0=9C=20=ED=95=B4?= =?UTF-8?q?=EA=B2=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../domain/index/entity/ChunkEmbedding.kt | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index b961583..3c1862f 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -4,6 +4,7 @@ import jakarta.persistence.* import simplerag.ragback.global.entity.BaseEntity import simplerag.ragback.global.util.FloatArrayToPgVectorStringConverter +// 임베딩 크기를 서비스단에서 검증을 해줘야함 @Entity @Table(name = "chunk_embeddings") class ChunkEmbedding( @@ -16,6 +17,9 @@ class ChunkEmbedding( @Column(name = "embedding", nullable = false) private var _embedding: FloatArray, + @Column(name = "embedding_dim", nullable = false) + val embeddingDim: Int, + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "indexes_id", nullable = false) val index: Index, @@ -29,24 +33,10 @@ class ChunkEmbedding( val embedding: FloatArray get() = _embedding.copyOf() fun updateEmbedding(newVec: FloatArray) { - verifyDimAndValues(index.embeddingModel.dim, newVec) - _embedding = newVec.copyOf() - } - - @PrePersist - @PreUpdate - fun verifyBeforeSave() { - verifyDimAndValues(index.embeddingModel.dim, _embedding) - } - - private fun verifyDimAndValues(expected: Int, vec: FloatArray) { - require(vec.isNotEmpty()) { "Embedding must not be empty" } - require(vec.size == expected) { - "Embedding dimension must be $expected but was ${vec.size}" - } - require(vec.all { it.isFinite() }) { - "Embedding must not contain NaN/Infinity" + require(newVec.size == embeddingDim) { + "Embedding dimension mismatch: expected=$embeddingDim, got=${newVec.size}" } + _embedding = newVec.copyOf() } } \ No newline at end of file From 90ada3138bbb17d7bc03700404123ff7d1f9e800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 22:06:18 +0900 Subject: [PATCH 27/30] =?UTF-8?q?:bug:=20fix:=20=EC=97=AD=EC=A7=81?= =?UTF-8?q?=EB=A0=AC=ED=99=94=20=EA=B2=80=EC=A6=9D=20=EB=A1=9C=EC=A7=81=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../global/util/FloatArrayToPgVectorStringConverter.kt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt b/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt index cff374e..04e1d23 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/FloatArrayToPgVectorStringConverter.kt @@ -22,6 +22,11 @@ class FloatArrayToPgVectorStringConverter : AttributeConverter + require(arr.all { it.isFinite() }) { + "Embedding must not contain NaN/Infinity (db → entity)" + } + } } catch (e: NumberFormatException) { throw IllegalArgumentException("Invalid vector literal for pgvector: '$dbData'", e) } From ccbfcf05271f793fbaa82f955c9278829c43400b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 22:08:52 +0900 Subject: [PATCH 28/30] =?UTF-8?q?:sparkles:=20feature:=20=EB=AA=A8?= =?UTF-8?q?=EB=8D=B8=20=EC=9D=B4=EB=A6=84=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../index/entity/enums/EmbeddingModel.kt | 65 +++++++++++-------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt index c72cd30..a53008a 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt @@ -1,40 +1,53 @@ package simplerag.ragback.domain.index.entity.enums -enum class EmbeddingModel(val dim: Int) { +enum class EmbeddingModel( + val dim: Int, + val modelId: String +) { // OpenAI - TEXT_EMBEDDING_ADA_002(1536), - TEXT_EMBEDDING_3_SMALL(1536), - TEXT_EMBEDDING_3_LARGE(3072), + TEXT_EMBEDDING_ADA_002(1536, "text-embedding-ada-002"), + TEXT_EMBEDDING_3_SMALL(1536, "text-embedding-3-small"), + TEXT_EMBEDDING_3_LARGE(3072, "text-embedding-3-large"), // SBERT / HuggingFace - ALL_MINILM_L6_V2(384), - ALL_MP_NET_BASE_V2(768), - MULTI_QA_MP_NET_BASE_DOT_V1(768), - DISTILUSE_BASE_MULTILINGUAL_CASUAL(512), - PARAPHRASE_MULTILINGUAL_MINILM_L12_V2(384), - KO_SBERT_V1(768), - KOR_SROBERTA(768), + ALL_MINILM_L6_V2(384, "sentence-transformers/all-MiniLM-L6-v2"), + ALL_MP_NET_BASE_V2(768, "sentence-transformers/all-mpnet-base-v2"), + MULTI_QA_MP_NET_BASE_DOT_V1(768, "sentence-transformers/multi-qa-mpnet-base-dot-v1"), + DISTILUSE_BASE_MULTILINGUAL_CASED_V2(512, "sentence-transformers/distiluse-base-multilingual-cased"), + PARAPHRASE_MULTILINGUAL_MINILM_L12_V2(384, "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"), + KO_SBERT_V1(768, "jhgan/ko-sbert-v1"), + KOR_SROBERTA(768, "jhgan/ko-sroberta-medium-nli"), // Korean specific - KPF_BERT_KOREAN(768), - KOCSEBERT(768), - BM_KO_SMALL(512), - BM_KO_LARGE(1024), + KPF_BERT_KOREAN(768, "kpfbert/kpfbert-base-korean"), + KOCSEBERT(768, "BM-K/KoCSE-BERT"), + BM_KO_SMALL(512, "bespin-global/klue-sroberta-base-continue-learning-by-mnr"), + BM_KO_LARGE(1024, "bespin-global/klue-roberta-large-continue-learning-by-mnr"), // Instructor / Mistral - INSTRUCTOR_BASE(768), - INSTRUCTOR_XL(1024), - MISTRAL_EMBED(4096), + INSTRUCTOR_BASE(768, "hkunlp/instructor-base"), + INSTRUCTOR_XL(1024, "hkunlp/instructor-xl"), + MISTRAL_EMBED(4096, "mistral-embed"), // BGE / E5 etc - BGE_SMALL_EN(384), - BGE_BASE_EN(768), - BGE_LARGE_EN(1024), - BGE_M3(1024), - E5_SMALL(384), - E5_BASE(768), - E5_LARGE(1024), + BGE_SMALL_EN(384, "BAAI/bge-small-en-v1.5"), + BGE_BASE_EN(768, "BAAI/bge-base-en-v1.5"), + BGE_LARGE_EN(1024, "BAAI/bge-large-en-v1.5"), + BGE_M3(1024, "BAAI/bge-m3"), + E5_SMALL(384, "intfloat/e5-small-v2"), + E5_BASE(768, "intfloat/e5-base-v2"), + E5_LARGE(1024, "intfloat/e5-large-v2"), // Old word vectors - FASTTEXT_KO(300), + FASTTEXT_KO(300, "fasttext-ko-300d"); + + companion object { + fun findByModelId(modelId: String): EmbeddingModel? { + return entries.find { it.modelId == modelId } + } + + fun getAllModelIds(): List { + return entries.map { it.modelId } + } + } } From a2db08a7e711996d28abaa3111a29a79c7054dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 22:17:45 +0900 Subject: [PATCH 29/30] =?UTF-8?q?:bug:=20fix:=20=EB=AA=A8=EB=8D=B8=20?= =?UTF-8?q?=EA=B4=80=EA=B3=84=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../simplerag/ragback/domain/chat/entity/Model.kt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt index 05bafad..5d7a193 100644 --- a/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt +++ b/src/main/kotlin/simplerag/ragback/domain/chat/entity/Model.kt @@ -12,15 +12,15 @@ class Model( @Column(name = "name", nullable = false, unique = true, length = 100) val name: String, - @Column(name = "llm_model", nullable = false, unique = true) + @Column(name = "llm_model", nullable = false) val llmModel: String, - @OneToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "indexes_id", nullable = false, unique = true) + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "indexes_id") val index: Index, - @OneToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "prompts_id", nullable = false, unique = true) + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "prompts_id") val prompt: Prompt, @Id @GeneratedValue(strategy = GenerationType.IDENTITY) From 3811cbc08cba4b4b02761c8c8a1bb4ea4ff6c40c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Sun, 17 Aug 2025 22:23:22 +0900 Subject: [PATCH 30/30] =?UTF-8?q?:rocket:=20chore:=20=EB=AA=A8=EB=8D=B8=20?= =?UTF-8?q?=EC=A0=95=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ragback/domain/index/entity/enums/EmbeddingModel.kt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt index a53008a..6729034 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt @@ -5,7 +5,6 @@ enum class EmbeddingModel( val modelId: String ) { // OpenAI - TEXT_EMBEDDING_ADA_002(1536, "text-embedding-ada-002"), TEXT_EMBEDDING_3_SMALL(1536, "text-embedding-3-small"), TEXT_EMBEDDING_3_LARGE(3072, "text-embedding-3-large"), @@ -13,21 +12,18 @@ enum class EmbeddingModel( ALL_MINILM_L6_V2(384, "sentence-transformers/all-MiniLM-L6-v2"), ALL_MP_NET_BASE_V2(768, "sentence-transformers/all-mpnet-base-v2"), MULTI_QA_MP_NET_BASE_DOT_V1(768, "sentence-transformers/multi-qa-mpnet-base-dot-v1"), - DISTILUSE_BASE_MULTILINGUAL_CASED_V2(512, "sentence-transformers/distiluse-base-multilingual-cased"), + DISTILUSE_BASE_MULTILINGUAL_CASED_V2(512, "sentence-transformers/distiluse-base-multilingual-cased-v2"), PARAPHRASE_MULTILINGUAL_MINILM_L12_V2(384, "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"), KO_SBERT_V1(768, "jhgan/ko-sbert-v1"), KOR_SROBERTA(768, "jhgan/ko-sroberta-medium-nli"), // Korean specific - KPF_BERT_KOREAN(768, "kpfbert/kpfbert-base-korean"), - KOCSEBERT(768, "BM-K/KoCSE-BERT"), BM_KO_SMALL(512, "bespin-global/klue-sroberta-base-continue-learning-by-mnr"), - BM_KO_LARGE(1024, "bespin-global/klue-roberta-large-continue-learning-by-mnr"), // Instructor / Mistral INSTRUCTOR_BASE(768, "hkunlp/instructor-base"), INSTRUCTOR_XL(1024, "hkunlp/instructor-xl"), - MISTRAL_EMBED(4096, "mistral-embed"), + MISTRAL_EMBED(1024, "mistral-embed"), // BGE / E5 etc BGE_SMALL_EN(384, "BAAI/bge-small-en-v1.5"),