From a8554d99b7a45664bb8b2d744c0ee46839f3a71a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Fri, 22 Aug 2025 17:56:30 +0900 Subject: [PATCH 1/6] =?UTF-8?q?:recycle:=20Refactor:=20val=20=EB=B6=88?= =?UTF-8?q?=EB=B3=80=EC=84=B1=20=ED=99=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 4 +++ .../domain/document/dto/DataFileRequestDTO.kt | 18 +++++------ .../document/dto/DataFileResponseDTO.kt | 6 ++-- .../domain/document/entity/DataFile.kt | 6 ++-- .../domain/document/entity/DataFileTag.kt | 13 ++++---- .../ragback/domain/document/entity/Tag.kt | 6 ++-- .../document/service/DataFileService.kt | 31 ++++++++++--------- .../domain/index/dto/IndexResponseDTO.kt | 4 +-- .../domain/index/entity/ChunkEmbedding.kt | 7 +++-- .../domain/index/entity/DataFileIndex.kt | 6 ++-- .../ragback/domain/index/entity/Index.kt | 5 ++- .../domain/index/service/IndexService.kt | 10 +++--- .../ragback/domain/prompt/entity/FewShot.kt | 5 +-- .../ragback/domain/prompt/entity/Prompt.kt | 5 +-- .../ragback/global/error/ErrorCode.kt | 1 + .../global/util/converter/FileConvertUtil.kt | 12 ++----- .../domain/index/service/IndexServiceTest.kt | 4 +-- 17 files changed, 74 insertions(+), 69 deletions(-) diff --git a/build.gradle b/build.gradle index e7c93c9..8f7aed0 100644 --- a/build.gradle +++ b/build.gradle @@ -60,6 +60,10 @@ dependencies { // s3 implementation(platform("software.amazon.awssdk:bom:2.25.70")) implementation("software.amazon.awssdk:s3") + + // extractor + implementation 'org.apache.pdfbox:pdfbox:2.0.30' + implementation 'org.apache.poi:poi-ooxml:5.2.5' } dependencyManagement { diff --git a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileRequestDTO.kt b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileRequestDTO.kt index 985c912..04a1135 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileRequestDTO.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileRequestDTO.kt @@ -8,13 +8,13 @@ data class DataFileBulkCreateRequest( @field:Size(min = 1, message = "최소 하나 이상 업로드해야 합니다") @Valid val items: List -) +) { + data class DataFileCreateItem( + @field:NotBlank(message = "title은 비어있을 수 없습니다") + @field:Size(max = 100) + val title: String, -data class DataFileCreateItem( - @field:NotBlank(message = "title은 비어있을 수 없습니다") - @field:Size(max = 100) - val title: String, - - @field:Size(max = 10, message = "태그는 최대 10개까지 가능합니다") - val tags: List = emptyList() -) + @field:Size(max = 10, message = "태그는 최대 10개까지 가능합니다") + val tags: List = emptyList() + ) +} diff --git a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt index 2f59d93..7bf60b7 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt @@ -43,7 +43,7 @@ data class DataFileDetailResponseList( } data class DataFileDetailResponse( - var id: Long?, + val id: Long, val title: String, val type: String, val lastModified: LocalDateTime, @@ -54,7 +54,7 @@ data class DataFileDetailResponse( companion object { fun from(file: DataFile, tags: List): DataFileDetailResponse = DataFileDetailResponse( - id = requireNotNull(file.id) { "DataFile.id is null" }, + id = file.id, title = file.title, type = file.type, lastModified = file.updatedAt, @@ -66,7 +66,7 @@ data class DataFileDetailResponse( } data class TagDTO( - val id: Long?, + val id: Long, val name: String, ) { companion object { diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt index 314edf7..f6c6646 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt @@ -24,11 +24,11 @@ class DataFile( @Column(nullable = false, length = 2048, name = "file_url") val fileUrl: String, - +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "data_files_id") - val id: Long? = null, -) : BaseEntity() { + val id: Long = 0 + companion object { fun from(title: String, type: String, sizeBytes : Long, sha256 : String, fileUrl: String): DataFile { return DataFile(title, type, sizeBytes, sha256, fileUrl) diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt index a00b9a6..b367638 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFileTag.kt @@ -9,16 +9,15 @@ import simplerag.ragback.global.entity.BaseEntity uniqueConstraints = [UniqueConstraint(columnNames = ["data_files_id", "tags_id"])] ) class DataFileTag( - @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "tags_id", nullable = false) + @JoinColumn(name = "tags_id") var tag: Tag, @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "data_files_id", nullable = false) - var dataFile: DataFile, - + @JoinColumn(name = "data_files_id") + var dataFile: DataFile +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "data_files_tags_id") - val id: Long? = null, -) : BaseEntity() \ No newline at end of file + val id: Long = 0 +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt index 0ce9934..3886444 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/Tag.kt @@ -12,8 +12,8 @@ class Tag( @Column(nullable = false, length = 60) val name: String, - +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "tags_id") - val id: Long? = null, -) : BaseEntity() \ No newline at end of file + val id: Long = 0 +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt index f45cd31..e0c48ad 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt @@ -66,7 +66,7 @@ class DataFileService( val tags = getOrCreateTags(meta.tags) attachTagsIfMissing(dataFile, tags) - DataFilePreviewResponse.from(dataFile) + return@mapIndexed DataFilePreviewResponse.from(dataFile) } return DataFilePreviewResponseList(responses) @@ -82,7 +82,7 @@ class DataFileService( val allLinks = dataFileTagRepository.findAllByDataFileIn(files.content) val tagsByFileId: Map> = allLinks.groupBy( - keySelector = { requireNotNull(it.dataFile.id) { "DataFile.id is null" } } + { requireNotNull(it.dataFile.id) { "DataFile.id is null" } } ).mapValues { (_, links) -> TagDTO.from(links) } val nextCursor = files.content.lastOrNull()?.id @@ -92,17 +92,18 @@ class DataFileService( @Transactional fun deleteFile(dataFilesId: Long) { - val dataFile = dataFileRepository.findDataFileById(dataFilesId) ?: throw FileException( - ErrorCode.NOT_FOUND, - dataFilesId.toString() - ) + val dataFile = dataFileRepository.findDataFileById(dataFilesId) + ?: throw FileException( + ErrorCode.NOT_FOUND, + dataFilesId.toString() + ) dataFileTagRepository.deleteAllByDataFile(dataFile) dataFileRepository.delete(dataFile) } - private fun registerRollbackCleanup(uploadedUrls: MutableList) { + private fun registerRollbackCleanup(uploadedUrls: List) { if (TransactionSynchronizationManager.isSynchronizationActive()) { TransactionSynchronizationManager.registerSynchronization(object : TransactionSynchronization { override fun afterCompletion(status: Int) { @@ -145,14 +146,14 @@ class DataFileService( private fun attachTagsIfMissing(dataFile: DataFile, tags: List) { - val fileId = dataFile.id ?: return - tags.forEach { tag -> - val tagId = tag.id ?: return@forEach - val exists = dataFileTagRepository.existsByDataFileIdAndTagId(fileId, tagId) - if (!exists) { - dataFileTagRepository.save(DataFileTag(tag = tag, dataFile = dataFile)) - } + val temp = tags.mapNotNull { tag -> + val exists = dataFileTagRepository.existsByDataFileIdAndTagId(dataFile.id, tag.id) + + if (exists) return@mapNotNull null + + return@mapNotNull DataFileTag(tag = tag, dataFile = dataFile) } - } + dataFileTagRepository.saveAll(temp) + } } diff --git a/src/main/kotlin/simplerag/ragback/domain/index/dto/IndexResponseDTO.kt b/src/main/kotlin/simplerag/ragback/domain/index/dto/IndexResponseDTO.kt index 112548a..96b78fe 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/dto/IndexResponseDTO.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/dto/IndexResponseDTO.kt @@ -18,7 +18,7 @@ data class IndexPreviewResponseList( } data class IndexPreviewResponse( - var indexId: Long?, + var indexId: Long, val snapshotName: String, ) { companion object { @@ -32,7 +32,7 @@ data class IndexPreviewResponse( } data class IndexDetailResponse( - var indexId: Long?, + val indexId: Long, val snapshotName: String, val chunkingSize: Int, val overlapSize: Int, diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt index 0c49a9f..988aa7d 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/ChunkEmbedding.kt @@ -20,8 +20,9 @@ class ChunkEmbedding( @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "indexes_id", nullable = false) val index: Index, - +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "chunk_embeddings_id") - val id: Long? = null, -) : BaseEntity() \ No newline at end of file + val id: Long = 0 + +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt index 2e60163..35486e4 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/DataFileIndex.kt @@ -16,7 +16,9 @@ class DataFileIndex( @JoinColumn(name = "indexes_id", nullable = false) val index: Index, +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "data_files_indexes_id") - val id: Long? = null, -) : BaseEntity() \ No newline at end of file + val id: Long = 0 + +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt index a95cde9..3d8938c 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/Index.kt @@ -40,11 +40,10 @@ class Index( @OneToMany(cascade = [CascadeType.ALL], orphanRemoval = true, mappedBy = "index") val chunkEmbeddings: MutableList = mutableListOf(), - +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "indexes_id") - var id: Long? = null, -) : BaseEntity() { + val id: Long = 0 companion object { fun toIndex(createRequest: IndexCreateRequest): Index { diff --git a/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt b/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt index e5971b5..3f769dc 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt @@ -41,7 +41,6 @@ class IndexService( for (file in files) { val url = file.fileUrl val content = contentLoader.load(url) - println(content) if (content.isBlank()) continue val chunks = TextChunker.chunkByCharsSeq(content, req.chunkingSize, req.overlapSize) @@ -68,7 +67,8 @@ class IndexService( @Transactional(readOnly = true) fun getIndex(indexId: Long): IndexDetailResponse { - val index = indexRepository.findByIdOrNull(indexId) ?: throw IndexException(ErrorCode.NOT_FOUND) + val index = indexRepository.findByIdOrNull(indexId) + ?: throw IndexException(ErrorCode.NOT_FOUND) return IndexDetailResponse.toIndexDetailResponse(index) } @@ -78,7 +78,8 @@ class IndexService( indexId: Long, indexUpdateRequest: IndexUpdateRequest ): IndexPreviewResponse { - val index = indexRepository.findByIdOrNull(indexId) ?: throw IndexException(ErrorCode.NOT_FOUND) + val index = indexRepository.findByIdOrNull(indexId) + ?: throw IndexException(ErrorCode.NOT_FOUND) validateOverlap(indexUpdateRequest.overlapSize, indexUpdateRequest.chunkingSize) @@ -89,7 +90,8 @@ class IndexService( @Transactional fun deleteIndex(indexId: Long) { - val index = indexRepository.findByIdOrNull(indexId) ?: throw IndexException(ErrorCode.NOT_FOUND) + val index = indexRepository.findByIdOrNull(indexId) + ?: throw IndexException(ErrorCode.NOT_FOUND) indexRepository.delete(index) } diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt index 3a4cec4..c36b702 100644 --- a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/FewShot.kt @@ -22,7 +22,8 @@ class FewShot( @JoinColumn(name = "prompts_id", nullable = false) val prompt: Prompt, +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "few_shots_id") - val id: Long? = null, -) : BaseEntity() \ No newline at end of file + val id: Long = 0 +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt index 1a787e5..0fd5cc2 100644 --- a/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt +++ b/src/main/kotlin/simplerag/ragback/domain/prompt/entity/Prompt.kt @@ -19,7 +19,8 @@ class Prompt( @Lob val systemPrompt: String, +) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "prompts_id") - val id: Long? = null, -) : BaseEntity() \ No newline at end of file + val id: Long = 0 +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/error/ErrorCode.kt b/src/main/kotlin/simplerag/ragback/global/error/ErrorCode.kt index 16e8c49..308a7ed 100644 --- a/src/main/kotlin/simplerag/ragback/global/error/ErrorCode.kt +++ b/src/main/kotlin/simplerag/ragback/global/error/ErrorCode.kt @@ -13,6 +13,7 @@ enum class ErrorCode( ALREADY_FILE(HttpStatus.BAD_REQUEST, "ALREADY_FILE", "같은 내용의 파일이 이미 존재합니다."), FILE_PART_MISSING(HttpStatus.BAD_REQUEST, "FILE_PART_MISSING", "필수 파트가 존재하지 않습니다."), INVALID_JSON(HttpStatus.BAD_REQUEST, "INVALID_JSON", "JSON이 유효하지 않습니다."), + INVALID_FILE_TYPE(HttpStatus.BAD_REQUEST, "INVALID_FILE_TYPE", "FILE TYPE이 유효하지 않습니다."), // S3 S3_OBJECT_NOT_FOUND(HttpStatus.NOT_FOUND, "S3_001", "S3 오브젝트를 찾을 수 없습니다."), diff --git a/src/main/kotlin/simplerag/ragback/global/util/converter/FileConvertUtil.kt b/src/main/kotlin/simplerag/ragback/global/util/converter/FileConvertUtil.kt index 2fc19c9..ff51f56 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/converter/FileConvertUtil.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/converter/FileConvertUtil.kt @@ -1,6 +1,8 @@ package simplerag.ragback.global.util.converter import org.springframework.web.multipart.MultipartFile +import simplerag.ragback.global.error.CustomException +import simplerag.ragback.global.error.ErrorCode import java.io.BufferedInputStream import java.security.DigestInputStream import java.security.MessageDigest @@ -19,21 +21,13 @@ fun MultipartFile.resolveContentType(): String { if (!this.contentType.isNullOrBlank()) return this.contentType!! val ext = this.originalFilename?.substringAfterLast('.', "")?.lowercase() return when (ext) { - "png" -> "image/png" - "jpg", "jpeg" -> "image/jpeg" "pdf" -> "application/pdf" "txt" -> "text/plain" "csv" -> "text/csv" "md" -> "text/markdown" "json" -> "application/json" - "zip" -> "application/zip" - "doc" -> "application/msword" "docx" -> "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - "xls" -> "application/vnd.ms-excel" - "xlsx" -> "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - "ppt" -> "application/vnd.ms-powerpoint" - "pptx" -> "application/vnd.openxmlformats-officedocument.presentationml.presentation" - else -> "application/octet-stream" + else -> throw CustomException(ErrorCode.INVALID_FILE_TYPE) } } diff --git a/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt b/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt index 3dd32f1..26c85b5 100644 --- a/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt +++ b/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt @@ -56,7 +56,7 @@ class IndexServiceTest( fun createIndexTest() { // given val indexCreateRequest = - IndexCreateRequest("test", 1, 0, SimilarityMetric.COSINE, 1, EmbeddingModel.TEXT_EMBEDDING_3_LARGE, true) + IndexCreateRequest(listOf(1), "test", 1, 0, SimilarityMetric.COSINE, 1, EmbeddingModel.TEXT_EMBEDDING_3_LARGE, true) // when val createIndexResponse = indexService.createIndex(indexCreateRequest) @@ -73,7 +73,7 @@ class IndexServiceTest( fun createIndexTestWithOverlapSize() { // given val indexCreateRequest = - IndexCreateRequest("test", 1, 1, SimilarityMetric.COSINE, 1, EmbeddingModel.TEXT_EMBEDDING_3_LARGE, true) + IndexCreateRequest(listOf(1),"test", 1, 1, SimilarityMetric.COSINE, 1, EmbeddingModel.TEXT_EMBEDDING_3_LARGE, true) // when * then val message = assertThrows { From 97c34aa9f56427f3580f8dcac70a0defc87704b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Fri, 22 Aug 2025 17:57:44 +0900 Subject: [PATCH 2/6] =?UTF-8?q?:recycle:=20Refactor:=20dto=20=EB=82=B4?= =?UTF-8?q?=EC=9E=AC=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../document/dto/DataFileResponseDTO.kt | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt index 7bf60b7..3a69023 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt @@ -9,18 +9,18 @@ import kotlin.math.round data class DataFilePreviewResponseList( val dataFilePreviewResponseList: List, -) - -data class DataFilePreviewResponse( - val id: Long, - val sha256: String, ) { - companion object { - fun from(file: DataFile): DataFilePreviewResponse = - DataFilePreviewResponse( - id = requireNotNull(file.id) { "DataFile.id is null" }, - sha256 = file.sha256, - ) + data class DataFilePreviewResponse( + val id: Long, + val sha256: String, + ) { + companion object { + fun from(file: DataFile): DataFilePreviewResponse = + DataFilePreviewResponse( + id = file.id, + sha256 = file.sha256, + ) + } } } @@ -40,28 +40,28 @@ data class DataFileDetailResponseList( hasNext = hasNext, ) } -} -data class DataFileDetailResponse( - val id: Long, - val title: String, - val type: String, - val lastModified: LocalDateTime, - val sizeMB: Double, - val tags: List, - val sha256: String, -) { - companion object { - fun from(file: DataFile, tags: List): DataFileDetailResponse = - DataFileDetailResponse( - id = file.id, - title = file.title, - type = file.type, - lastModified = file.updatedAt, - sizeMB = file.sizeBytes.toMegaBytes(2), - tags = tags, - sha256 = file.sha256, - ) + data class DataFileDetailResponse( + val id: Long, + val title: String, + val type: String, + val lastModified: LocalDateTime, + val sizeMB: Double, + val tags: List, + val sha256: String, + ) { + companion object { + fun from(file: DataFile, tags: List): DataFileDetailResponse = + DataFileDetailResponse( + id = file.id, + title = file.title, + type = file.type, + lastModified = file.updatedAt, + sizeMB = file.sizeBytes.toMegaBytes(2), + tags = tags, + sha256 = file.sha256, + ) + } } } From 1fb358dcb278def3dd1cb58e5db8187c0eb4e587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Fri, 22 Aug 2025 17:59:20 +0900 Subject: [PATCH 3/6] =?UTF-8?q?:bug:=20Fix:=20dto=20=EC=97=90=EB=9F=AC=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../document/dto/DataFileResponseDTO.kt | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt index 3a69023..f214e20 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt @@ -9,18 +9,18 @@ import kotlin.math.round data class DataFilePreviewResponseList( val dataFilePreviewResponseList: List, +) + +data class DataFilePreviewResponse( + val id: Long, + val sha256: String, ) { - data class DataFilePreviewResponse( - val id: Long, - val sha256: String, - ) { - companion object { - fun from(file: DataFile): DataFilePreviewResponse = - DataFilePreviewResponse( - id = file.id, - sha256 = file.sha256, - ) - } + companion object { + fun from(file: DataFile): DataFilePreviewResponse = + DataFilePreviewResponse( + id = file.id, + sha256 = file.sha256, + ) } } From abf077d54fb2c28463db62bcc8eeca03b9180847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Fri, 22 Aug 2025 17:59:58 +0900 Subject: [PATCH 4/6] :sparkles: Feature: add ContentExtractor --- .../global/util/extractor/ContentExtractor.kt | 7 +++++++ .../util/extractor/DocxContentExtractor.kt | 16 ++++++++++++++++ .../util/extractor/PdfContentExtractor.kt | 18 ++++++++++++++++++ .../util/extractor/TxtContentExtractor.kt | 11 +++++++++++ 4 files changed, 52 insertions(+) create mode 100644 src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt create mode 100644 src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt create mode 100644 src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt create mode 100644 src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt new file mode 100644 index 0000000..86d036d --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt @@ -0,0 +1,7 @@ +package simplerag.ragback.global.util.extractor + +import org.springframework.web.multipart.MultipartFile + +interface ContentExtractor { + fun extract(file: MultipartFile): String +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt new file mode 100644 index 0000000..a12a679 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt @@ -0,0 +1,16 @@ +package simplerag.ragback.global.util.extractor + +import org.apache.poi.xwpf.usermodel.XWPFDocument +import org.springframework.stereotype.Component +import org.springframework.web.multipart.MultipartFile + +@Component +class DocxContentExtractor : ContentExtractor { + override fun extract(file: MultipartFile): String { + file.inputStream.use { input -> + XWPFDocument(input).use { doc -> + return doc.paragraphs.joinToString("\n") { it.text } + } + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt new file mode 100644 index 0000000..995c874 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt @@ -0,0 +1,18 @@ +package simplerag.ragback.global.util.extractor + +import org.apache.pdfbox.pdmodel.PDDocument +import org.apache.pdfbox.text.PDFTextStripper +import org.springframework.stereotype.Component +import org.springframework.web.multipart.MultipartFile + +@Component +class PdfContentExtractor : ContentExtractor { + override fun extract(file: MultipartFile): String { + file.inputStream.use { input -> + PDDocument.load(input).use { doc -> + val stripper = PDFTextStripper() + return stripper.getText(doc) + } + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt new file mode 100644 index 0000000..ad62120 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt @@ -0,0 +1,11 @@ +package simplerag.ragback.global.util.extractor + +import org.springframework.stereotype.Component +import org.springframework.web.multipart.MultipartFile + +@Component +class TxtContentExtractor : ContentExtractor { + override fun extract(file: MultipartFile): String { + return file.inputStream.bufferedReader(Charsets.UTF_8).use { it.readText() } + } +} \ No newline at end of file From fe097837f89bdc8ca33fb31735806f3daea6999b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Fri, 22 Aug 2025 19:03:02 +0900 Subject: [PATCH 5/6] =?UTF-8?q?:recycle:=20Refactor:=20loader=EB=A5=BC=20e?= =?UTF-8?q?xtractor=EB=A1=9C=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle | 8 +- .../domain/document/entity/DataFile.kt | 9 +- .../document/service/DataFileService.kt | 29 +---- .../domain/index/service/IndexService.kt | 5 +- .../ragback/global/config/S3Config.kt | 71 ------------ .../ragback/global/storage/FakeS3Util.kt | 54 --------- .../global/util/extractor/ContentExtractor.kt | 1 + .../extractor/ContentExtractorResolver.kt | 16 +++ .../util/extractor/DocxContentExtractor.kt | 7 ++ .../util/extractor/PdfContentExtractor.kt | 7 ++ .../util/extractor/TxtContentExtractor.kt | 13 +++ .../global/util/loader/ContentLoader.kt | 6 - .../global/util/loader/HttpContentLoader.kt | 21 ---- .../ragback/global/util/s3/S3Type.kt | 8 -- .../ragback/global/util/s3/S3Util.kt | 11 -- .../ragback/global/util/s3/S3UtilImpl.kt | 108 ------------------ src/main/resources/application-local.yml | 14 +-- src/test/resources/application-test.yml | 9 +- 18 files changed, 67 insertions(+), 330 deletions(-) delete mode 100644 src/main/kotlin/simplerag/ragback/global/config/S3Config.kt delete mode 100644 src/main/kotlin/simplerag/ragback/global/storage/FakeS3Util.kt create mode 100644 src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractorResolver.kt delete mode 100644 src/main/kotlin/simplerag/ragback/global/util/loader/ContentLoader.kt delete mode 100644 src/main/kotlin/simplerag/ragback/global/util/loader/HttpContentLoader.kt delete mode 100644 src/main/kotlin/simplerag/ragback/global/util/s3/S3Type.kt delete mode 100644 src/main/kotlin/simplerag/ragback/global/util/s3/S3Util.kt delete mode 100644 src/main/kotlin/simplerag/ragback/global/util/s3/S3UtilImpl.kt diff --git a/build.gradle b/build.gradle index 8f7aed0..1bc4270 100644 --- a/build.gradle +++ b/build.gradle @@ -46,21 +46,17 @@ dependencies { // postgresql implementation 'org.postgresql:postgresql' - implementation 'org.springframework.ai:spring-ai-starter-vector-store-pgvector' + implementation 'com.pgvector:pgvector:0.1.6' // test testImplementation "org.springframework.boot:spring-boot-testcontainers" testImplementation "org.testcontainers:postgresql" testImplementation "org.testcontainers:junit-jupiter" - testImplementation 'org.springframework.ai:spring-ai-starter-vector-store-pgvector' + testImplementation 'com.pgvector:pgvector:0.1.6' // swagger implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.7.0' - // s3 - implementation(platform("software.amazon.awssdk:bom:2.25.70")) - implementation("software.amazon.awssdk:s3") - // extractor implementation 'org.apache.pdfbox:pdfbox:2.0.30' implementation 'org.apache.poi:poi-ooxml:5.2.5' diff --git a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt index f6c6646..547da30 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/entity/DataFile.kt @@ -22,16 +22,17 @@ class DataFile( @Column(nullable = false, length = 64) val sha256: String, - @Column(nullable = false, length = 2048, name = "file_url") - val fileUrl: String, + @Column(nullable = false) + @Lob + val content: String, ) : BaseEntity() { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @Column(name = "data_files_id") val id: Long = 0 companion object { - fun from(title: String, type: String, sizeBytes : Long, sha256 : String, fileUrl: String): DataFile { - return DataFile(title, type, sizeBytes, sha256, fileUrl) + fun from(title: String, type: String, sizeBytes : Long, sha256 : String, content: String): DataFile { + return DataFile(title, type, sizeBytes, sha256, content) } } } diff --git a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt index e0c48ad..35dc65a 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt @@ -4,8 +4,6 @@ import org.springframework.dao.DataIntegrityViolationException import org.springframework.data.domain.PageRequest import org.springframework.stereotype.Service import org.springframework.transaction.annotation.Transactional -import org.springframework.transaction.support.TransactionSynchronization -import org.springframework.transaction.support.TransactionSynchronizationManager import org.springframework.web.multipart.MultipartFile import simplerag.ragback.domain.document.dto.* import simplerag.ragback.domain.document.entity.DataFile @@ -17,18 +15,18 @@ import simplerag.ragback.domain.document.repository.TagRepository import simplerag.ragback.global.error.CustomException import simplerag.ragback.global.error.ErrorCode import simplerag.ragback.global.error.FileException -import simplerag.ragback.global.util.s3.S3Type -import simplerag.ragback.global.util.s3.S3Util import simplerag.ragback.global.util.converter.computeMetricsStreaming import simplerag.ragback.global.util.converter.resolveContentType +import simplerag.ragback.global.util.extractor.ContentExtractorResolver import java.util.* @Service +@Transactional(readOnly = true) class DataFileService( private val dataFileRepository: DataFileRepository, private val tagRepository: TagRepository, private val dataFileTagRepository: DataFileTagRepository, - private val s3Util: S3Util, + private val contentExtractorResolver: ContentExtractorResolver ) { @Transactional @@ -40,9 +38,6 @@ class DataFileService( throw CustomException(ErrorCode.INVALID_INPUT) } - val uploadedUrls = mutableListOf() - registerRollbackCleanup(uploadedUrls) - val responses = files.mapIndexed { idx, file -> val meta = req.items[idx] val metrics = file.computeMetricsStreaming() @@ -54,11 +49,10 @@ class DataFileService( throw FileException(ErrorCode.ALREADY_FILE, sha256) } - val fileUrl = s3Util.upload(file, S3Type.ORIGINAL_FILE) - uploadedUrls += fileUrl + val content = contentExtractorResolver.extractContent(file, type) val dataFile = try { - dataFileRepository.save(DataFile.from(meta.title, type, sizeBytes, sha256, fileUrl)) + dataFileRepository.save(DataFile.from(meta.title, type, sizeBytes, sha256, content)) } catch (ex: DataIntegrityViolationException) { throw FileException(ErrorCode.ALREADY_FILE, sha256) } @@ -72,7 +66,6 @@ class DataFileService( return DataFilePreviewResponseList(responses) } - @Transactional(readOnly = true) fun getDataFiles( cursor: Long, take: Int @@ -103,18 +96,6 @@ class DataFileService( dataFileRepository.delete(dataFile) } - private fun registerRollbackCleanup(uploadedUrls: List) { - if (TransactionSynchronizationManager.isSynchronizationActive()) { - TransactionSynchronizationManager.registerSynchronization(object : TransactionSynchronization { - override fun afterCompletion(status: Int) { - if (status == TransactionSynchronization.STATUS_ROLLED_BACK) { - uploadedUrls.forEach { runCatching { s3Util.deleteByUrl(it) } } - } - } - }) - } - } - private fun getOrCreateTags(names: List): List { val normalized = names diff --git a/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt b/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt index 3f769dc..cc1efad 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/service/IndexService.kt @@ -13,7 +13,6 @@ import simplerag.ragback.domain.index.repository.IndexRepository import simplerag.ragback.global.error.CustomException import simplerag.ragback.global.error.ErrorCode import simplerag.ragback.global.error.IndexException -import simplerag.ragback.global.util.loader.ContentLoader import simplerag.ragback.global.util.TextChunker @Service @@ -21,7 +20,6 @@ class IndexService( private val indexRepository: IndexRepository, private val embedder: Embedder, private val dataFileRepository: DataFileRepository, - private val contentLoader: ContentLoader, ) { @Transactional @@ -39,8 +37,7 @@ class IndexService( val index = indexRepository.save(Index.toIndex(req)) for (file in files) { - val url = file.fileUrl - val content = contentLoader.load(url) + val content = file.content if (content.isBlank()) continue val chunks = TextChunker.chunkByCharsSeq(content, req.chunkingSize, req.overlapSize) diff --git a/src/main/kotlin/simplerag/ragback/global/config/S3Config.kt b/src/main/kotlin/simplerag/ragback/global/config/S3Config.kt deleted file mode 100644 index c71b40c..0000000 --- a/src/main/kotlin/simplerag/ragback/global/config/S3Config.kt +++ /dev/null @@ -1,71 +0,0 @@ -package simplerag.ragback.global.config - -import org.springframework.beans.factory.annotation.Value -import org.springframework.context.annotation.Bean -import org.springframework.context.annotation.Configuration -import org.springframework.context.annotation.Profile -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider -import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider -import software.amazon.awssdk.regions.Region -import software.amazon.awssdk.services.s3.S3Client -import software.amazon.awssdk.services.s3.S3Configuration -import software.amazon.awssdk.services.s3.presigner.S3Presigner - -@Configuration -@Profile("!test") -class S3Config( - - @Value("\${cloud.aws.region.static}") - private val region: String, - - @Value("\${cloud.aws.s3.bucket}") - val bucket: String, - - @Value("\${cloud.aws.credentials.access-key:}") - private val accessKey: String, - - @Value("\${cloud.aws.credentials.secret-key:}") - private val secretKey: String, -) { - - @Bean - fun s3Client(): S3Client { - val regionObj = Region.of(region) - - val creds: AwsCredentialsProvider = - if (accessKey.isNotBlank() && secretKey.isNotBlank()) - StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey)) - else - DefaultCredentialsProvider.create() - - val builder = S3Client.builder() - .region(regionObj) - .credentialsProvider(creds) - .serviceConfiguration( - S3Configuration.builder() - .checksumValidationEnabled(true) - .build() - ) - - return builder.build() - } - - @Bean - fun s3Presigner(): S3Presigner { - val regionObj = Region.of(region) - - val creds: AwsCredentialsProvider = - if (accessKey.isNotBlank() && secretKey.isNotBlank()) - StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey)) - else - DefaultCredentialsProvider.create() - - val builder = S3Presigner.builder() - .region(regionObj) - .credentialsProvider(creds) - - return builder.build() - } -} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/storage/FakeS3Util.kt b/src/main/kotlin/simplerag/ragback/global/storage/FakeS3Util.kt deleted file mode 100644 index ba3db6f..0000000 --- a/src/main/kotlin/simplerag/ragback/global/storage/FakeS3Util.kt +++ /dev/null @@ -1,54 +0,0 @@ -package simplerag.ragback.global.storage - -import org.springframework.context.annotation.Primary -import org.springframework.context.annotation.Profile -import org.springframework.stereotype.Component -import org.springframework.web.multipart.MultipartFile -import simplerag.ragback.global.util.s3.S3Type -import simplerag.ragback.global.util.s3.S3Util -import simplerag.ragback.global.util.converter.sha256Hex -import java.util.concurrent.ConcurrentHashMap - -@Component -@Primary -@Profile("test") -class FakeS3Util : S3Util { - - private val store = ConcurrentHashMap() - - override fun upload(file: MultipartFile, dir: S3Type): String { - val clean = (file.originalFilename ?: "file") - .substringAfterLast('/') - .substringAfterLast('\\') - .ifBlank { "file" } - - val hash = sha256Hex(file.bytes).take(12) - val prefix = dir.label.trim('/') - val key = "$prefix/${hash}_$clean" - - store[key] = file.bytes - return urlFromKey(key) - } - - override fun urlFromKey(key: String): String = "fake://$key" - - override fun deleteByUrl(url: String) { - val key = keyFromUrl(url) ?: throw simplerag.ragback.global.error.S3Exception( - simplerag.ragback.global.error.ErrorCode.S3_INVALID_URL - ) - store.remove(key) - } - - override fun delete(key: String) { - store.remove(key) - } - - override fun keyFromUrl(url: String): String? = url.removePrefix("fake://") - .removePrefix("/") - .ifBlank { null } - - // 테스트 용 함수 - fun exists(url: String): Boolean = keyFromUrl(url)?.let { store.containsKey(it) } == true - fun count(): Int = store.size - fun clear() = store.clear() -} diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt index 86d036d..5be7b05 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractor.kt @@ -3,5 +3,6 @@ package simplerag.ragback.global.util.extractor import org.springframework.web.multipart.MultipartFile interface ContentExtractor { + fun supports(type: String): Boolean fun extract(file: MultipartFile): String } \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractorResolver.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractorResolver.kt new file mode 100644 index 0000000..632de74 --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/ContentExtractorResolver.kt @@ -0,0 +1,16 @@ +package simplerag.ragback.global.util.extractor + +import org.springframework.stereotype.Component +import org.springframework.web.multipart.MultipartFile + +@Component +class ContentExtractorResolver( + private val extractors: List +) { + + fun extractContent(file: MultipartFile, type: String): String { + val extractor = extractors.find { it.supports(type) } + ?: throw IllegalArgumentException("지원하지 않는 파일 타입입니다: $type") + return extractor.extract(file) + } +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt index a12a679..a06b121 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/DocxContentExtractor.kt @@ -6,6 +6,13 @@ import org.springframework.web.multipart.MultipartFile @Component class DocxContentExtractor : ContentExtractor { + + private val TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + + override fun supports(type: String): Boolean { + return TYPE == type + } + override fun extract(file: MultipartFile): String { file.inputStream.use { input -> XWPFDocument(input).use { doc -> diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt index 995c874..97fe56a 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/PdfContentExtractor.kt @@ -7,6 +7,13 @@ import org.springframework.web.multipart.MultipartFile @Component class PdfContentExtractor : ContentExtractor { + + private val TYPE = "application/pdf" + + override fun supports(type: String): Boolean { + return TYPE == type + } + override fun extract(file: MultipartFile): String { file.inputStream.use { input -> PDDocument.load(input).use { doc -> diff --git a/src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt b/src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt index ad62120..6526ebc 100644 --- a/src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt +++ b/src/main/kotlin/simplerag/ragback/global/util/extractor/TxtContentExtractor.kt @@ -5,6 +5,19 @@ import org.springframework.web.multipart.MultipartFile @Component class TxtContentExtractor : ContentExtractor { + + private val TYPE = listOf( + "text/plain", + "text/csv", + "text/markdown", + "application/json", + "text/html", + ) + + override fun supports(type: String): Boolean { + return type in TYPE + } + override fun extract(file: MultipartFile): String { return file.inputStream.bufferedReader(Charsets.UTF_8).use { it.readText() } } diff --git a/src/main/kotlin/simplerag/ragback/global/util/loader/ContentLoader.kt b/src/main/kotlin/simplerag/ragback/global/util/loader/ContentLoader.kt deleted file mode 100644 index 743d96a..0000000 --- a/src/main/kotlin/simplerag/ragback/global/util/loader/ContentLoader.kt +++ /dev/null @@ -1,6 +0,0 @@ -package simplerag.ragback.global.util.loader - - -interface ContentLoader { - fun load(url: String): String -} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/loader/HttpContentLoader.kt b/src/main/kotlin/simplerag/ragback/global/util/loader/HttpContentLoader.kt deleted file mode 100644 index 71da76c..0000000 --- a/src/main/kotlin/simplerag/ragback/global/util/loader/HttpContentLoader.kt +++ /dev/null @@ -1,21 +0,0 @@ -package simplerag.ragback.global.util.loader - -import org.springframework.http.converter.StringHttpMessageConverter -import org.springframework.stereotype.Component -import org.springframework.web.client.RestTemplate -import java.nio.charset.StandardCharsets - -@Component -class HttpContentLoader : ContentLoader { - - private val restTemplate: RestTemplate = RestTemplate().apply { - // 기존 String 컨버터 제거 후 UTF-8 컨버터를 맨 앞에 추가 - val replaced = messageConverters.filterNot { it is StringHttpMessageConverter }.toMutableList() - replaced.add(0, StringHttpMessageConverter(StandardCharsets.UTF_8)) - messageConverters = replaced - } - - override fun load(url: String): String { - return restTemplate.getForObject(url, String::class.java) ?: "" - } -} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/s3/S3Type.kt b/src/main/kotlin/simplerag/ragback/global/util/s3/S3Type.kt deleted file mode 100644 index 2cbb9bd..0000000 --- a/src/main/kotlin/simplerag/ragback/global/util/s3/S3Type.kt +++ /dev/null @@ -1,8 +0,0 @@ -package simplerag.ragback.global.util.s3 - - -enum class S3Type( - val label: String, -) { - ORIGINAL_FILE("/ORIGINAL/"), -} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/s3/S3Util.kt b/src/main/kotlin/simplerag/ragback/global/util/s3/S3Util.kt deleted file mode 100644 index 0c0ebf0..0000000 --- a/src/main/kotlin/simplerag/ragback/global/util/s3/S3Util.kt +++ /dev/null @@ -1,11 +0,0 @@ -package simplerag.ragback.global.util.s3 - -import org.springframework.web.multipart.MultipartFile - -interface S3Util { - fun upload(file: MultipartFile, dir: S3Type): String - fun urlFromKey(key: String): String - fun deleteByUrl(url: String) - fun delete(key: String) - fun keyFromUrl(url: String): String? -} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/global/util/s3/S3UtilImpl.kt b/src/main/kotlin/simplerag/ragback/global/util/s3/S3UtilImpl.kt deleted file mode 100644 index 49a4fde..0000000 --- a/src/main/kotlin/simplerag/ragback/global/util/s3/S3UtilImpl.kt +++ /dev/null @@ -1,108 +0,0 @@ -package simplerag.ragback.global.util.s3 - -import org.slf4j.LoggerFactory -import org.springframework.context.annotation.Profile -import org.springframework.stereotype.Component -import org.springframework.web.multipart.MultipartFile -import simplerag.ragback.global.config.S3Config -import simplerag.ragback.global.error.ErrorCode -import simplerag.ragback.global.error.GlobalExceptionHandler -import simplerag.ragback.global.error.S3Exception -import software.amazon.awssdk.core.sync.RequestBody -import software.amazon.awssdk.services.s3.S3Client -import software.amazon.awssdk.services.s3.model.DeleteObjectRequest -import software.amazon.awssdk.services.s3.model.PutObjectRequest -import java.net.URI -import java.util.* - -@Component -@Profile("!test") -class S3UtilImpl( - private val s3: S3Client, - private val s3Config: S3Config, -) : S3Util { - - private val bucket get() = s3Config.bucket - private val log = LoggerFactory.getLogger(GlobalExceptionHandler::class.java) - - override fun upload(file: MultipartFile, dir: S3Type): String { - if (file.isEmpty) throw S3Exception(ErrorCode.S3_EMPTY_FILE) - - val key = buildKey(dir.label, file.originalFilename) - val contentType = file.contentType ?: "application/octet-stream" - - try { - file.inputStream.use { input -> - val putReq = PutObjectRequest.builder() - .bucket(bucket) - .key(key) - .contentType(contentType) - .build() - - val body = RequestBody.fromInputStream(input, file.size) - s3.putObject(putReq, body) - } - - return urlFromKey(key) - } catch (e: software.amazon.awssdk.services.s3.model.S3Exception) { - log.error( - "S3 putObject fail bucket={}, key={}, status={}, awsCode={}, reqId={}, msg={}", - bucket, key, e.statusCode(), e.awsErrorDetails()?.errorCode(), e.requestId(), - e.awsErrorDetails()?.errorMessage(), e - ) - throw S3Exception(ErrorCode.S3_UPLOAD_FAIL) - } catch (e: Exception) { - log.error(e.message, e) - throw S3Exception(ErrorCode.S3_UPLOAD_FAIL) - } - } - - override fun urlFromKey(key: String): String = - "https://mukit-s3.s3.ap-northeast-2.amazonaws.com/" + key - - override fun deleteByUrl(url: String) { - val key = keyFromUrl(url) ?: throw S3Exception(ErrorCode.S3_INVALID_URL) - delete(key) - } - - override fun delete(key: String) { - try { - val req = DeleteObjectRequest.builder() - .bucket(bucket) - .key(key) - .build() - s3.deleteObject(req) - } catch (e: software.amazon.awssdk.services.s3.model.S3Exception) { - // NoSuchKey 등 - throw S3Exception(ErrorCode.S3_OBJECT_NOT_FOUND) - } catch (e: Exception) { - throw S3Exception(ErrorCode.S3_DELETE_FAIL) - } - } - - private fun buildKey(dir: String, originalFilename: String?): String { - val cleanName = (originalFilename ?: "file") - .substringAfterLast('/') - .substringAfterLast('\\') - .ifBlank { "file" } - - val prefix = dir.trim('/') - - val key = if (prefix.isBlank()) { - "${UUID.randomUUID()}_$cleanName" - } else { - "$prefix/${UUID.randomUUID()}_$cleanName" - } - - return key - } - - override fun keyFromUrl(url: String): String? { - val path = try { - URI(url).path // e.g. "/market/menu/uuid_name.jpg" - } catch (_: Exception) { - return null - } - return path.removePrefix("/").ifBlank { null } - } -} diff --git a/src/main/resources/application-local.yml b/src/main/resources/application-local.yml index 89011ba..780caaf 100644 --- a/src/main/resources/application-local.yml +++ b/src/main/resources/application-local.yml @@ -6,7 +6,7 @@ spring: driver-class-name: org.postgresql.Driver jpa: hibernate: - ddl-auto: update + ddl-auto: create-drop show-sql: true ai: openai: @@ -20,14 +20,4 @@ logging: level: root: INFO org.hibernate.SQL: DEBUG - org.hibernate.type.descriptor.sql.BasicBinder: TRACE - -cloud: - aws: - region: - static: ${REGION_LOCAL} - s3: - bucket: ${BUCKET_LOCAL} - credentials: - access-key: ${AWS_ACCESS_KEY_ID_LOCAL} - secret-key: ${AWS_SECRET_ACCESS_KEY_LOCAL} \ No newline at end of file + org.hibernate.type.descriptor.sql.BasicBinder: TRACE \ No newline at end of file diff --git a/src/test/resources/application-test.yml b/src/test/resources/application-test.yml index 79ec5f2..78663f9 100644 --- a/src/test/resources/application-test.yml +++ b/src/test/resources/application-test.yml @@ -4,4 +4,11 @@ spring: jpa: database-platform: org.hibernate.dialect.PostgreSQLDialect hibernate: - ddl-auto: create-drop \ No newline at end of file + ddl-auto: create-drop + ai: + openai: + api-key: dummy + model: + embedding: + options: + model: text-embedding-3-small \ No newline at end of file From f35e708723fd3d5e4ecd5dff3e8acb1aa9a1965c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=A4=80=ED=99=98?= Date: Fri, 22 Aug 2025 21:25:38 +0900 Subject: [PATCH 6/6] :white_check_mark: Test: index test add --- .../document/service/DataFileService.kt | 2 +- .../ragback/domain/index/embed/FakeEmbder.kt | 15 ++++ .../index/entity/enums/EmbeddingModel.kt | 5 +- .../document/service/DataFileServiceTest.kt | 88 ++----------------- .../domain/index/service/IndexServiceTest.kt | 16 ++-- 5 files changed, 39 insertions(+), 87 deletions(-) create mode 100644 src/main/kotlin/simplerag/ragback/domain/index/embed/FakeEmbder.kt diff --git a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt index 35dc65a..7e7e8ea 100644 --- a/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt +++ b/src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt @@ -75,7 +75,7 @@ class DataFileService( val allLinks = dataFileTagRepository.findAllByDataFileIn(files.content) val tagsByFileId: Map> = allLinks.groupBy( - { requireNotNull(it.dataFile.id) { "DataFile.id is null" } } + { it.dataFile.id } ).mapValues { (_, links) -> TagDTO.from(links) } val nextCursor = files.content.lastOrNull()?.id diff --git a/src/main/kotlin/simplerag/ragback/domain/index/embed/FakeEmbder.kt b/src/main/kotlin/simplerag/ragback/domain/index/embed/FakeEmbder.kt new file mode 100644 index 0000000..6652a8b --- /dev/null +++ b/src/main/kotlin/simplerag/ragback/domain/index/embed/FakeEmbder.kt @@ -0,0 +1,15 @@ +package simplerag.ragback.domain.index.embed + +import org.springframework.context.annotation.Primary +import org.springframework.context.annotation.Profile +import org.springframework.stereotype.Component + +@Component +@Primary +@Profile("test") +class FakeEmbder: Embedder { + override val dim: Int = 1536 + override fun embed(text: String): FloatArray { + return FloatArray(1536) { 0.0f } + } +} \ No newline at end of file diff --git a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt index d13ec86..94b3f9b 100644 --- a/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt +++ b/src/main/kotlin/simplerag/ragback/domain/index/entity/enums/EmbeddingModel.kt @@ -21,7 +21,10 @@ enum class EmbeddingModel( BGE_M3(1024, "BAAI/bge-m3"), // E5 - E5_BASE(768, "intfloat/e5-base-v2"); + E5_BASE(768, "intfloat/e5-base-v2"), + + // fake + FAKE(1, "fake"); companion object { fun findByModelId(modelId: String): EmbeddingModel? { diff --git a/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt b/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt index 8cc7b79..54b4631 100644 --- a/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt +++ b/src/test/kotlin/simplerag/ragback/domain/document/service/DataFileServiceTest.kt @@ -10,12 +10,11 @@ import org.springframework.boot.testcontainers.service.connection.ServiceConnect import org.springframework.mock.web.MockMultipartFile import org.springframework.test.context.ActiveProfiles import org.springframework.transaction.annotation.Transactional -import org.springframework.transaction.support.TransactionTemplate import org.springframework.web.multipart.MultipartFile import org.testcontainers.containers.PostgreSQLContainer import org.testcontainers.utility.DockerImageName import simplerag.ragback.domain.document.dto.DataFileBulkCreateRequest -import simplerag.ragback.domain.document.dto.DataFileCreateItem +import simplerag.ragback.domain.document.dto.DataFileBulkCreateRequest.DataFileCreateItem import simplerag.ragback.domain.document.entity.DataFile import simplerag.ragback.domain.document.repository.DataFileRepository import simplerag.ragback.domain.document.repository.DataFileTagRepository @@ -23,10 +22,7 @@ import simplerag.ragback.domain.document.repository.TagRepository import simplerag.ragback.global.error.CustomException import simplerag.ragback.global.error.ErrorCode import simplerag.ragback.global.error.FileException -import simplerag.ragback.global.storage.FakeS3Util -import simplerag.ragback.global.util.s3.S3Type import simplerag.ragback.global.util.converter.sha256Hex -import java.security.MessageDigest @SpringBootTest @ActiveProfiles("test") @@ -35,7 +31,6 @@ class DataFileServiceTest( @Autowired val dataFileRepository: DataFileRepository, @Autowired val tagRepository: TagRepository, @Autowired val dataFileTagRepository: DataFileTagRepository, - @Autowired val s3Util: FakeS3Util ) { @@ -52,17 +47,12 @@ class DataFileServiceTest( } } - @Autowired - lateinit var txManager: org.springframework.transaction.PlatformTransactionManager - - private fun txTemplate() = TransactionTemplate(txManager) @AfterEach fun clean() { dataFileTagRepository.deleteAll() tagRepository.deleteAll() dataFileRepository.deleteAll() - s3Util.clear() } @Test @@ -89,7 +79,7 @@ class DataFileServiceTest( assertEquals("greeting", saved.title) assertEquals("text/plain", saved.type) assertEquals(sha256Hex(bytes), saved.sha256) - assertFalse(saved.fileUrl.isNullOrBlank()) + assertFalse(saved.content.isBlank()) val ai = tagRepository.findByName("AI") val rag = tagRepository.findByName("RAG") @@ -125,7 +115,7 @@ class DataFileServiceTest( type = "text/plain", sizeBytes = 0, sha256 = sha, - fileUrl = "fake://original/exists.txt", + content = "fake://original/exists.txt", ) ) val req = DataFileBulkCreateRequest(listOf(DataFileCreateItem("dup", listOf("tag")))) @@ -140,7 +130,7 @@ class DataFileServiceTest( @Test @Transactional - @DisplayName("컨텐츠 타입 null이거나 확장자 없을 시 application/octet-stream 저장이 된다") + @DisplayName("컨텐츠 타입 지정되지 않을 거 일 시 에러가 난다") fun unknownTypeOctetStream() { // given val bytes = "x".toByteArray() @@ -148,11 +138,8 @@ class DataFileServiceTest( val f = file(name = "noext", content = bytes, contentType = null) // no extension // when - val res = dataFileService.upload(listOf(f), req) - - // then - val saved = dataFileRepository.findById(res.dataFilePreviewResponseList.first().id).orElseThrow() - assertEquals("application/octet-stream", saved.type) + assertThrows(CustomException::class.java) { dataFileService.upload(listOf(f), req) } + .message.equals("FILE TYPE이 유효하지 않습니다.") } @Test @@ -173,65 +160,6 @@ class DataFileServiceTest( assertEquals(ErrorCode.ALREADY_FILE, ex.errorCode) } - @Test - @DisplayName("트랜잭션이 커밋되면 DB와 S3에 정상 저장된다") - fun uploadCommitPersist() { - // given - val bytes = "commit-case".toByteArray() - val req = DataFileBulkCreateRequest( - listOf(DataFileCreateItem(title = "commit-title", tags = listOf("t1"))) - ) - val f = file("c.txt", bytes) - - // when - val resultIds = txTemplate().execute { - val res = dataFileService.upload(listOf(f), req) - res.dataFilePreviewResponseList.map { it.id } - }!! - - // then (DB) - assertEquals(1, resultIds.size) - val saved = dataFileRepository.findById(resultIds.first()).orElseThrow() - assertEquals("commit-title", saved.title) - assertEquals(sha256Hex(bytes), saved.sha256) - assertFalse(saved.fileUrl.isNullOrBlank()) - - // then (S3 - FakeS3Util 기준) - assertTrue(s3Util.exists(saved.fileUrl!!), "커밋 시 S3에 파일이 존재해야 합니다") - } - - - @Test - @DisplayName("파일 업로드 중 트랜잭션이 롤백되면 DB와 S3에서 모두 정리된다") - fun uploadRollbackCleansDBandS3() { - // given - val bytes = "rollback-case".toByteArray() - val filename = "r.txt" - val req = DataFileBulkCreateRequest( - listOf(DataFileCreateItem(title = "rollback-title", tags = listOf("t2"))) - ) - val f = file(filename, bytes) - - val hash12 = MessageDigest.getInstance("SHA-256") - .digest(bytes).joinToString("") { "%02x".format(it) } - .take(12) - val expectedKey = "${S3Type.ORIGINAL_FILE.label}/${hash12}_$filename" - val expectedUrl = s3Util.urlFromKey(expectedKey) - - // when: 트랜잭션 내에서 업로드 후 강제 롤백 - txTemplate().execute { status -> - dataFileService.upload(listOf(f), req) - status!!.setRollbackOnly() - } - - // then - val sha = sha256Hex(bytes) - val existsInDb = dataFileRepository.findAll().any { it.sha256 == sha } - assertFalse(existsInDb, "롤백되었으므로 DB에 남으면 안 됩니다") - - assertFalse(s3Util.exists(expectedUrl), "롤백 시 S3도 보상 삭제되어야 합니다") - } - @Test @DisplayName("데이터 조회가 잘 된다") @Transactional @@ -248,14 +176,14 @@ class DataFileServiceTest( type = "text/plain", sizeBytes = 0, sha256 = sha1, - fileUrl = "fake://original/exists.txt", + content = "fake://original/exists.txt", ), DataFile( title = "exists2", type = "text/pdf", sizeBytes = 0, sha256 = sha2, - fileUrl = "fake://original/exists.txt", + content = "fake://original/exists.txt", ) ) ) diff --git a/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt b/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt index 26c85b5..f2cf09b 100644 --- a/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt +++ b/src/test/kotlin/simplerag/ragback/domain/index/service/IndexServiceTest.kt @@ -7,10 +7,10 @@ import org.junit.jupiter.api.DisplayName import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows import org.springframework.beans.factory.annotation.Autowired +import org.springframework.boot.test.autoconfigure.orm.jpa.TestEntityManager import org.springframework.boot.test.context.SpringBootTest import org.springframework.test.context.ActiveProfiles import org.testcontainers.containers.PostgreSQLContainer -import org.testcontainers.junit.jupiter.Container import simplerag.ragback.domain.index.dto.IndexCreateRequest import simplerag.ragback.domain.index.dto.IndexUpdateRequest import simplerag.ragback.domain.index.entity.Index @@ -21,6 +21,9 @@ import simplerag.ragback.global.error.IndexException import org.springframework.boot.testcontainers.service.connection.ServiceConnection import org.springframework.test.context.TestConstructor import org.testcontainers.utility.DockerImageName +import simplerag.ragback.domain.document.entity.DataFile +import simplerag.ragback.domain.document.repository.DataFileRepository +import simplerag.ragback.domain.index.repository.ChunkEmbeddingRepository @SpringBootTest @@ -29,10 +32,10 @@ import org.testcontainers.utility.DockerImageName class IndexServiceTest( @Autowired val indexService: IndexService, @Autowired val indexRepository: IndexRepository, + @Autowired val dataFileRepository: DataFileRepository, + @Autowired val chunkEmbeddingRepository: ChunkEmbeddingRepository, ) { - - companion object { private val pgvectorImage = DockerImageName @@ -48,15 +51,18 @@ class IndexServiceTest( @AfterEach fun cleanUp() { - indexRepository.deleteAll() + chunkEmbeddingRepository.deleteAllInBatch() // 자식 먼저 + indexRepository.deleteAllInBatch() // 부모 나중 } @Test @DisplayName("인덱스 생성이 정상 작동한다") fun createIndexTest() { // given + val datafile = dataFileRepository.save(DataFile("a", "txt", 1000000L, "sdf", "sfd")) + val indexCreateRequest = - IndexCreateRequest(listOf(1), "test", 1, 0, SimilarityMetric.COSINE, 1, EmbeddingModel.TEXT_EMBEDDING_3_LARGE, true) + IndexCreateRequest(listOf(datafile.id), "test", 1, 0, SimilarityMetric.COSINE, 1, EmbeddingModel.TEXT_EMBEDDING_3_SMALL, true) // when val createIndexResponse = indexService.createIndex(indexCreateRequest)