Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ package simplerag.ragback.domain.document.controller
import io.swagger.v3.oas.annotations.Parameter
import io.swagger.v3.oas.annotations.media.Content
import jakarta.validation.Valid
import jakarta.validation.constraints.Max
import jakarta.validation.constraints.Min
import jakarta.validation.constraints.Size
import org.springframework.http.HttpStatus
import org.springframework.http.MediaType
import org.springframework.validation.annotation.Validated
import org.springframework.web.bind.annotation.*
import org.springframework.web.multipart.MultipartFile
import simplerag.ragback.domain.document.dto.DataFileBulkCreateRequest
import simplerag.ragback.domain.document.dto.DataFileDetailResponseList
import simplerag.ragback.domain.document.dto.DataFileResponseList
import simplerag.ragback.domain.document.service.DataFileService
import simplerag.ragback.global.response.ApiResponse
Expand Down Expand Up @@ -41,4 +44,13 @@ class DataFileController(
return ApiResponse.ok(saved, "업로드 완료")
}

@GetMapping
fun getDataFiles(
@RequestParam(name = "cursor") cursor: Long,
@RequestParam(name = "take") @Min(1) @Max(100) take: Int,
): ApiResponse<DataFileDetailResponseList> {
val data = dataFileService.getDataFiles(cursor, take)
return ApiResponse.ok(data)
}

}
Original file line number Diff line number Diff line change
@@ -1,10 +1,48 @@
package simplerag.ragback.domain.document.dto

import simplerag.ragback.domain.document.entity.DataFile
import java.time.LocalDateTime

data class DataFileResponseList(
val dataFilePreviewResponseList: List<DataFilePreviewResponse>,
)

data class DataFilePreviewResponse(
val id: Long,
val sha256: String,
)
)

data class DataFileDetailResponseList(
val dataFileDetailResponseList: List<DataFileDetailResponse>,
val cursor: Long?,
val hasNext: Boolean,
)

data class DataFileDetailResponse(
var id: Long?,
val title: String,
val type: String,
val lastModified: LocalDateTime,
val sizeMB: Double,
val tags: List<TagDTO>,
val sha256: String,
) {
Comment on lines +21 to +29
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

DTO 불변성 유지: id는 가변(var)보다 불변(val)이 적절

응답 DTO에서 id가 변경될 이유가 없으므로 varval로 불변성을 유지하는 것이 안전합니다.

 data class DataFileDetailResponse(
-    var id: Long?,
+    val id: Long?,
     val title: String,
     val type: String,
     val lastModified: LocalDateTime,
     val sizeMB: Double,
     val tags: List<TagDTO>,
     val sha256: String,
 )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
data class DataFileDetailResponse(
var id: Long?,
val title: String,
val type: String,
val lastModified: LocalDateTime,
val sizeMB: Double,
val tags: List<TagDTO>,
val sha256: String,
) {
data class DataFileDetailResponse(
val id: Long?,
val title: String,
val type: String,
val lastModified: LocalDateTime,
val sizeMB: Double,
val tags: List<TagDTO>,
val sha256: String,
) {
// existing class body...
}
🤖 Prompt for AI Agents
In src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt
around lines 21 to 29, the DTO declares `id` as a mutable `var` but it should be
immutable; change `var id: Long?` to `val id: Long?` to preserve DTO
immutability and update any code that mutates this property (replace mutations
with constructor-based creation or copies) and adjust affected call sites/tests
to use the immutable property.

companion object {
fun of(dataFile: DataFile, tags: List<TagDTO>): DataFileDetailResponse {
return DataFileDetailResponse(
dataFile.id,
dataFile.title,
dataFile.type,
dataFile.updatedAt,
dataFile.sizeBytes / (1024.0 * 1024.0),
tags,
dataFile.sha256,
)
}
Comment on lines +31 to +41
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

매직 넘버 제거: MB 변환 상수로 추출

1024.0 * 1024.0 하드코딩 대신 상수로 추출하여 의미를 드러내고 재사용성을 높이세요.

 data class DataFileDetailResponse(
@@
 ) {
     companion object {
+        private const val BYTES_PER_MB = 1_048_576.0
         fun of(dataFile: DataFile, tags: List<TagDTO>): DataFileDetailResponse {
             return DataFileDetailResponse(
                 dataFile.id,
                 dataFile.title,
                 dataFile.type,
                 dataFile.updatedAt,
-                dataFile.sizeBytes / (1024.0 * 1024.0),
+                dataFile.sizeBytes / BYTES_PER_MB,
                 tags,
                 dataFile.sha256,
             )
         }
     }
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
fun of(dataFile: DataFile, tags: List<TagDTO>): DataFileDetailResponse {
return DataFileDetailResponse(
dataFile.id,
dataFile.title,
dataFile.type,
dataFile.updatedAt,
dataFile.sizeBytes / (1024.0 * 1024.0),
tags,
dataFile.sha256,
)
}
data class DataFileDetailResponse(
// existing properties…
) {
companion object {
private const val BYTES_PER_MB = 1_048_576.0
fun of(dataFile: DataFile, tags: List<TagDTO>): DataFileDetailResponse {
return DataFileDetailResponse(
dataFile.id,
dataFile.title,
dataFile.type,
dataFile.updatedAt,
dataFile.sizeBytes / BYTES_PER_MB,
tags,
dataFile.sha256,
)
}
}
}
🤖 Prompt for AI Agents
In src/main/kotlin/simplerag/ragback/domain/document/dto/DataFileResponseDTO.kt
around lines 31 to 41, the code uses the magic number 1024.0 * 1024.0 to convert
bytes to megabytes; extract that expression into a well-named constant (e.g.,
BYTES_PER_MB or BYTES_IN_MB) declared near the top of the file or inside the
companion object as a Double, then replace the inline calculation with division
by that constant to clarify intent and enable reuse.

}
}

data class TagDTO(
val id: Long?,
val name: String,
)
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package simplerag.ragback.domain.document.repository

import org.springframework.data.domain.Pageable
import org.springframework.data.domain.Slice
import org.springframework.data.jpa.repository.JpaRepository
import simplerag.ragback.domain.document.entity.DataFile

interface DataFileRepository : JpaRepository<DataFile, Long> {
fun existsBySha256(sha256: String): Boolean

fun findByIdGreaterThanOrderById(cursorId: Long, pageable: Pageable): Slice<DataFile>
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
package simplerag.ragback.domain.document.repository

import org.springframework.data.jpa.repository.JpaRepository
import org.springframework.data.jpa.repository.Query
import org.springframework.data.repository.query.Param
import simplerag.ragback.domain.document.entity.DataFile
import simplerag.ragback.domain.document.entity.DataFileTag

interface DataFileTagRepository : JpaRepository<DataFileTag, Long> {
fun existsByDataFileIdAndTagId(dataFileId: Long, tagId: Long): Boolean

@Query("""
SELECT dft
FROM DataFileTag dft
JOIN FETCH dft.tag t
WHERE dft.dataFile = :dataFile
""")
fun findTagsByDataFile(@Param("dataFile") dataFile: DataFile): List<DataFileTag>
Comment on lines +12 to +18
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

N+1 가능성: 목록 조회 시 파일별 개별 태그 조회는 비효율적 — 배치 페치 메서드 추가 권장

페이지 사이즈만큼 findTagsByDataFile가 반복 호출될 수 있어 쿼리 수가 N으로 증가합니다. 한 번에 여러 파일의 태그를 패치 조인으로 가져오는 배치 메서드를 추가하고 서비스에서 그룹핑하세요.

아래 메서드를 추가(서비스는 dataFile.id 기준 그룹핑)하면 쿼리를 1회로 줄일 수 있습니다:

@Query(
    """
    SELECT dft
    FROM DataFileTag dft
    JOIN FETCH dft.tag t
    WHERE dft.dataFile IN :dataFiles
    """
)
fun findAllByDataFileInFetchTag(@Param("dataFiles") dataFiles: List<DataFile>): List<DataFileTag>

서비스 측 그룹핑 예시:

val tagsByFileId = dataFileTagRepository
    .findAllByDataFileInFetchTag(files)
    .groupBy({ it.dataFile.id!! }, { TagDTO(it.tag.id, it.tag.name) })

참고: default_batch_fetch_size가 도움이 되긴 하지만, 위와 같은 명시적 패치 조인이 가장 확실하게 N+1을 제거합니다.

🤖 Prompt for AI Agents
In
src/main/kotlin/simplerag/ragback/domain/document/repository/DataFileTagRepository.kt
around lines 12–18, the current findTagsByDataFile causes N+1 when called per
file; add a batch fetch method named findAllByDataFileInFetchTag that accepts a
List<DataFile> and returns List<DataFileTag>, implemented with a JPQL query that
uses JOIN FETCH on dft.tag and a WHERE dft.dataFile IN :dataFiles to load all
tags for multiple files in one query; then update the service to call this new
repository method and group the results by dataFile.id to build TagDTO lists per
file (i.e., groupBy { it.dataFile.id } mapping to TagDTO).

}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package simplerag.ragback.domain.document.service

import org.springframework.dao.DataIntegrityViolationException
import org.springframework.data.domain.PageRequest
import org.springframework.data.domain.Pageable
import org.springframework.stereotype.Service
import org.springframework.transaction.annotation.Transactional
import org.springframework.transaction.support.TransactionSynchronization
import org.springframework.transaction.support.TransactionSynchronizationManager
import org.springframework.web.multipart.MultipartFile
import simplerag.ragback.domain.document.dto.DataFileBulkCreateRequest
import simplerag.ragback.domain.document.dto.DataFilePreviewResponse
import simplerag.ragback.domain.document.dto.DataFileResponseList
import simplerag.ragback.domain.document.dto.*
import simplerag.ragback.domain.document.entity.DataFile
import simplerag.ragback.domain.document.entity.DataFileTag
import simplerag.ragback.domain.document.entity.Tag
Expand All @@ -24,6 +24,7 @@ import simplerag.ragback.global.util.computeMetricsStreaming
import simplerag.ragback.global.util.resolveContentType
import java.time.LocalDateTime
import java.util.*
import kotlin.collections.ArrayList

@Service
class DataFileService(
Expand Down Expand Up @@ -76,6 +77,28 @@ class DataFileService(
return DataFileResponseList(responses)
}

@Transactional(readOnly = true)
fun getDataFiles(cursor: Long, take: Int): DataFileDetailResponseList {

val dataSlice = dataFileRepository.findByIdGreaterThanOrderById(cursor, PageRequest.of(0, take))

val dataFileList: MutableList<DataFileDetailResponse> = ArrayList()
dataSlice.forEach{ dataFile ->
val dataFileTags: List<DataFileTag> = dataFileTagRepository.findTagsByDataFile(dataFile)

val tagDtos: List<TagDTO> = dataFileTags.map{
dataFileTag ->
val tag = dataFileTag.tag
TagDTO(tag.id, tag.name)
}

dataFileList.add(DataFileDetailResponse.of(dataFile, tagDtos))
}
Comment on lines +85 to +96
Copy link

@coderabbitai coderabbitai bot Aug 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

N+1 쿼리 발생 — 태그를 다건 조회로 한 번에 가져오도록 리팩터링 권장

현재는 Slice의 각 DataFile마다 findTagsByDataFile을 호출합니다. 목록 크기에 비례해 쿼리 수가 증가하며, 배치 페치 설정만으로는 한계가 있습니다. 다건 조회 후 groupBy하여 1+1 패턴으로 줄이세요.

아래와 같이 서비스 코드를 변경하고, 리포지토리에 다건 조회 메서드를 추가해 주세요.

서비스 변경(해당 범위 교체):

-        val dataFileList: MutableList<DataFileDetailResponse> = ArrayList()
-        dataSlice.forEach{ dataFile ->
-            val dataFileTags: List<DataFileTag> = dataFileTagRepository.findTagsByDataFile(dataFile)
-
-            val tagDtos: List<TagDTO> = dataFileTags.map{
-                dataFileTag ->
-                val tag = dataFileTag.tag
-                TagDTO(tag.id, tag.name)
-            }
-
-            dataFileList.add(DataFileDetailResponse.of(dataFile, tagDtos))
-        }
+        val ids = dataSlice.mapNotNull { it.id }
+        val tagMap: Map<Long, List<TagDTO>> =
+            dataFileTagRepository.findByDataFileIdIn(ids)
+                .groupBy({ requireNotNull(it.dataFile.id) }) { TagDTO(it.tag.id, it.tag.name) }
+
+        val dataFileList: List<DataFileDetailResponse> = dataSlice.map { dataFile ->
+            val tagDtos = tagMap[dataFile.id] ?: emptyList()
+            DataFileDetailResponse.of(dataFile, tagDtos)
+        }.toList()

리포지토리 추가(별도 파일 수정 필요):

// src/main/kotlin/simplerag/ragback/domain/document/repository/DataFileTagRepository.kt
@Query("select dft from DataFileTag dft join fetch dft.tag where dft.dataFile.id in :ids")
fun findByDataFileIdIn(@Param("ids") ids: List<Long>): List<DataFileTag>

이 방식은 (데이터 파일 슬라이스 1회 + 태그 일괄 1회)로 고정되어 규모가 커져도 효율적입니다.

🤖 Prompt for AI Agents
In src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt
around lines 85 to 96, the code issues N+1 queries by calling findTagsByDataFile
for each DataFile; replace that loop with a batched approach: call a new
repository method that returns all DataFileTag for a list of dataFile IDs (e.g.,
findByDataFileIdIn(ids)), group the returned DataFileTag list by dataFile.id,
then map each DataFile to its tag DTOs using the grouped map and construct
DataFileDetailResponse.of(dataFile, tagDtos) for each item; also add the
repository method signature in DataFileTagRepository as described in the review
so tags are fetched in one query.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

어짜피 fetch join으로 가져오니까 상관없지 않나요?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@catturtle123 좋은 질문입니다! 하지만 fetch join과 N+1 쿼리 문제는 서로 다른 개념입니다.

현재 상황 분석:

현재 코드에서는 여전히 N+1 쿼리가 발생합니다:

  1. 1번 쿼리: dataFileRepository.findByIdGreaterThanOrderById() - DataFile 목록 조회
  2. N번 쿼리: dataFileTagRepository.findTagsByDataFile() - 각 DataFile마다 개별 호출

Fetch Join의 역할:

  • Fetch join은 단일 쿼리 내에서 연관 엔티티를 즉시 로딩하여 지연 로딩으로 인한 추가 쿼리를 방지합니다
  • 하지만 메서드 자체가 여러 번 호출되는 것은 막지 못합니다

예시:
DataFile이 3개 있다면:

-- 1번: DataFile 목록 조회
SELECT df.* FROM data_file df WHERE df.id > ? ORDER BY df.id LIMIT 3

-- 2번: 첫 번째 DataFile의 태그 조회 (fetch join 적용)
SELECT dft.*, t.* FROM data_file_tag dft JOIN tag t ON dft.tag_id = t.id WHERE dft.data_file_id = 1

-- 3번: 두 번째 DataFile의 태그 조회 (fetch join 적용)  
SELECT dft.*, t.* FROM data_file_tag dft JOIN tag t ON dft.tag_id = t.id WHERE dft.data_file_id = 2

-- 4번: 세 번째 DataFile의 태그 조회 (fetch join 적용)
SELECT dft.*, t.* FROM data_file_tag dft JOIN tag t ON dft.tag_id = t.id WHERE dft.data_file_id = 3

4번의 데이터베이스 라운드트립이 발생합니다.

권장 방식:

-- 1번: DataFile 목록 조회  
SELECT df.* FROM data_file df WHERE df.id > ? ORDER BY df.id LIMIT 3

-- 2번: 모든 DataFile의 태그를 한 번에 조회
SELECT dft.*, t.* FROM data_file_tag dft JOIN tag t ON dft.tag_id = t.id WHERE dft.data_file_id IN (1,2,3)

2번의 데이터베이스 라운드트립으로 줄어듭니다.

데이터 양이 많아질수록 성능 차이가 크게 벌어지므로, 일괄 조회 방식으로 리팩터링하는 것을 권장드립니다.


✏️ Learnings added
Learnt from: catturtle123
PR: Simple-RAG/BACK#7
File: src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt:85-96
Timestamp: 2025-08-17T17:04:32.878Z
Learning: Fetch join은 단일 쿼리 내에서 연관 엔티티의 지연 로딩을 방지하지만, 메서드가 루프에서 여러 번 호출되는 N+1 쿼리 문제는 해결하지 못함. 일괄 조회 후 groupBy 방식으로 1+1 쿼리 패턴을 만들어야 함.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

좋습니다. 하지만 batch_size를 등록해놨으니 N+1까지는 아니지 않나요?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@catturtle123 좋은 지적입니다! batch_size 설정에 대한 이해가 정확하시네요. 하지만 이 경우에는 완전히 해결되지 않습니다.

Batch Size의 동작 방식:

  • default_batch_fetch_size: 500은 주로 지연 로딩(lazy loading) 상황에서 동일한 연관관계를 여러 번 로드할 때 배치로 묶어서 처리합니다
  • 예: 엔티티의 연관관계를 .tags 프로퍼티로 접근할 때 Hibernate가 자동으로 배치 처리

현재 코드의 한계:

dataSlice.forEach{ dataFile ->
    val dataFileTags: List<DataFileTag> = dataFileTagRepository.findTagsByDataFile(dataFile) // 명시적 메서드 호출
}
  • findTagsByDataFile() 호출은 별개의 쿼리 실행입니다
  • Repository 메서드를 명시적으로 호출하므로 Hibernate가 자동으로 배치할 수 없습니다
  • 결과적으로 여전히 DataFile 개수만큼 쿼리가 실행됩니다

실제 쿼리 수:

  • batch_size 없이: 1 + N번 (완전한 N+1)
  • batch_size 있어도: 1 + N번 (동일함)

진정한 해결책:

val ids = dataSlice.mapNotNull { it.id }
val tagMap = dataFileTagRepository.findByDataFileIdIn(ids) // 1번의 IN 쿼리
  • 이 방식은 1 + 1 = 총 2번의 쿼리로 고정됩니다

batch_size는 좋은 최적화이지만, 명시적인 일괄 조회 패턴이 더 효과적입니다!


🧠 Learnings used
Learnt from: catturtle123
PR: Simple-RAG/BACK#7
File: src/main/kotlin/simplerag/ragback/domain/document/service/DataFileService.kt:85-96
Timestamp: 2025-08-17T17:04:32.878Z
Learning: Fetch join은 단일 쿼리 내에서 연관 엔티티의 지연 로딩을 방지하지만, 메서드가 루프에서 여러 번 호출되는 N+1 쿼리 문제는 해결하지 못함. 일괄 조회 후 groupBy 방식으로 1+1 쿼리 패턴을 만들어야 함.


val nextCursor: Long? = dataFileList.lastOrNull()?.id
return DataFileDetailResponseList(dataFileList, nextCursor, dataSlice.hasNext())
}

private fun registerRollbackCleanup(uploadedUrls: MutableList<String>) {
if (TransactionSynchronizationManager.isSynchronizationActive()) {
TransactionSynchronizationManager.registerSynchronization(object : TransactionSynchronization {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,5 @@ class FakeS3Util : S3Util {
// 테스트 용 함수
fun exists(url: String): Boolean = keyFromUrl(url)?.let { store.containsKey(it) } == true
fun count(): Int = store.size
fun clear() = store.clear()
}
1 change: 1 addition & 0 deletions src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ spring:
hibernate:
dialect: org.hibernate.dialect.PostgreSQLDialect
format_sql: true
default_batch_fetch_size: 500
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

default_batch_fetch_size=500은 과도할 수 있고, 현재 조회 방식에서는 효과가 제한적입니다

  • 500은 DB/드라이버에 따라 너무 큰 IN 리스트를 유발해 성능이 악화될 수 있습니다. 일반적으로 50~200 사이가 안전한 편입니다.
  • 또한 본 PR의 태그 조회는 개별 파일마다 별도 쿼리(findTagsByDataFile)를 호출하는 패턴이라, Hibernate의 batch fetch가 큰 도움을 주지 못합니다. 태그를 배치로 조회하도록 리포지토리를 확장하는 쪽이 효과적입니다(아래 다른 코멘트 참고).

권장:

  • 우선 값을 보수적으로 낮추고(예: 100), 태그 배치 조회 최적화를 병행하세요.

다음 간단 변경으로 보수적으로 조정할 수 있습니다:

-        default_batch_fetch_size: 500
+        default_batch_fetch_size: 100
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
default_batch_fetch_size: 500
default_batch_fetch_size: 100
🤖 Prompt for AI Agents
In src/main/resources/application.yml around line 19, default_batch_fetch_size
is set to 500 which is too large for this usage and provides limited benefit
given per-file tag queries; change the value to a conservative default (e.g.,
100) in this file and simultaneously add a follow-up task to implement
repository-level batch tag fetching (a single query that returns tags for
multiple data files) and then re-evaluate the optimal batch size after measuring
performance.

Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package simplerag.ragback.domain.document.service

import jakarta.annotation.PostConstruct
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.DisplayName
import org.junit.jupiter.api.Test
import org.springframework.beans.factory.annotation.Autowired
Expand All @@ -23,7 +25,6 @@ import simplerag.ragback.global.storage.FakeS3Util
import simplerag.ragback.global.util.S3Type
import simplerag.ragback.global.util.sha256Hex
import java.security.MessageDigest
import java.time.LocalDateTime

@SpringBootTest
@ActiveProfiles("test")
Expand All @@ -40,6 +41,14 @@ class DataFileServiceTest(

private fun txTemplate() = TransactionTemplate(txManager)

@BeforeEach
fun clean() {
dataFileTagRepository.deleteAll()
tagRepository.deleteAll()
dataFileRepository.deleteAll()
s3Util.clear()
}

@Test
@Transactional
@DisplayName("업로드 시 잘 저장이 된다.")
Expand Down Expand Up @@ -207,6 +216,57 @@ class DataFileServiceTest(
assertFalse(s3Util.exists(expectedUrl), "롤백 시 S3도 보상 삭제되어야 합니다")
}

@Test
@DisplayName("데이터 조회가 잘 된다")
@Transactional
fun getDataFilesOK() {
// given
val bytes1 = "test1".toByteArray()
val sha1 = sha256Hex(bytes1)
val bytes2 = "test2".toByteArray()
val sha2 = sha256Hex(bytes2)
dataFileRepository.saveAll(
listOf(
DataFile(
title = "exists",
type = "text/plain",
sizeBytes = 0,
sha256 = sha1,
fileUrl = "fake://original/exists.txt",
),
DataFile(
title = "exists2",
type = "text/pdf",
sizeBytes = 0,
sha256 = sha2,
fileUrl = "fake://original/exists.txt",
)
)
)

val cursor = 0L
val take = 2

// when
val dataFiles = dataFileService.getDataFiles(cursor, take)

// then
val dataFileDetailResponse = dataFiles.dataFileDetailResponseList[0]
assertEquals(dataFileDetailResponse.title, "exists")
assertEquals(dataFileDetailResponse.type, "text/plain")
assertEquals(dataFileDetailResponse.sizeMB, 0.0)
assertEquals(dataFileDetailResponse.sha256, sha1)

val dataFileDetailResponse2 = dataFiles.dataFileDetailResponseList[1]
assertEquals(dataFileDetailResponse2.title, "exists2")
assertEquals(dataFileDetailResponse2.type, "text/pdf")
assertEquals(dataFileDetailResponse2.sizeMB, 0.0)
assertEquals(dataFileDetailResponse2.sha256, sha2)

assertEquals(dataFiles.cursor, dataFileDetailResponse2.id)
assertEquals(dataFiles.hasNext, false)
}
Comment on lines +219 to +268
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

조회 해피패스만 검증됨 — hasNext=true 및 커서 후속 조회 케이스 테스트 추가 제안

현재 테스트는 take=2, 레코드=2로 hasNext=false와 nextCursor=마지막 id만 검증합니다. 다음도 함께 커버해 주세요:

  • hasNext=true인 경우(take < 총 레코드 수)와 nextCursor가 적절히 설정되는지
  • 반환된 cursor를 이용해 후속 페이지를 조회했을 때 기대 데이터가 나오는지
  • sizeMB 변환/반올림(버그 수정 이력)에 대한 비영(0이 아닌) 값 검증

원하시면 아래와 같이 테스트를 추가할 수 있습니다:

@Test
@DisplayName("커서 기반 조회 - hasNext=true, cursor로 후속 페이지 조회")
@Transactional
fun getDataFiles_hasNext_and_followUpCursor() {
    // given
    val f1 = dataFileRepository.save(DataFile("t1", "text/plain", 0, "sha1", "u1"))
    val f2 = dataFileRepository.save(DataFile("t2", "text/plain", 0, "sha2", "u2"))
    val f3 = dataFileRepository.save(DataFile("t3", "text/plain", 0, "sha3", "u3"))

    // when: 첫 페이지
    val first = dataFileService.getDataFiles(cursor = 0L, take = 2)

    // then
    assertEquals(2, first.dataFileDetailResponseList.size)
    assertTrue(first.hasNext)
    val next = requireNotNull(first.cursor)

    // when: 후속 페이지
    val second = dataFileService.getDataFiles(cursor = next, take = 2)

    // then: 남은 1건
    assertEquals(1, second.dataFileDetailResponseList.size)
    assertFalse(second.hasNext)
    assertEquals("t3", second.dataFileDetailResponseList.first().title)
}

또한 sizeMB 변환 검증 예시(반올림 정책에 맞게 기대값 조정 필요):

@Test
@DisplayName("sizeBytes -> sizeMB 변환 검증")
@Transactional
fun getDataFiles_sizeMB_conversion() {
    val oneAndHalfMB = 1_572_864L // 1.5 MiB
    val sha = "shaX"
    dataFileRepository.save(DataFile("mb", "application/octet-stream", oneAndHalfMB, sha, "u"))
    val res = dataFileService.getDataFiles(cursor = 0L, take = 1)
    val item = res.dataFileDetailResponseList.first()
    // 예: 1.5 MB로 표현된다면 아래 기대값 조정
    assertTrue(item.sizeMB in 1.49..1.51, "sizeMB 변환/반올림을 확인해 주세요")
}

추가 테스트 작성이 필요하시면 전체 코드 패치로 도와드리겠습니다.


// -----------------------
// helpers
// -----------------------
Expand Down