diff --git a/DESCRIPTION b/DESCRIPTION
index ee2ee1c..d71d7da 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,7 +13,7 @@ Description: MSstats package provide tools for preprocessing, summarization and
     processing larger than memory data sets.
 License: Artistic-2.0
 Encoding: UTF-8
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Imports: 
     arrow,
     DBI,
@@ -23,7 +23,9 @@ Imports:
     readr,
     sparklyr,
     utils
-Suggests: 
+Suggests:
+    testthat,
+    mockery,
     knitr,
     rmarkdown
 VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 6aaf83d..3c5047a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(MSstatsAddAnnotationBig)
 export(MSstatsPreprocessBig)
+export(bigDIANNtoMSstatsFormat)
 export(bigFragPipetoMSstatsFormat)
 export(bigSpectronauttoMSstatsFormat)
 importFrom(MSstats,dataProcess)
diff --git a/R/clean_DIANN.R b/R/clean_DIANN.R
new file mode 100644
index 0000000..6bd867d
--- /dev/null
+++ b/R/clean_DIANN.R
@@ -0,0 +1,290 @@
+#' Read and clean a large DIANN file in chunks
+#' 
+#' @param input_file Path to the input DIANN file
+#' @param output_path Path to the output CSV file
+#' @param MBR Boolean, whether MBR was used
+#' @param quantificationColumn Name of the column containing intensity values
+#' @param global_qvalue_cutoff Global Q-value cutoff
+#' @param qvalue_cutoff Q-value cutoff
+#' @param pg_qvalue_cutoff Protein group Q-value cutoff
+#' @return NULL. Writes to file.
+#' @keywords internal
+reduceBigDIANN <- function(input_file, output_path, MBR = TRUE,
+                           quantificationColumn = "Fragment.Quant.Corrected",
+                           global_qvalue_cutoff = 0.01,
+                           qvalue_cutoff = 0.01,
+                           pg_qvalue_cutoff = 0.01) {
+  if (grepl("csv", input_file)) {
+    delim = ","
+  } else if (grepl("tsv|xls", input_file)) {
+    delim = "\t"
+  } else {
+    delim <- ";"
+  }
+  
+  diann_chunk <- function(x, pos) cleanDIANNChunk(x,
+                                                  output_path,
+                                                  MBR,
+                                                  quantificationColumn,
+                                                  pos,
+                                                  global_qvalue_cutoff,
+                                                  qvalue_cutoff,
+                                                  pg_qvalue_cutoff)
+  readr::read_delim_chunked(input_file,
+                            readr::DataFrameCallback$new(diann_chunk),
+                            delim = delim,
+                            chunk_size = 1e6)
+}
+
+#' Clean a single chunk of DIANN data
+#' 
+#' @param input Data frame chunk
+#' @param output_path Path to output file
+#' @param MBR Boolean, whether MBR was used
+#' @param quantificationColumn Name of intensity column
+#' @param pos Chunk position (1 for first chunk, >1 for subsequent)
+#' @param global_qvalue_cutoff Global Q-value cutoff
+#' @param qvalue_cutoff Q-value cutoff
+#' @param pg_qvalue_cutoff Protein group Q-value cutoff
+#' @return NULL
+#' @keywords internal
+cleanDIANNChunk = function(input, output_path, MBR, quantificationColumn, pos,
+                           global_qvalue_cutoff = 0.01,
+                           qvalue_cutoff = 0.01,
+                           pg_qvalue_cutoff = 0.01) {
+  # 1. Handle "auto" quantification column
+  processed <- .handleAutoQuantification(input, quantificationColumn)
+  input <- processed$input
+  quantificationColumn <- processed$quantificationColumn
+  
+  # 2. Select required columns
+  input <- .selectDIANNColumns(input, MBR, quantificationColumn)
+  input <- .cleanDIANNAddMissingColumns(input)
+  
+  # 3. Expand concatenated rows
+  input <- .expandDIANNRows(input, quantificationColumn)
+  
+  # 4. Process fragment info (extract intensity, charge, ion)
+  input <- .processDIANNFragmentInfo(input, quantificationColumn)
+  
+  # 5. Filter invalid fragments
+  input <- .filterDIANNFragments(input, quantificationColumn)
+  
+  # 6. Standardize column names
+  input <- .standardizeDIANNColumns(input, quantificationColumn)
+  
+  # 7. Filter by Q-values
+  input <- .filterDIANNByQValues(input, MBR, global_qvalue_cutoff, qvalue_cutoff, pg_qvalue_cutoff)
+  
+  # 8. Finalize columns (select final set, add IsotopeLabelType)
+  input <- .finalizeDIANNColumns(input)
+  
+  # 9. Write to output
+  .writeDIANNChunk(input, output_path, pos)
+  
+  NULL
+}
+
+#' Handle automatic detection of quantification columns
+#' 
+#' @param input Data frame
+#' @param quantificationColumn Name of column or "auto"
+#' @return List with input data frame and updated quantification column name
+#' @keywords internal
+.handleAutoQuantification <- function(input, quantificationColumn) {
+  if (quantificationColumn == "auto") {
+    fragment_columns <- grep("^Fr[0-9]+Quantity$", colnames(input), value = TRUE)
+    if (length(fragment_columns) == 0) {
+      stop("No fragment quantification columns found. Please check your input.")
+    }
+    input <- tidyr::unite(input, "FragmentQuantCorrected", all_of(fragment_columns), sep = ";")
+    quantificationColumn <- "FragmentQuantCorrected"
+  }
+  list(input = input, quantificationColumn = quantificationColumn)
+}
+
+#' Select required columns from DIANN output
+#' 
+#' @param input Data frame
+#' @param MBR Boolean
+#' @param quantificationColumn Name of intensity column
+#' @return Data frame with selected columns
+#' @keywords internal
+.selectDIANNColumns <- function(input, MBR, quantificationColumn) {
+  base_cols <- c('Protein.Names', 'Stripped.Sequence', 'Modified.Sequence', 
+                 'Precursor.Charge', quantificationColumn, 'Q.Value', 
+                 'Precursor.Mz', 'Fragment.Info', 'Run')
+  
+  mbr_cols <- if (MBR) {
+    c('Lib.Q.Value', 'Lib.PG.Q.Value')
+  } else {
+    c('Global.Q.Value', 'Global.PG.Q.Value')
+  }
+  
+  req_cols <- intersect(c(base_cols, mbr_cols), colnames(input))
+  dplyr::select(input, all_of(req_cols))
+}
+
+#' Add missing required columns
+#' 
+#' @param input Data frame
+#' @return Data frame with missing columns added
+#' @keywords internal
+.cleanDIANNAddMissingColumns <- function(input) {
+  if (!"Precursor.Mz" %in% colnames(input)) {
+    input <- dplyr::mutate(input, Precursor.Mz = NA)
+  }
+  if (!"Fragment.Info" %in% colnames(input)) {
+    input <- dplyr::mutate(input, Fragment.Info = NA)
+  }
+  input
+}
+
+#' Expand rows with multiple fragments
+#' 
+#' @param input Data frame
+#' @param quantificationColumn Name of intensity column
+#' @return Data frame with expanded rows
+#' @keywords internal
+.expandDIANNRows <- function(input, quantificationColumn) {
+  split_cols <- intersect(c(quantificationColumn, "Fragment.Info"), colnames(input))
+  if (length(split_cols) > 0) {
+    tidyr::separate_rows(input, all_of(split_cols), sep = ";")
+  } else {
+    input
+  }
+}
+
+#' Process fragment information strings
+#' 
+#' @param input Data frame
+#' @param quantificationColumn Name of intensity column
+#' @return Data frame with FragmentIon and ProductCharge columns
+#' @keywords internal
+.processDIANNFragmentInfo <- function(input, quantificationColumn) {
+  # Convert Intensity to Numeric from Char strings
+  input[[quantificationColumn]] <- as.numeric(input[[quantificationColumn]])
+  
+  # Generate fragment info if missing
+  if (all(is.na(input$Fragment.Info))) {
+    input <- dplyr::group_by(input, Protein.Names, Modified.Sequence, Precursor.Charge, Run)
+    input <- dplyr::mutate(input, Fragment.Info = paste0("Frag", dplyr::row_number()))
+    input <- dplyr::ungroup(input)
+  }
+  
+  dplyr::mutate(
+    input,
+    FragmentIon = sub('\\^\\.\\*', '', .data$Fragment.Info),
+    
+    # Extract product charge
+    ProductCharge = dplyr::if_else(
+      grepl("/", .data$Fragment.Info),
+      # Extract charge (number right after "/" in string), default to 1 if parsing fails
+      as.integer(stringr::str_extract(.data$Fragment.Info, "(?<=/)[0-9]+")),
+      1L,
+      missing = 1L
+    )
+  )
+}
+
+#' Filter invalid fragments
+#' 
+#' @param input Data frame
+#' @param quantificationColumn Name of intensity column
+#' @return Filtered data frame
+#' @keywords internal
+.filterDIANNFragments <- function(input, quantificationColumn) {
+  dplyr::filter(
+    input,
+    (!grepl("NH3|H2O", .data$FragmentIon) | is.na(.data$FragmentIon)) & !is.na(.data[[quantificationColumn]])
+  )
+}
+
+#' Standardize column names to MSstats format
+#' 
+#' @param input Data frame
+#' @param quantificationColumn Name of intensity column
+#' @return Data frame with renamed columns
+#' @keywords internal
+.standardizeDIANNColumns <- function(input, quantificationColumn) {
+  input <- dplyr::rename_with(input, .fn = function(x) gsub("\\.", "", x))
+  
+  # Standardize column names
+  clean_quant_col <- gsub("\\.", "", quantificationColumn)
+  old_names <- c('ProteinNames', 'StrippedSequence', 'ModifiedSequence',
+                 'PrecursorCharge', clean_quant_col, 'QValue',
+                 'PrecursorMz', 'FragmentIon', 'Run', 'ProductCharge')
+  new_names <- c('ProteinName', 'PeptideSequence', 'PeptideModifiedSequence',
+                 'PrecursorCharge', 'Intensity', 'DetectionQValue', 
+                 'PrecursorMz', 'FragmentIon', 'Run', 'ProductCharge')
+  
+  current_names <- colnames(input)
+  names_to_rename <- intersect(current_names, old_names)
+  
+  # Create a named vector for renaming in the format c(new_name = old_name)
+  new_names_subset <- new_names[match(names_to_rename, old_names)]
+  rename_map <- setNames(names_to_rename, new_names_subset)
+  rename_map <- rename_map[!is.na(names(rename_map))]
+  
+  dplyr::rename(input, any_of(rename_map))
+}
+
+#' Filter data by Q-values
+#' 
+#' @param input Data frame
+#' @param MBR Boolean
+#' @param global_qvalue_cutoff Numeric
+#' @param qvalue_cutoff Numeric
+#' @param pg_qvalue_cutoff Numeric
+#' @return Filtered data frame
+#' @keywords internal
+.filterDIANNByQValues <- function(input, MBR, global_qvalue_cutoff, qvalue_cutoff, pg_qvalue_cutoff) {
+  input <- dplyr::filter(input, DetectionQValue < global_qvalue_cutoff)
+  
+  if (MBR) {
+    dplyr::filter(input, LibPGQValue < pg_qvalue_cutoff & LibQValue < qvalue_cutoff)
+  } else {
+    dplyr::filter(input, GlobalPGQValue < pg_qvalue_cutoff & GlobalQValue < qvalue_cutoff)
+  }
+}
+
+#' Finalize columns for output
+#' 
+#' @param input Data frame
+#' @return Data frame with final columns
+#' @keywords internal
+.finalizeDIANNColumns <- function(input) {
+  # Final column selection for MSstats format
+  msstats_cols <- c("ProteinName", "PeptideSequence", "PeptideModifiedSequence", "PrecursorCharge", 
+                    "FragmentIon", "ProductCharge", "Run", "Intensity")
+  
+  
+  # Add annotation columns if they exist
+  if ("Condition" %in% colnames(input)) msstats_cols <- c(msstats_cols, "Condition")
+  if ("BioReplicate" %in% colnames(input)) msstats_cols <- c(msstats_cols, "BioReplicate")
+  
+  # Add IsotopeLabelType, assuming Light for DIANN
+  input$IsotopeLabelType <- "L"
+  msstats_cols <- c(msstats_cols, "IsotopeLabelType")
+  
+  final_cols <- intersect(msstats_cols, colnames(input))
+  dplyr::select(input, all_of(final_cols))
+}
+
+#' Write chunk to file
+#' 
+#' @param input Data frame
+#' @param output_path Path to output file
+#' @param pos Chunk position
+#' @return NULL
+#' @keywords internal
+.writeDIANNChunk <- function(input, output_path, pos) {
+  # Write to file
+  if (!is.null(pos)) {
+    if (pos == 1) {
+      readr::write_csv(input, file = output_path, append = FALSE)
+    } else {
+      readr::write_csv(input, file = output_path, append = TRUE)
+    }
+  }
+}
\ No newline at end of file
diff --git a/R/converters.R b/R/converters.R
index 90bfca1..a96fead 100644
--- a/R/converters.R
+++ b/R/converters.R
@@ -155,6 +155,50 @@ bigSpectronauttoMSstatsFormat <-  function(input_file, output_file_name,
 }
 
 
+#' Convert out-of-memory DIANN files to MSstats format.
+#'
+#' @inheritParams MSstatsPreprocessBig
+#' @param MBR True if analysis was done with match between runs.
+#' @param quantificationColumn Use 'Fragment.Quant.Corrected'(default) column for quantified intensities for DIANN 1.8.x.
+#' Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x. 
+#'
+#' @export
+#'
+#' @return either arrow object or sparklyr table that can be optionally collected
+#' into memory by using dplyr::collect function.
+#'
+bigDIANNtoMSstatsFormat <- function(input_file, 
+                                    output_file_name,
+                                    backend,
+                                    MBR = TRUE,
+                                    quantificationColumn = "Fragment.Quant.Corrected",
+                                    max_feature_count = 100,
+                                    filter_unique_peptides =  FALSE,
+                                    aggregate_psms =  FALSE,
+                                    filter_few_obs =  FALSE,
+                                    remove_annotation =  FALSE,
+                                    calculateAnomalyScores=FALSE, 
+                                    anomalyModelFeatures=c(),
+                                    connection =  NULL) {
+  
+  # Reduce and clean the DIANN report file in chunks
+  reduceBigDIANN(input_file, 
+                 paste0("reduce_output_", output_file_name),
+                 MBR,
+                 quantificationColumn)
+  
+  # Preprocess the cleaned data (feature selection, etc.)
+  msstats_data <- MSstatsPreprocessBig(
+    paste0("reduce_output_", output_file_name),
+    output_file_name, backend, max_feature_count,
+    filter_unique_peptides, aggregate_psms, filter_few_obs, 
+    remove_annotation, calculateAnomalyScores, 
+    anomalyModelFeatures, connection)
+  
+  return(msstats_data)
+}
+
+
 #' Merge annotation to output of MSstatsPreprocessBig
 #'
 #' @param input output of MSstatsPreprocessBig
@@ -185,4 +229,3 @@ bigSpectronauttoMSstatsFormat <-  function(input_file, output_file_name,
 MSstatsAddAnnotationBig <- function(input, annotation) {
   dplyr::inner_join(input, annotation, by = "Run")
 }
-
diff --git a/man/bigDIANNtoMSstatsFormat.Rd b/man/bigDIANNtoMSstatsFormat.Rd
new file mode 100644
index 0000000..e9d6360
--- /dev/null
+++ b/man/bigDIANNtoMSstatsFormat.Rd
@@ -0,0 +1,65 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/converters.R
+\name{bigDIANNtoMSstatsFormat}
+\alias{bigDIANNtoMSstatsFormat}
+\title{Convert out-of-memory DIANN files to MSstats format.}
+\usage{
+bigDIANNtoMSstatsFormat(
+  input_file,
+  output_file_name,
+  backend,
+  MBR = TRUE,
+  quantificationColumn = "Fragment.Quant.Corrected",
+  max_feature_count = 100,
+  filter_unique_peptides = FALSE,
+  aggregate_psms = FALSE,
+  filter_few_obs = FALSE,
+  remove_annotation = FALSE,
+  calculateAnomalyScores = FALSE,
+  anomalyModelFeatures = c(),
+  connection = NULL
+)
+}
+\arguments{
+\item{input_file}{name of the input text file in 10-column MSstats format.}
+
+\item{output_file_name}{name of an output file which will be saved after pre-processing}
+
+\item{backend}{"arrow" or "sparklyr". Option "sparklyr" requires a spark installation
+and connection to spark instance provided in the `connection` parameter.}
+
+\item{MBR}{True if analysis was done with match between runs.}
+
+\item{quantificationColumn}{Use 'FragmentQuantCorrected'(default) column for quantified intensities for DIANN 1.8.x.
+Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x.}
+
+\item{max_feature_count}{maximum number of features per protein. Features will
+be selected based on highest average intensity.}
+
+\item{filter_unique_peptides}{If TRUE, shared peptides will be removed.
+Please refer to the `Details` section for additional information.}
+
+\item{aggregate_psms}{If TRUE, multiple measurements per PSM in a Run will
+be aggregated (by taking maximum value). Please refer to the `Details` section for additional information.}
+
+\item{filter_few_obs}{If TRUE, feature with less than 3 observations across runs will be removed.
+Please refer to the `Details` section for additional information.}
+
+\item{remove_annotation}{If TRUE, columns BioReplicate and Condition will be removed
+to reduce output file size. These will need to be added manually later before
+using dataProcess function. Only applicable to sparklyr backend.}
+
+\item{calculateAnomalyScores}{If TRUE, will carry anomaly model features through pipeline}
+
+\item{anomalyModelFeatures}{Character vector of column names to be carried through the pipeline}
+
+\item{connection}{Connection to a spark instance created with the
+`spark_connect` function from `sparklyr` package.}
+}
+\value{
+either arrow object or sparklyr table that can be optionally collected
+into memory by using dplyr::collect function.
+}
+\description{
+Convert out-of-memory DIANN files to MSstats format.
+}
diff --git a/tests/testthat/test-converters.R b/tests/testthat/test-converters.R
new file mode 100644
index 0000000..8fee265
--- /dev/null
+++ b/tests/testthat/test-converters.R
@@ -0,0 +1,95 @@
+library(testthat)
+library(mockery)
+
+context("General converter functions")
+
+test_that("MSstatsAddAnnotationBig adds annotation correctly", {
+  input_data <- data.frame(
+    Run = c("Run1", "Run2", "Run3"),
+    Intensity = c(100, 200, 300)
+  )
+
+  annotation_data <- data.frame(
+    Run = c("Run1", "Run2", "Run3"),
+    Condition = c("A", "A", "B"),
+    BioReplicate = c(1, 2, 1)
+  )
+
+  expected_output <- data.frame(
+    Run = c("Run1", "Run2", "Run3"),
+    Intensity = c(100, 200, 300),
+    Condition = c("A", "A", "B"),
+    BioReplicate = c(1, 2, 1)
+  )
+
+  result <- MSstatsAddAnnotationBig(input_data, annotation_data)
+
+  expect_equal(result, expected_output)
+})
+
+test_that("MSstatsPreprocessBig performs feature selection correctly", {
+  input_file <- tempfile(fileext = ".csv")
+  output_file <- "preprocess_output.csv"
+
+  # P1 has 3 features (frag1, frag2, frag3). frag3 has the highest avg intensity.
+  # P2 has 2 features (fragA, fragB). fragB has the highest avg intensity.
+  msstats_data <- rbind(
+    data.frame(ProteinName = "P1", PeptideSequence = "PEPTIDE", PrecursorCharge = 2, FragmentIon = rep(c("frag1", "frag2", "frag3"), each = 2), ProductCharge = 1, IsotopeLabelType = "L", Condition = "A", BioReplicate = rep(1:2, 3), Run = rep(c("run1", "run2"), 3), Intensity = c(1000, 1100, 500, 550, 2000, 2100)),
+    data.frame(ProteinName = "P2", PeptideSequence = "PEPTIDE2", PrecursorCharge = 3, FragmentIon = rep(c("fragA", "fragB"), each = 2), ProductCharge = 1, IsotopeLabelType = "L", Condition = "B", BioReplicate = rep(1:2, 2), Run = rep(c("run1", "run2"), 2), Intensity = c(100, 150, 800, 850))
+  )
+  readr::write_csv(msstats_data, input_file)
+
+  processed <- MSstatsPreprocessBig(input_file, output_file, backend = "arrow",
+                                    max_feature_count = 1)
+  result <- dplyr::collect(processed)
+
+  # For P1, frag3 should be selected. For P2, fragB should be selected.
+  expect_equal(nrow(result), 4)
+
+  p1_result <- result[result$ProteinName == "P1", ]
+  expect_equal(nrow(p1_result), 2)
+  expect_true(all(p1_result$FragmentIon == "frag3"))
+
+  p2_result <- result[result$ProteinName == "P2", ]
+  expect_equal(nrow(p2_result), 2)
+  expect_true(all(p2_result$FragmentIon == "fragB"))
+
+  # Cleanup
+  file.remove(input_file)
+  if (file.exists(output_file)) file.remove(output_file)
+})
+
+test_that("bigSpectronauttoMSstatsFormat works correctly", {
+  # Mock reduceBigSpectronaut as its source is not provided
+  mock_reduce <- mock(NULL)
+
+  stub(bigSpectronauttoMSstatsFormat, "reduceBigSpectronaut", function(input_file, output_path, ...) {
+    msstats_data <- data.frame(
+      ProteinName = "P1", PeptideSequence = "PEPTIDE", PrecursorCharge = 2,
+      FragmentIon = rep(c("frag1", "frag2"), each = 2), ProductCharge = 1,
+      IsotopeLabelType = "L", Condition = "A", BioReplicate = rep(1:2, 2),
+      Run = rep(c("run1", "run2"), 2), Intensity = c(1000, 1100, 2000, 2100) # frag2 is higher
+    )
+    readr::write_csv(msstats_data, output_path)
+  })
+
+  input_file <- "dummy_spectro_input.csv"
+  output_file <- "spectro_output.csv"
+
+  processed <- bigSpectronauttoMSstatsFormat(
+    input_file = input_file,
+    output_file_name = output_file,
+    backend = "arrow",
+    max_feature_count = 1
+  )
+  result <- dplyr::collect(processed)
+
+  # The mock reduce function creates a file with 2 features for P1.
+  # max_feature_count = 1 should select frag2.
+  expect_equal(nrow(result), 2)
+  expect_true(all(result$FragmentIon == "frag2"))
+
+  # Cleanup
+  if (file.exists(output_file)) file.remove(output_file)
+  if (file.exists(paste0("reduce_output_", output_file))) file.remove(paste0("reduce_output_", output_file))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-diann_converter.R b/tests/testthat/test-diann_converter.R
new file mode 100644
index 0000000..81b82cf
--- /dev/null
+++ b/tests/testthat/test-diann_converter.R
@@ -0,0 +1,178 @@
+library(testthat)
+
+context("DIANN converter functions")
+
+# Test for the internal cleanDIANNChunk function
+test_that("cleanDIANNChunk processes data correctly", {
+  output_file <- tempfile(fileext = ".csv")
+
+  diann_chunk_data <- data.frame(
+    Run = "run1",
+    Protein.Names = "ProteinA",
+    Stripped.Sequence = "PEPTIDE",
+    Modified.Sequence = "PEPTIDE(mod)",
+    Precursor.Charge = 2,
+    Fragment.Quant.Corrected = "100;200",
+    Q.Value = 0.005,
+    Precursor.Mz = 400.5,
+    Fragment.Info = "y7^1/1;b3-H2O^1/1", # One valid, one to be filtered
+    Lib.Q.Value = 0.001,
+    Lib.PG.Q.Value = 0.001,
+    stringsAsFactors = FALSE
+  )
+
+  # The function is not exported, so we use :::
+  MSstatsBig:::cleanDIANNChunk(diann_chunk_data, output_file, MBR = TRUE,
+                               quantificationColumn = "Fragment.Quant.Corrected", pos = 1)
+
+  result <- read.csv(output_file)
+
+  expect_equal(nrow(result), 1)
+  expect_equal(result$ProteinName, "ProteinA")
+  expect_equal(result$PeptideSequence, "PEPTIDE")
+  expect_equal(result$Intensity, 100)
+  expect_equal(result$FragmentIon, "y7^1/1")
+  expect_equal(result$ProductCharge, 1)
+  expect_equal(result$IsotopeLabelType, "L")
+  expect_true("PeptideModifiedSequence" %in% colnames(result))
+
+  file.remove(output_file)
+})
+
+test_that("cleanDIANNChunk handles 'auto' quantification column correctly", {
+  output_file <- tempfile(fileext = ".csv")
+
+  # Data with wide format fragment quantification
+  diann_chunk_wide <- data.frame(
+    Run = "run1",
+    Protein.Names = "ProteinA",
+    Stripped.Sequence = "PEPTIDE",
+    Modified.Sequence = "PEPTIDE",
+    Precursor.Charge = 2,
+    Fr1Quantity = 100,
+    Fr2Quantity = 200,
+    Q.Value = 0.005,
+    Precursor.Mz = 400.5,
+    Fragment.Info = "y1^1/1;y2^1/1",
+    Lib.Q.Value = 0.001,
+    Lib.PG.Q.Value = 0.001,
+    stringsAsFactors = FALSE
+  )
+
+  MSstatsBig:::cleanDIANNChunk(diann_chunk_wide, output_file, MBR = TRUE,
+                               quantificationColumn = "auto", pos = 1)
+
+  result <- read.csv(output_file)
+
+  expect_equal(nrow(result), 2)
+  expect_equal(sort(result$Intensity), c(100, 200))
+  expect_equal(sort(result$FragmentIon), c("y1^1/1", "y2^1/1"))
+
+  file.remove(output_file)
+
+  # Test error when columns are missing
+  diann_chunk_missing <- diann_chunk_wide[, !grepl("Quantity", names(diann_chunk_wide))]
+  expect_error(MSstatsBig:::cleanDIANNChunk(diann_chunk_missing, output_file, MBR = TRUE,
+                                            quantificationColumn = "auto", pos = 1),
+               "No fragment quantification columns found")
+})
+
+test_that("cleanDIANNChunk handles missing Fragment.Info by defaulting ProductCharge to 1", {
+  output_file <- tempfile(fileext = ".csv")
+
+  # Data with missing Fragment.Info (simulating it not being present)
+  diann_chunk_missing <- data.frame(
+    Run = "run1",
+    Protein.Names = "ProteinA",
+    Stripped.Sequence = "PEPTIDE",
+    Modified.Sequence = "PEPTIDE",
+    Precursor.Charge = 2,
+    Fragment.Quant.Corrected = 100,
+    Q.Value = 0.005,
+    Precursor.Mz = 400.5,
+    # Fragment.Info is missing
+    Lib.Q.Value = 0.001,
+    Lib.PG.Q.Value = 0.001,
+    stringsAsFactors = FALSE
+  )
+
+  MSstatsBig:::cleanDIANNChunk(diann_chunk_missing, output_file, MBR = TRUE,
+                               quantificationColumn = "Fragment.Quant.Corrected", pos = 1)
+
+  result <- read.csv(output_file)
+
+  expect_equal(nrow(result), 1)
+  expect_equal(result$ProductCharge, 1)
+  expect_equal(result$FragmentIon, "Frag1")
+
+  file.remove(output_file)
+})
+
+# Test for the internal reduceBigDIANN function
+test_that("reduceBigDIANN processes a file correctly", {
+  input_file <- tempfile(fileext = ".csv")
+  output_file <- tempfile(fileext = ".csv")
+
+  diann_data <- data.frame(
+    Run = c("run1", "run1"),
+    Protein.Names = c("ProteinA", "ProteinB"),
+    Stripped.Sequence = c("PEPTIDE_A", "PEPTIDE_B"),
+    Modified.Sequence = c("PEPTIDE_A(mod)", "PEPTIDE_B"),
+    Precursor.Charge = c(2, 3),
+    Fragment.Quant.Corrected = c("100;200", "300"),
+    Q.Value = c(0.005, 0.006),
+    Precursor.Mz = c(400.5, 500.5),
+    Fragment.Info = c("y7^1/1;b3-H2O^1/1", "y5^1/2"),
+    Lib.Q.Value = c(0.001, 0.002),
+    Lib.PG.Q.Value = c(0.001, 0.002),
+    stringsAsFactors = FALSE
+  )
+  write.csv(diann_data, input_file, row.names = FALSE)
+
+  MSstatsBig:::reduceBigDIANN(input_file, output_file, MBR = TRUE,
+                              quantificationColumn = "Fragment.Quant.Corrected")
+
+  result <- read.csv(output_file)
+  expect_equal(nrow(result), 2)
+  expect_equal(result$Intensity, c(100, 300))
+  expect_equal(result$ProteinName, c("ProteinA", "ProteinB"))
+  expect_equal(result$ProductCharge, c(1, 2))
+  expect_equal(result$FragmentIon, c("y7^1/1", "y5^1/2"))
+
+  file.remove(input_file)
+  file.remove(output_file)
+})
+
+# End-to-end test for bigDIANNtoMSstatsFormat
+test_that("bigDIANNtoMSstatsFormat works with arrow backend", {
+  input_file <- tempfile(fileext = ".csv")
+  output_file <- basename(tempfile(fileext = ".csv"))
+
+  # 4 features for one protein. Feature selection should pick the top 2.
+  diann_data <- rbind(
+    data.frame(Run = c("r1", "r2"), Protein.Names = "P1", Stripped.Sequence = "PEPTIDE", Modified.Sequence = "PEPTIDE", Precursor.Charge = 2, Fragment.Quant.Corrected = c(1000, 1100), Q.Value = 0.001, Precursor.Mz = 500, Fragment.Info = "y1", Lib.Q.Value = 0.001, Lib.PG.Q.Value = 0.001),
+    data.frame(Run = c("r1", "r2"), Protein.Names = "P1", Stripped.Sequence = "PEPTIDE", Modified.Sequence = "PEPTIDE", Precursor.Charge = 2, Fragment.Quant.Corrected = c(500, 600), Q.Value = 0.001, Precursor.Mz = 500, Fragment.Info = "y2", Lib.Q.Value = 0.001, Lib.PG.Q.Value = 0.001),
+    data.frame(Run = c("r1", "r2"), Protein.Names = "P1", Stripped.Sequence = "PEPTIDE", Modified.Sequence = "PEPTIDE", Precursor.Charge = 2, Fragment.Quant.Corrected = c(100, 100), Q.Value = 0.001, Precursor.Mz = 500, Fragment.Info = "y3", Lib.Q.Value = 0.001, Lib.PG.Q.Value = 0.001),
+    data.frame(Run = c("r1", "r2"), Protein.Names = "P1", Stripped.Sequence = "PEPTIDE", Modified.Sequence = "PEPTIDE", Precursor.Charge = 2, Fragment.Quant.Corrected = c(2000, 2100), Q.Value = 0.001, Precursor.Mz = 500, Fragment.Info = "y4", Lib.Q.Value = 0.001, Lib.PG.Q.Value = 0.001)
+  )
+  write.csv(diann_data, input_file, row.names = FALSE)
+
+  converted <- bigDIANNtoMSstatsFormat(
+    input_file = input_file,
+    output_file_name = output_file,
+    backend = "arrow",
+    max_feature_count = 2
+  )
+  result <- dplyr::collect(converted)
+
+  # Avg intensities: y1=1050, y2=550, y3=100, y4=2050.
+  # Top 2 features are y4 and y1.
+  expect_equal(nrow(result), 4) # 2 features * 2 runs
+  expect_true(all(c("y1", "y4") %in% unique(result$FragmentIon)))
+  expect_false(any(c("y2", "y3") %in% unique(result$FragmentIon)))
+
+  # Cleanup
+  file.remove(input_file)
+  if (file.exists(output_file)) file.remove(output_file)
+  if (file.exists(paste0("reduce_output_", output_file))) file.remove(paste0("reduce_output_", output_file))
+})
\ No newline at end of file
diff --git a/tests/testthat/topN_preprocess_output.csv b/tests/testthat/topN_preprocess_output.csv
new file mode 100644
index 0000000..d9afa98
--- /dev/null
+++ b/tests/testthat/topN_preprocess_output.csv
@@ -0,0 +1,5 @@
+"ProteinName","PeptideSequence","PrecursorCharge","FragmentIon","ProductCharge","IsotopeLabelType","Condition","BioReplicate","Run","Intensity"
+"P1","PEPTIDE",2,"frag3",1,"L","A",1,"run1",2000
+"P1","PEPTIDE",2,"frag3",1,"L","A",2,"run2",2100
+"P2","PEPTIDE2",3,"fragB",1,"L","B",1,"run1",800
+"P2","PEPTIDE2",3,"fragB",1,"L","B",2,"run2",850
diff --git a/tests/testthat/topN_spectro_output.csv b/tests/testthat/topN_spectro_output.csv
new file mode 100644
index 0000000..b103ba2
--- /dev/null
+++ b/tests/testthat/topN_spectro_output.csv
@@ -0,0 +1,3 @@
+"ProteinName","PeptideSequence","PrecursorCharge","FragmentIon","ProductCharge","IsotopeLabelType","Condition","BioReplicate","Run","Intensity"
+"P1","PEPTIDE",2,"frag2",1,"L","A",1,"run1",2000
+"P1","PEPTIDE",2,"frag2",1,"L","A",2,"run2",2100
diff --git a/tests/testthat/topN_test_diann_output.csv b/tests/testthat/topN_test_diann_output.csv
new file mode 100644
index 0000000..95d41b3
--- /dev/null
+++ b/tests/testthat/topN_test_diann_output.csv
@@ -0,0 +1,5 @@
+"ProteinName","PeptideSequence","PeptideModifiedSequence","PrecursorCharge","FragmentIon","ProductCharge","Run","Intensity","IsotopeLabelType"
+"P1","PEPTIDE","PEPTIDE",2,"y1",1,"r1",1000,"L"
+"P1","PEPTIDE","PEPTIDE",2,"y1",1,"r2",1100,"L"
+"P1","PEPTIDE","PEPTIDE",2,"y4",1,"r1",2000,"L"
+"P1","PEPTIDE","PEPTIDE",2,"y4",1,"r2",2100,"L"