From 2598c1ddb1dedfeb0eb1477e0e38d9d96d3940d0 Mon Sep 17 00:00:00 2001 From: forwardxu Date: Fri, 19 Dec 2025 11:04:09 +0800 Subject: [PATCH] docs: fix duplicate words in comments and error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed 7 instances of duplicate words: - 'to to' → 'to' (3 occurrences) - 'the the' → 'the' (4 occurrences) This is a documentation/comment cleanup with no functional changes. --- protos/transaction.proto | 2 +- rust/lance-datagen/src/generator.rs | 4 ++-- .../lance-encoding/src/previous/encodings/physical/bitpack.rs | 2 +- rust/lance-index/src/vector.rs | 2 +- rust/lance/src/dataset/scanner.rs | 2 +- rust/lance/src/io/exec/pushdown_scan.rs | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/protos/transaction.proto b/protos/transaction.proto index bcc49a16188..bec8155eb11 100644 --- a/protos/transaction.proto +++ b/protos/transaction.proto @@ -174,7 +174,7 @@ message Transaction { // integrity guarantees provided by the storage backend. bool is_shallow = 1; // the reference name in the source dataset - // in most cases it should be the the branch or tag name in the source dataset + // in most cases it should be the branch or tag name in the source dataset optional string ref_name = 2; // the version of the source dataset for cloning uint64 ref_version = 3; diff --git a/rust/lance-datagen/src/generator.rs b/rust/lance-datagen/src/generator.rs index bc319c1ed2e..c2a82ae062f 100644 --- a/rust/lance-datagen/src/generator.rs +++ b/rust/lance-datagen/src/generator.rs @@ -1022,7 +1022,7 @@ impl ArrayGenerator for RandomBinaryGenerator { /// Generate a sequence of strings with a prefix and a counter /// -/// For example, if the prefix is "user_" the the strings will be "user_0", "user_1", ... +/// For example, if the prefix is "user_" the strings will be "user_0", "user_1", ... #[derive(Debug)] pub struct PrefixPlusCounterGenerator { prefix: String, @@ -2598,7 +2598,7 @@ pub mod array { /// Creates a generator of strings with a prefix and a counter /// - /// For example, if the prefix is "user_" the the strings will be "user_0", "user_1", ... + /// For example, if the prefix is "user_" the strings will be "user_0", "user_1", ... pub fn utf8_prefix_plus_counter( prefix: impl Into, is_large: bool, diff --git a/rust/lance-encoding/src/previous/encodings/physical/bitpack.rs b/rust/lance-encoding/src/previous/encodings/physical/bitpack.rs index 7cc10e5f531..69fea9a8771 100644 --- a/rust/lance-encoding/src/previous/encodings/physical/bitpack.rs +++ b/rust/lance-encoding/src/previous/encodings/physical/bitpack.rs @@ -621,7 +621,7 @@ pub fn bitpack_params(arr: &dyn Array) -> Option { } } -// Compute the number bits to to use for bitpacking generically. +// Compute the number bits to use for bitpacking generically. // returns None if the array is empty or all nulls fn bitpack_params_for_type(arr: &PrimitiveArray) -> Option where diff --git a/rust/lance-index/src/vector.rs b/rust/lance-index/src/vector.rs index f694810aec2..7871def65b6 100644 --- a/rust/lance-index/src/vector.rs +++ b/rust/lance-index/src/vector.rs @@ -86,7 +86,7 @@ pub struct Query { pub upper_bound: Option, /// The minimum number of probes to load and search. More partitions - /// will only be loaded if we have not found k results, or the the algorithm + /// will only be loaded if we have not found k results, or the algorithm /// determines more partitions are needed to satisfy recall requirements. /// /// The planner will always search at least this many partitions. Defaults to 1. diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index 31e7a31d68d..c83acd6bd14 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -1563,7 +1563,7 @@ impl Scanner { if self.autoproject_scoring_columns { if self.nearest.is_some() && output_expr.iter().all(|(_, name)| name != DIST_COL) { if self.explicit_projection { - log::warn!("Deprecation warning, this behavior will change in the future. This search specified output columns but did not include `_distance`. Currently the `_distance` column will be included. In the future it will not. Call `disable_scoring_autoprojection` to to adopt the future behavior and avoid this warning"); + log::warn!("Deprecation warning, this behavior will change in the future. This search specified output columns but did not include `_distance`. Currently the `_distance` column will be included. In the future it will not. Call `disable_scoring_autoprojection` to adopt the future behavior and avoid this warning"); } let vector_expr = expressions::col(DIST_COL, current_schema)?; output_expr.push((vector_expr, DIST_COL.to_string())); diff --git a/rust/lance/src/io/exec/pushdown_scan.rs b/rust/lance/src/io/exec/pushdown_scan.rs index c519751a0f3..6f07618655c 100644 --- a/rust/lance/src/io/exec/pushdown_scan.rs +++ b/rust/lance/src/io/exec/pushdown_scan.rs @@ -541,7 +541,7 @@ impl FragmentScanner { .project_by_schema(&self.projection.as_ref().into()) .map_err(|err| Error::Internal { message: format!( - "Failed to to select schema {} from batch with schema {}\nInner error: {}", + "Failed to select schema {} from batch with schema {}\nInner error: {}", self.projection, batch.schema(), err