Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 32 additions & 34 deletions crates/file_store/src/entry_iter.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use bincode::Options;
use std::{
fs::File,
io::{self, Seek},
io::{self, BufReader, Seek},
marker::PhantomData,
};

Expand All @@ -14,8 +14,9 @@ use crate::bincode_options;
///
/// [`next`]: Self::next
pub struct EntryIter<'t, T> {
db_file: Option<&'t mut File>,

/// Buffered reader around the file
db_file: BufReader<&'t mut File>,
finished: bool,
/// The file position for the first read of `db_file`.
start_pos: Option<u64>,
types: PhantomData<T>,
Expand All @@ -24,8 +25,9 @@ pub struct EntryIter<'t, T> {
impl<'t, T> EntryIter<'t, T> {
pub fn new(start_pos: u64, db_file: &'t mut File) -> Self {
Self {
db_file: Some(db_file),
db_file: BufReader::new(db_file),
start_pos: Some(start_pos),
finished: false,
types: PhantomData,
}
}
Expand All @@ -38,44 +40,34 @@ where
type Item = Result<T, IterError>;

fn next(&mut self) -> Option<Self::Item> {
// closure which reads a single entry starting from `self.pos`
let read_one = |f: &mut File, start_pos: Option<u64>| -> Result<Option<T>, IterError> {
let pos = match start_pos {
Some(pos) => f.seek(io::SeekFrom::Start(pos))?,
None => f.stream_position()?,
};
if self.finished {
return None;
}
(|| {
if let Some(start) = self.start_pos.take() {
self.db_file.seek(io::SeekFrom::Start(start))?;
}

match bincode_options().deserialize_from(&*f) {
Ok(changeset) => {
f.stream_position()?;
Ok(Some(changeset))
}
let pos_before_read = self.db_file.stream_position()?;
Comment thread
evanlinjin marked this conversation as resolved.
match bincode_options().deserialize_from(&mut self.db_file) {
Ok(changeset) => Ok(Some(changeset)),
Err(e) => {
self.finished = true;
let pos_after_read = self.db_file.stream_position()?;
// allow unexpected EOF if 0 bytes were read
if let bincode::ErrorKind::Io(inner) = &*e {
if inner.kind() == io::ErrorKind::UnexpectedEof {
let eof = f.seek(io::SeekFrom::End(0))?;
if pos == eof {
return Ok(None);
}
if inner.kind() == io::ErrorKind::UnexpectedEof
&& pos_after_read == pos_before_read
{
return Ok(None);
}
}
f.seek(io::SeekFrom::Start(pos))?;
self.db_file.seek(io::SeekFrom::Start(pos_before_read))?;
Err(IterError::Bincode(*e))
}
}
};

let result = read_one(self.db_file.as_mut()?, self.start_pos.take());
if result.is_err() {
self.db_file = None;
}
result.transpose()
}
}

impl From<io::Error> for IterError {
fn from(value: io::Error) -> Self {
IterError::Io(value)
})()
.transpose()
}
}
Comment on lines 67 to 72
Copy link
Copy Markdown
Member

@evanlinjin evanlinjin Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still need a Drop implementation that syncs the underlying file's offset. However, we should use self.seek(SeekFrom::Current(0)).

Copy link
Copy Markdown
Collaborator Author

@LLFourn LLFourn Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we write a test that demonstrates this need? I think I understand the rationale I'll see if I can come up one.

[EDIT] or maybe we can use a BufReader in the main type (not just the iterator).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LLFourn I don't think using a BufReader with the internal file getting written to is a good idea. We'll need to do some crazy stuff with .get_mut and flush the buffer with every write.

I thought that the test I added demonstrated the need for the Drop impl but clearly not. We need a test to write after a failed read without closing the file in-between.


Expand All @@ -97,4 +89,10 @@ impl core::fmt::Display for IterError {
}
}

impl From<io::Error> for IterError {
fn from(value: io::Error) -> Self {
IterError::Io(value)
}
}

impl std::error::Error for IterError {}
76 changes: 73 additions & 3 deletions crates/file_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ mod test {

use bincode::DefaultOptions;
use std::{
collections::BTreeSet,
io::{Read, Write},
vec::Vec,
};
Expand All @@ -228,7 +229,7 @@ mod test {
const TEST_MAGIC_BYTES: [u8; TEST_MAGIC_BYTES_LEN] =
[98, 100, 107, 102, 115, 49, 49, 49, 49, 49, 49, 49];

type TestChangeSet = Vec<String>;
type TestChangeSet = BTreeSet<String>;

#[derive(Debug)]
struct TestTracker;
Expand All @@ -253,7 +254,7 @@ mod test {
fn open_or_create_new() {
let temp_dir = tempfile::tempdir().unwrap();
let file_path = temp_dir.path().join("db_file");
let changeset = vec!["hello".to_string(), "world".to_string()];
let changeset = BTreeSet::from(["hello".to_string(), "world".to_string()]);

{
let mut db = Store::<TestChangeSet>::open_or_create_new(&TEST_MAGIC_BYTES, &file_path)
Expand Down Expand Up @@ -304,7 +305,7 @@ mod test {
let mut data = [255_u8; 2000];
data[..TEST_MAGIC_BYTES_LEN].copy_from_slice(&TEST_MAGIC_BYTES);

let changeset = vec!["one".into(), "two".into(), "three!".into()];
let changeset = TestChangeSet::from(["one".into(), "two".into(), "three!".into()]);

let mut file = NamedTempFile::new().unwrap();
file.write_all(&data).expect("should write");
Expand Down Expand Up @@ -340,4 +341,73 @@ mod test {

assert_eq!(got_bytes, expected_bytes);
}

#[test]
fn last_write_is_short() {
let temp_dir = tempfile::tempdir().unwrap();

let changesets = [
TestChangeSet::from(["1".into()]),
TestChangeSet::from(["2".into(), "3".into()]),
TestChangeSet::from(["4".into(), "5".into(), "6".into()]),
];
let last_changeset = TestChangeSet::from(["7".into(), "8".into(), "9".into()]);
let last_changeset_bytes = bincode_options().serialize(&last_changeset).unwrap();

for short_write_len in 1..last_changeset_bytes.len() - 1 {
let file_path = temp_dir.path().join(format!("{}.dat", short_write_len));
println!("Test file: {:?}", file_path);

// simulate creating a file, writing data where the last write is incomplete
{
let mut db =
Store::<TestChangeSet>::create_new(&TEST_MAGIC_BYTES, &file_path).unwrap();
for changeset in &changesets {
db.append_changeset(changeset).unwrap();
}
// this is the incomplete write
db.db_file
.write_all(&last_changeset_bytes[..short_write_len])
.unwrap();
}

// load file again and aggregate changesets
// write the last changeset again (this time it succeeds)
{
let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
let err = db
.aggregate_changesets()
.expect_err("should return error as last read is short");
assert_eq!(
err.changeset,
changesets.iter().cloned().reduce(|mut acc, cs| {
Append::append(&mut acc, cs);
acc
}),
"should recover all changesets that are written in full",
);
db.db_file.write_all(&last_changeset_bytes).unwrap();
}

// load file again - this time we should successfully aggregate all changesets
{
let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
let aggregated_changesets = db
.aggregate_changesets()
.expect("aggregating all changesets should succeed");
assert_eq!(
aggregated_changesets,
changesets
.iter()
.cloned()
.chain(core::iter::once(last_changeset.clone()))
.reduce(|mut acc, cs| {
Append::append(&mut acc, cs);
acc
}),
"should recover all changesets",
);
}
}
}
}