From d132205994c2c25f7c924d2b465dfd69c00563c3 Mon Sep 17 00:00:00 2001 From: zjw Date: Sat, 18 Jan 2025 19:51:53 +0800 Subject: [PATCH 1/6] w3d2 task: w1d1_test task3 capacity --- .vscode/launch.json | 40 +++++++++++++++++++- mini-lsm-starter/src/compact.rs | 2 +- mini-lsm-starter/src/lsm_iterator.rs | 12 ++++-- mini-lsm-starter/src/lsm_storage.rs | 56 ++++++++++++---------------- mini-lsm-starter/src/mem_table.rs | 17 ++++++--- 5 files changed, 83 insertions(+), 44 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 0fd118f..bb1b2b0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -40,12 +40,50 @@ "RUSTC_TOOLCHAIN=/home/zjw/.rustup/toolchains/stable-x86_64-unknown-linux-gnu" ], "preRunCommands": [ - "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py", + "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py" ], "args": [ "tests::week2_day5", "--show-output" ] + }, + { + "type": "lldb-dap", + "request": "launch", + "name": "test tests::week1_day2::test_task4_integration", + "cwd": "${workspaceFolder}/mini-lsm-starter", + "program": "${workspaceFolder}/target/debug/deps/mini_lsm_starter-430c4aaead80470b", + "env": [ + "RUST_BACKTRACE=short", + "RUSTC_TOOLCHAIN=/home/zjw/.rustup/toolchains/stable-x86_64-unknown-linux-gnu" + ], + "preRunCommands": [ + "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py" + ], + "args": [ + "tests::week1_day2::test_task4_integration", + "--exact", + "--show-output" + ] + }, + { + "type": "lldb-dap", + "request": "launch", + "name": "test tests::week1_day1::test_task3_freeze_on_capacity", + "cwd": "${workspaceFolder}/mini-lsm-starter", + "program": "${workspaceFolder}/target/debug/deps/mini_lsm_starter-430c4aaead80470b", + "env": [ + "RUST_BACKTRACE=short", + "RUSTC_TOOLCHAIN=/home/zjw/.rustup/toolchains/stable-x86_64-unknown-linux-gnu" + ], + "preRunCommands": [ + "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py" + ], + "args": [ + "tests::week1_day1::test_task3_freeze_on_capacity", + "--exact", + "--show-output" + ] } ] } \ No newline at end of file diff --git a/mini-lsm-starter/src/compact.rs b/mini-lsm-starter/src/compact.rs index 09724e8..46ce8dc 100644 --- a/mini-lsm-starter/src/compact.rs +++ b/mini-lsm-starter/src/compact.rs @@ -134,7 +134,7 @@ impl LsmStorageInner { let builder_inner = sst_builder.as_mut().unwrap(); builder_inner.add(iter.key(), iter.value()); - if builder_inner.estimated_size() >= target_sst_size && !is_same_as_prevkey{ + if builder_inner.estimated_size() >= target_sst_size && !is_same_as_prevkey { let next_sst_id = self.next_sst_id(); let block_cache = self.block_cache.clone(); let builder = sst_builder.take().unwrap(); diff --git a/mini-lsm-starter/src/lsm_iterator.rs b/mini-lsm-starter/src/lsm_iterator.rs index cb29654..54f5b8a 100644 --- a/mini-lsm-starter/src/lsm_iterator.rs +++ b/mini-lsm-starter/src/lsm_iterator.rs @@ -34,7 +34,7 @@ impl LsmIterator { end_bound, prev_key: Vec::new(), }; - valid_iter.move_to_next_key()?; + valid_iter.move_to_key()?; Ok(valid_iter) } @@ -49,7 +49,7 @@ impl LsmIterator { // Ok(()) // } - fn move_to_next_key(&mut self) -> Result<()> { + fn move_to_key(&mut self) -> Result<()> { // while self.inner.is_valid() && self.inner.value().is_empty() { // self.inner.next()?; // } @@ -66,6 +66,10 @@ impl LsmIterator { self.prev_key.clear(); self.prev_key.extend(self.inner.key().key_ref()); + + if !self.inner.value().is_empty() { + break; + } } Ok(()) } @@ -76,7 +80,7 @@ impl LsmIterator { self.is_valid = false; return Ok(()); } - + match self.end_bound.as_ref() { Bound::Included(x) => { self.is_valid = @@ -110,7 +114,7 @@ impl StorageIterator for LsmIterator { fn next(&mut self) -> Result<()> { // self.inner.next()?; self.inner_next()?; - self.move_to_next_key()?; + self.move_to_key()?; Ok(()) } diff --git a/mini-lsm-starter/src/lsm_storage.rs b/mini-lsm-starter/src/lsm_storage.rs index a16c7fd..0312a27 100644 --- a/mini-lsm-starter/src/lsm_storage.rs +++ b/mini-lsm-starter/src/lsm_storage.rs @@ -456,7 +456,7 @@ impl LsmStorageInner { compaction_controller, manifest: Some(manifest), options: options.into(), - mvcc: None, + mvcc: Some(LsmMvccInner::new(0)), compaction_filters: Arc::new(Mutex::new(Vec::new())), }; @@ -474,7 +474,7 @@ impl LsmStorageInner { compaction_filters.push(compaction_filter); } - pub fn mvcc(&self) -> &LsmMvccInner{ + pub fn mvcc(&self) -> &LsmMvccInner { self.mvcc.as_ref().unwrap() } @@ -485,7 +485,7 @@ impl LsmStorageInner { let state_guard = self.state.read(); snapshot = Arc::clone(&state_guard); } - + //first search in mem_table // if let Some(bytes) = snapshot.memtable.get(key) { // if bytes.is_empty() { @@ -496,24 +496,27 @@ impl LsmStorageInner { // } let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1); memtable_iters.push(Box::new(snapshot.memtable.scan( - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MAX)), - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MIN))))); + Bound::Included(KeySlice::from_slice_with_ts(key, TS_MAX)), + Bound::Included(KeySlice::from_slice_with_ts(key, TS_MIN)), + ))); //then search in immutable mem_table // for imme_table in &storage_guard.imm_memtables { for imme_table in snapshot.imm_memtables.iter() { - memtable_iters.push(Box::new(imme_table.scan( - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MAX)), - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MIN))))); + Bound::Included(KeySlice::from_slice_with_ts(key, TS_MAX)), + Bound::Included(KeySlice::from_slice_with_ts(key, TS_MIN)), + ))); } - let mut merge_memtb_iter = MergeIterator::create(memtable_iters); + let merge_memtb_iter = MergeIterator::create(memtable_iters); while merge_memtb_iter.is_valid() { if !merge_memtb_iter.value().is_empty() { return Ok(Some(Bytes::copy_from_slice(merge_memtb_iter.value()))); + } else { + return Ok(None); } - merge_memtb_iter.next()?; + // merge_memtb_iter.next()?; } //search in l0_Sstable @@ -758,7 +761,7 @@ impl LsmStorageInner { let end_bound = upper; let lower = map_bound_plus_ts(lower, TS_DEFAULT); - let upper= map_bound_plus_ts(upper, TS_DEFAULT); + let upper = map_bound_plus_ts(upper, TS_DEFAULT); //create merge_mem_iters let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1); memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper))); @@ -775,18 +778,10 @@ impl LsmStorageInner { continue; } let iter = match lower { - Bound::Included(x) => SsTableIterator::create_and_seek_to_key( - table, - x, - )?, + Bound::Included(x) => SsTableIterator::create_and_seek_to_key(table, x)?, Bound::Excluded(x) => { - let mut sst_tmp_iter = SsTableIterator::create_and_seek_to_key( - table, - x, - )?; - if sst_tmp_iter.is_valid() - && sst_tmp_iter.key() == x - { + let mut sst_tmp_iter = SsTableIterator::create_and_seek_to_key(table, x)?; + if sst_tmp_iter.is_valid() && sst_tmp_iter.key() == x { sst_tmp_iter.next()?; } sst_tmp_iter @@ -809,18 +804,13 @@ impl LsmStorageInner { } if !li_sstables.is_empty() { let iter = match lower { - Bound::Included(x) => SstConcatIterator::create_and_seek_to_key( - li_sstables, - x, - )?, + Bound::Included(x) => { + SstConcatIterator::create_and_seek_to_key(li_sstables, x)? + } Bound::Excluded(x) => { - let mut sst_tmp_iter = SstConcatIterator::create_and_seek_to_key( - li_sstables, - x, - )?; - if sst_tmp_iter.is_valid() - && sst_tmp_iter.key() == x - { + let mut sst_tmp_iter = + SstConcatIterator::create_and_seek_to_key(li_sstables, x)?; + if sst_tmp_iter.is_valid() && sst_tmp_iter.key() == x { sst_tmp_iter.next()?; } sst_tmp_iter diff --git a/mini-lsm-starter/src/mem_table.rs b/mini-lsm-starter/src/mem_table.rs index cc5f542..aa2e074 100644 --- a/mini-lsm-starter/src/mem_table.rs +++ b/mini-lsm-starter/src/mem_table.rs @@ -34,7 +34,7 @@ pub(crate) fn map_bound(bound: Bound<&[u8]>) -> Bound { } } -pub(crate) fn map_bound_plus_ts(bound: Bound<&[u8]>, ts:u64) -> Bound { +pub(crate) fn map_bound_plus_ts(bound: Bound<&[u8]>, ts: u64) -> Bound { match bound { Bound::Included(x) => Bound::Included(KeySlice::from_slice_with_ts(x, ts)), Bound::Excluded(x) => Bound::Excluded(KeySlice::from_slice_with_ts(x, ts)), @@ -108,7 +108,7 @@ impl MemTable { upper: Bound<&[u8]>, ) -> MemTableIterator { let lower = map_bound_plus_ts(lower, TS_DEFAULT); - let upper= map_bound_plus_ts(upper, TS_DEFAULT); + let upper = map_bound_plus_ts(upper, TS_DEFAULT); self.scan(lower, upper) } @@ -129,10 +129,12 @@ impl MemTable { /// In week 3, day 5, modify the function to use the batch API. pub fn put(&self, key: KeySlice, value: &[u8]) -> Result<()> { let inc_sizes = key.raw_len() + value.len(); + println!("try to insert key:{:?}, ts:{:?}", key.key_ref(), key.ts()); self.map.insert( - KeyBytes::from_bytes_with_ts(Bytes::copy_from_slice(&key.key_ref()), key.ts()), + KeyBytes::from_bytes_with_ts(Bytes::copy_from_slice(key.key_ref()), key.ts()), Bytes::copy_from_slice(value), ); + println!("insertion done!"); self.approximate_size .fetch_add(inc_sizes, std::sync::atomic::Ordering::Relaxed); if let Some(ref wal) = self.wal { @@ -193,8 +195,13 @@ impl MemTable { } } -type SkipMapRangeIter<'a> = - crossbeam_skiplist::map::Range<'a, KeyBytes, (Bound, Bound), KeyBytes, Bytes>; +type SkipMapRangeIter<'a> = crossbeam_skiplist::map::Range< + 'a, + KeyBytes, + (Bound, Bound), + KeyBytes, + Bytes, +>; /// An iterator over a range of `SkipMap`. This is a self-referential structure and please refer to week 1, day 2 /// chapter for more information. From 37e066837edb5c93047da53a52569b252649d265 Mon Sep 17 00:00:00 2001 From: zjw Date: Sun, 19 Jan 2025 16:13:14 +0800 Subject: [PATCH 2/6] fix dead loop when insert same key with different timestamp to skiplist due to incorrect PartialEq implementation --- .vscode/launch.json | 18 +++++ mini-lsm-starter/src/block/iterator.rs | 1 + mini-lsm-starter/src/compact.rs | 2 +- .../src/iterators/merge_iterator.rs | 23 ++++-- .../src/iterators/two_merge_iterator.rs | 18 ++--- mini-lsm-starter/src/key.rs | 2 + mini-lsm-starter/src/key/timestamped_key.rs | 9 ++- mini-lsm-starter/src/lsm_storage.rs | 74 ++++++++++++------- mini-lsm-starter/src/mem_table.rs | 8 +- mini-lsm-starter/src/table/iterator.rs | 7 +- mini-lsm-starter/src/tests/harness.rs | 4 +- 11 files changed, 115 insertions(+), 51 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bb1b2b0..3ed6680 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -84,6 +84,24 @@ "--exact", "--show-output" ] + }, + { + "type": "lldb-dap", + "request": "launch", + "name": "test-mod tests::week1_day6", + "cwd": "${workspaceFolder}/mini-lsm-starter", + "program": "${workspaceFolder}/target/debug/deps/mini_lsm_starter-430c4aaead80470b", + "env": [ + "RUST_BACKTRACE=short", + "RUSTC_TOOLCHAIN=/home/zjw/.rustup/toolchains/stable-x86_64-unknown-linux-gnu" + ], + "preRunCommands": [ + "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py" + ], + "args": [ + "tests::week1_day6", + "--show-output" + ] } ] } \ No newline at end of file diff --git a/mini-lsm-starter/src/block/iterator.rs b/mini-lsm-starter/src/block/iterator.rs index 686044a..18a86c8 100644 --- a/mini-lsm-starter/src/block/iterator.rs +++ b/mini-lsm-starter/src/block/iterator.rs @@ -105,6 +105,7 @@ impl BlockIterator { .append(&self.first_key.key_ref()[..key_overlap_len]); self.key.append(key); let ts = entry.get_u64(); + self.key.set_ts(ts); let value_len = entry.get_u16() as usize; let value_offset_begin = offset + SIZEOF_U16 * 2 + res_len + SIZEOF_U64 + SIZEOF_U16; let value_offset_end = value_offset_begin + value_len; diff --git a/mini-lsm-starter/src/compact.rs b/mini-lsm-starter/src/compact.rs index 46ce8dc..74a3855 100644 --- a/mini-lsm-starter/src/compact.rs +++ b/mini-lsm-starter/src/compact.rs @@ -148,7 +148,7 @@ impl LsmStorageInner { iter.next()?; - if !is_same_as_prevkey { + if !is_same_as_prevkey && iter.is_valid() { prev_key.clear(); prev_key.extend(iter.key().key_ref()); } diff --git a/mini-lsm-starter/src/iterators/merge_iterator.rs b/mini-lsm-starter/src/iterators/merge_iterator.rs index f8f8497..7b2bf11 100644 --- a/mini-lsm-starter/src/iterators/merge_iterator.rs +++ b/mini-lsm-starter/src/iterators/merge_iterator.rs @@ -94,11 +94,17 @@ impl StorageIterator = KeySlice<'a>>> StorageIt while let Some(mut inner_iter) = self.iters.peek_mut() { //cur.1.key is the latest data, when next.1.key==cur.1.key , inner_iter should call inner_iter.next() // eprintln!( - // "cur.1.key is {:?}, cur.1.value is {:?}", + // "cur.1.key is {:?}, ts is {:?}, cur.1.value is {:?}", // cur.1.key(), + // cur.1.key().ts(), // cur.1.value() // ); - // eprintln!("inner_iter.1.key is {:?}", inner_iter.1.key()); + // eprintln!( + // "inner_iter.1.key is {:?}, ts is {:?}, inner_iter.1.value is {:?}", + // inner_iter.1.key(), + // inner_iter.1.key().ts(), + // inner_iter.1.value() + // ); // io::stdout().flush().unwrap(); if cur.1.key() == inner_iter.1.key() { /* @@ -160,9 +166,14 @@ impl StorageIterator = KeySlice<'a>>> StorageIt } fn num_active_iterators(&self) -> usize { - match self.current { - Some(_) => self.iters.len() + 1, - None => self.iters.len(), - } + self.iters + .iter() + .map(|x| x.1.num_active_iterators()) + .sum::() + + self + .current + .as_ref() + .map(|x| x.1.num_active_iterators()) + .unwrap_or(0) } } diff --git a/mini-lsm-starter/src/iterators/two_merge_iterator.rs b/mini-lsm-starter/src/iterators/two_merge_iterator.rs index 01853a2..0ecfb30 100644 --- a/mini-lsm-starter/src/iterators/two_merge_iterator.rs +++ b/mini-lsm-starter/src/iterators/two_merge_iterator.rs @@ -81,16 +81,16 @@ impl< } fn num_active_iterators(&self) -> usize { - if !self.aflag { - if !self.b.is_valid() { - return 0; - } - return self.b.num_active_iterators(); - } + // if !self.aflag { + // if !self.b.is_valid() { + // return 0; + // } + // return self.b.num_active_iterators(); + // } - if !self.b.is_valid() { - return self.a.num_active_iterators(); - } + // if !self.b.is_valid() { + // return self.a.num_active_iterators(); + // } self.a.num_active_iterators() + self.b.num_active_iterators() } diff --git a/mini-lsm-starter/src/key.rs b/mini-lsm-starter/src/key.rs index 438c34a..561a3b9 100644 --- a/mini-lsm-starter/src/key.rs +++ b/mini-lsm-starter/src/key.rs @@ -5,6 +5,8 @@ use timestamped_key as key; pub const TS_ENABLED: bool = key::TS_ENABLED; pub const TS_DEFAULT: u64 = key::TS_DEFAULT; +pub const TS_RANGE_BEGIN: u64 = key::TS_RANGE_BEGIN; +pub const TS_RANGE_END: u64 = key::TS_RANGE_END; pub const TS_MAX: u64 = key::TS_MAX; pub const TS_MIN: u64 = key::TS_MIN; diff --git a/mini-lsm-starter/src/key/timestamped_key.rs b/mini-lsm-starter/src/key/timestamped_key.rs index c282e1f..616dcec 100644 --- a/mini-lsm-starter/src/key/timestamped_key.rs +++ b/mini-lsm-starter/src/key/timestamped_key.rs @@ -12,6 +12,8 @@ pub(super) type KeyVec = Key>; pub(super) type KeyBytes = Key; pub const TS_DEFAULT: u64 = 0; +pub const TS_RANGE_BEGIN: u64 = std::u64::MAX; +pub const TS_RANGE_END: u64 = std::u64::MIN; pub const TS_MAX: u64 = std::u64::MAX; pub const TS_MIN: u64 = std::u64::MIN; @@ -58,6 +60,11 @@ impl Key> { self.0.extend(data) } + /// set ts + pub fn set_ts(&mut self, ts: u64) { + self.1 = ts; + } + /// Set the key from a slice without re-allocating. The signature will change in week 3. pub fn set_from_slice(&mut self, key_slice: KeySlice) { self.0.clear(); @@ -168,7 +175,7 @@ impl + Default> Default for Key { impl + PartialEq> PartialEq for Key { fn eq(&self, other: &Self) -> bool { - self.0.eq(&other.0) + (self.0.as_ref(), self.1).eq(&(other.0.as_ref(), other.1)) } } diff --git a/mini-lsm-starter/src/lsm_storage.rs b/mini-lsm-starter/src/lsm_storage.rs index 0312a27..1fc18f3 100644 --- a/mini-lsm-starter/src/lsm_storage.rs +++ b/mini-lsm-starter/src/lsm_storage.rs @@ -21,7 +21,7 @@ use crate::iterators::concat_iterator::SstConcatIterator; use crate::iterators::merge_iterator::MergeIterator; use crate::iterators::two_merge_iterator::TwoMergeIterator; use crate::iterators::StorageIterator; -use crate::key::{KeySlice, TS_DEFAULT, TS_MAX, TS_MIN}; +use crate::key::{KeySlice, TS_DEFAULT, TS_MAX, TS_MIN, TS_RANGE_BEGIN, TS_RANGE_END}; use crate::lsm_iterator::{FusedIterator, LsmIterator}; use crate::manifest::{Manifest, ManifestRecord}; use crate::mem_table::{map_bound, map_bound_plus_ts, MemTable}; @@ -293,30 +293,32 @@ pub fn range_overlap( user_upper: Bound, table: &Arc, ) -> bool { - /* - [user_lower, ) - */ - let first_key = table.first_key().as_key_slice(); - let last_key = table.last_key().as_key_slice(); + let first_key = table.first_key().key_ref(); + let last_key = table.last_key().key_ref(); match user_lower { - Bound::Included(key) if last_key < key => { + // [user_lower, user_upper ) + // [first_key, last_key ] + Bound::Included(key) if last_key < key.key_ref() => { return false; } - Bound::Excluded(key) if last_key <= key => { + // (user_lower, user_upper ) + // [first_key, last_key ] + Bound::Excluded(key) if last_key <= key.key_ref() => { return false; } _ => {} } - /* - [, user_upper] - */ match user_upper { - Bound::Included(key) if key < first_key => { + // (user_lower, user_upper] + // [first_key, last_key ] + Bound::Included(key) if key.key_ref() < first_key => { return false; } - Bound::Excluded(key) if key <= first_key => { + // (user_lower, user_upper) + // [first_key, last_key ] + Bound::Excluded(key) if key.key_ref() <= first_key => { return false; } _ => {} @@ -760,13 +762,17 @@ impl LsmStorageInner { } let end_bound = upper; - let lower = map_bound_plus_ts(lower, TS_DEFAULT); - let upper = map_bound_plus_ts(upper, TS_DEFAULT); //create merge_mem_iters let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1); - memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper))); + memtable_iters.push(Box::new(snapshot.memtable.scan( + map_bound_plus_ts(lower, TS_RANGE_BEGIN), + map_bound_plus_ts(upper, TS_RANGE_END), + ))); for imm_memtable in snapshot.imm_memtables.iter() { - memtable_iters.push(Box::new(imm_memtable.scan(lower, upper))); + memtable_iters.push(Box::new(imm_memtable.scan( + map_bound_plus_ts(lower, TS_RANGE_BEGIN), + map_bound_plus_ts(upper, TS_RANGE_END), + ))); } let merge_mem_iters = MergeIterator::create(memtable_iters); @@ -774,14 +780,25 @@ impl LsmStorageInner { let mut sst_iters = Vec::with_capacity(snapshot.l0_sstables.len()); for sst_idx in snapshot.l0_sstables.iter() { let table = snapshot.sstables[sst_idx].clone(); - if !range_overlap(lower, upper, &table) { + if !range_overlap( + map_bound_plus_ts(lower, TS_RANGE_BEGIN), + map_bound_plus_ts(upper, TS_RANGE_END), + &table, + ) { continue; } let iter = match lower { - Bound::Included(x) => SsTableIterator::create_and_seek_to_key(table, x)?, + Bound::Included(x) => SsTableIterator::create_and_seek_to_key( + table, + KeySlice::from_slice_with_ts(x, TS_RANGE_BEGIN), + )?, Bound::Excluded(x) => { - let mut sst_tmp_iter = SsTableIterator::create_and_seek_to_key(table, x)?; - if sst_tmp_iter.is_valid() && sst_tmp_iter.key() == x { + //TODO : note that excluded(x) should seek to the first keyslice of key_ref() and skip to next different key_ref(), if seek to (x, ts_range_end), we may seek to the last key of the table. + let mut sst_tmp_iter = SsTableIterator::create_and_seek_to_key( + table, + KeySlice::from_slice_with_ts(x, TS_RANGE_BEGIN), + )?; + while sst_tmp_iter.is_valid() && sst_tmp_iter.key().key_ref() == x { sst_tmp_iter.next()?; } sst_tmp_iter @@ -804,13 +821,16 @@ impl LsmStorageInner { } if !li_sstables.is_empty() { let iter = match lower { - Bound::Included(x) => { - SstConcatIterator::create_and_seek_to_key(li_sstables, x)? - } + Bound::Included(x) => SstConcatIterator::create_and_seek_to_key( + li_sstables, + KeySlice::from_slice_with_ts(x, TS_RANGE_BEGIN), + )?, Bound::Excluded(x) => { - let mut sst_tmp_iter = - SstConcatIterator::create_and_seek_to_key(li_sstables, x)?; - if sst_tmp_iter.is_valid() && sst_tmp_iter.key() == x { + let mut sst_tmp_iter = SstConcatIterator::create_and_seek_to_key( + li_sstables, + KeySlice::from_slice_with_ts(x, TS_RANGE_BEGIN), + )?; + while sst_tmp_iter.is_valid() && sst_tmp_iter.key().key_ref() == x { sst_tmp_iter.next()?; } sst_tmp_iter diff --git a/mini-lsm-starter/src/mem_table.rs b/mini-lsm-starter/src/mem_table.rs index aa2e074..e60500b 100644 --- a/mini-lsm-starter/src/mem_table.rs +++ b/mini-lsm-starter/src/mem_table.rs @@ -129,12 +129,10 @@ impl MemTable { /// In week 3, day 5, modify the function to use the batch API. pub fn put(&self, key: KeySlice, value: &[u8]) -> Result<()> { let inc_sizes = key.raw_len() + value.len(); - println!("try to insert key:{:?}, ts:{:?}", key.key_ref(), key.ts()); self.map.insert( KeyBytes::from_bytes_with_ts(Bytes::copy_from_slice(key.key_ref()), key.ts()), Bytes::copy_from_slice(value), ); - println!("insertion done!"); self.approximate_size .fetch_add(inc_sizes, std::sync::atomic::Ordering::Relaxed); if let Some(ref wal) = self.wal { @@ -172,6 +170,12 @@ impl MemTable { /// Flush the mem-table to SSTable. Implement in week 1 day 6. pub fn flush(&self, builder: &mut SsTableBuilder) -> Result<()> { for entry in self.map.iter() { + // println!( + // "add entry key: {:?}, ts: {:?}, value: {:?}", + // entry.key().key_ref(), + // entry.key().ts(), + // entry.value() + // ); builder.add( KeySlice::from_slice_with_ts(entry.key().key_ref(), entry.key().ts()), entry.value(), diff --git a/mini-lsm-starter/src/table/iterator.rs b/mini-lsm-starter/src/table/iterator.rs index fdeace2..f482ba9 100644 --- a/mini-lsm-starter/src/table/iterator.rs +++ b/mini-lsm-starter/src/table/iterator.rs @@ -68,9 +68,9 @@ impl SsTableIterator { /// this function. pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> { let mut blk_idx = self.table.find_block_idx(key); - if blk_idx == 25 { - blk_idx = self.table.find_block_idx(key); - } + // if blk_idx == 25 { + // blk_idx = self.table.find_block_idx(key); + // } let mut block = self.table.read_block_cached(blk_idx)?; let mut blk_iter = BlockIterator::create_and_seek_to_key(block, key); @@ -92,7 +92,6 @@ impl StorageIterator for SsTableIterator { /// Return the `key` that's held by the underlying block iterator. fn key(&self) -> KeySlice { - // unimplemented!() self.blk_iter.key() } diff --git a/mini-lsm-starter/src/tests/harness.rs b/mini-lsm-starter/src/tests/harness.rs index 0553b03..0e55768 100644 --- a/mini-lsm-starter/src/tests/harness.rs +++ b/mini-lsm-starter/src/tests/harness.rs @@ -98,9 +98,11 @@ where assert_eq!( k, iter.key().for_testing_key_ref(), - "expected key: {:?}, actual key: {:?}", + "expected key: {:?}, actual key: {:?}, expected value: {:?}, actual value: {:?}", k, as_bytes(iter.key().for_testing_key_ref()), + v, + as_bytes(iter.value()), ); assert_eq!( v, From 4448401273f73c999aaeed1d8dbde72d0b7dfd12 Mon Sep 17 00:00:00 2001 From: zjw Date: Sun, 19 Jan 2025 21:11:38 +0800 Subject: [PATCH 3/6] w3d2 : refactor get() in lsm_storage --- mini-lsm-starter/src/lsm_storage.rs | 90 ++++++++++++----------------- 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/mini-lsm-starter/src/lsm_storage.rs b/mini-lsm-starter/src/lsm_storage.rs index 1fc18f3..ab690c0 100644 --- a/mini-lsm-starter/src/lsm_storage.rs +++ b/mini-lsm-starter/src/lsm_storage.rs @@ -21,7 +21,7 @@ use crate::iterators::concat_iterator::SstConcatIterator; use crate::iterators::merge_iterator::MergeIterator; use crate::iterators::two_merge_iterator::TwoMergeIterator; use crate::iterators::StorageIterator; -use crate::key::{KeySlice, TS_DEFAULT, TS_MAX, TS_MIN, TS_RANGE_BEGIN, TS_RANGE_END}; +use crate::key::{KeySlice, TS_RANGE_BEGIN, TS_RANGE_END}; use crate::lsm_iterator::{FusedIterator, LsmIterator}; use crate::manifest::{Manifest, ManifestRecord}; use crate::mem_table::{map_bound, map_bound_plus_ts, MemTable}; @@ -488,29 +488,17 @@ impl LsmStorageInner { snapshot = Arc::clone(&state_guard); } + let lower = Bound::Included(KeySlice::from_slice_with_ts(key, TS_RANGE_BEGIN)); + let upper = Bound::Included(KeySlice::from_slice_with_ts(key, TS_RANGE_END)); //first search in mem_table - // if let Some(bytes) = snapshot.memtable.get(key) { - // if bytes.is_empty() { - // return Ok(None); - // } else { - // return Ok(Some(bytes)); - // } - // } let mut memtable_iters = Vec::with_capacity(snapshot.imm_memtables.len() + 1); - memtable_iters.push(Box::new(snapshot.memtable.scan( - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MAX)), - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MIN)), - ))); + memtable_iters.push(Box::new(snapshot.memtable.scan(lower, upper))); //then search in immutable mem_table // for imme_table in &storage_guard.imm_memtables { for imme_table in snapshot.imm_memtables.iter() { - memtable_iters.push(Box::new(imme_table.scan( - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MAX)), - Bound::Included(KeySlice::from_slice_with_ts(key, TS_MIN)), - ))); + memtable_iters.push(Box::new(imme_table.scan(lower, upper))); } - let merge_memtb_iter = MergeIterator::create(memtable_iters); while merge_memtb_iter.is_valid() { if !merge_memtb_iter.value().is_empty() { @@ -522,58 +510,56 @@ impl LsmStorageInner { } //search in l0_Sstable - let key = KeySlice::from_slice_with_ts(key, TS_DEFAULT); - let mut sst_iters = Vec::with_capacity(snapshot.l0_sstables.len()); for sst_idx in snapshot.l0_sstables.iter() { let table = snapshot.sstables[sst_idx].clone(); - - if !key_within(&table, key) { + if !range_overlap(lower, upper, &table) { continue; } - if let Some(ref bloom) = table.bloom { - if !bloom.may_contain(fingerprint32(key.key_ref())) { + if !bloom.may_contain(fingerprint32(key)) { continue; } } - let sst_iter = SsTableIterator::create_and_seek_to_key(table, key)?; - sst_iters.push(Box::new(sst_iter)); - } - - // create sst iterators - let mut merge_sst_iter = MergeIterator::create(sst_iters); - while merge_sst_iter.is_valid() && key > merge_sst_iter.key() { - merge_sst_iter.next()?; - //skip deleted key - while merge_sst_iter.is_valid() && merge_sst_iter.value().is_empty() { - merge_sst_iter.next()?; - } + let iter = SsTableIterator::create_and_seek_to_key( + table, + KeySlice::from_slice_with_ts(key, TS_RANGE_BEGIN), + )?; + sst_iters.push(Box::new(iter)); } - if merge_sst_iter.is_valid() && merge_sst_iter.key() == key { - if !merge_sst_iter.value().is_empty() { - return Ok(Some(Bytes::copy_from_slice(merge_sst_iter.value()))); + let merge_l0_sst_iters = MergeIterator::create(sst_iters); + while merge_l0_sst_iters.is_valid() { + if !merge_l0_sst_iters.value().is_empty() { + return Ok(Some(Bytes::copy_from_slice(merge_l0_sst_iters.value()))); } else { return Ok(None); } } - // search in l1-lmax_sstables - for (_, sst_ids) in snapshot.levels.iter() { - let mut sstables = Vec::with_capacity(sst_ids.len()); - for sst_id in sst_ids.iter() { - sstables.push(snapshot.sstables[sst_id].clone()); + // create l1-lmax_sst_iter + let mut l1_to_lmax_sst_concat_iters = Vec::with_capacity(snapshot.levels.len()); + for (_, li_ssts_ids) in snapshot.levels.iter() { + // add sst_ids in each level for the concat_iter + let mut li_sstables = Vec::with_capacity(li_ssts_ids.len()); + for sst_id in li_ssts_ids.iter() { + li_sstables.push(snapshot.sstables[sst_id].clone()); } - if sstables.is_empty() { - continue; + if !li_sstables.is_empty() { + let iter = SstConcatIterator::create_and_seek_to_key( + li_sstables, + KeySlice::from_slice_with_ts(key, TS_RANGE_BEGIN), + )?; + l1_to_lmax_sst_concat_iters.push(Box::new(iter)); } - let iter = SstConcatIterator::create_and_seek_to_key(sstables, key)?; - if iter.is_valid() && iter.key() == key { - if !iter.value().is_empty() { - return Ok(Some(Bytes::copy_from_slice(iter.value()))); - } else { - return Ok(None); - } + } + let merge_l1_to_lmax_sst_iters = MergeIterator::create(l1_to_lmax_sst_concat_iters); + while merge_l1_to_lmax_sst_iters.is_valid() { + if !merge_l0_sst_iters.value().is_empty() { + return Ok(Some(Bytes::copy_from_slice( + merge_l1_to_lmax_sst_iters.value(), + ))); + } else { + return Ok(None); } } Ok(None) From ad5f331028b15909beddde352770acfbb6719f99 Mon Sep 17 00:00:00 2001 From: zjw Date: Tue, 21 Jan 2025 21:22:56 +0800 Subject: [PATCH 4/6] fix timestamped_key for_testing_ts() --- .vscode/launch.json | 57 +++++++++++++++++++++ mini-lsm-starter/src/compact.rs | 6 +++ mini-lsm-starter/src/key/timestamped_key.rs | 2 +- mini-lsm-starter/src/lsm_storage.rs | 6 ++- 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 3ed6680..54e6bef 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -102,6 +102,63 @@ "tests::week1_day6", "--show-output" ] + }, + { + "type": "lldb-dap", + "request": "launch", + "name": "test tests::week2_day1::test_task3_integration", + "cwd": "${workspaceFolder}/mini-lsm-starter", + "program": "${workspaceFolder}/target/debug/deps/mini_lsm_starter-430c4aaead80470b", + "env": [ + "RUST_BACKTRACE=short", + "RUSTC_TOOLCHAIN=/home/zjw/.rustup/toolchains/stable-x86_64-unknown-linux-gnu" + ], + "preRunCommands": [ + "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py" + ], + "args": [ + "tests::week2_day1::test_task3_integration", + "--exact", + "--show-output" + ] + }, + { + "type": "lldb-dap", + "request": "launch", + "name": "test tests::week2_day2::test_integration", + "cwd": "${workspaceFolder}/mini-lsm-starter", + "program": "${workspaceFolder}/target/debug/deps/mini_lsm_starter-430c4aaead80470b", + "env": [ + "RUST_BACKTRACE=short", + "RUSTC_TOOLCHAIN=/home/zjw/.rustup/toolchains/stable-x86_64-unknown-linux-gnu" + ], + "preRunCommands": [ + "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py" + ], + "args": [ + "tests::week2_day2::test_integration", + "--exact", + "--show-output" + ] + }, + { + "type": "lldb-dap", + "request": "launch", + "name": "test tests::week3_day1::test_sst_build_multi_version_hard", + "cwd": "${workspaceFolder}/mini-lsm-starter", + "program": "${workspaceFolder}/target/debug/deps/mini_lsm_starter-430c4aaead80470b", + "env": [ + "RUST_BACKTRACE=short", + "RUSTC_TOOLCHAIN=/home/zjw/.rustup/toolchains/stable-x86_64-unknown-linux-gnu" + ], + "preRunCommands": [ + "command script import ${workspaceFolder}/util/rust_prettifier_for_lldb.py" + ], + "args": [ + "tests::week3_day1::test_sst_build_multi_version_hard", + "--exact", + "--show-output" + ] } ] } \ No newline at end of file diff --git a/mini-lsm-starter/src/compact.rs b/mini-lsm-starter/src/compact.rs index 74a3855..d971e0b 100644 --- a/mini-lsm-starter/src/compact.rs +++ b/mini-lsm-starter/src/compact.rs @@ -357,6 +357,12 @@ impl LsmStorageInner { } snapshot.levels[0] = (1, level_1); + println!("===== After compaction ====="); + println!("L0 : {:?}", snapshot.l0_sstables); + for (tier, sstables) in snapshot.levels.iter() { + println!("L{:?} : {:?}", tier, sstables); + } + println!(); // lock and update LsmStorageState { let _state_lock = self.state_lock.lock(); diff --git a/mini-lsm-starter/src/key/timestamped_key.rs b/mini-lsm-starter/src/key/timestamped_key.rs index 616dcec..42f7bee 100644 --- a/mini-lsm-starter/src/key/timestamped_key.rs +++ b/mini-lsm-starter/src/key/timestamped_key.rs @@ -35,7 +35,7 @@ impl> Key { } pub fn for_testing_ts(self) -> u64 { - 0 + self.1 } } diff --git a/mini-lsm-starter/src/lsm_storage.rs b/mini-lsm-starter/src/lsm_storage.rs index ab690c0..ef50149 100644 --- a/mini-lsm-starter/src/lsm_storage.rs +++ b/mini-lsm-starter/src/lsm_storage.rs @@ -542,6 +542,10 @@ impl LsmStorageInner { // add sst_ids in each level for the concat_iter let mut li_sstables = Vec::with_capacity(li_ssts_ids.len()); for sst_id in li_ssts_ids.iter() { + let table = snapshot.sstables[sst_id].clone(); + if !range_overlap(lower, upper, &table) { + continue; + } li_sstables.push(snapshot.sstables[sst_id].clone()); } if !li_sstables.is_empty() { @@ -554,7 +558,7 @@ impl LsmStorageInner { } let merge_l1_to_lmax_sst_iters = MergeIterator::create(l1_to_lmax_sst_concat_iters); while merge_l1_to_lmax_sst_iters.is_valid() { - if !merge_l0_sst_iters.value().is_empty() { + if !merge_l1_to_lmax_sst_iters.value().is_empty() { return Ok(Some(Bytes::copy_from_slice( merge_l1_to_lmax_sst_iters.value(), ))); From 8d0201c536af1732b87fb5718090128b66f667bb Mon Sep 17 00:00:00 2001 From: zjw Date: Wed, 22 Jan 2025 13:22:59 +0800 Subject: [PATCH 5/6] fix SstableBuilder.add(), correctly set from slice after call of finish_block() --- mini-lsm-starter/src/block/iterator.rs | 2 ++ mini-lsm-starter/src/key/timestamped_key.rs | 2 +- mini-lsm-starter/src/mem_table.rs | 10 +++------- mini-lsm-starter/src/table/builder.rs | 10 ++++++---- mini-lsm-starter/src/table/iterator.rs | 3 --- mini-lsm-starter/src/tests/harness.rs | 7 ++++++- 6 files changed, 18 insertions(+), 16 deletions(-) diff --git a/mini-lsm-starter/src/block/iterator.rs b/mini-lsm-starter/src/block/iterator.rs index 18a86c8..5ce82f2 100644 --- a/mini-lsm-starter/src/block/iterator.rs +++ b/mini-lsm-starter/src/block/iterator.rs @@ -129,6 +129,7 @@ impl BlockIterator { /// Note: You should assume the key-value pairs in the block are sorted when being added by /// callers. pub fn seek_to_key(&mut self, key: KeySlice) { + // if key doesn't exist, it will seek to empty key let mut low = 0; let mut high = self.block.offsets.len(); while low < high { @@ -144,6 +145,7 @@ impl BlockIterator { self.seek_to(low); // if key doesn't exist, it will seek to the last_key in this block + // O(n) // let mut offset: usize; // for i in 0..self.block.offsets.len() { // offset = self.block.offsets[i] as usize; diff --git a/mini-lsm-starter/src/key/timestamped_key.rs b/mini-lsm-starter/src/key/timestamped_key.rs index 42f7bee..c48da67 100644 --- a/mini-lsm-starter/src/key/timestamped_key.rs +++ b/mini-lsm-starter/src/key/timestamped_key.rs @@ -94,7 +94,7 @@ impl Key> { } pub fn for_testing_from_vec_no_ts(key: Vec) -> Self { - Self(key, u64::default()) + Self(key, TS_DEFAULT) } } diff --git a/mini-lsm-starter/src/mem_table.rs b/mini-lsm-starter/src/mem_table.rs index e60500b..97c4408 100644 --- a/mini-lsm-starter/src/mem_table.rs +++ b/mini-lsm-starter/src/mem_table.rs @@ -117,7 +117,7 @@ impl MemTable { // this is safe because lifetime of keybytes is no longer than key: KeySlice let keybytes = KeyBytes::from_bytes_with_ts( Bytes::from_static(unsafe { std::mem::transmute(key.key_ref()) }), - TS_DEFAULT, + key.ts(), ); self.map.get(&keybytes).map(|entry| entry.value().clone()) } @@ -129,6 +129,7 @@ impl MemTable { /// In week 3, day 5, modify the function to use the batch API. pub fn put(&self, key: KeySlice, value: &[u8]) -> Result<()> { let inc_sizes = key.raw_len() + value.len(); + // assert_ne!(key.ts(), 0, "key is {:?}", key.key_ref()); self.map.insert( KeyBytes::from_bytes_with_ts(Bytes::copy_from_slice(key.key_ref()), key.ts()), Bytes::copy_from_slice(value), @@ -170,12 +171,7 @@ impl MemTable { /// Flush the mem-table to SSTable. Implement in week 1 day 6. pub fn flush(&self, builder: &mut SsTableBuilder) -> Result<()> { for entry in self.map.iter() { - // println!( - // "add entry key: {:?}, ts: {:?}, value: {:?}", - // entry.key().key_ref(), - // entry.key().ts(), - // entry.value() - // ); + // assert_ne!(entry.key().ts(), 0, "key is {:?}", entry.key()); builder.add( KeySlice::from_slice_with_ts(entry.key().key_ref(), entry.key().ts()), entry.value(), diff --git a/mini-lsm-starter/src/table/builder.rs b/mini-lsm-starter/src/table/builder.rs index eede662..31a15b6 100644 --- a/mini-lsm-starter/src/table/builder.rs +++ b/mini-lsm-starter/src/table/builder.rs @@ -63,9 +63,9 @@ impl SsTableBuilder { //insert kv again and update first_key and last_key assert!(self.builder.add(key, value)); self.first_key.clear(); - self.first_key.append(key.into_inner()); + self.first_key.set_from_slice(key); self.last_key.clear(); - self.last_key.append(key.into_inner()); + self.last_key.set_from_slice(key); } pub fn finish_block(&mut self) { @@ -133,10 +133,12 @@ impl SsTableBuilder { let first_key = self.meta.first().unwrap().first_key.clone(); let last_key = self.meta.last().unwrap().last_key.clone(); println!( - "build {:?} , key range from {:?} to {:?}", + "build {:?} , key range from {:?}, ts:{:?} to {:?}, ts: {:?}", id, &first_key.key_ref()[first_key.key_len().saturating_sub(6)..], - &last_key.key_ref()[last_key.key_len().saturating_sub(6)..] + first_key.ts(), + &last_key.key_ref()[last_key.key_len().saturating_sub(6)..], + last_key.ts() ); assert!( diff --git a/mini-lsm-starter/src/table/iterator.rs b/mini-lsm-starter/src/table/iterator.rs index f482ba9..b3b64cb 100644 --- a/mini-lsm-starter/src/table/iterator.rs +++ b/mini-lsm-starter/src/table/iterator.rs @@ -68,9 +68,6 @@ impl SsTableIterator { /// this function. pub fn seek_to_key(&mut self, key: KeySlice) -> Result<()> { let mut blk_idx = self.table.find_block_idx(key); - // if blk_idx == 25 { - // blk_idx = self.table.find_block_idx(key); - // } let mut block = self.table.read_block_cached(blk_idx)?; let mut blk_iter = BlockIterator::create_and_seek_to_key(block, key); diff --git a/mini-lsm-starter/src/tests/harness.rs b/mini-lsm-starter/src/tests/harness.rs index 0e55768..345ecfe 100644 --- a/mini-lsm-starter/src/tests/harness.rs +++ b/mini-lsm-starter/src/tests/harness.rs @@ -264,7 +264,12 @@ pub fn compaction_bench(storage: Arc) { let value = storage.get(key.as_bytes()).unwrap(); if let Some(val) = key_map.get(&i) { let expected_value = gen_value(*val); - assert_eq!(value, Some(Bytes::from(expected_value.clone()))); + assert_eq!( + value, + Some(Bytes::from(expected_value.clone())), + "key is {:?}", + key + ); expected_key_value_pairs.push((Bytes::from(key), Bytes::from(expected_value))); } else { assert!(value.is_none()); From 7663822004e55f244752cf09457af6586d6ab180 Mon Sep 17 00:00:00 2001 From: zjw Date: Wed, 22 Jan 2025 13:23:42 +0800 Subject: [PATCH 6/6] fix find_block_idx() --- mini-lsm-starter/src/table.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mini-lsm-starter/src/table.rs b/mini-lsm-starter/src/table.rs index 09982b7..d5d0950 100644 --- a/mini-lsm-starter/src/table.rs +++ b/mini-lsm-starter/src/table.rs @@ -245,14 +245,21 @@ impl SsTable { /// Note: You may want to make use of the `first_key` stored in `BlockMeta`. /// You may also assume the key-value pairs stored in each consecutive block are sorted. pub fn find_block_idx(&self, key: KeySlice) -> usize { - //self.block_meta.partition_point(|meta| meta.first_key.as_key_slice()<=key).saturating_sub(1); + // self.block_meta + // .partition_point(|meta| meta.first_key.as_key_slice() <= key) + // .saturating_sub(1) let mut final_idx: usize = 0; + let mut prevmeta_lastkey = KeySlice::from(key); for meta in self.block_meta.iter() { - if key >= meta.first_key.as_key_slice() { + if meta.first_key.as_key_slice() <= key { + final_idx += 1; + } else if meta.first_key.key_ref() <= key.key_ref() && prevmeta_lastkey < key { final_idx += 1; } else { + // println!("key is {:?} ts={:?}, meta.first_key is {:?} ts={:?}, meta.last_key is {:?} ts={:?}", key.to_key_vec().into_key_bytes(), key.ts(), meta.first_key, meta.first_key.ts(), meta.last_key, meta.last_key.ts()); break; } + prevmeta_lastkey = meta.last_key.as_key_slice(); } match final_idx { 0 => 0,