scratchpad/sparse_merkle/mod.rs
1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4// Copyright 2021 Conflux Foundation. All rights reserved.
5// Conflux is free software and distributed under GNU General Public License.
6// See http://www.gnu.org/licenses/
7
8//! This module implements an in-memory Sparse Merkle Tree that is similar to
9//! what we use in storage to represent world state. This tree will store only a
10//! small portion of the state -- the part of accounts that have been modified
11//! by uncommitted transactions. For example, if we execute a transaction T_i on
12//! top of committed state and it modified account A, we will end up having the
13//! following tree:
14//!
15//! ```text
16//! S_i
17//! / \
18//! o y
19//! / \
20//! x A
21//! ```
22//! where A has the new state of the account, and y and x are the siblings on
23//! the path from root to A in the tree.
24//!
25//! This Sparse Merkle Tree is immutable once constructed. If the next
26//! transaction T_{i+1} modified another account B that lives in the subtree at
27//! y, a new tree will be constructed and the structure will look like the
28//! following:
29//!
30//! ```text
31//! S_i S_{i+1}
32//! / \ / \
33//! / y / \
34//! / _______/ \
35//! // \
36//! o y'
37//! / \ / \
38//! x A z B
39//! ```
40//!
41//! Using this structure, we are able to query the global state, taking into
42//! account the output of uncommitted transactions. For example, if we want to
43//! execute another transaction T_{i+1}', we can use the tree S_i. If we look
44//! for account A, we can find its new value in the tree. Otherwise, we know the
45//! account does not exist in the tree, and we can fall back to storage. As
46//! another example, if we want to execute transaction T_{i+2}, we can use the
47//! tree S_{i+1} that has updated values for both account A and B.
48//!
49//! Each version of the tree holds a strong reference (an `Arc<Node>`) to its
50//! root as well as one to its base tree (S_i is the base tree of S_{i+1} in the
51//! above example). The root node in turn, recursively holds all descendant
52//! nodes created in the same version, and weak references (a `Weak<Node>`) to
53//! all descendant nodes that was created from previous versions.
54//! With this construction:
55//! 1. Even if a reference to a specific tree is dropped, the nodes
56//! belonging to it won't be dropped as long as trees depending on it still hold
57//! strong references to it via the chain of "base trees".
58//! 2. Even if a tree is not dropped, when nodes it created are persisted to
59//! DB, all of them and those created by its previous versions can be dropped,
60//! which we express by calling "prune()" on it which replaces the strong
61//! references to its root and its base tree with weak references. 3. We can
62//! hold strong references to recently accessed nodes that have already been
63//! persisted in an LRU flavor cache for less DB reads.
64//!
65//! This Sparse Merkle Tree serves a dual purpose. First, to support a leader
66//! based consensus algorithm, we need to build a tree of transactions like the
67//! following:
68//!
69//! ```text
70//! Committed -> T5 -> T6 -> T7
71//! └---> T6' -> T7'
72//! └----> T7"
73//! ```
74//!
75//! Once T5 is executed, we will have a tree that stores the modified portion of
76//! the state. Later when we execute T6 on top of T5, the output of T5 can be
77//! visible to T6.
78//!
79//! Second, given this tree representation it is straightforward to compute the
80//! root hash of S_i once T_i is executed. This allows us to verify the proofs
81//! we need when executing T_{i+1}.
82
83// See https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=e9c4c53eb80b30d09112fcfb07d481e7
84#![allow(clippy::let_and_return)]
85// See https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=795cd4f459f1d4a0005a99650726834b
86#![allow(clippy::while_let_loop)]
87
88mod node;
89
90#[cfg(test)]
91mod sparse_merkle_test;
92
93use crate::sparse_merkle::node::{LeafValue, Node, SubTree};
94use arc_swap::{ArcSwap, ArcSwapOption};
95use diem_crypto::{
96 hash::{CryptoHash, HashValueBitIterator, SPARSE_MERKLE_PLACEHOLDER_HASH},
97 HashValue,
98};
99use diem_types::proof::SparseMerkleProof;
100use std::{borrow::Borrow, sync::Arc};
101
102/// `AccountStatus` describes the result of querying an account from this
103/// SparseMerkleTree.
104#[derive(Debug, Eq, PartialEq)]
105pub enum AccountStatus<V> {
106 /// The account exists in the tree, therefore we can give its value.
107 ExistsInScratchPad(V),
108
109 /// The account does not exist in the tree, but exists in DB. This happens
110 /// when the search reaches a leaf node that has the requested account,
111 /// but the node has only the value hash because it was loaded into
112 /// memory as part of a non-inclusion proof. When we go to DB we
113 /// don't need to traverse the tree to find the same leaf, instead we can
114 /// use the value hash to look up the account content directly.
115 ExistsInDB,
116
117 /// The account does not exist in either the tree or DB. This happens when
118 /// the search reaches an empty node, or a leaf node that has a
119 /// different account.
120 DoesNotExist,
121
122 /// We do not know if this account exists or not and need to go to DB to
123 /// find out. This happens when the search reaches a subtree node.
124 Unknown,
125}
126
127/// The inner content of a sparse merkle tree, we have this so that even if a
128/// tree is dropped, the INNER of it can still live if referenced by a later
129/// version.
130#[derive(Debug)]
131struct Inner<V> {
132 /// Reference to the root node, initially a strong reference, and once
133 /// pruned, becomes a weak reference, allowing nodes created by this
134 /// version to go away.
135 root: ArcSwap<SubTree<V>>,
136 /// Reference to the INNER base tree, needs to be a strong reference if the
137 /// base is speculative itself, so that nodes referenced in this
138 /// version won't go away because the base tree is dropped.
139 base: ArcSwapOption<Inner<V>>,
140}
141
142impl<V: CryptoHash> Inner<V> {
143 fn prune(&self) {
144 // Replace the link to the root node with a weak reference, so all nodes
145 // created by this version can be dropped. A weak link is still
146 // maintained so that if it's cached somehow, we still have
147 // access to it without resorting to the DB.
148 self.root.store(Arc::new(self.root.load().weak()));
149 // Disconnect the base tree, so that nodes created by previous versions
150 // can be dropped.
151 self.base.store(None);
152 }
153}
154
155/// The Sparse Merkle Tree implementation.
156#[derive(Clone, Debug)]
157pub struct SparseMerkleTree<V> {
158 inner: Arc<Inner<V>>,
159}
160
161impl<V> SparseMerkleTree<V>
162where V: Clone + CryptoHash
163{
164 /// Constructs a Sparse Merkle Tree with a root hash. This is often used
165 /// when we restart and the scratch pad and the storage have identical
166 /// state, so we use a single root hash to represent the entire state.
167 pub fn new(root_hash: HashValue) -> Self {
168 Self::new_impl(
169 if root_hash != *SPARSE_MERKLE_PLACEHOLDER_HASH {
170 SubTree::new_unknown(root_hash)
171 } else {
172 SubTree::new_empty()
173 },
174 None,
175 )
176 }
177
178 fn new_with_base(root: SubTree<V>, base: &Self) -> Self {
179 Self::new_impl(root, Some(base.inner.clone()))
180 }
181
182 fn new_impl(root: SubTree<V>, base: Option<Arc<Inner<V>>>) -> Self {
183 let inner = Inner {
184 root: ArcSwap::from_pointee(root),
185 base: ArcSwapOption::new(base),
186 };
187
188 Self {
189 inner: Arc::new(inner),
190 }
191 }
192
193 fn root_weak(&self) -> SubTree<V> { self.inner.root.load().weak() }
194
195 /// Constructs a new Sparse Merkle Tree as if we are updating the existing
196 /// tree. Since the tree is immutable, the existing tree will remain the
197 /// same and may share part of the tree with the new one.
198 pub fn update(
199 &self, updates: Vec<(HashValue, V)>, proof_reader: &impl ProofRead<V>,
200 ) -> Result<Self, UpdateError> {
201 updates
202 .into_iter()
203 .try_fold(self.clone(), |prev, (key, value)| {
204 prev.update_one(key, value, proof_reader)
205 })
206 }
207
208 /// Constructs a new Sparse Merkle Tree as if we are updating the existing
209 /// tree multiple times with `update_batch`. The function will return
210 /// the root hash of each individual update and a Sparse Merkle Tree of
211 /// the final state.
212 ///
213 /// The `update_batch` will take in a reference of value instead of an owned
214 /// instance. This is because it would be nicer for future parallelism.
215 pub fn batch_update(
216 &self, update_batch: Vec<Vec<(HashValue, &V)>>,
217 proof_reader: &impl ProofRead<V>,
218 ) -> Result<(Vec<HashValue>, Self), UpdateError> {
219 let mut current_state_tree = self.clone();
220
221 let mut result_hashes = Vec::with_capacity(update_batch.len());
222 for updates in update_batch {
223 current_state_tree = current_state_tree.update(
224 updates
225 .into_iter()
226 .map(|(hash, v_ref)| (hash, v_ref.clone()))
227 .collect(),
228 proof_reader,
229 )?;
230 result_hashes.push(current_state_tree.root_hash());
231 }
232 Ok((result_hashes, current_state_tree))
233 }
234
235 fn update_one(
236 &self, key: HashValue, new_value: V, proof_reader: &impl ProofRead<V>,
237 ) -> Result<Self, UpdateError> {
238 let mut current_subtree = self.root_weak();
239 let mut bits = key.iter_bits();
240
241 // Starting from root, traverse the tree according to key until we find
242 // a non-internal node. Record all the bits and sibling nodes on
243 // the path.
244 let mut bits_on_path = vec![];
245 let mut siblings_on_path = vec![];
246 loop {
247 if let SubTree::NonEmpty { root, .. } = ¤t_subtree {
248 if let Some(node) = root.get_node_if_in_mem() {
249 if let Node::Internal(internal_node) = node.borrow() {
250 let bit = bits.next().unwrap_or_else(|| {
251 // invariant of HashValueBitIterator
252 unreachable!(
253 "Tree is deeper than {} levels.",
254 HashValue::LENGTH_IN_BITS
255 )
256 });
257 bits_on_path.push(bit);
258 current_subtree = if bit {
259 siblings_on_path.push(internal_node.left.weak());
260 internal_node.right.weak()
261 } else {
262 siblings_on_path.push(internal_node.right.weak());
263 internal_node.left.weak()
264 };
265 continue;
266 }
267 }
268 }
269 break;
270 }
271
272 // Now we are at the bottom of the tree and current_node can be either a
273 // leaf, unknown or empty. We construct a new subtree like we
274 // are inserting the key here.
275 let new_node = Self::construct_subtree_at_bottom(
276 ¤t_subtree,
277 key,
278 new_value,
279 bits,
280 proof_reader,
281 )?;
282
283 // Use the new node and all previous siblings on the path to construct
284 // the final tree.
285 let root = Self::construct_subtree(
286 bits_on_path.into_iter().rev(),
287 siblings_on_path.into_iter().rev(),
288 new_node,
289 );
290
291 Ok(Self::new_with_base(root, self))
292 }
293
294 /// This function is called when we are trying to write (key, new_value) to
295 /// the tree and have traversed the existing tree using some prefix of
296 /// the key. We should have reached the bottom of the existing tree, so
297 /// current_node cannot be an internal node. This function will
298 /// construct a subtree using current_node, the new key-value pair and
299 /// potentially the key-value pair in the proof.
300 fn construct_subtree_at_bottom(
301 current_subtree: &SubTree<V>, key: HashValue, new_value: V,
302 remaining_bits: HashValueBitIterator, proof_reader: &impl ProofRead<V>,
303 ) -> Result<SubTree<V>, UpdateError> {
304 match current_subtree {
305 SubTree::Empty => {
306 // When we reach an empty node, we just place the leaf node at
307 // the same position to replace the empty node.
308 Ok(SubTree::new_leaf_with_value(key, new_value))
309 }
310 SubTree::NonEmpty { root, .. } => {
311 match root.get_node_if_in_mem() {
312 Some(node) => match node.borrow() {
313 Node::Internal(_) => {
314 unreachable!("Reached an internal node at the bottom of the tree.");
315 }
316 Node::Leaf(leaf_node) => {
317 Ok(Self::construct_subtree_with_new_leaf(
318 key,
319 new_value,
320 current_subtree.weak(),
321 leaf_node.key,
322 HashValue::LENGTH_IN_BITS
323 .checked_sub(remaining_bits.len())
324 .expect("shouldn't overflow"),
325 ))
326 }
327 },
328 None => {
329 // When the search reaches an unknown subtree, we need
330 // proof to give us more
331 // information about this part of the tree.
332 let proof = proof_reader
333 .get_proof(key)
334 .ok_or(UpdateError::MissingProof)?;
335
336 // Here the in-memory tree is identical to the tree in
337 // storage (we have only the
338 // root hash of this subtree in memory). So we need to
339 // take into account the leaf in
340 // the proof.
341 let new_subtree = match proof.leaf() {
342 Some(existing_leaf) => {
343 Self::construct_subtree_with_new_leaf(
344 key,
345 new_value,
346 SubTree::new_leaf_with_value_hash(
347 existing_leaf.key(),
348 existing_leaf.value_hash(),
349 ),
350 existing_leaf.key(),
351 proof.siblings().len(),
352 )
353 }
354 None => {
355 SubTree::new_leaf_with_value(key, new_value)
356 }
357 };
358
359 let num_remaining_bits = remaining_bits.len();
360 let proof_length = proof.siblings().len();
361 Ok(Self::construct_subtree(
362 remaining_bits.rev().skip(
363 HashValue::LENGTH_IN_BITS
364 .checked_sub(proof_length)
365 .expect("shouldn't overflow"),
366 ),
367 proof
368 .siblings()
369 .iter()
370 .take(
371 num_remaining_bits
372 .checked_add(proof_length)
373 .expect("shouldn't overflow")
374 .checked_sub(HashValue::LENGTH_IN_BITS)
375 .expect("shouldn't overflow"),
376 )
377 .map(|sibling_hash| {
378 if *sibling_hash
379 != *SPARSE_MERKLE_PLACEHOLDER_HASH
380 {
381 SubTree::new_unknown(*sibling_hash)
382 } else {
383 SubTree::new_empty()
384 }
385 }),
386 new_subtree,
387 ))
388 }
389 }
390 }
391 }
392 }
393
394 /// Given key, new value, existing leaf and the distance from root to the
395 /// existing leaf, constructs a new subtree that has either the new leaf
396 /// or both nodes, depending on whether the key equals the existing
397 /// leaf's key.
398 ///
399 /// 1. If the key equals the existing leaf's key, we simply need to update
400 /// the leaf to the new value and return it. For example, in the
401 /// following case this function will return `new_leaf`.
402 /// ``` text
403 /// o o
404 /// / \ / \
405 /// o o => o o
406 /// / \ / \
407 /// o existing_leaf o new_leaf
408 /// ```
409 ///
410 /// 2. Otherwise, we need to construct an "extension" for the common prefix,
411 /// and at the end of the extension a subtree for both keys. For
412 /// example, in the following case we assume the existing leaf's key
413 /// starts with 010010 and key starts with 010011, and this function
414 /// will return `x`.
415 /// ```text
416 /// o o common_prefix_len = 5
417 /// / \ / \ distance_from_root_to_existing_leaf = 2
418 /// o o o o extension_len = common_prefix_len - distance_from_root_to_existing_leaf = 3
419 /// / \ / \
420 /// o existing_leaf => o x _
421 /// / \ ^
422 /// o placeholder |
423 /// / \ |
424 /// o placeholder extension
425 /// / \ |
426 /// placeholder o -
427 /// / \
428 /// existing_leaf new_leaf
429 /// ```
430 fn construct_subtree_with_new_leaf(
431 key: HashValue, new_value: V, existing_leaf: SubTree<V>,
432 existing_leaf_key: HashValue,
433 distance_from_root_to_existing_leaf: usize,
434 ) -> SubTree<V> {
435 let new_leaf = SubTree::new_leaf_with_value(key, new_value);
436 if key == existing_leaf_key {
437 // This implies that `key` already existed and the proof is an
438 // inclusion proof.
439 return new_leaf;
440 }
441
442 // This implies that `key` did not exist and was just created. The proof
443 // is a non-inclusion proof. See above example for how
444 // extension_len is computed.
445 let common_prefix_len = key.common_prefix_bits_len(existing_leaf_key);
446 assert!(
447 common_prefix_len >= distance_from_root_to_existing_leaf,
448 "common_prefix_len: {}, distance_from_root_to_existing_leaf: {}",
449 common_prefix_len,
450 distance_from_root_to_existing_leaf,
451 );
452 let extension_len =
453 common_prefix_len - distance_from_root_to_existing_leaf;
454 Self::construct_subtree(
455 key.iter_bits()
456 .rev()
457 .skip(HashValue::LENGTH_IN_BITS - common_prefix_len - 1)
458 .take(extension_len + 1),
459 std::iter::once(existing_leaf).chain(
460 std::iter::repeat(SubTree::new_empty()).take(extension_len),
461 ),
462 new_leaf,
463 )
464 }
465
466 /// Constructs a subtree with a list of siblings and a leaf. For example, if
467 /// `bits` are [false, false, true] and `siblings` are [a, b, c], the
468 /// resulting subtree will look like:
469 ///
470 /// ```text
471 /// x
472 /// / \
473 /// c o
474 /// / \
475 /// o b
476 /// / \
477 /// leaf a
478 /// ```
479 /// and this function will return `x`. Both `bits` and `siblings` start from
480 /// the bottom.
481 fn construct_subtree(
482 bits: impl Iterator<Item = bool>,
483 siblings: impl Iterator<Item = SubTree<V>>, leaf: SubTree<V>,
484 ) -> SubTree<V> {
485 itertools::zip_eq(bits, siblings).fold(
486 leaf,
487 |previous_node, (bit, sibling)| {
488 if bit {
489 SubTree::new_internal(sibling, previous_node)
490 } else {
491 SubTree::new_internal(previous_node, sibling)
492 }
493 },
494 )
495 }
496
497 /// Queries a `key` in this `SparseMerkleTree`.
498 pub fn get(&self, key: HashValue) -> AccountStatus<V> {
499 let mut cur = self.root_weak();
500 let mut bits = key.iter_bits();
501
502 loop {
503 if let Some(node) = cur.get_node_if_in_mem() {
504 if let Node::Internal(internal_node) = node.borrow() {
505 match bits.next() {
506 Some(bit) => {
507 cur = if bit {
508 internal_node.right.weak()
509 } else {
510 internal_node.left.weak()
511 };
512 continue;
513 }
514 None => panic!(
515 "Tree is deeper than {} levels.",
516 HashValue::LENGTH_IN_BITS
517 ),
518 }
519 }
520 }
521 break;
522 }
523
524 let ret = match cur {
525 SubTree::Empty => AccountStatus::DoesNotExist,
526 SubTree::NonEmpty { root, .. } => match root.get_node_if_in_mem() {
527 None => AccountStatus::Unknown,
528 Some(node) => match node.borrow() {
529 Node::Internal(_) => unreachable!(
530 "There is an internal node at the bottom of the tree."
531 ),
532 Node::Leaf(leaf_node) => {
533 if leaf_node.key == key {
534 match &leaf_node.value {
535 LeafValue::Value(value) => {
536 AccountStatus::ExistsInScratchPad(
537 value.clone(),
538 )
539 }
540 LeafValue::ValueHash(_) => {
541 AccountStatus::ExistsInDB
542 }
543 }
544 } else {
545 AccountStatus::DoesNotExist
546 }
547 }
548 },
549 },
550 };
551 ret
552 }
553
554 /// Returns the root hash of this tree.
555 pub fn root_hash(&self) -> HashValue { self.inner.root.load().hash() }
556
557 /// Mark that all the nodes created by this tree and its ancestors are
558 /// persisted in the DB.
559 pub fn prune(&self) { self.inner.prune() }
560}
561
562impl<V> Default for SparseMerkleTree<V>
563where V: Clone + CryptoHash
564{
565 fn default() -> Self {
566 SparseMerkleTree::new(*SPARSE_MERKLE_PLACEHOLDER_HASH)
567 }
568}
569
570/// A type that implements `ProofRead` can provide proof for keys in persistent
571/// storage.
572pub trait ProofRead<V> {
573 /// Gets verified proof for this key in persistent storage.
574 fn get_proof(&self, key: HashValue) -> Option<&SparseMerkleProof<V>>;
575}
576
577/// All errors `update` can possibly return.
578#[derive(Debug, Eq, PartialEq)]
579pub enum UpdateError {
580 /// The update intends to insert a key that does not exist in the tree, so
581 /// the operation needs proof to get more information about the tree,
582 /// but no proof is provided.
583 MissingProof,
584}