scratchpad/sparse_merkle/
mod.rs

1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4// Copyright 2021 Conflux Foundation. All rights reserved.
5// Conflux is free software and distributed under GNU General Public License.
6// See http://www.gnu.org/licenses/
7
8//! This module implements an in-memory Sparse Merkle Tree that is similar to
9//! what we use in storage to represent world state. This tree will store only a
10//! small portion of the state -- the part of accounts that have been modified
11//! by uncommitted transactions. For example, if we execute a transaction T_i on
12//! top of committed state and it modified account A, we will end up having the
13//! following tree:
14//!
15//! ```text
16//!              S_i
17//!             /   \
18//!            o     y
19//!           / \
20//!          x   A
21//! ```
22//! where A has the new state of the account, and y and x are the siblings on
23//! the path from root to A in the tree.
24//!
25//! This Sparse Merkle Tree is immutable once constructed. If the next
26//! transaction T_{i+1} modified another account B that lives in the subtree at
27//! y, a new tree will be constructed and the structure will look like the
28//! following:
29//!
30//! ```text
31//!                 S_i        S_{i+1}
32//!                /   \      /       \
33//!               /     y   /          \
34//!              / _______/             \
35//!             //                       \
36//!            o                          y'
37//!           / \                        / \
38//!          x   A                      z   B
39//! ```
40//!
41//! Using this structure, we are able to query the global state, taking into
42//! account the output of uncommitted transactions. For example, if we want to
43//! execute another transaction T_{i+1}', we can use the tree S_i. If we look
44//! for account A, we can find its new value in the tree. Otherwise, we know the
45//! account does not exist in the tree, and we can fall back to storage. As
46//! another example, if we want to execute transaction T_{i+2}, we can use the
47//! tree S_{i+1} that has updated values for both account A and B.
48//!
49//! Each version of the tree holds a strong reference (an `Arc<Node>`) to its
50//! root as well as one to its base tree (S_i is the base tree of S_{i+1} in the
51//! above example). The root node in turn, recursively holds all descendant
52//! nodes created in the same version, and weak references (a `Weak<Node>`) to
53//! all descendant nodes that was created from previous versions.
54//! With this construction:
55//!     1. Even if a reference to a specific tree is dropped, the nodes
56//! belonging to it won't be dropped as long as trees depending on it still hold
57//! strong references to it via the chain of "base trees".
58//!     2. Even if a tree is not dropped, when nodes it created are persisted to
59//! DB, all of them and those created by its previous versions can be dropped,
60//! which we express by calling "prune()" on it which replaces the strong
61//! references to its root and its base tree with weak references.     3. We can
62//! hold strong references to recently accessed nodes that have already been
63//! persisted in an LRU flavor cache for less DB reads.
64//!
65//! This Sparse Merkle Tree serves a dual purpose. First, to support a leader
66//! based consensus algorithm, we need to build a tree of transactions like the
67//! following:
68//!
69//! ```text
70//! Committed -> T5 -> T6  -> T7
71//!              └---> T6' -> T7'
72//!                    └----> T7"
73//! ```
74//!
75//! Once T5 is executed, we will have a tree that stores the modified portion of
76//! the state. Later when we execute T6 on top of T5, the output of T5 can be
77//! visible to T6.
78//!
79//! Second, given this tree representation it is straightforward to compute the
80//! root hash of S_i once T_i is executed. This allows us to verify the proofs
81//! we need when executing T_{i+1}.
82
83// See https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=e9c4c53eb80b30d09112fcfb07d481e7
84#![allow(clippy::let_and_return)]
85// See https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=795cd4f459f1d4a0005a99650726834b
86#![allow(clippy::while_let_loop)]
87
88mod node;
89
90#[cfg(test)]
91mod sparse_merkle_test;
92
93use crate::sparse_merkle::node::{LeafValue, Node, SubTree};
94use arc_swap::{ArcSwap, ArcSwapOption};
95use diem_crypto::{
96    hash::{CryptoHash, HashValueBitIterator, SPARSE_MERKLE_PLACEHOLDER_HASH},
97    HashValue,
98};
99use diem_types::proof::SparseMerkleProof;
100use std::{borrow::Borrow, sync::Arc};
101
102/// `AccountStatus` describes the result of querying an account from this
103/// SparseMerkleTree.
104#[derive(Debug, Eq, PartialEq)]
105pub enum AccountStatus<V> {
106    /// The account exists in the tree, therefore we can give its value.
107    ExistsInScratchPad(V),
108
109    /// The account does not exist in the tree, but exists in DB. This happens
110    /// when the search reaches a leaf node that has the requested account,
111    /// but the node has only the value hash because it was loaded into
112    /// memory as part of a non-inclusion proof. When we go to DB we
113    /// don't need to traverse the tree to find the same leaf, instead we can
114    /// use the value hash to look up the account content directly.
115    ExistsInDB,
116
117    /// The account does not exist in either the tree or DB. This happens when
118    /// the search reaches an empty node, or a leaf node that has a
119    /// different account.
120    DoesNotExist,
121
122    /// We do not know if this account exists or not and need to go to DB to
123    /// find out. This happens when the search reaches a subtree node.
124    Unknown,
125}
126
127/// The inner content of a sparse merkle tree, we have this so that even if a
128/// tree is dropped, the INNER of it can still live if referenced by a later
129/// version.
130#[derive(Debug)]
131struct Inner<V> {
132    /// Reference to the root node, initially a strong reference, and once
133    /// pruned, becomes a weak reference, allowing nodes created by this
134    /// version to go away.
135    root: ArcSwap<SubTree<V>>,
136    /// Reference to the INNER base tree, needs to be a strong reference if the
137    /// base is speculative itself, so that nodes referenced in this
138    /// version won't go away because the base tree is dropped.
139    base: ArcSwapOption<Inner<V>>,
140}
141
142impl<V: CryptoHash> Inner<V> {
143    fn prune(&self) {
144        // Replace the link to the root node with a weak reference, so all nodes
145        // created by this version can be dropped. A weak link is still
146        // maintained so that if it's cached somehow, we still have
147        // access to it without resorting to the DB.
148        self.root.store(Arc::new(self.root.load().weak()));
149        // Disconnect the base tree, so that nodes created by previous versions
150        // can be dropped.
151        self.base.store(None);
152    }
153}
154
155/// The Sparse Merkle Tree implementation.
156#[derive(Clone, Debug)]
157pub struct SparseMerkleTree<V> {
158    inner: Arc<Inner<V>>,
159}
160
161impl<V> SparseMerkleTree<V>
162where V: Clone + CryptoHash
163{
164    /// Constructs a Sparse Merkle Tree with a root hash. This is often used
165    /// when we restart and the scratch pad and the storage have identical
166    /// state, so we use a single root hash to represent the entire state.
167    pub fn new(root_hash: HashValue) -> Self {
168        Self::new_impl(
169            if root_hash != *SPARSE_MERKLE_PLACEHOLDER_HASH {
170                SubTree::new_unknown(root_hash)
171            } else {
172                SubTree::new_empty()
173            },
174            None,
175        )
176    }
177
178    fn new_with_base(root: SubTree<V>, base: &Self) -> Self {
179        Self::new_impl(root, Some(base.inner.clone()))
180    }
181
182    fn new_impl(root: SubTree<V>, base: Option<Arc<Inner<V>>>) -> Self {
183        let inner = Inner {
184            root: ArcSwap::from_pointee(root),
185            base: ArcSwapOption::new(base),
186        };
187
188        Self {
189            inner: Arc::new(inner),
190        }
191    }
192
193    fn root_weak(&self) -> SubTree<V> { self.inner.root.load().weak() }
194
195    /// Constructs a new Sparse Merkle Tree as if we are updating the existing
196    /// tree. Since the tree is immutable, the existing tree will remain the
197    /// same and may share part of the tree with the new one.
198    pub fn update(
199        &self, updates: Vec<(HashValue, V)>, proof_reader: &impl ProofRead<V>,
200    ) -> Result<Self, UpdateError> {
201        updates
202            .into_iter()
203            .try_fold(self.clone(), |prev, (key, value)| {
204                prev.update_one(key, value, proof_reader)
205            })
206    }
207
208    /// Constructs a new Sparse Merkle Tree as if we are updating the existing
209    /// tree multiple times with `update_batch`. The function will return
210    /// the root hash of each individual update and a Sparse Merkle Tree of
211    /// the final state.
212    ///
213    /// The `update_batch` will take in a reference of value instead of an owned
214    /// instance. This is because it would be nicer for future parallelism.
215    pub fn batch_update(
216        &self, update_batch: Vec<Vec<(HashValue, &V)>>,
217        proof_reader: &impl ProofRead<V>,
218    ) -> Result<(Vec<HashValue>, Self), UpdateError> {
219        let mut current_state_tree = self.clone();
220
221        let mut result_hashes = Vec::with_capacity(update_batch.len());
222        for updates in update_batch {
223            current_state_tree = current_state_tree.update(
224                updates
225                    .into_iter()
226                    .map(|(hash, v_ref)| (hash, v_ref.clone()))
227                    .collect(),
228                proof_reader,
229            )?;
230            result_hashes.push(current_state_tree.root_hash());
231        }
232        Ok((result_hashes, current_state_tree))
233    }
234
235    fn update_one(
236        &self, key: HashValue, new_value: V, proof_reader: &impl ProofRead<V>,
237    ) -> Result<Self, UpdateError> {
238        let mut current_subtree = self.root_weak();
239        let mut bits = key.iter_bits();
240
241        // Starting from root, traverse the tree according to key until we find
242        // a non-internal node. Record all the bits and sibling nodes on
243        // the path.
244        let mut bits_on_path = vec![];
245        let mut siblings_on_path = vec![];
246        loop {
247            if let SubTree::NonEmpty { root, .. } = &current_subtree {
248                if let Some(node) = root.get_node_if_in_mem() {
249                    if let Node::Internal(internal_node) = node.borrow() {
250                        let bit = bits.next().unwrap_or_else(|| {
251                            // invariant of HashValueBitIterator
252                            unreachable!(
253                                "Tree is deeper than {} levels.",
254                                HashValue::LENGTH_IN_BITS
255                            )
256                        });
257                        bits_on_path.push(bit);
258                        current_subtree = if bit {
259                            siblings_on_path.push(internal_node.left.weak());
260                            internal_node.right.weak()
261                        } else {
262                            siblings_on_path.push(internal_node.right.weak());
263                            internal_node.left.weak()
264                        };
265                        continue;
266                    }
267                }
268            }
269            break;
270        }
271
272        // Now we are at the bottom of the tree and current_node can be either a
273        // leaf, unknown or empty. We construct a new subtree like we
274        // are inserting the key here.
275        let new_node = Self::construct_subtree_at_bottom(
276            &current_subtree,
277            key,
278            new_value,
279            bits,
280            proof_reader,
281        )?;
282
283        // Use the new node and all previous siblings on the path to construct
284        // the final tree.
285        let root = Self::construct_subtree(
286            bits_on_path.into_iter().rev(),
287            siblings_on_path.into_iter().rev(),
288            new_node,
289        );
290
291        Ok(Self::new_with_base(root, self))
292    }
293
294    /// This function is called when we are trying to write (key, new_value) to
295    /// the tree and have traversed the existing tree using some prefix of
296    /// the key. We should have reached the bottom of the existing tree, so
297    /// current_node cannot be an internal node. This function will
298    /// construct a subtree using current_node, the new key-value pair and
299    /// potentially the key-value pair in the proof.
300    fn construct_subtree_at_bottom(
301        current_subtree: &SubTree<V>, key: HashValue, new_value: V,
302        remaining_bits: HashValueBitIterator, proof_reader: &impl ProofRead<V>,
303    ) -> Result<SubTree<V>, UpdateError> {
304        match current_subtree {
305            SubTree::Empty => {
306                // When we reach an empty node, we just place the leaf node at
307                // the same position to replace the empty node.
308                Ok(SubTree::new_leaf_with_value(key, new_value))
309            }
310            SubTree::NonEmpty { root, .. } => {
311                match root.get_node_if_in_mem() {
312                    Some(node) => match node.borrow() {
313                        Node::Internal(_) => {
314                            unreachable!("Reached an internal node at the bottom of the tree.");
315                        }
316                        Node::Leaf(leaf_node) => {
317                            Ok(Self::construct_subtree_with_new_leaf(
318                                key,
319                                new_value,
320                                current_subtree.weak(),
321                                leaf_node.key,
322                                HashValue::LENGTH_IN_BITS
323                                    .checked_sub(remaining_bits.len())
324                                    .expect("shouldn't overflow"),
325                            ))
326                        }
327                    },
328                    None => {
329                        // When the search reaches an unknown subtree, we need
330                        // proof to give us more
331                        // information about this part of the tree.
332                        let proof = proof_reader
333                            .get_proof(key)
334                            .ok_or(UpdateError::MissingProof)?;
335
336                        // Here the in-memory tree is identical to the tree in
337                        // storage (we have only the
338                        // root hash of this subtree in memory). So we need to
339                        // take into account the leaf in
340                        // the proof.
341                        let new_subtree = match proof.leaf() {
342                            Some(existing_leaf) => {
343                                Self::construct_subtree_with_new_leaf(
344                                    key,
345                                    new_value,
346                                    SubTree::new_leaf_with_value_hash(
347                                        existing_leaf.key(),
348                                        existing_leaf.value_hash(),
349                                    ),
350                                    existing_leaf.key(),
351                                    proof.siblings().len(),
352                                )
353                            }
354                            None => {
355                                SubTree::new_leaf_with_value(key, new_value)
356                            }
357                        };
358
359                        let num_remaining_bits = remaining_bits.len();
360                        let proof_length = proof.siblings().len();
361                        Ok(Self::construct_subtree(
362                            remaining_bits.rev().skip(
363                                HashValue::LENGTH_IN_BITS
364                                    .checked_sub(proof_length)
365                                    .expect("shouldn't overflow"),
366                            ),
367                            proof
368                                .siblings()
369                                .iter()
370                                .take(
371                                    num_remaining_bits
372                                        .checked_add(proof_length)
373                                        .expect("shouldn't overflow")
374                                        .checked_sub(HashValue::LENGTH_IN_BITS)
375                                        .expect("shouldn't overflow"),
376                                )
377                                .map(|sibling_hash| {
378                                    if *sibling_hash
379                                        != *SPARSE_MERKLE_PLACEHOLDER_HASH
380                                    {
381                                        SubTree::new_unknown(*sibling_hash)
382                                    } else {
383                                        SubTree::new_empty()
384                                    }
385                                }),
386                            new_subtree,
387                        ))
388                    }
389                }
390            }
391        }
392    }
393
394    /// Given key, new value, existing leaf and the distance from root to the
395    /// existing leaf, constructs a new subtree that has either the new leaf
396    /// or both nodes, depending on whether the key equals the existing
397    /// leaf's key.
398    ///
399    /// 1. If the key equals the existing leaf's key, we simply need to update
400    /// the leaf to the new    value and return it. For example, in the
401    /// following case this function will return    `new_leaf`.
402    ///  ``` text
403    ///       o                    o
404    ///      / \                  / \
405    ///     o   o       =>       o   o
406    ///    / \                  / \
407    ///   o   existing_leaf    o   new_leaf
408    ///  ```
409    ///
410    /// 2. Otherwise, we need to construct an "extension" for the common prefix,
411    /// and at the end of    the extension a subtree for both keys. For
412    /// example, in the following case we assume the    existing leaf's key
413    /// starts with 010010 and key starts with 010011, and this function
414    ///    will return `x`.
415    /// ```text
416    ///        o                              o             common_prefix_len = 5
417    ///       / \                            / \            distance_from_root_to_existing_leaf = 2
418    ///      o   o                          o   o           extension_len = common_prefix_len - distance_from_root_to_existing_leaf = 3
419    ///     / \                            / \
420    ///    o   existing_leaf    =>        o   x                 _
421    ///                                      / \                ^
422    ///                                     o   placeholder     |
423    ///                                    / \                  |
424    ///                                   o   placeholder   extension
425    ///                                  / \                    |
426    ///                       placeholder   o                   -
427    ///                                    / \
428    ///                       existing_leaf   new_leaf
429    /// ```
430    fn construct_subtree_with_new_leaf(
431        key: HashValue, new_value: V, existing_leaf: SubTree<V>,
432        existing_leaf_key: HashValue,
433        distance_from_root_to_existing_leaf: usize,
434    ) -> SubTree<V> {
435        let new_leaf = SubTree::new_leaf_with_value(key, new_value);
436        if key == existing_leaf_key {
437            // This implies that `key` already existed and the proof is an
438            // inclusion proof.
439            return new_leaf;
440        }
441
442        // This implies that `key` did not exist and was just created. The proof
443        // is a non-inclusion proof. See above example for how
444        // extension_len is computed.
445        let common_prefix_len = key.common_prefix_bits_len(existing_leaf_key);
446        assert!(
447            common_prefix_len >= distance_from_root_to_existing_leaf,
448            "common_prefix_len: {}, distance_from_root_to_existing_leaf: {}",
449            common_prefix_len,
450            distance_from_root_to_existing_leaf,
451        );
452        let extension_len =
453            common_prefix_len - distance_from_root_to_existing_leaf;
454        Self::construct_subtree(
455            key.iter_bits()
456                .rev()
457                .skip(HashValue::LENGTH_IN_BITS - common_prefix_len - 1)
458                .take(extension_len + 1),
459            std::iter::once(existing_leaf).chain(
460                std::iter::repeat(SubTree::new_empty()).take(extension_len),
461            ),
462            new_leaf,
463        )
464    }
465
466    /// Constructs a subtree with a list of siblings and a leaf. For example, if
467    /// `bits` are [false, false, true] and `siblings` are [a, b, c], the
468    /// resulting subtree will look like:
469    ///
470    /// ```text
471    ///          x
472    ///         / \
473    ///        c   o
474    ///           / \
475    ///          o   b
476    ///         / \
477    ///     leaf   a
478    /// ```
479    /// and this function will return `x`. Both `bits` and `siblings` start from
480    /// the bottom.
481    fn construct_subtree(
482        bits: impl Iterator<Item = bool>,
483        siblings: impl Iterator<Item = SubTree<V>>, leaf: SubTree<V>,
484    ) -> SubTree<V> {
485        itertools::zip_eq(bits, siblings).fold(
486            leaf,
487            |previous_node, (bit, sibling)| {
488                if bit {
489                    SubTree::new_internal(sibling, previous_node)
490                } else {
491                    SubTree::new_internal(previous_node, sibling)
492                }
493            },
494        )
495    }
496
497    /// Queries a `key` in this `SparseMerkleTree`.
498    pub fn get(&self, key: HashValue) -> AccountStatus<V> {
499        let mut cur = self.root_weak();
500        let mut bits = key.iter_bits();
501
502        loop {
503            if let Some(node) = cur.get_node_if_in_mem() {
504                if let Node::Internal(internal_node) = node.borrow() {
505                    match bits.next() {
506                        Some(bit) => {
507                            cur = if bit {
508                                internal_node.right.weak()
509                            } else {
510                                internal_node.left.weak()
511                            };
512                            continue;
513                        }
514                        None => panic!(
515                            "Tree is deeper than {} levels.",
516                            HashValue::LENGTH_IN_BITS
517                        ),
518                    }
519                }
520            }
521            break;
522        }
523
524        let ret = match cur {
525            SubTree::Empty => AccountStatus::DoesNotExist,
526            SubTree::NonEmpty { root, .. } => match root.get_node_if_in_mem() {
527                None => AccountStatus::Unknown,
528                Some(node) => match node.borrow() {
529                    Node::Internal(_) => unreachable!(
530                        "There is an internal node at the bottom of the tree."
531                    ),
532                    Node::Leaf(leaf_node) => {
533                        if leaf_node.key == key {
534                            match &leaf_node.value {
535                                LeafValue::Value(value) => {
536                                    AccountStatus::ExistsInScratchPad(
537                                        value.clone(),
538                                    )
539                                }
540                                LeafValue::ValueHash(_) => {
541                                    AccountStatus::ExistsInDB
542                                }
543                            }
544                        } else {
545                            AccountStatus::DoesNotExist
546                        }
547                    }
548                },
549            },
550        };
551        ret
552    }
553
554    /// Returns the root hash of this tree.
555    pub fn root_hash(&self) -> HashValue { self.inner.root.load().hash() }
556
557    /// Mark that all the nodes created by this tree and its ancestors are
558    /// persisted in the DB.
559    pub fn prune(&self) { self.inner.prune() }
560}
561
562impl<V> Default for SparseMerkleTree<V>
563where V: Clone + CryptoHash
564{
565    fn default() -> Self {
566        SparseMerkleTree::new(*SPARSE_MERKLE_PLACEHOLDER_HASH)
567    }
568}
569
570/// A type that implements `ProofRead` can provide proof for keys in persistent
571/// storage.
572pub trait ProofRead<V> {
573    /// Gets verified proof for this key in persistent storage.
574    fn get_proof(&self, key: HashValue) -> Option<&SparseMerkleProof<V>>;
575}
576
577/// All errors `update` can possibly return.
578#[derive(Debug, Eq, PartialEq)]
579pub enum UpdateError {
580    /// The update intends to insert a key that does not exist in the tree, so
581    /// the operation needs proof to get more information about the tree,
582    /// but no proof is provided.
583    MissingProof,
584}