diem_proptest_helpers/
repeat_vec.rs

1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4// Copyright 2021 Conflux Foundation. All rights reserved.
5// Conflux is free software and distributed under GNU General Public License.
6// See http://www.gnu.org/licenses/
7
8use crate::pick_slice_idxs;
9use proptest::sample::Index;
10use std::iter;
11
12/// An efficient representation of a vector with repeated elements inserted.
13///
14/// Internally, this data structure stores one copy of each inserted element,
15/// along with data about how many times each element is repeated.
16///
17/// This data structure does not do any sort of deduplication, so it isn't any
18/// sort of set (or multiset).
19///
20/// This is useful for presenting a large logical vector for picking `proptest`
21/// indexes from.
22///
23/// # Examples
24///
25/// ```
26/// use diem_proptest_helpers::RepeatVec;
27///
28/// let mut repeat_vec = RepeatVec::new();
29/// repeat_vec.extend("a", 10); // logically, insert "a" 10 times
30/// repeat_vec.extend("b", 20); // logically, insert "b" 20 times
31/// assert_eq!(repeat_vec.get(0), Some((&"a", 0))); // returns the "a" at logical position 0
32/// assert_eq!(repeat_vec.get(5), Some((&"a", 5))); // returns the "a" at logical position 5
33/// assert_eq!(repeat_vec.get(10), Some((&"b", 0))); // returns the "b" (offset 0) at logical position 10
34/// assert_eq!(repeat_vec.get(20), Some((&"b", 10))); // returns the "b" (offset 10) at logical position 20
35/// assert_eq!(repeat_vec.get(30), None); // past the end of the logical array
36/// ```
37///
38/// The data structure doesn't care about whether the inserted items are equal
39/// or not.
40///
41/// ```
42/// use diem_proptest_helpers::RepeatVec;
43///
44/// let mut repeat_vec = RepeatVec::new();
45/// repeat_vec.extend("a", 10); // logically, insert "a" 10 times
46/// repeat_vec.extend("a", 20); // logically, insert "a" 20 times
47/// assert_eq!(repeat_vec.get(0), Some((&"a", 0)));
48/// assert_eq!(repeat_vec.get(5), Some((&"a", 5)));
49/// assert_eq!(repeat_vec.get(10), Some((&"a", 0))); // This refers to the second "a".
50/// ```
51#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
52pub struct RepeatVec<T> {
53    // The first element of each tuple is the starting position for this item.
54    // An invariant is that this doesn't have any zero-length elements.
55    items: Vec<(usize, T)>,
56    len: usize,
57}
58
59impl<T> RepeatVec<T> {
60    /// Creates a new, empty `RepeatVec`.
61    pub fn new() -> Self {
62        Self {
63            items: vec![],
64            len: 0,
65        }
66    }
67
68    /// Creates a new, empty `RepeatVec` with the specified capacity to store
69    /// physical elements.
70    pub fn with_capacity(capacity: usize) -> Self {
71        Self {
72            items: Vec::with_capacity(capacity),
73            len: 0,
74        }
75    }
76
77    /// Returns the *logical* number of elements in this `RepeatVec`.
78    #[inline]
79    pub fn len(&self) -> usize { self.len }
80
81    /// Returns `true` if this `RepeatVec` has no *logical* elements.
82    ///
83    /// # Examples
84    ///
85    /// ```
86    /// use diem_proptest_helpers::RepeatVec;
87    ///
88    /// let mut repeat_vec = RepeatVec::new();
89    ///
90    /// // There are no elements in this RepeatVec.
91    /// assert!(repeat_vec.is_empty());
92    ///
93    /// // Adding 0 logical copies of an element still means it's empty.
94    /// repeat_vec.extend("a", 0);
95    /// assert!(repeat_vec.is_empty());
96    ///
97    /// // Adding non-zero logical copies makes this vector not empty.
98    /// repeat_vec.extend("b", 1);
99    /// assert!(!repeat_vec.is_empty());
100    /// ```
101    #[inline]
102    pub fn is_empty(&self) -> bool { self.len == 0 }
103
104    /// Extends this `RepeatVec` by logically adding `size` copies of `item` to
105    /// the end of it.
106    pub fn extend(&mut self, item: T, size: usize) {
107        // Skip over zero-length elements to maintain the invariant on items.
108        if size > 0 {
109            self.items.push((self.len, item));
110            self.len += size;
111        }
112    }
113
114    /// Removes the item specified by the given *logical* index, shifting all
115    /// elements after it to the left by updating start positions.
116    ///
117    /// Out of bounds indexes have no effect.
118    pub fn remove(&mut self, index: usize) {
119        self.remove_all(iter::once(index))
120    }
121
122    /// Removes the items specified by the given *logical* indexes, shifting all
123    /// elements after them to the left by updating start positions.
124    ///
125    /// Ignores any out of bounds indexes.
126    pub fn remove_all(
127        &mut self, logical_indexes: impl IntoIterator<Item = usize>,
128    ) {
129        let mut logical_indexes: Vec<_> = logical_indexes.into_iter().collect();
130        logical_indexes.sort_unstable();
131        logical_indexes.dedup();
132        self.remove_all_impl(logical_indexes)
133    }
134
135    fn remove_all_impl(&mut self, logical_indexes: Vec<usize>) {
136        // # Notes
137        //
138        // * This looks pretty long and complicated, mostly because the logic is
139        //   complex enough to require manual loops and iteration.
140        // * This is unavoidably linear time in the number of physical elements.
141        //   One way to make the constant factors smaller would be to implement
142        //   a ChunkedRepeatVec<T>, which can be done by wrapping a
143        //   RepeatVec<RepeatVec<T>> plus some glue logic. The idea would be
144        //   similar to skip-lists. 2 levels should be enough, but 3 or more
145        //   would work as well.
146
147        // Separate function to minimize the amount of monomorphized code.
148        let first = match logical_indexes.first() {
149            Some(first) => *first,
150            None => {
151                // No indexes to remove, nothing to do.
152                return;
153            }
154        };
155        if first >= self.len() {
156            // First index is out of bounds, nothing to do.
157            return;
158        }
159
160        let first_idx = match self.binary_search(first) {
161            Ok(exact_idx) => {
162                // Logical copy 0 of the element at this position.
163                exact_idx
164            }
165            Err(start_idx) => {
166                // This is the physical index after the logical item. Start
167                // reasoning from the previous index to match
168                // the Ok branch above.
169                start_idx - 1
170            }
171        };
172
173        // This serves two purposes -- it represents the number of elements to
174        // decrease by and the current position in indexes.
175        let mut decrease = 0;
176
177        let new_items = {
178            let mut items = self.items.drain(first_idx..).peekable();
179            let mut new_items = vec![];
180
181            while let Some((current_logical_idx_old, current_elem)) =
182                items.next()
183            {
184                let current_logical_idx_new =
185                    current_logical_idx_old - decrease;
186
187                let next_logical_idx_old =
188                    items.peek().map_or(self.len, |&(idx, _)| idx);
189
190                // Remove all indexes until the next logical index or
191                // sorted_indexes runs out.
192                while let Some(remove_idx) = logical_indexes.get(decrease) {
193                    if *remove_idx < next_logical_idx_old {
194                        decrease += 1;
195                    } else {
196                        break;
197                    }
198                }
199
200                let next_logical_idx_new = next_logical_idx_old - decrease;
201                assert!(
202                    next_logical_idx_new >= current_logical_idx_new,
203                    "too many items removed from next"
204                );
205
206                // Drop zero-length items to maintain invariant.
207                if next_logical_idx_new > current_logical_idx_new {
208                    new_items.push((current_logical_idx_new, current_elem));
209                }
210            }
211            new_items
212        };
213
214        self.items.extend(new_items);
215        self.len -= decrease;
216    }
217
218    /// Returns the item at location `at`. The return value is a reference to
219    /// the stored item, plus the offset from the start (logically, which
220    /// copy of the item is being returned).
221    pub fn get(&self, at: usize) -> Option<(&T, usize)> {
222        if at >= self.len {
223            return None;
224        }
225        match self.binary_search(at) {
226            Ok(exact_idx) => Some((&self.items[exact_idx].1, 0)),
227            Err(start_idx) => {
228                // start_idx can never be 0 because usize starts from 0 and
229                // items[0].0 is always 0. So start_idx is
230                // always at least 1.
231                let start_val = &self.items[start_idx - 1];
232                let offset = at - start_val.0;
233                Some((&start_val.1, offset))
234            }
235        }
236    }
237
238    /// Picks out indexes uniformly randomly from this `RepeatVec`, using the
239    /// provided [`Index`](proptest::sample::Index) instances as sources of
240    /// randomness.
241    pub fn pick_uniform_indexes(
242        &self, indexes: &[impl AsRef<Index>],
243    ) -> Vec<usize> {
244        pick_slice_idxs(self.len(), indexes)
245    }
246
247    /// Picks out elements uniformly randomly from this `RepeatVec`, using the
248    /// provided [`Index`](proptest::sample::Index) instances as sources of
249    /// randomness.
250    pub fn pick_uniform(
251        &self, indexes: &[impl AsRef<Index>],
252    ) -> Vec<(&T, usize)> {
253        pick_slice_idxs(self.len(), indexes)
254            .into_iter()
255            .map(|idx| {
256                self.get(idx)
257                    .expect("indexes picked should always be in range")
258            })
259            .collect()
260    }
261
262    #[inline]
263    fn binary_search(&self, at: usize) -> Result<usize, usize> {
264        self.items.binary_search_by_key(&at, |(start, _)| *start)
265    }
266
267    /// Check and assert the internal invariants for this RepeatVec.
268    #[cfg(test)]
269    pub(crate) fn assert_invariants(&self) {
270        for window in self.items.windows(2) {
271            let (idx1, idx2) = match window {
272                [(idx1, _), (idx2, _)] => (*idx1, *idx2),
273                _ => panic!("wrong window size"),
274            };
275            assert!(idx1 < idx2, "no zero-length elements");
276        }
277        match self.items.last() {
278            Some(&(idx, _)) => {
279                assert!(
280                    idx < self.len,
281                    "length must be greater than last element's start"
282                );
283            }
284            None => {
285                assert_eq!(self.len, 0, "empty RepeatVec");
286            }
287        }
288    }
289}
290
291// Note that RepeatVec cannot implement `std::ops::Index<usize>` because the
292// return type of Index has to be a reference (in this case it would be &(T,
293// usize)). But RepeatVec computes the result of get() (as (&T, usize)) instead
294// of merely returning a reference. This is a subtle but important point.