diem_proptest_helpers/repeat_vec.rs
1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4// Copyright 2021 Conflux Foundation. All rights reserved.
5// Conflux is free software and distributed under GNU General Public License.
6// See http://www.gnu.org/licenses/
7
8use crate::pick_slice_idxs;
9use proptest::sample::Index;
10use std::iter;
11
12/// An efficient representation of a vector with repeated elements inserted.
13///
14/// Internally, this data structure stores one copy of each inserted element,
15/// along with data about how many times each element is repeated.
16///
17/// This data structure does not do any sort of deduplication, so it isn't any
18/// sort of set (or multiset).
19///
20/// This is useful for presenting a large logical vector for picking `proptest`
21/// indexes from.
22///
23/// # Examples
24///
25/// ```
26/// use diem_proptest_helpers::RepeatVec;
27///
28/// let mut repeat_vec = RepeatVec::new();
29/// repeat_vec.extend("a", 10); // logically, insert "a" 10 times
30/// repeat_vec.extend("b", 20); // logically, insert "b" 20 times
31/// assert_eq!(repeat_vec.get(0), Some((&"a", 0))); // returns the "a" at logical position 0
32/// assert_eq!(repeat_vec.get(5), Some((&"a", 5))); // returns the "a" at logical position 5
33/// assert_eq!(repeat_vec.get(10), Some((&"b", 0))); // returns the "b" (offset 0) at logical position 10
34/// assert_eq!(repeat_vec.get(20), Some((&"b", 10))); // returns the "b" (offset 10) at logical position 20
35/// assert_eq!(repeat_vec.get(30), None); // past the end of the logical array
36/// ```
37///
38/// The data structure doesn't care about whether the inserted items are equal
39/// or not.
40///
41/// ```
42/// use diem_proptest_helpers::RepeatVec;
43///
44/// let mut repeat_vec = RepeatVec::new();
45/// repeat_vec.extend("a", 10); // logically, insert "a" 10 times
46/// repeat_vec.extend("a", 20); // logically, insert "a" 20 times
47/// assert_eq!(repeat_vec.get(0), Some((&"a", 0)));
48/// assert_eq!(repeat_vec.get(5), Some((&"a", 5)));
49/// assert_eq!(repeat_vec.get(10), Some((&"a", 0))); // This refers to the second "a".
50/// ```
51#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
52pub struct RepeatVec<T> {
53 // The first element of each tuple is the starting position for this item.
54 // An invariant is that this doesn't have any zero-length elements.
55 items: Vec<(usize, T)>,
56 len: usize,
57}
58
59impl<T> RepeatVec<T> {
60 /// Creates a new, empty `RepeatVec`.
61 pub fn new() -> Self {
62 Self {
63 items: vec![],
64 len: 0,
65 }
66 }
67
68 /// Creates a new, empty `RepeatVec` with the specified capacity to store
69 /// physical elements.
70 pub fn with_capacity(capacity: usize) -> Self {
71 Self {
72 items: Vec::with_capacity(capacity),
73 len: 0,
74 }
75 }
76
77 /// Returns the *logical* number of elements in this `RepeatVec`.
78 #[inline]
79 pub fn len(&self) -> usize { self.len }
80
81 /// Returns `true` if this `RepeatVec` has no *logical* elements.
82 ///
83 /// # Examples
84 ///
85 /// ```
86 /// use diem_proptest_helpers::RepeatVec;
87 ///
88 /// let mut repeat_vec = RepeatVec::new();
89 ///
90 /// // There are no elements in this RepeatVec.
91 /// assert!(repeat_vec.is_empty());
92 ///
93 /// // Adding 0 logical copies of an element still means it's empty.
94 /// repeat_vec.extend("a", 0);
95 /// assert!(repeat_vec.is_empty());
96 ///
97 /// // Adding non-zero logical copies makes this vector not empty.
98 /// repeat_vec.extend("b", 1);
99 /// assert!(!repeat_vec.is_empty());
100 /// ```
101 #[inline]
102 pub fn is_empty(&self) -> bool { self.len == 0 }
103
104 /// Extends this `RepeatVec` by logically adding `size` copies of `item` to
105 /// the end of it.
106 pub fn extend(&mut self, item: T, size: usize) {
107 // Skip over zero-length elements to maintain the invariant on items.
108 if size > 0 {
109 self.items.push((self.len, item));
110 self.len += size;
111 }
112 }
113
114 /// Removes the item specified by the given *logical* index, shifting all
115 /// elements after it to the left by updating start positions.
116 ///
117 /// Out of bounds indexes have no effect.
118 pub fn remove(&mut self, index: usize) {
119 self.remove_all(iter::once(index))
120 }
121
122 /// Removes the items specified by the given *logical* indexes, shifting all
123 /// elements after them to the left by updating start positions.
124 ///
125 /// Ignores any out of bounds indexes.
126 pub fn remove_all(
127 &mut self, logical_indexes: impl IntoIterator<Item = usize>,
128 ) {
129 let mut logical_indexes: Vec<_> = logical_indexes.into_iter().collect();
130 logical_indexes.sort_unstable();
131 logical_indexes.dedup();
132 self.remove_all_impl(logical_indexes)
133 }
134
135 fn remove_all_impl(&mut self, logical_indexes: Vec<usize>) {
136 // # Notes
137 //
138 // * This looks pretty long and complicated, mostly because the logic is
139 // complex enough to require manual loops and iteration.
140 // * This is unavoidably linear time in the number of physical elements.
141 // One way to make the constant factors smaller would be to implement
142 // a ChunkedRepeatVec<T>, which can be done by wrapping a
143 // RepeatVec<RepeatVec<T>> plus some glue logic. The idea would be
144 // similar to skip-lists. 2 levels should be enough, but 3 or more
145 // would work as well.
146
147 // Separate function to minimize the amount of monomorphized code.
148 let first = match logical_indexes.first() {
149 Some(first) => *first,
150 None => {
151 // No indexes to remove, nothing to do.
152 return;
153 }
154 };
155 if first >= self.len() {
156 // First index is out of bounds, nothing to do.
157 return;
158 }
159
160 let first_idx = match self.binary_search(first) {
161 Ok(exact_idx) => {
162 // Logical copy 0 of the element at this position.
163 exact_idx
164 }
165 Err(start_idx) => {
166 // This is the physical index after the logical item. Start
167 // reasoning from the previous index to match
168 // the Ok branch above.
169 start_idx - 1
170 }
171 };
172
173 // This serves two purposes -- it represents the number of elements to
174 // decrease by and the current position in indexes.
175 let mut decrease = 0;
176
177 let new_items = {
178 let mut items = self.items.drain(first_idx..).peekable();
179 let mut new_items = vec![];
180
181 while let Some((current_logical_idx_old, current_elem)) =
182 items.next()
183 {
184 let current_logical_idx_new =
185 current_logical_idx_old - decrease;
186
187 let next_logical_idx_old =
188 items.peek().map_or(self.len, |&(idx, _)| idx);
189
190 // Remove all indexes until the next logical index or
191 // sorted_indexes runs out.
192 while let Some(remove_idx) = logical_indexes.get(decrease) {
193 if *remove_idx < next_logical_idx_old {
194 decrease += 1;
195 } else {
196 break;
197 }
198 }
199
200 let next_logical_idx_new = next_logical_idx_old - decrease;
201 assert!(
202 next_logical_idx_new >= current_logical_idx_new,
203 "too many items removed from next"
204 );
205
206 // Drop zero-length items to maintain invariant.
207 if next_logical_idx_new > current_logical_idx_new {
208 new_items.push((current_logical_idx_new, current_elem));
209 }
210 }
211 new_items
212 };
213
214 self.items.extend(new_items);
215 self.len -= decrease;
216 }
217
218 /// Returns the item at location `at`. The return value is a reference to
219 /// the stored item, plus the offset from the start (logically, which
220 /// copy of the item is being returned).
221 pub fn get(&self, at: usize) -> Option<(&T, usize)> {
222 if at >= self.len {
223 return None;
224 }
225 match self.binary_search(at) {
226 Ok(exact_idx) => Some((&self.items[exact_idx].1, 0)),
227 Err(start_idx) => {
228 // start_idx can never be 0 because usize starts from 0 and
229 // items[0].0 is always 0. So start_idx is
230 // always at least 1.
231 let start_val = &self.items[start_idx - 1];
232 let offset = at - start_val.0;
233 Some((&start_val.1, offset))
234 }
235 }
236 }
237
238 /// Picks out indexes uniformly randomly from this `RepeatVec`, using the
239 /// provided [`Index`](proptest::sample::Index) instances as sources of
240 /// randomness.
241 pub fn pick_uniform_indexes(
242 &self, indexes: &[impl AsRef<Index>],
243 ) -> Vec<usize> {
244 pick_slice_idxs(self.len(), indexes)
245 }
246
247 /// Picks out elements uniformly randomly from this `RepeatVec`, using the
248 /// provided [`Index`](proptest::sample::Index) instances as sources of
249 /// randomness.
250 pub fn pick_uniform(
251 &self, indexes: &[impl AsRef<Index>],
252 ) -> Vec<(&T, usize)> {
253 pick_slice_idxs(self.len(), indexes)
254 .into_iter()
255 .map(|idx| {
256 self.get(idx)
257 .expect("indexes picked should always be in range")
258 })
259 .collect()
260 }
261
262 #[inline]
263 fn binary_search(&self, at: usize) -> Result<usize, usize> {
264 self.items.binary_search_by_key(&at, |(start, _)| *start)
265 }
266
267 /// Check and assert the internal invariants for this RepeatVec.
268 #[cfg(test)]
269 pub(crate) fn assert_invariants(&self) {
270 for window in self.items.windows(2) {
271 let (idx1, idx2) = match window {
272 [(idx1, _), (idx2, _)] => (*idx1, *idx2),
273 _ => panic!("wrong window size"),
274 };
275 assert!(idx1 < idx2, "no zero-length elements");
276 }
277 match self.items.last() {
278 Some(&(idx, _)) => {
279 assert!(
280 idx < self.len,
281 "length must be greater than last element's start"
282 );
283 }
284 None => {
285 assert_eq!(self.len, 0, "empty RepeatVec");
286 }
287 }
288 }
289}
290
291// Note that RepeatVec cannot implement `std::ops::Index<usize>` because the
292// return type of Index has to be a reference (in this case it would be &(T,
293// usize)). But RepeatVec computes the result of get() (as (&T, usize)) instead
294// of merely returning a reference. This is a subtle but important point.