1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
// Copyright (c) The Diem Core Contributors
// SPDX-License-Identifier: Apache-2.0

// Copyright 2021 Conflux Foundation. All rights reserved.
// Conflux is free software and distributed under GNU General Public License.
// See http://www.gnu.org/licenses/

use crate::pick_slice_idxs;
use proptest::sample::Index;
use std::iter;

/// An efficient representation of a vector with repeated elements inserted.
///
/// Internally, this data structure stores one copy of each inserted element,
/// along with data about how many times each element is repeated.
///
/// This data structure does not do any sort of deduplication, so it isn't any
/// sort of set (or multiset).
///
/// This is useful for presenting a large logical vector for picking `proptest`
/// indexes from.
///
/// # Examples
///
/// ```
/// use diem_proptest_helpers::RepeatVec;
///
/// let mut repeat_vec = RepeatVec::new();
/// repeat_vec.extend("a", 10); // logically, insert "a" 10 times
/// repeat_vec.extend("b", 20); // logically, insert "b" 20 times
/// assert_eq!(repeat_vec.get(0), Some((&"a", 0))); // returns the "a" at logical position 0
/// assert_eq!(repeat_vec.get(5), Some((&"a", 5))); // returns the "a" at logical position 5
/// assert_eq!(repeat_vec.get(10), Some((&"b", 0))); // returns the "b" (offset 0) at logical position 10
/// assert_eq!(repeat_vec.get(20), Some((&"b", 10))); // returns the "b" (offset 10) at logical position 20
/// assert_eq!(repeat_vec.get(30), None); // past the end of the logical array
/// ```
///
/// The data structure doesn't care about whether the inserted items are equal
/// or not.
///
/// ```
/// use diem_proptest_helpers::RepeatVec;
///
/// let mut repeat_vec = RepeatVec::new();
/// repeat_vec.extend("a", 10); // logically, insert "a" 10 times
/// repeat_vec.extend("a", 20); // logically, insert "a" 20 times
/// assert_eq!(repeat_vec.get(0), Some((&"a", 0)));
/// assert_eq!(repeat_vec.get(5), Some((&"a", 5)));
/// assert_eq!(repeat_vec.get(10), Some((&"a", 0))); // This refers to the second "a".
/// ```
#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
pub struct RepeatVec<T> {
    // The first element of each tuple is the starting position for this item.
    // An invariant is that this doesn't have any zero-length elements.
    items: Vec<(usize, T)>,
    len: usize,
}

impl<T> RepeatVec<T> {
    /// Creates a new, empty `RepeatVec`.
    pub fn new() -> Self {
        Self {
            items: vec![],
            len: 0,
        }
    }

    /// Creates a new, empty `RepeatVec` with the specified capacity to store
    /// physical elements.
    pub fn with_capacity(capacity: usize) -> Self {
        Self {
            items: Vec::with_capacity(capacity),
            len: 0,
        }
    }

    /// Returns the *logical* number of elements in this `RepeatVec`.
    #[inline]
    pub fn len(&self) -> usize { self.len }

    /// Returns `true` if this `RepeatVec` has no *logical* elements.
    ///
    /// # Examples
    ///
    /// ```
    /// use diem_proptest_helpers::RepeatVec;
    ///
    /// let mut repeat_vec = RepeatVec::new();
    ///
    /// // There are no elements in this RepeatVec.
    /// assert!(repeat_vec.is_empty());
    ///
    /// // Adding 0 logical copies of an element still means it's empty.
    /// repeat_vec.extend("a", 0);
    /// assert!(repeat_vec.is_empty());
    ///
    /// // Adding non-zero logical copies makes this vector not empty.
    /// repeat_vec.extend("b", 1);
    /// assert!(!repeat_vec.is_empty());
    /// ```
    #[inline]
    pub fn is_empty(&self) -> bool { self.len == 0 }

    /// Extends this `RepeatVec` by logically adding `size` copies of `item` to
    /// the end of it.
    pub fn extend(&mut self, item: T, size: usize) {
        // Skip over zero-length elements to maintain the invariant on items.
        if size > 0 {
            self.items.push((self.len, item));
            self.len += size;
        }
    }

    /// Removes the item specified by the given *logical* index, shifting all
    /// elements after it to the left by updating start positions.
    ///
    /// Out of bounds indexes have no effect.
    pub fn remove(&mut self, index: usize) {
        self.remove_all(iter::once(index))
    }

    /// Removes the items specified by the given *logical* indexes, shifting all
    /// elements after them to the left by updating start positions.
    ///
    /// Ignores any out of bounds indexes.
    pub fn remove_all(
        &mut self, logical_indexes: impl IntoIterator<Item = usize>,
    ) {
        let mut logical_indexes: Vec<_> = logical_indexes.into_iter().collect();
        logical_indexes.sort_unstable();
        logical_indexes.dedup();
        self.remove_all_impl(logical_indexes)
    }

    fn remove_all_impl(&mut self, logical_indexes: Vec<usize>) {
        // # Notes
        //
        // * This looks pretty long and complicated, mostly because the logic is
        //   complex enough to require manual loops and iteration.
        // * This is unavoidably linear time in the number of physical elements.
        //   One way to make the constant factors smaller would be to implement
        //   a ChunkedRepeatVec<T>, which can be done by wrapping a
        //   RepeatVec<RepeatVec<T>> plus some glue logic. The idea would be
        //   similar to skip-lists. 2 levels should be enough, but 3 or more
        //   would work as well.

        // Separate function to minimize the amount of monomorphized code.
        let first = match logical_indexes.first() {
            Some(first) => *first,
            None => {
                // No indexes to remove, nothing to do.
                return;
            }
        };
        if first >= self.len() {
            // First index is out of bounds, nothing to do.
            return;
        }

        let first_idx = match self.binary_search(first) {
            Ok(exact_idx) => {
                // Logical copy 0 of the element at this position.
                exact_idx
            }
            Err(start_idx) => {
                // This is the physical index after the logical item. Start
                // reasoning from the previous index to match
                // the Ok branch above.
                start_idx - 1
            }
        };

        // This serves two purposes -- it represents the number of elements to
        // decrease by and the current position in indexes.
        let mut decrease = 0;

        let new_items = {
            let mut items = self.items.drain(first_idx..).peekable();
            let mut new_items = vec![];

            while let Some((current_logical_idx_old, current_elem)) =
                items.next()
            {
                let current_logical_idx_new =
                    current_logical_idx_old - decrease;

                let next_logical_idx_old =
                    items.peek().map_or(self.len, |&(idx, _)| idx);

                // Remove all indexes until the next logical index or
                // sorted_indexes runs out.
                while let Some(remove_idx) = logical_indexes.get(decrease) {
                    if *remove_idx < next_logical_idx_old {
                        decrease += 1;
                    } else {
                        break;
                    }
                }

                let next_logical_idx_new = next_logical_idx_old - decrease;
                assert!(
                    next_logical_idx_new >= current_logical_idx_new,
                    "too many items removed from next"
                );

                // Drop zero-length items to maintain invariant.
                if next_logical_idx_new > current_logical_idx_new {
                    new_items.push((current_logical_idx_new, current_elem));
                }
            }
            new_items
        };

        self.items.extend(new_items);
        self.len -= decrease;
    }

    /// Returns the item at location `at`. The return value is a reference to
    /// the stored item, plus the offset from the start (logically, which
    /// copy of the item is being returned).
    pub fn get(&self, at: usize) -> Option<(&T, usize)> {
        if at >= self.len {
            return None;
        }
        match self.binary_search(at) {
            Ok(exact_idx) => Some((&self.items[exact_idx].1, 0)),
            Err(start_idx) => {
                // start_idx can never be 0 because usize starts from 0 and
                // items[0].0 is always 0. So start_idx is
                // always at least 1.
                let start_val = &self.items[start_idx - 1];
                let offset = at - start_val.0;
                Some((&start_val.1, offset))
            }
        }
    }

    /// Picks out indexes uniformly randomly from this `RepeatVec`, using the
    /// provided [`Index`](proptest::sample::Index) instances as sources of
    /// randomness.
    pub fn pick_uniform_indexes(
        &self, indexes: &[impl AsRef<Index>],
    ) -> Vec<usize> {
        pick_slice_idxs(self.len(), indexes)
    }

    /// Picks out elements uniformly randomly from this `RepeatVec`, using the
    /// provided [`Index`](proptest::sample::Index) instances as sources of
    /// randomness.
    pub fn pick_uniform(
        &self, indexes: &[impl AsRef<Index>],
    ) -> Vec<(&T, usize)> {
        pick_slice_idxs(self.len(), indexes)
            .into_iter()
            .map(|idx| {
                self.get(idx)
                    .expect("indexes picked should always be in range")
            })
            .collect()
    }

    #[inline]
    fn binary_search(&self, at: usize) -> Result<usize, usize> {
        self.items.binary_search_by_key(&at, |(start, _)| *start)
    }

    /// Check and assert the internal invariants for this RepeatVec.
    #[cfg(test)]
    pub(crate) fn assert_invariants(&self) {
        for window in self.items.windows(2) {
            let (idx1, idx2) = match window {
                [(idx1, _), (idx2, _)] => (*idx1, *idx2),
                _ => panic!("wrong window size"),
            };
            assert!(idx1 < idx2, "no zero-length elements");
        }
        match self.items.last() {
            Some(&(idx, _)) => {
                assert!(
                    idx < self.len,
                    "length must be greater than last element's start"
                );
            }
            None => {
                assert_eq!(self.len, 0, "empty RepeatVec");
            }
        }
    }
}

// Note that RepeatVec cannot implement `std::ops::Index<usize>` because the
// return type of Index has to be a reference (in this case it would be &(T,
// usize)). But RepeatVec computes the result of get() (as (&T, usize)) instead
// of merely returning a reference. This is a subtle but important point.