hibitset/iter/
parallel.rs

1use rayon::iter::{
2    plumbing::{
3        bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer,
4    },
5    ParallelIterator,
6};
7
8use crate::{
9    iter::{BitIter, BitSetLike, Index, BITS, LAYERS},
10    util::average_ones,
11};
12
13/// A `ParallelIterator` over a [`BitSetLike`] structure.
14///
15/// [`BitSetLike`]: ../../trait.BitSetLike.html
16#[derive(Debug)]
17pub struct BitParIter<T>(T, u8);
18
19impl<T> BitParIter<T> {
20    /// Creates a new `BitParIter`. You usually don't call this function
21    /// but just [`.par_iter()`] on a bit set.
22    ///
23    /// Default layer split amount is 3.
24    ///
25    /// [`.par_iter()`]: ../../trait.BitSetLike.html#method.par_iter
26    pub fn new(set: T) -> Self { BitParIter(set, 3) }
27
28    /// Sets how many layers are split when forking.
29    ///
30    /// # Examples
31    ///
32    /// ```
33    /// # use hibitset::{BitSet, BitSetLike};
34    /// # use rayon::iter::ParallelIterator;
35    /// # fn main() {
36    /// let mut bitset = BitSet::new();
37    /// bitset.par_iter().layers_split(2).count();
38    /// # }
39    /// ```
40    ///
41    /// The value should be in range [1, 3]
42    ///
43    /// | splits | largest smallest unit of work |
44    /// |--------|-------------------------------|
45    /// | 1      | usize_bits<sup>3</sup>        |
46    /// | 2      | usize_bits<sup>2</sup>        |
47    /// | 3      | usize_bits                    |
48    pub fn layers_split(mut self, layers: u8) -> Self {
49        assert!(layers >= 1);
50        assert!(layers <= 3);
51        self.1 = layers;
52        self
53    }
54}
55
56impl<T> ParallelIterator for BitParIter<T>
57where T: BitSetLike + Send + Sync
58{
59    type Item = Index;
60
61    fn drive_unindexed<C>(self, consumer: C) -> C::Result
62    where C: UnindexedConsumer<Self::Item> {
63        bridge_unindexed(BitProducer((&self.0).iter(), self.1), consumer)
64    }
65}
66
67/// Allows splitting and internally iterating through `BitSet`.
68///
69/// Usually used internally by `BitParIter`.
70#[derive(Debug)]
71pub struct BitProducer<'a, T: 'a + Send + Sync>(pub BitIter<&'a T>, pub u8);
72
73impl<'a, T: 'a + Send + Sync> UnindexedProducer for BitProducer<'a, T>
74where T: BitSetLike
75{
76    type Item = Index;
77
78    /// How the splitting is done:
79    ///
80    /// 1) First the highest layer that has at least one set bit is searched.
81    ///
82    /// 2) If the layer that was found, has only one bit that's set, it's
83    ///    cleared. After that the correct prefix for the cleared bit is figured
84    ///    out and the descending is continued.
85    ///
86    /// 3) If the layer that was found, has more than one bit that's set, a mask
87    ///    is created that splits it's set bits as close to half as possible.
88    ///    After creating the mask the layer is masked by either the mask or
89    ///    it's complement constructing two distinct producers which are then
90    ///    returned.
91    ///
92    /// 4) If there isn't any layers that have more than one set bit, splitting
93    ///    doesn't happen.
94    ///
95    /// The actual iteration is performed by the sequential iterator
96    /// `BitIter` which internals are modified by this splitting
97    ///  algorithm.
98    ///
99    /// This splitting strategy should split work evenly if the set bits
100    /// are distributed close to uniformly random.
101    /// As the strategy only looks one layer at the time, if there are subtrees
102    /// that have lots of work and sibling subtrees that have little of work,
103    /// then it will produce non-optimal splittings.
104    fn split(mut self) -> (Self, Option<Self>) {
105        let splits = self.1;
106        let other = {
107            let mut handle_level = |level: usize| {
108                if self.0.masks[level] == 0 {
109                    // Skip the empty layers
110                    None
111                } else {
112                    // Top levels prefix is zero because there is nothing before
113                    // it
114                    let level_prefix =
115                        self.0.prefix.get(level).cloned().unwrap_or(0);
116                    let first_bit = self.0.masks[level].trailing_zeros();
117                    average_ones(self.0.masks[level])
118                        .and_then(|average_bit| {
119                            let mask = (1 << average_bit) - 1;
120                            let mut other = BitProducer(
121                                BitIter::new(
122                                    self.0.set,
123                                    [0; LAYERS],
124                                    [0; LAYERS - 1],
125                                ),
126                                splits,
127                            );
128                            // The `other` is the more significant half of the
129                            // mask
130                            other.0.masks[level] = self.0.masks[level] & !mask;
131                            other.0.prefix[level - 1] =
132                                (level_prefix | average_bit as u32) << BITS;
133                            // The upper portion of the prefix is maintained,
134                            // because the `other`
135                            // will iterate the same subtree as the `self` does
136                            other.0.prefix[level..]
137                                .copy_from_slice(&self.0.prefix[level..]);
138                            // And the `self` is the less significant one
139                            self.0.masks[level] &= mask;
140                            self.0.prefix[level - 1] =
141                                (level_prefix | first_bit) << BITS;
142                            Some(other)
143                        })
144                        .or_else(|| {
145                            // Because there is only one bit left we descend to
146                            // it
147                            let idx =
148                                level_prefix as usize | first_bit as usize;
149                            self.0.prefix[level - 1] = (idx as u32) << BITS;
150                            // The level that is descended from doesn't have
151                            // anything interesting
152                            // so it can be skipped in the future.
153                            self.0.masks[level] = 0;
154                            self.0.masks[level - 1] =
155                                self.0.set.get_from_layer(level - 1, idx);
156                            None
157                        })
158                }
159            };
160            let top_layer = LAYERS - 1;
161            let mut h = handle_level(top_layer);
162            for i in 1..splits {
163                h = h.or_else(|| handle_level(top_layer - i as usize));
164            }
165            h
166        };
167        (self, other)
168    }
169
170    fn fold_with<F>(self, folder: F) -> F
171    where F: Folder<Self::Item> {
172        folder.consume_iter(self.0)
173    }
174}
175
176#[cfg(test)]
177mod test_bit_producer {
178    use rayon::iter::plumbing::UnindexedProducer;
179
180    use super::BitProducer;
181    use crate::{BitSetLike, BITS};
182
183    fn test_splitting(split_levels: u8) {
184        fn visit<T>(
185            mut us: BitProducer<T>, d: usize, i: usize, mut trail: String,
186            c: &mut usize,
187        ) where
188            T: Send + Sync + BitSetLike,
189        {
190            if d == 0 {
191                assert!(us.split().1.is_none(), "{}", trail);
192                *c += 1;
193            } else {
194                for j in 1..(i + 1) {
195                    let (new_us, them) = us.split();
196                    us = new_us;
197                    let them = them.expect(&trail);
198                    let mut trail = trail.clone();
199                    trail.push_str(&i.to_string());
200                    visit(them, d, i - j, trail, c);
201                }
202                trail.push_str("u");
203                visit(us, d - 1, BITS, trail, c);
204            }
205        }
206
207        let usize_bits = ::std::mem::size_of::<usize>() * 8;
208
209        let mut c = crate::BitSet::new();
210        for i in 0..(usize_bits.pow(3) * 2) {
211            assert!(!c.add(i as u32));
212        }
213
214        let us = BitProducer((&c).iter(), split_levels);
215        let (us, them) = us.split();
216
217        let mut count = 0;
218        visit(
219            us,
220            split_levels as usize - 1,
221            BITS,
222            "u".to_owned(),
223            &mut count,
224        );
225        visit(
226            them.expect("Splitting top level"),
227            split_levels as usize - 1,
228            BITS,
229            "t".to_owned(),
230            &mut count,
231        );
232        assert_eq!(usize_bits.pow(split_levels as u32 - 1) * 2, count);
233    }
234
235    #[test]
236    fn max_3_splitting_of_two_top_bits() { test_splitting(3); }
237
238    #[test]
239    fn max_2_splitting_of_two_top_bits() { test_splitting(2); }
240
241    #[test]
242    fn max_1_splitting_of_two_top_bits() { test_splitting(1); }
243}