@@ -19,7 +19,7 @@ use crate::unicode;
1919//
2020// Some of the implementation complexity here is a result of me wanting to
2121// preserve the sequential representation without using additional memory.
22- // In some cases, we do use linear extra memory, but it is at most 2x and it
22+ // In many cases, we do use linear extra memory, but it is at most 2x and it
2323// is amortized. If we relaxed the memory requirements, this implementation
2424// could become much simpler. The extra memory is honestly probably OK, but
2525// character classes (especially of the Unicode variety) can become quite
@@ -81,45 +81,14 @@ impl<I: Interval> IntervalSet<I> {
8181
8282 /// Add a new interval to this set.
8383 pub fn push ( & mut self , interval : I ) {
84+ // TODO: This could be faster. e.g., Push the interval such that
85+ // it preserves canonicalization.
86+ self . ranges . push ( interval) ;
87+ self . canonicalize ( ) ;
8488 // We don't know whether the new interval added here is considered
8589 // case folded, so we conservatively assume that the entire set is
8690 // no longer case folded if it was previously.
8791 self . folded = false ;
88-
89- if self . ranges . is_empty ( ) {
90- self . ranges . push ( interval) ;
91- return ;
92- }
93-
94- // Find the first range that is not greater than the new interval.
95- // This is the first range that could possibly be unioned with the
96- // new interval.
97- let mut drain_end = self . ranges . len ( ) ;
98- while drain_end > 0
99- && self . ranges [ drain_end - 1 ] . lower ( ) > interval. upper ( )
100- && !self . ranges [ drain_end - 1 ] . is_contiguous ( & interval)
101- {
102- drain_end -= 1 ;
103- }
104-
105- // Try to union the new interval with old intervals backwards.
106- if drain_end > 0 && self . ranges [ drain_end - 1 ] . is_contiguous ( & interval)
107- {
108- self . ranges [ drain_end - 1 ] =
109- self . ranges [ drain_end - 1 ] . union ( & interval) . unwrap ( ) ;
110- for i in ( 0 ..drain_end - 1 ) . rev ( ) {
111- if let Some ( union) =
112- self . ranges [ drain_end - 1 ] . union ( & self . ranges [ i] )
113- {
114- self . ranges [ drain_end - 1 ] = union;
115- } else {
116- self . ranges . drain ( i + 1 ..drain_end - 1 ) ;
117- break ;
118- }
119- }
120- } else {
121- self . ranges . insert ( drain_end, interval) ;
122- }
12392 }
12493
12594 /// Return an iterator over all intervals in this set.
@@ -223,13 +192,34 @@ impl<I: Interval> IntervalSet<I> {
223192 // Folks seem to suggest interval or segment trees, but I'd like to
224193 // avoid the overhead (both runtime and conceptual) of that.
225194 //
195+ // The following is basically my Shitty First Draft. Therefore, in
196+ // order to grok it, you probably need to read each line carefully.
197+ // Simplifications are most welcome!
198+ //
226199 // Remember, we can assume the canonical format invariant here, which
227200 // says that all ranges are sorted, not overlapping and not adjacent in
228201 // each class.
229202 let drain_end = self . ranges . len ( ) ;
203+ let ( mut a, mut b) = ( 0 , 0 ) ;
204+ ' LOOP : while a < drain_end && b < other. ranges . len ( ) {
205+ // Basically, the easy cases are when neither range overlaps with
206+ // each other. If the `b` range is less than our current `a`
207+ // range, then we can skip it and move on.
208+ if other. ranges [ b] . upper ( ) < self . ranges [ a] . lower ( ) {
209+ b += 1 ;
210+ continue ;
211+ }
212+ // ... similarly for the `a` range. If it's less than the smallest
213+ // `b` range, then we can add it as-is.
214+ if self . ranges [ a] . upper ( ) < other. ranges [ b] . lower ( ) {
215+ let range = self . ranges [ a] ;
216+ self . ranges . push ( range) ;
217+ a += 1 ;
218+ continue ;
219+ }
220+ // Otherwise, we have overlapping ranges.
221+ assert ! ( !self . ranges[ a] . is_intersection_empty( & other. ranges[ b] ) ) ;
230222
231- let mut b = 0 ;
232- for a in 0 ..drain_end {
233223 // This part is tricky and was non-obvious to me without looking
234224 // at explicit examples (see the tests). The trickiness stems from
235225 // two things: 1) subtracting a range from another range could
@@ -241,34 +231,47 @@ impl<I: Interval> IntervalSet<I> {
241231 // For example, if our `a` range is `a-t` and our next three `b`
242232 // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
243233 // subtraction three times before moving on to the next `a` range.
244- self . ranges . push ( self . ranges [ a] ) ;
245- // Only when `b` is not above `a`, `b` might apply to current
246- // `a` range.
234+ let mut range = self . ranges [ a] ;
247235 while b < other. ranges . len ( )
248- && other. ranges [ b] . lower ( ) <= self . ranges [ a ] . upper ( )
236+ && !range . is_intersection_empty ( & other. ranges [ b] )
249237 {
250- match self . ranges . pop ( ) . unwrap ( ) . difference ( & other. ranges [ b] ) {
251- ( Some ( range1) , None ) | ( None , Some ( range1) ) => {
252- self . ranges . push ( range1) ;
238+ let old_range = range;
239+ range = match range. difference ( & other. ranges [ b] ) {
240+ ( None , None ) => {
241+ // We lost the entire range, so move on to the next
242+ // without adding this one.
243+ a += 1 ;
244+ continue ' LOOP ;
253245 }
246+ ( Some ( range1) , None ) | ( None , Some ( range1) ) => range1,
254247 ( Some ( range1) , Some ( range2) ) => {
255248 self . ranges . push ( range1) ;
256- self . ranges . push ( range2) ;
249+ range2
257250 }
258- ( None , None ) => { }
251+ } ;
252+ // It's possible that the `b` range has more to contribute
253+ // here. In particular, if it is greater than the original
254+ // range, then it might impact the next `a` range *and* it
255+ // has impacted the current `a` range as much as possible,
256+ // so we can quit. We don't bump `b` so that the next `a`
257+ // range can apply it.
258+ if other. ranges [ b] . upper ( ) > old_range. upper ( ) {
259+ break ;
259260 }
260- // The next `b` range might apply to the current
261+ // Otherwise, the next `b` range might apply to the current
261262 // `a` range.
262263 b += 1 ;
263264 }
264- // It's possible that the last `b` range has more to
265- // contribute to the next `a`. We don't bump the last
266- // `b` so that the next `a` range can apply it.
267- b = b. saturating_sub ( 1 ) ;
265+ self . ranges . push ( range) ;
266+ a += 1 ;
267+ }
268+ while a < drain_end {
269+ let range = self . ranges [ a] ;
270+ self . ranges . push ( range) ;
271+ a += 1 ;
268272 }
269-
270273 self . ranges . drain ( ..drain_end) ;
271- self . folded = self . ranges . is_empty ( ) || ( self . folded && other. folded ) ;
274+ self . folded = self . folded && other. folded ;
272275 }
273276
274277 /// Compute the symmetric difference of the two sets, in place.
@@ -279,83 +282,11 @@ impl<I: Interval> IntervalSet<I> {
279282 /// set. That is, the set will contain all elements in either set,
280283 /// but will not contain any elements that are in both sets.
281284 pub fn symmetric_difference ( & mut self , other : & IntervalSet < I > ) {
282- if self . ranges . is_empty ( ) {
283- self . ranges . extend ( & other. ranges ) ;
284- self . folded = other. folded ;
285- return ;
286- }
287- if other. ranges . is_empty ( ) {
288- return ;
289- }
290-
291- // There should be a way to do this in-place with constant memory,
292- // but I couldn't figure out a simple way to do it. So just append
293- // the symmetric difference to the end of this range, and then drain
294- // it before we're done.
295- let drain_end = self . ranges . len ( ) ;
296- let mut b = 0 ;
297- let mut b_range = Some ( other. ranges [ b] ) ;
298- for a in 0 ..drain_end {
299- self . ranges . push ( self . ranges [ a] ) ;
300- while b_range
301- . map_or ( false , |r| r. lower ( ) <= self . ranges [ a] . upper ( ) )
302- {
303- let ( range1, range2) = match self
304- . ranges
305- . pop ( )
306- . unwrap ( )
307- . symmetric_difference ( & b_range. as_ref ( ) . unwrap ( ) )
308- {
309- ( Some ( range1) , None ) | ( None , Some ( range1) ) => {
310- ( Some ( range1) , None )
311- }
312- ( Some ( range1) , Some ( range2) ) => {
313- ( Some ( range1) , Some ( range2) )
314- }
315- ( None , None ) => ( None , None ) ,
316- } ;
317- if let Some ( range) = range1 {
318- if self . ranges . len ( ) > drain_end
319- && self . ranges . last ( ) . unwrap ( ) . is_contiguous ( & range)
320- {
321- self . ranges
322- . last_mut ( )
323- . map ( |last| * last = last. union ( & range) . unwrap ( ) ) ;
324- } else {
325- self . ranges . push ( range) ;
326- }
327- }
328- if let Some ( range) = range2 {
329- self . ranges . push ( range) ;
330- }
331-
332- b_range = if self . ranges . len ( ) > drain_end
333- && self . ranges . last ( ) . unwrap ( ) . upper ( )
334- > self . ranges [ a] . upper ( )
335- {
336- Some ( * self . ranges . last ( ) . unwrap ( ) )
337- } else {
338- b += 1 ;
339- other. ranges . get ( b) . cloned ( )
340- } ;
341- }
342- }
343- while let Some ( range) = b_range {
344- if self . ranges . len ( ) > drain_end
345- && self . ranges . last ( ) . unwrap ( ) . is_contiguous ( & range)
346- {
347- self . ranges
348- . last_mut ( )
349- . map ( |last| * last = last. union ( & range) . unwrap ( ) ) ;
350- } else {
351- self . ranges . push ( range) ;
352- }
353- b += 1 ;
354- b_range = other. ranges . get ( b) . cloned ( ) ;
355- }
356-
357- self . ranges . drain ( ..drain_end) ;
358- self . folded = self . ranges . is_empty ( ) || ( self . folded && other. folded ) ;
285+ // TODO(burntsushi): Fix this so that it amortizes allocation.
286+ let mut intersection = self . clone ( ) ;
287+ intersection. intersect ( other) ;
288+ self . union ( other) ;
289+ self . difference ( & intersection) ;
359290 }
360291
361292 /// Negate this interval set.
@@ -371,44 +302,28 @@ impl<I: Interval> IntervalSet<I> {
371302 return ;
372303 }
373304
305+ // There should be a way to do this in-place with constant memory,
306+ // but I couldn't figure out a simple way to do it. So just append
307+ // the negation to the end of this range, and then drain it before
308+ // we're done.
309+ let drain_end = self . ranges . len ( ) ;
310+
374311 // We do checked arithmetic below because of the canonical ordering
375312 // invariant.
376313 if self . ranges [ 0 ] . lower ( ) > I :: Bound :: min_value ( ) {
377- let mut pre_upper = self . ranges [ 0 ] . upper ( ) ;
378- self . ranges [ 0 ] = I :: create (
379- I :: Bound :: min_value ( ) ,
380- self . ranges [ 0 ] . lower ( ) . decrement ( ) ,
381- ) ;
382- for i in 1 ..self . ranges . len ( ) {
383- let lower = pre_upper. increment ( ) ;
384- pre_upper = self . ranges [ i] . upper ( ) ;
385- self . ranges [ i] =
386- I :: create ( lower, self . ranges [ i] . lower ( ) . decrement ( ) ) ;
387- }
388- if pre_upper < I :: Bound :: max_value ( ) {
389- self . ranges . push ( I :: create (
390- pre_upper. increment ( ) ,
391- I :: Bound :: max_value ( ) ,
392- ) ) ;
393- }
394- } else {
395- for i in 1 ..self . ranges . len ( ) {
396- self . ranges [ i - 1 ] = I :: create (
397- self . ranges [ i - 1 ] . upper ( ) . increment ( ) ,
398- self . ranges [ i] . lower ( ) . decrement ( ) ,
399- ) ;
400- }
401- if self . ranges . last ( ) . unwrap ( ) . upper ( ) < I :: Bound :: max_value ( ) {
402- self . ranges . last_mut ( ) . map ( |range| {
403- * range = I :: create (
404- range. upper ( ) . increment ( ) ,
405- I :: Bound :: max_value ( ) ,
406- )
407- } ) ;
408- } else {
409- self . ranges . pop ( ) ;
410- }
314+ let upper = self . ranges [ 0 ] . lower ( ) . decrement ( ) ;
315+ self . ranges . push ( I :: create ( I :: Bound :: min_value ( ) , upper) ) ;
316+ }
317+ for i in 1 ..drain_end {
318+ let lower = self . ranges [ i - 1 ] . upper ( ) . increment ( ) ;
319+ let upper = self . ranges [ i] . lower ( ) . decrement ( ) ;
320+ self . ranges . push ( I :: create ( lower, upper) ) ;
321+ }
322+ if self . ranges [ drain_end - 1 ] . upper ( ) < I :: Bound :: max_value ( ) {
323+ let lower = self . ranges [ drain_end - 1 ] . upper ( ) . increment ( ) ;
324+ self . ranges . push ( I :: create ( lower, I :: Bound :: max_value ( ) ) ) ;
411325 }
326+ self . ranges . drain ( ..drain_end) ;
412327 // We don't need to update whether this set is folded or not, because
413328 // it is conservatively preserved through negation. Namely, if a set
414329 // is not folded, then it is possible that its negation is folded, for
@@ -422,7 +337,6 @@ impl<I: Interval> IntervalSet<I> {
422337 // of case folded characters. Negating it in turn means that all
423338 // equivalence classes in the set are negated, and any equivalence
424339 // class that was previously not in the set is now entirely in the set.
425- self . folded = self . ranges . is_empty ( ) || self . folded ;
426340 }
427341
428342 /// Converts this set into a canonical ordering.
@@ -433,20 +347,24 @@ impl<I: Interval> IntervalSet<I> {
433347 self . ranges . sort ( ) ;
434348 assert ! ( !self . ranges. is_empty( ) ) ;
435349
436- // We maintain the canonicalization results in-place at `0..newi`.
437- // `newi` will keep track of the end of the canonicalized ranges.
438- let mut newi = 0 ;
439- for oldi in 1 ..self . ranges . len ( ) {
440- // The last new range gets merged with currnet old range when
441- // unionable. If not, we update `newi` and store it as a new range.
442- if let Some ( union) = self . ranges [ newi] . union ( & self . ranges [ oldi] ) {
443- self . ranges [ newi] = union;
444- } else {
445- newi += 1 ;
446- self . ranges [ newi] = self . ranges [ oldi] ;
350+ // Is there a way to do this in-place with constant memory? I couldn't
351+ // figure out a way to do it. So just append the canonicalization to
352+ // the end of this range, and then drain it before we're done.
353+ let drain_end = self . ranges . len ( ) ;
354+ for oldi in 0 ..drain_end {
355+ // If we've added at least one new range, then check if we can
356+ // merge this range in the previously added range.
357+ if self . ranges . len ( ) > drain_end {
358+ let ( last, rest) = self . ranges . split_last_mut ( ) . unwrap ( ) ;
359+ if let Some ( union) = last. union ( & rest[ oldi] ) {
360+ * last = union;
361+ continue ;
362+ }
447363 }
364+ let range = self . ranges [ oldi] ;
365+ self . ranges . push ( range) ;
448366 }
449- self . ranges . truncate ( newi + 1 ) ;
367+ self . ranges . drain ( ..drain_end ) ;
450368 }
451369
452370 /// Returns true if and only if this class is in a canonical ordering.
@@ -568,13 +486,7 @@ pub trait Interval:
568486 other : & Self ,
569487 ) -> ( Option < Self > , Option < Self > ) {
570488 let union = match self . union ( other) {
571- None => {
572- return if self . upper ( ) < other. lower ( ) {
573- ( Some ( self . clone ( ) ) , Some ( other. clone ( ) ) )
574- } else {
575- ( Some ( other. clone ( ) ) , Some ( self . clone ( ) ) )
576- }
577- }
489+ None => return ( Some ( self . clone ( ) ) , Some ( other. clone ( ) ) ) ,
578490 Some ( union) => union,
579491 } ;
580492 let intersection = match self . intersect ( other) {
0 commit comments