Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions src/libraries/System.Linq/src/System/Linq/Distinct.SpeedOpt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,64 @@ public override bool Contains(TSource value) =>
_comparer is null ? _source.Contains(value) :
base.Contains(value);
}

private sealed partial class PureOrderedDistinctIterator<TSource>
{
public override TSource[] ToArray()
{
SegmentedArrayBuilder<TSource>.ScratchBuffer scratch = default;
SegmentedArrayBuilder<TSource> builder = new(scratch);

builder.AddNonICollectionRangeInlined(this);

TSource[] result = builder.ToArray();
builder.Dispose();

return result;
}

public override List<TSource> ToList()
{
SegmentedArrayBuilder<TSource>.ScratchBuffer scratch = default;
SegmentedArrayBuilder<TSource> builder = new(scratch);

builder.AddNonICollectionRangeInlined(this);

List<TSource> result = builder.ToList();
builder.Dispose();

return result;
}

public override int GetCount(bool onlyIfCheap)
{
if (onlyIfCheap)
{
return -1;
}


int count = 0;

using Iterator<TSource> enumerator = this.GetEnumerator();

while (enumerator.MoveNext())
{
count++;
}

return count;
}

public override TSource? TryGetFirst(out bool found) => _source.TryGetFirst(out found);

public override bool Contains(TSource value) =>
// If we're using the default comparer, then source.Distinct().Contains(value) is no different from
// source.Contains(value), as the Distinct() won't remove anything that could have caused
// Contains to return true. If, however, there is a custom comparer, Distinct might remove
// the elements that would have matched, and thus we can't skip it.
_comparer is null ? _source.Contains(value) :
base.Contains(value);
}
}
}
102 changes: 102 additions & 0 deletions src/libraries/System.Linq/src/System/Linq/Distinct.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> s
return [];
}

if (source is PureOrderedIterator<TSource> pureOrderedIterator && (comparer is null || comparer == EqualityComparer<TSource>.Default))
{
return new PureOrderedDistinctIterator<TSource>(pureOrderedIterator);
}

return new DistinctIterator<TSource>(source, comparer);
}

Expand Down Expand Up @@ -158,5 +163,102 @@ public override void Dispose()
base.Dispose();
}
}

/// <summary>
/// An iterator that yields the distinct values in an <see cref="PureOrderedIterator{TSource}"/>.
/// </summary>
/// <typeparam name="TSource">The type of the source PureOrderedDistinctIterator.</typeparam>
private sealed partial class PureOrderedDistinctIterator<TSource> : Iterator<TSource>
{
private readonly PureOrderedIterator<TSource> _source;
private EqualityComparer<TSource>? _comparer;
private Iterator<TSource>? _enumerator;

private const int _valueTypeState = 2;
private const int _referenceTypeState = 3;
public PureOrderedDistinctIterator(PureOrderedIterator<TSource> source)
{
Debug.Assert(source is not null);
_source = source;
}

private protected override Iterator<TSource> Clone() => new PureOrderedDistinctIterator<TSource>(_source);

public override bool MoveNext()
{
switch (_state)
{
case 1:
_enumerator = _source.GetEnumerator();
if (!_enumerator.MoveNext())
{
Dispose();
return false;
}

TSource element = _enumerator.Current;
_current = element;

if (typeof(TSource).IsValueType)
{
_state = _valueTypeState;
}
else
{
_comparer = EqualityComparer<TSource>.Default;
_state = _referenceTypeState;
}

return true;
case _valueTypeState:
// Value types
Debug.Assert(_enumerator is not null);
while (_enumerator.MoveNext())
{
element = _enumerator.Current;
if (!EqualityComparer<TSource>.Default.Equals(_current, element))
{
_current = element;
return true;
}
}

break;

case _referenceTypeState:
// Reference types
Debug.Assert(_enumerator is not null);
Debug.Assert(_comparer is not null);
EqualityComparer<TSource> comparer = _comparer;
while (_enumerator.MoveNext())
{
element = _enumerator.Current;
if (!comparer.Equals(_current, element))
{
_current = element;
return true;
}
}

break;
}

Dispose();
return false;
}

public override void Dispose()
{
if (_enumerator is not null)
{
_enumerator.Dispose();
_enumerator = null;
}

_comparer = null;

base.Dispose();
}
}
}
}
62 changes: 54 additions & 8 deletions src/libraries/System.Linq/src/System/Linq/OrderBy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,17 @@ public static IOrderedEnumerable<T> Order<T>(this IEnumerable<T> source) =>
///
/// If comparer is <see langword="null"/>, the default comparer <see cref="Comparer{T}.Default"/> is used to compare elements.
/// </remarks>
public static IOrderedEnumerable<T> Order<T>(this IEnumerable<T> source, IComparer<T>? comparer) =>
TypeIsImplicitlyStable<T>() && (comparer is null || comparer == Comparer<T>.Default) ?
new ImplicitlyStableOrderedIterator<T>(source, descending: false) :
OrderBy(source, EnumerableSorter<T>.IdentityFunc, comparer);
public static IOrderedEnumerable<T> Order<T>(this IEnumerable<T> source, IComparer<T>? comparer)
{
if (TypeCanBePureOrdered<T>() && (comparer is null || comparer == Comparer<T>.Default))
{
return TypeIsImplicitlyStable<T>() ?
new ImplicitlyStableOrderedIterator<T>(source, descending: false) :
new PureOrderedIteratorImpl<T>(source, descending: false);
}

return OrderBy(source, EnumerableSorter<T>.IdentityFunc, comparer);
}

public static IOrderedEnumerable<TSource> OrderBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector)
=> new OrderedIterator<TSource, TKey>(source, keySelector, null, false, null);
Expand Down Expand Up @@ -87,10 +94,17 @@ public static IOrderedEnumerable<T> OrderDescending<T>(this IEnumerable<T> sourc
///
/// If comparer is <see langword="null"/>, the default comparer <see cref="Comparer{T}.Default"/> is used to compare elements.
/// </remarks>
public static IOrderedEnumerable<T> OrderDescending<T>(this IEnumerable<T> source, IComparer<T>? comparer) =>
TypeIsImplicitlyStable<T>() && (comparer is null || comparer == Comparer<T>.Default) ?
new ImplicitlyStableOrderedIterator<T>(source, descending: true) :
OrderByDescending(source, EnumerableSorter<T>.IdentityFunc, comparer);
public static IOrderedEnumerable<T> OrderDescending<T>(this IEnumerable<T> source, IComparer<T>? comparer)
{
if (TypeCanBePureOrdered<T>() && (comparer is null || comparer == Comparer<T>.Default))
{
return TypeIsImplicitlyStable<T>() ?
new ImplicitlyStableOrderedIterator<T>(source, descending: true) :
new PureOrderedIteratorImpl<T>(source, descending: true);
}

return OrderByDescending(source, EnumerableSorter<T>.IdentityFunc, comparer);
}

public static IOrderedEnumerable<TSource> OrderByDescending<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector) =>
new OrderedIterator<TSource, TKey>(source, keySelector, null, true, null);
Expand Down Expand Up @@ -148,6 +162,38 @@ internal static bool TypeIsImplicitlyStable<T>()
t = typeof(T).GetEnumUnderlyingType();
}

return NonEnumTypeIsImplicitlyStable(t);
}

/// <summary>A type can be pure ordered when every single time equal elements is side by side: [ (someVal), (someVal), (otherVal), (otherVal) ] see: #120125</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool TypeCanBePureOrdered<T>()
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A type can be pure ordered when the equality methods is pure, meaning for the same input always have the same output

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is a correct application of the term "pure". A struct with two fields implementing IEquatable that only projects to one field is also pure in that sense but it is invalid from the perspective of this optimization. The defining property you seem to be testing for is implicit stability, so this is merely an extension of the existing TypeIsImplicitlyStable to more types.

Could !typeof(IEquatable<T>).IsAssignableFrom(typeof(T)) && !RuntimeHelpers.IsReferenceOrContainsReferences<T>() be an acceptable proxy for widening this test to more types?

Copy link
Member

@eiriktsarpalis eiriktsarpalis Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could !typeof(IEquatable).IsAssignableFrom(typeof(T)) && !RuntimeHelpers.IsReferenceOrContainsReferences() be an acceptable proxy for widening this test to more types?

Answering my own question, probably not because T could contain fields that themselves implement IEquatable<T> in an unstable manner. I don't think we need this method, TypeIsImplicitlyStable is probably good enough, although we could have conversation about how much we can extend its scope.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that "Pure" is not the best word to describe that, but that's the best name that I came up with

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could !typeof(IEquatable).IsAssignableFrom(typeof(T)) && !RuntimeHelpers.IsReferenceOrContainsReferences() be an acceptable proxy for widening this test to more types?

Answering my own question, probably not because T could contain fields that themselves implement IEquatable<T> in an unstable manner. I don't think we need this method, TypeIsImplicitlyStable is probably good enough, although we could have conversation about how much we can extend its scope.

The method TypeIsImplicitlyStable is a subset of what the optimization can handle

Example:

class PureOrderableType : IEquatable<PureOrderableType>
{
    public int Value { get; set; }

    public bool Equals(PureOrderableType? other)
    {
        return other?.Value == Value;
    }

    public override bool Equals(object? obj)
    {
        return Equals(obj as PureOrderableType);
    }

    public override int GetHashCode()
    {
        return Value;
    }
}

var firstElement = new PureOrderableType { Value = 1 };
var secondElement = new PureOrderableType { Value = 1 };

List<PureOrderableType> list = [firstElement, secondElement];

Although

firstElement.Equals(secondElement) == true

it needs to be in that order (stable)

Copy link
Author

@henriquewr henriquewr Sep 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is implicit stability important here? If you have a sorted enumerable and equality is compatible with comparison, it follows that equal elements should appear sequentially. By applying your algorithm where the first element from each group of equal values is being yielded, you should obtain behavior that is equivalent to Distinct even when applied to DateTimeOffset.

The implicitly stability is not important,
Consider this code:

List<DateTimeOffset> dateTimeOffsets = new List<DateTimeOffset>
{
    DateTimeOffset.Parse("2025-09-29T12:00:00+00:00"),
    DateTimeOffset.Parse("2025-09-29T08:00:00-04:00"),
    DateTimeOffset.Parse("2025-09-29T07:00:00-05:00")
}.Order().ToList();

dateTimeOffsets.Distinct() Will return: DateTimeOffset.Parse("2025-09-29T12:00:00+00:00") (The first item)

if the sorting wasn't stable the Distinct() can return other item

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Linq sorting is stable. I guess what I'm trying to hint at is that your change looks promising, but you need to better clarify the conditions for when the optimization kicks in.

Copy link
Author

@henriquewr henriquewr Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Linq sorting is stable. I guess what I'm trying to hint at is that your change looks promising, but you need to better clarify the conditions for when the optimization kicks in.

I think that's the conditions for this optimization

If the type is not implicitly stable, it should use a stable sort, otherwise it can use an unstable (doesn't matter)

When looping the ordered collection the current element (when is a sequence of equal elements like [1,1,1] is threated like a single element [1]) it not equals to any other past element

To achive that, the methods GetHashCode, CompareTo, Equals should be properly implemented

"properly implemented" means that the methods must agree with each other

Like this example:

class SomeType : IEquatable<SomeType>, IComparable<SomeType>
{
    public int Value { get; set; }

    public int CompareTo(SomeType? other)
    {
        return (Value % 2).CompareTo(other.Value % 2);
    }

    public override int GetHashCode()
    {
        return Value % 2;
    }

    public bool Equals(SomeType? other)
    {
        return (other?.Value % 2) == (Value % 2);
    }
}

Other good example:

class OtherGoodType : IEquatable<OtherGoodType>, IComparable<OtherGoodType>
{
    public int Value { get; set; }

    public int CompareTo(OtherGoodType? other)
    {
        return (Value).CompareTo(other.Value);
    }

    public override int GetHashCode()
    {
        return Value;
    }

    public bool Equals(OtherGoodType? other)
    {
        return other?.Value == Value;
    }
}

This would be invalid:

class InvalidType : IEquatable<InvalidType>, IComparable<InvalidType>
{
    public int Value { get; set; }

    public int CompareTo(InvalidType? other)
    {
        return (Value % 2).CompareTo(other.Value % 2);
    }

    public override int GetHashCode()
    {
        return Value % 2;
    }

    public bool Equals(InvalidType? other)
    {
        return other?.Value == Value; // Does not follow CompareTo
    }
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're in agreement, but I would like you to update the name of your test method to better reflect what is being tested and potentially include even more types.

Copy link
Author

@henriquewr henriquewr Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're in agreement, but I would like you to update the name of your test method to better reflect what is being tested and potentially include even more types.

I added more tests, I tried to improve the names, it's better, but I agree that they aren't the best names
And I remembered other 2 types to add to this optimization: DateOnly and TimeOnly

And about the failing tests in the ci, I believe that it's not related to this changes, all the logs says:
unable to pull image...

{
Type t = typeof(T);

Type? nullableUnderlyingType = Nullable.GetUnderlyingType(t);
if (nullableUnderlyingType != null)
{
t = nullableUnderlyingType;
}

if (typeof(T).IsEnum)
{
t = typeof(T).GetEnumUnderlyingType();
}
Comment on lines +174 to +183
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can have negative impact. It may break constant folding for typeof(T) == typeof(X). What does this path handle?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The equality methods in nullable only add the null check, which are pure, if it is not null there is a fallback to the equality methods of the internal type

basically only the internal type of nullable matters


return NonEnumTypeIsImplicitlyStable(t) ||
t == typeof(Half) || t == typeof(float) ||
t == typeof(double) || t == typeof(decimal) ||
t == typeof(string) || t == typeof(Guid) ||
t == typeof(DateTime) || t == typeof(DateTimeOffset) ||
t == typeof(TimeSpan) || t == typeof(TimeOnly) ||
t == typeof(DateOnly);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool NonEnumTypeIsImplicitlyStable(Type t)
{
// Check for integral primitive types that compare equally iff they have the same bit pattern.
// bool is included because, even though technically it can have 256 different values, anything
// other than 0/1 is only producible using unsafe code. It's tempting to include a type like string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ private TElement Last(TElement[] items)
}
}

private abstract partial class PureOrderedIterator<TElement> : OrderedIterator<TElement>
{
}

private sealed partial class OrderedIterator<TElement, TKey> : OrderedIterator<TElement>
{
// For complicated cases, rely on the base implementation that's more comprehensive.
Expand Down Expand Up @@ -361,7 +365,11 @@ private sealed partial class OrderedIterator<TElement, TKey> : OrderedIterator<T
}
}

private sealed partial class ImplicitlyStableOrderedIterator<TElement> : OrderedIterator<TElement>
private sealed partial class PureOrderedIteratorImpl<TElement> : PureOrderedIterator<TElement>
{
}

private sealed partial class ImplicitlyStableOrderedIterator<TElement> : PureOrderedIterator<TElement>
{
public override TElement[] ToArray()
{
Expand Down Expand Up @@ -461,7 +469,7 @@ public override bool MoveNext()
{
int state = _state;

Initialized:
Initialized:
if (state > 1)
{
Debug.Assert(_buffer is not null);
Expand Down
Loading
Loading