Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 54 additions & 115 deletions src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
using System.Globalization;
using System.Numerics;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Text;
using Internal.Runtime.CompilerServices;

Expand Down Expand Up @@ -1493,137 +1491,78 @@ private string[] SplitWithPostProcessing(ReadOnlySpan<int> sepList, ReadOnlySpan
/// <param name="sepListBuilder"><see cref="ValueListBuilder{T}"/> to store indexes</param>
private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuilder<int> sepListBuilder)
{
// Special-case no separators to mean any whitespace is a separator.
if (separators.Length == 0)
char sep0, sep1, sep2;

switch (separators.Length)
{
for (int i = 0; i < Length; i++)
{
if (char.IsWhiteSpace(this[i]))
// Special-case no separators to mean any whitespace is a separator.
case 0:
for (int i = 0; i < Length; i++)
{
sepListBuilder.Append(i);
if (char.IsWhiteSpace(this[i]))
{
sepListBuilder.Append(i);
}
}
}
}

// Special-case the common cases of 1, 2, and 3 separators, with manual comparisons against each separator.
else if (separators.Length <= 3)
{
char sep0, sep1, sep2;
sep0 = separators[0];
sep1 = separators.Length > 1 ? separators[1] : sep0;
sep2 = separators.Length > 2 ? separators[2] : sep1;

if (Length >= 16 && Sse41.IsSupported)
{
MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2);
return;
}
break;

for (int i = 0; i < Length; i++)
{
char c = this[i];
if (c == sep0 || c == sep1 || c == sep2)
// Special-case the common cases of 1, 2, and 3 separators, with manual comparisons against each separator.
case 1:
sep0 = separators[0];
for (int i = 0; i < Length; i++)
{
sepListBuilder.Append(i);
if (this[i] == sep0)
{
sepListBuilder.Append(i);
}
}
}
}

// Handle > 3 separators with a probabilistic map, ala IndexOfAny.
// This optimizes for chars being unlikely to match a separator.
else
{
unsafe
{
ProbabilisticMap map = default;
uint* charMap = (uint*)&map;
InitializeProbabilisticMap(charMap, separators);

break;
case 2:
sep0 = separators[0];
sep1 = separators[1];
for (int i = 0; i < Length; i++)
{
char c = this[i];
if (IsCharBitSet(charMap, (byte)c) && IsCharBitSet(charMap, (byte)(c >> 8)) &&
separators.Contains(c))
if (c == sep0 || c == sep1)
{
sepListBuilder.Append(i);
}
}
}
}
}

private void MakeSeparatorListVectorized(ref ValueListBuilder<int> sepListBuilder, char c, char c2, char c3)
{
// Redundant test so we won't prejit remainder of this method
// on platforms without SSE.
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}

// Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0)
Vector128<byte> shuffleConstant = Vector128.Create(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);

Vector128<ushort> v1 = Vector128.Create(c);
Vector128<ushort> v2 = Vector128.Create(c2);
Vector128<ushort> v3 = Vector128.Create(c3);

ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan());
int cond = Length & -Vector128<ushort>.Count;
int i = 0;

for (; i < cond; i += Vector128<ushort>.Count)
{
Vector128<ushort> charVector = ReadVector(ref c0, i);
Vector128<ushort> cmp = Sse2.CompareEqual(charVector, v1);

cmp = Sse2.Or(Sse2.CompareEqual(charVector, v2), cmp);
cmp = Sse2.Or(Sse2.CompareEqual(charVector, v3), cmp);

if (Sse41.TestZ(cmp, cmp)) { continue; }

Vector128<byte> mask = Sse2.ShiftRightLogical(cmp.AsUInt64(), 4).AsByte();
mask = Ssse3.Shuffle(mask, shuffleConstant);

uint lowBits = Sse2.ConvertToUInt32(mask.AsUInt32());
mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 32).AsByte();
uint highBits = Sse2.ConvertToUInt32(mask.AsUInt32());

for (int idx = i; lowBits != 0; idx++)
{
if ((lowBits & 0xF) != 0)
break;
case 3:
sep0 = separators[0];
sep1 = separators[1];
sep2 = separators[2];
for (int i = 0; i < Length; i++)
{
sepListBuilder.Append(idx);
char c = this[i];
if (c == sep0 || c == sep1 || c == sep2)
{
sepListBuilder.Append(i);
}
}
break;

lowBits >>= 8;
}

for (int idx = i + 4; highBits != 0; idx++)
{
if ((highBits & 0xF) != 0)
// Handle > 3 separators with a probabilistic map, ala IndexOfAny.
// This optimizes for chars being unlikely to match a separator.
default:
unsafe
{
sepListBuilder.Append(idx);
}

highBits >>= 8;
}
}

for (; i < Length; i++)
{
char curr = Unsafe.Add(ref c0, (IntPtr)(uint)i);
if (curr == c || curr == c2 || curr == c3)
{
sepListBuilder.Append(i);
}
}
ProbabilisticMap map = default;
uint* charMap = (uint*)&map;
InitializeProbabilisticMap(charMap, separators);

static Vector128<ushort> ReadVector(ref char c0, int offset)
{
ref char ci = ref Unsafe.Add(ref c0, (IntPtr)(uint)offset);
ref byte b = ref Unsafe.As<char, byte>(ref ci);
return Unsafe.ReadUnaligned<Vector128<ushort>>(ref b);
for (int i = 0; i < Length; i++)
{
char c = this[i];
if (IsCharBitSet(charMap, (byte)c) && IsCharBitSet(charMap, (byte)(c >> 8)) &&
separators.Contains(c))
{
sepListBuilder.Append(i);
}
}
}
break;
}
}

Expand Down