Skip to content

Commit

Permalink
Fixed the benchmark filter
Browse files Browse the repository at this point in the history
  • Loading branch information
Nick-Nuon committed Nov 22, 2023
1 parent ffe05a8 commit e8d2254
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 45 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ cd test
dotnet test
```

To run specific tests, it is helpful to use the filter parameter:

```
dotnet test -c Release --filter Ascii
```

## Running Benchmarks

```
Expand All @@ -52,10 +58,9 @@ sudo dotnet run -c Release
To run specific tests, it is helpful to use the filter parameter:

```
sudo dotnet run -c Release --filter Ascii
sudo dotnet test -c Release --filter *Ascii*
```


## Building the library

```
Expand Down
217 changes: 174 additions & 43 deletions benchmark/Benchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -140,54 +140,126 @@ private List<byte[]> GenerateUtf8Strings(int count, uint length)
return strings;
}

private void IntroduceError(byte[] utf8)
// private void IntroduceError(byte[] utf8)
// {
// Random random = new Random();
// int errorType = random.Next(5); // Randomly select an error type (0-4)
// int position = random.Next(utf8.Length); // Random position in the byte array

// switch (errorType)
// {
// case 0: // Header Bits Error
// if ((utf8[position] & 0b11000000) != 0b10000000)
// {
// utf8[position] = 0b11111000;
// }
// break;

// case 1: // Too Short Error
// if ((utf8[position] & 0b11000000) == 0b10000000)
// {
// utf8[position] = 0b11100000;
// }
// break;

// case 2: // Too Long Error
// if ((utf8[position] & 0b11000000) != 0b10000000)
// {
// utf8[position] = 0b10000000;
// }
// break;

// case 3: // Overlong Error
// if (utf8[position] >= 0b11000000)
// {
// if ((utf8[position] & 0b11100000) == 0b11000000)
// {
// utf8[position] = 0b11000000;
// }
// else if ((utf8[position] & 0b11110000) == 0b11100000)
// {
// utf8[position] = 0b11100000;
// utf8[position + 1] = (byte)(utf8[position + 1] & 0b11011111);
// }
// else if ((utf8[position] & 0b11111000) == 0b11110000)
// {
// utf8[position] = 0b11110000;
// utf8[position + 1] = (byte)(utf8[position + 1] & 0b11001111);
// }
// }
// break;

// case 4: // Surrogate Error
// if ((utf8[position] & 0b11110000) == 0b11100000)
// {
// utf8[position] = 0b11101101; // Leading byte for surrogate
// for (int s = 0x8; s < 0xf; s++)
// {
// utf8[position + 1] = (byte)((utf8[position + 1] & 0b11000011) | (s << 2));
// break; // Just introduce one surrogate error
// }
// }
// break;

// }
// }

private void IntroduceError(byte[] utf8)
{
Random random = new Random();
int errorType = random.Next(5); // Randomly select an error type (0-4)
int position = random.Next(utf8.Length); // Random position in the byte array
bool errorIntroduced = false;

switch (errorType)
while (!errorIntroduced)
{
case 0: // Header Bits Error
if ((utf8[position] & 0b11000000) != 0b10000000)
{
utf8[position] = 0b11111000;
}
break;

case 1: // Too Short Error
if ((utf8[position] & 0b11000000) == 0b10000000)
{
utf8[position] = 0b11100000;
}
break;

case 2: // Too Long Error
if ((utf8[position] & 0b11000000) != 0b10000000)
{
utf8[position] = 0b10000000;
}
break;
int errorType = random.Next(5); // Randomly select an error type (0-4)
int position = random.Next(utf8.Length); // Random position in the byte array

case 3: // Overlong Error
if (utf8[position] >= 0b11000000)
{
if ((utf8[position] & 0b11100000) == 0b11000000)
switch (errorType)
{
case 0: // Header Bits Error
if ((utf8[position] & 0b11000000) != 0b10000000)
{
utf8[position] = 0b11000000;
utf8[position] = 0b11111000;
errorIntroduced = true;
}
else if ((utf8[position] & 0b11110000) == 0b11100000)
break;

case 1: // Too Short Error
if ((utf8[position] & 0b11000000) == 0b10000000)
{
utf8[position] = 0b11100000;
utf8[position + 1] = (byte)(utf8[position + 1] & 0b11011111);
errorIntroduced = true;
}
break;

case 2: // Too Long Error
if ((utf8[position] & 0b11000000) != 0b10000000)
{
utf8[position] = 0b10000000;
errorIntroduced = true;
}
else if ((utf8[position] & 0b11111000) == 0b11110000)
break;

case 3: // Overlong Error
if (utf8[position] >= 0b11000000)
{
utf8[position] = 0b11110000;
utf8[position + 1] = (byte)(utf8[position + 1] & 0b11001111);
if ((utf8[position] & 0b11100000) == 0b11000000)
{
utf8[position] = 0b11000000;
}
else if ((utf8[position] & 0b11110000) == 0b11100000)
{
utf8[position] = 0b11100000;
utf8[position + 1] = (byte)(utf8[position + 1] & 0b11011111);
}
else if ((utf8[position] & 0b11111000) == 0b11110000)
{
utf8[position] = 0b11110000;
utf8[position + 1] = (byte)(utf8[position + 1] & 0b11001111);
}
errorIntroduced = true;
}
}
break;
break;

case 4: // Surrogate Error
if ((utf8[position] & 0b11110000) == 0b11100000)
Expand All @@ -196,16 +268,18 @@ private void IntroduceError(byte[] utf8)
for (int s = 0x8; s < 0xf; s++)

Check warning on line 268 in benchmark/Benchmark.cs

View workflow job for this annotation

GitHub Actions / build

Unreachable code detected

Check warning on line 268 in benchmark/Benchmark.cs

View workflow job for this annotation

GitHub Actions / build

Unreachable code detected
{
utf8[position + 1] = (byte)((utf8[position + 1] & 0b11000011) | (s << 2));
errorIntroduced = true;
break; // Just introduce one surrogate error
}
}
break;

}
}
}


[Benchmark]
[BenchmarkCategory("Ascii", "SIMD")]
public void FastUnicodeIsAscii()
{
int count = 0;
Expand All @@ -217,6 +291,7 @@ public void FastUnicodeIsAscii()
}

[Benchmark]
[BenchmarkCategory("Ascii", "Runtime")]
public void RuntimeIsAscii()
{
int count = 0;
Expand Down Expand Up @@ -353,7 +428,7 @@ public void CompetitionUtf8ValidationValidUtf8()
}
}

[Benchmark(Description = "ScalarUtf8ValidationRealValidData")]
[Benchmark(Description = "ScalarUtf8ValidationValidData")]
public void SimDUnicodeUtf8ValidationRealData()
{
foreach (var line in _linesUtf8) // Assuming _linesUtf8 contains UTF-8 encoded data
Expand All @@ -368,7 +443,7 @@ public void SimDUnicodeUtf8ValidationRealData()
}
}

[Benchmark(Description = "CompetitionUtf8ValidationRealValidData")]
[Benchmark(Description = "CompetitionUtf8ValidationValidData")]
public void CompetitionUtf8ValidationRealData()
{
foreach (var line in _linesUtf8) // Assuming _linesUtf8 contains UTF-8 encoded data
Expand All @@ -384,11 +459,66 @@ public void CompetitionUtf8ValidationRealData()
}
}

[Benchmark(Description = "ScalarUtf8ValidationErrorData")]
public void ScalarUtf8ValidationErrorData()
{
foreach (var utf8StringWithError in utf8ErrorStrings)
{
unsafe
{
fixed (byte* pUtf8 = utf8StringWithError)
{
byte* invalidBytePointer = SimdUnicode.UTF8.GetPointerToFirstInvalidByte(pUtf8, utf8StringWithError.Length);
}
}
}
}

[Benchmark(Description = "CompetitionUtf8ValidationErrorData")]
public void CompetitionUtf8ValidationErrorData()
{
foreach (var utf8StringWithError in utf8ErrorStrings)
{
unsafe
{
fixed (byte* pUtf8 = utf8StringWithError)
{
int utf16CodeUnitCountAdjustment, scalarCountAdjustment;
byte* invalidBytePointer = Competition.Utf8Utility.GetPointerToFirstInvalidByte(pUtf8, utf8StringWithError.Length, out utf16CodeUnitCountAdjustment, out scalarCountAdjustment);
}
}
}
}





}

public class Program
// public class Program
// {
// public static void Main(string[] args)
// {
// if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
// {
// Console.WriteLine("ARM64 system detected.");
// }
// else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
// {
// Console.WriteLine("X64 system detected (Intel, AMD,...).");

// }
// else
// {
// Console.WriteLine("Unrecognized system.");

// }
// var summary = BenchmarkRunner.Run<Checker>();
// }
// }

public class Program
{
public static void Main(string[] args)
{
Expand All @@ -399,14 +529,15 @@ public static void Main(string[] args)
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
{
Console.WriteLine("X64 system detected (Intel, AMD,...).");

}
else
{
Console.WriteLine("Unrecognized system.");

}
var summary = BenchmarkRunner.Run<Checker>();

var switcher = new BenchmarkSwitcher(new[] { typeof(Checker) });
switcher.Run(args);
}
}

}

0 comments on commit e8d2254

Please sign in to comment.