Skip to content

Commit

Permalink
Slight organizing
Browse files Browse the repository at this point in the history
  • Loading branch information
Nick-Nuon committed Nov 19, 2023
1 parent 5a9a6f2 commit 8e7876f
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 17 deletions.
32 changes: 15 additions & 17 deletions test/UTF8ValidationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,21 @@ public void SurrogateErrorTest()
}
}
}

// We save this for when testing SIMD version, I promise to clean up later:

private bool ValidateUtf8(byte[] utf8)
{
unsafe
{
fixed (byte* pInput = utf8)
{
byte* invalidBytePointer = UTF8.GetPointerToFirstInvalidByte(pInput, utf8.Length);
// If the pointer to the first invalid byte is at the end of the array, the UTF-8 is valid.
return invalidBytePointer == pInput + utf8.Length;
}
}
}

// I save this for when testing the SIMD version
// [Fact]
// public void BruteForceTest()
// {
Expand Down Expand Up @@ -309,19 +322,4 @@ public void SurrogateErrorTest()

// 5. If all tests pass, output "OK".


private bool ValidateUtf8(byte[] utf8)
{
unsafe
{
fixed (byte* pInput = utf8)
{
byte* invalidBytePointer = UTF8.GetPointerToFirstInvalidByte(pInput, utf8.Length);
// If the pointer to the first invalid byte is at the end of the array, the UTF-8 is valid.
return invalidBytePointer == pInput + utf8.Length;
}
}
}


}
55 changes: 55 additions & 0 deletions test/UTF8ValidationTestsBruteForce.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
namespace tests;
using System.Text;
using SimdUnicode;

public class Utf8ValidationTests

Check failure on line 5 in test/UTF8ValidationTestsBruteForce.cs

View workflow job for this annotation

GitHub Actions / build

The namespace 'tests' already contains a definition for 'Utf8ValidationTests'

Check failure on line 5 in test/UTF8ValidationTestsBruteForce.cs

View workflow job for this annotation

GitHub Actions / build

The namespace 'tests' already contains a definition for 'Utf8ValidationTests'
{

// I save this for when testing the SIMD version
// [Fact]
// public void BruteForceTest()
// {
// for (int i = 0; i < NumTrials; i++)
// {
// byte[] utf8 = generator.Generate(rand.Next(256));
// Assert.True(ValidateUtf8(utf8), "UTF-8 validation failed, indicating a bug.");

// for (int flip = 0; flip < 1000; flip++)
// {
// byte[] modifiedUtf8 = (byte[])utf8.Clone();
// int byteIndex = rand.Next(modifiedUtf8.Length);
// int bitFlip = 1 << rand.Next(8);
// modifiedUtf8[byteIndex] ^= (byte)bitFlip;

// bool isValid = ValidateUtf8(modifiedUtf8);
// // This condition may depend on the specific behavior of your validation method
// // and whether or not it should match a reference implementation.
// // In this example, we are simply asserting that the modified sequence is still valid.
// Assert.True(isValid, "Mismatch in UTF-8 validation detected, indicating a bug.");
// }
// }
// }

// Pseudocode for easier ChatGPT generatioN:
// 1. Set a seed value (1234).
// 2. Create a random UTF-8 generator with equal probabilities for 1, 2, 3, and 4-byte sequences.
// 3. Set the total number of trials to 1000.

// 4. For each trial (0 to total - 1):
// a. Generate a random UTF-8 sequence with a length between 0 and 255.
// b. Validate the UTF-8 sequence. If it's invalid:
// - Output "bug" to stderr.
// - Fail the test.

// c. For 1000 times (bit flipping loop):
// i. Generate a random bit position (0 to 7).
// ii. Flip exactly one bit at the random position in a random byte of the UTF-8 sequence.
// iii. Re-validate the modified UTF-8 sequence.
// iv. Compare the result of the validation with a reference validation method.
// v. If the results differ:
// - Output "bug" to stderr.
// - Fail the test.

// 5. If all tests pass, output "OK".

}

0 comments on commit 8e7876f

Please sign in to comment.