Skip to content

Commit

Permalink
fix #55:
Browse files Browse the repository at this point in the history
- create AVPRIndex.Hash module
- add tests for string/byte array/file hashing
- use this function exclusively in AVPRIndex and API
- remove the API endpoint for direct hash creation
- return http 422 if package content contains any CR characters
- handle PackageContentHash creation for new packages automatically
  • Loading branch information
kMutagene committed Jun 18, 2024
1 parent c6219a4 commit dcc0070
Show file tree
Hide file tree
Showing 16 changed files with 290 additions and 108 deletions.
15 changes: 8 additions & 7 deletions src/AVPRIndex/AVPRIndex.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,32 @@

<PropertyGroup>
<Authors>Kevin Schneider</Authors>
<Description>Type system for the indexing backend of avpr.nfdi4plants.org</Description>
<Summary>Type system for the indexing backend of avpr.nfdi4plants.org</Summary>
<Description>Type system and utils for the indexing backend of avpr.nfdi4plants.org</Description>
<Summary>Type system and utils for the indexing backend of avpr.nfdi4plants.org</Summary>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageTags>C# F# ARC annotated-research-context rdm research-data-management validation</PackageTags>
<PackageProjectUrl>https://github.com/nfdi4plants/arc-validate-package-registry</PackageProjectUrl>
<RepositoryUrl>https://github.com/nfdi4plants/arc-validate-package-registry</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageReleaseNotes>$([System.IO.File]::ReadAllText("$(MSBuildProjectDirectory)/RELEASE_NOTES.md"))</PackageReleaseNotes>
<PackageReadmeFile>README.md</PackageReadmeFile>
<PackageVersion>0.1.1</PackageVersion>
<PackageVersion>0.1.2</PackageVersion>
</PropertyGroup>

<ItemGroup>
<None Include="README.md" Pack="true" PackagePath="\"/>
<None Include="README.md" Pack="true" PackagePath="\" />
<None Include="RELEASE_NOTES.md" />
<Compile Include="Globals.fs" />
<Compile Include="Domain.fs" />
<Compile Include="MD5Hash.fs" />
<Compile Include="Utils.fs" />
<Compile Include="Domain.fs" />
<Compile Include="Frontmatter.fs" />
<Compile Include="AVPRRepo.fs" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="YamlDotNet" Version="15.1.2" />
<PackageReference Include="FsHttp" Version="14.5.0" />
<PackageReference Include="YamlDotNet" Version="[15.1.2, 16.0.0)" />
<PackageReference Include="FsHttp" Version="[14.5.0, 15.0.0)" />
</ItemGroup>

</Project>
12 changes: 1 addition & 11 deletions src/AVPRIndex/Domain.fs
Original file line number Diff line number Diff line change
Expand Up @@ -212,21 +212,11 @@ module Domain =
lastUpdated: System.DateTimeOffset,
metadata: ValidationPackageMetadata
) =

let md5 = MD5.Create()

ValidationPackageIndex.create(
repoPath = repoPath,
fileName = Path.GetFileName(repoPath),
lastUpdated = lastUpdated,
contentHash = (
repoPath
|> File.ReadAllText
|> fun s -> s.ReplaceLineEndings("\n")
|> Encoding.UTF8.GetBytes
|> md5.ComputeHash
|> Convert.ToHexString
),
contentHash = Hash.hashFile repoPath,
metadata = metadata
)

Expand Down
32 changes: 32 additions & 0 deletions src/AVPRIndex/MD5Hash.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
namespace AVPRIndex

open System
open System.IO
open System.Text
open System.Security.Cryptography

type Hash =

// This is the function used as the first point of entry, as it is used when parsing packages that do not exist in the prioduction DB
// unifying line endings is crucial to ensure that the hash is the same on all platforms

/// calculates a md5 hash of the given byte array and returns it as a hex string
static member hashContent (content: byte array) =
let md5 = MD5.Create()
content
|> md5.ComputeHash
|> Convert.ToHexString

/// calculates a md5 hash of the given string with line endings unified to `\n` and returns it as a hex string
static member hashString (content: string) =
content
|> fun s -> s.ReplaceLineEndings("\n")
|> Encoding.UTF8.GetBytes
|> Hash.hashContent

/// calculates a md5 hash of the file at the given path with line endings unified to `\n` and returns it as a hex string
static member hashFile (path: string) =
path
|> File.ReadAllText
|> Hash.hashString

4 changes: 4 additions & 0 deletions src/AVPRIndex/RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## v0.1.2

Add `PackageContentHash` module to unify package hash calculation across downstream libraries

## v0.1.1

Add `CQCHookEndpoint` field to `ValidationPackageMetadata`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ public static RouteGroupBuilder MapVerificationApiV1(this RouteGroupBuilder grou
.WithOpenApi()
.WithName("VerifyPackageContent");

group.MapPost("/hashes", VerificationHandlers.CreateContentHash)
.WithOpenApi()
.WithName("CreatePackageContentHash")
.AddEndpointFilter<APIKeyEndpointFilter>(); // creating hashes via post requests requires an API key!
// remove this at it is safer to create hash entries automatically on posted packages
//group.MapPost("/hashes", VerificationHandlers.CreateContentHash)
// .WithOpenApi()
// .WithName("CreatePackageContentHash")
// .AddEndpointFilter<APIKeyEndpointFilter>(); // creating hashes via post requests requires an API key!

return group.WithTags("Content Verification");
}
Expand Down
8 changes: 7 additions & 1 deletion src/PackageRegistryService/API/Handlers/PackageHandlers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,20 @@ public static async Task<Results<BadRequest<string>, NotFound<string>, Conflict<
return TypedResults.Ok(package);
}

public static async Task<Results<Ok<ValidationPackage>, Conflict, UnauthorizedHttpResult>> CreatePackage(ValidationPackage package, ValidationPackageDb database)
public static async Task<Results<Ok<ValidationPackage>, Conflict, UnauthorizedHttpResult, UnprocessableEntity<string>>> CreatePackage(ValidationPackage package, ValidationPackageDb database)
{
var existing = await database.ValidationPackages.FindAsync(package.Name, package.MajorVersion, package.MinorVersion, package.PatchVersion);
if (existing != null)
{
return TypedResults.Conflict();
}

if (package.ContentContainsCarriageReturn())
{
return TypedResults.UnprocessableEntity("package content contained non-LF line endings");
}

ValidationPackageDb.CreatePackageContentHash(package, database);
database.ValidationPackages.Add(package);
await database.SaveChangesAsync();

Expand Down
34 changes: 17 additions & 17 deletions src/PackageRegistryService/API/Handlers/VerificationHandlers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,27 @@ public static async Task<Results<Ok, UnprocessableEntity, NotFound>> Verify(Pack
return TypedResults.Ok();
}

public static async Task<Results<Ok<PackageContentHash>, Conflict, UnauthorizedHttpResult>> CreateContentHash(PackageContentHash hashedPackage, ValidationPackageDb database)
{
//public static async Task<Results<Ok<PackageContentHash>, Conflict, UnauthorizedHttpResult>> CreateContentHash(PackageContentHash hashedPackage, ValidationPackageDb database)
//{

var existing = await
database.Hashes.FindAsync(
hashedPackage.PackageName,
hashedPackage.PackageMajorVersion,
hashedPackage.PackageMinorVersion,
hashedPackage.PackagePatchVersion
);
// var existing = await
// database.Hashes.FindAsync(
// hashedPackage.PackageName,
// hashedPackage.PackageMajorVersion,
// hashedPackage.PackageMinorVersion,
// hashedPackage.PackagePatchVersion
// );

if (existing != null)
{
return TypedResults.Conflict();
}
// if (existing != null)
// {
// return TypedResults.Conflict();
// }

database.Hashes.Add(hashedPackage);
await database.SaveChangesAsync();
// database.Hashes.Add(hashedPackage);
// await database.SaveChangesAsync();

return TypedResults.Ok(hashedPackage);
// return TypedResults.Ok(hashedPackage);

}
//}
}
}
8 changes: 2 additions & 6 deletions src/PackageRegistryService/Data/DataInitializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ public class DataInitializer
{
public static void SeedData(ValidationPackageDb context)
{
MD5 md5 = MD5.Create();

if (!context.ValidationPackages.Any())
{
var index = AVPRRepo.getPreviewIndex();
Expand Down Expand Up @@ -67,11 +65,9 @@ public static void SeedData(ValidationPackageDb context)
Path.GetDirectoryName(Assembly.GetEntryAssembly().Location),

Check warning on line 65 in src/PackageRegistryService/Data/DataInitializer.cs

View workflow job for this annotation

GitHub Actions / Build and test projects / build-and-test (windows-latest)

Dereference of a possibly null reference.

Check warning on line 65 in src/PackageRegistryService/Data/DataInitializer.cs

View workflow job for this annotation

GitHub Actions / Build and test projects / build-and-test (windows-latest)

Possible null reference argument for parameter 'path1' in 'string Path.Combine(string path1, string path2)'.

Check warning on line 65 in src/PackageRegistryService/Data/DataInitializer.cs

View workflow job for this annotation

GitHub Actions / Build and test projects / build-and-test (ubuntu-latest)

Dereference of a possibly null reference.

Check warning on line 65 in src/PackageRegistryService/Data/DataInitializer.cs

View workflow job for this annotation

GitHub Actions / Build and test projects / build-and-test (ubuntu-latest)

Possible null reference argument for parameter 'path1' in 'string Path.Combine(string path1, string path2)'.

Check warning on line 65 in src/PackageRegistryService/Data/DataInitializer.cs

View workflow job for this annotation

GitHub Actions / Build and test projects / build-and-test (macos-latest)

Dereference of a possibly null reference.

Check warning on line 65 in src/PackageRegistryService/Data/DataInitializer.cs

View workflow job for this annotation

GitHub Actions / Build and test projects / build-and-test (macos-latest)

Possible null reference argument for parameter 'path1' in 'string Path.Combine(string path1, string path2)'.
$"StagingArea/{i.Metadata.Name}/{i.FileName}"
);
var content =
File.ReadAllText(path)
.ReplaceLineEndings("\n");

var hash = Convert.ToHexString(md5.ComputeHash(Encoding.UTF8.GetBytes(content)));
var hash = AVPRIndex.Hash.hashFile(path);

if (hash != i.ContentHash)
{
throw new Exception($"Hash collision for indexed hash vs content hash: {$"StagingArea/{i.Metadata.Name}/{i.FileName}"}");
Expand Down
13 changes: 9 additions & 4 deletions src/PackageRegistryService/Models/ValidationPackage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,15 @@ public class ValidationPackage
/// <returns>A string containing the package content</returns>
public string GetPackageContentHash()
{
using (var md5 = MD5.Create())
{
return Convert.ToHexString(md5.ComputeHash(PackageContent));
}
return AVPRIndex.Hash.hashContent(PackageContent);
}
/// <summary>
/// Returns whether the package content CR characters - meaning its is has not been unified to only use LF.
/// </summary>
/// <returns>true or false</returns>
public bool ContentContainsCarriageReturn()
{
return GetPackageScriptContent().Contains("\r");
}
}
}
21 changes: 21 additions & 0 deletions src/PackageRegistryService/Models/ValidationPackageDb.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,28 @@ public static bool ValidatePackageContent(ValidationPackage package, ValidationP
var packageHash = package.GetPackageContentHash();
return hash.Hash == packageHash;
}
public static bool CreatePackageContentHash(ValidationPackage package, ValidationPackageDb database)
{
var result = database.Hashes.SingleOrDefault(d => d.PackageName == package.Name && d.PackageMajorVersion == package.MajorVersion && d.PackageMinorVersion == package.MinorVersion && d.PackagePatchVersion == package.PatchVersion);

if (result != null)
{
return false; // there is an existing hash!
}
else
{
var h = new PackageContentHash
{
PackageName = package.Name,
PackageMajorVersion = package.MajorVersion,
PackageMinorVersion = package.MinorVersion,
PackagePatchVersion = package.PatchVersion,
Hash = package.GetPackageContentHash()
};
database.Hashes.Add(h);
return true;
}
}
public static void IncrementDownloadCount(ValidationPackage package, ValidationPackageDb database)
{
var result = database.Downloads.SingleOrDefault(d => d.PackageName == package.Name && d.PackageMajorVersion == package.MajorVersion && d.PackageMinorVersion == package.MinorVersion && d.PackagePatchVersion == package.PatchVersion);
Expand Down
14 changes: 7 additions & 7 deletions src/PackageRegistryService/OpenAPI/OperationMetadataProcessor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ public class OperationMetadataProcessor : IOperationProcessor
{ "Description", "Verify a content hash for a given package. Hashes are MD5 hex fingerprints." }
}
},
{
"CreatePackageContentHash", new Dictionary<string, string>
{
{ "Summary", "Create a content hash for a given package." },
{ "Description", "Create a content hash for a given package. Hashes are MD5 hex fingerprints. This Endpoint requires API Key authentication." }
}
},
//{
// "CreatePackageContentHash", new Dictionary<string, string>
// {
// { "Summary", "Create a content hash for a given package." },
// { "Description", "Create a content hash for a given package. Hashes are MD5 hex fingerprints. This Endpoint requires API Key authentication." }
// }
//},
{
"GetAllDownloads", new Dictionary<string, string>
{
Expand Down
Loading

0 comments on commit dcc0070

Please sign in to comment.