Skip to content

Commit

Permalink
Merge pull request #47 from sanctuuary/benchmarks_biotools_OEB
Browse files Browse the repository at this point in the history
Improve design-time benchmarks
  • Loading branch information
kretep authored Jan 29, 2024
2 parents 3b0b5c1 + 745afeb commit dc4e756
Show file tree
Hide file tree
Showing 10 changed files with 580 additions and 137 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,37 @@ public JSONObject getTitleJson() {
benchmarkJson.put("benchmark_description", benchmarkDescription);
return benchmarkJson;
}

static String ratioString(int count, int length) {
return count + "/" + length;
}

/**
* Calculate the desirability value for the given workflow, assuming that it
* increases linearly with the number of tools that satisfy the benchmark.
*
* @param count number of tools that satisfy the benchmark
* @param workflowLength length of the workflow
* @return Desirability value for the given workflow.
*/
static double normalDesirabilityDistribution(int count, int workflowLength) {
return 1.0 * count / workflowLength;
}

/**
* Calculate the desirability value for the given workflow, assuming that it is
* desired that all tools satisfy the benchmark. Small desirability values are
* assigned to workflows where only a subset of tools satisfy the benchmark.
*
* @param count number of tools that satisfy the benchmark
* @param workflowLength length of the workflow
* @return Desirability value for the given workflow.
*/
static double strictDesirabilityDistribution(int count, int workflowLength) {
if (count == workflowLength) {
return 1;
} else {
return 1.0 * count / workflowLength / 10;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,15 @@ public class BioToolsBenchmark {
private double desirabilityValue;
private List<WorkflowStepBench> workflow;

private static boolean emptyToolAnnotation(JSONObject toolAnnot) {
return !toolAnnot.has("biotoolsID");
}

private static String ratioString(int count, int length) {
return count + "/" + length;
}

private static double normalDistribution(int count, int workflowLength) {
return 1.0 * count / workflowLength;
}

private static double strictDistribution(int count, int workflowLength) {
if (count == workflowLength) {
return 1;
} else {
return 1.0 * count / workflowLength / 10;
}
}

public static BioToolsBenchmark countEntries(List<JSONObject> biotoolsAnnotations, BenchmarkBase benchmarkTitle) {
BioToolsBenchmark benchmark = new BioToolsBenchmark(benchmarkTitle);
int workflowLength = biotoolsAnnotations.size();

benchmark.workflow = countEntries(biotoolsAnnotations);
int count = (int) benchmark.workflow.stream().filter(tool -> tool.getDesirabilityValue() > 0).count();

benchmark.desirabilityValue = strictDistribution(count, workflowLength);
benchmark.value = ratioString(count, workflowLength);
benchmark.desirabilityValue = BenchmarkBase.strictDesirabilityDistribution(count, workflowLength);
benchmark.value = BenchmarkBase.ratioString(count, workflowLength);

return benchmark;
}
Expand All @@ -66,8 +46,8 @@ public static BioToolsBenchmark countLicencedEntries(List<JSONObject> biotoolsAn
benchmark.workflow = countField(biotoolsAnnotations, benchmarkTitle.getExpectedField());
int count = (int) benchmark.workflow.stream().filter(tool -> tool.getDesirabilityValue() > 0).count();

benchmark.desirabilityValue = strictDistribution(count, workflowLength);
benchmark.value = ratioString(count, workflowLength);
benchmark.desirabilityValue = BenchmarkBase.strictDesirabilityDistribution(count, workflowLength);
benchmark.value = BenchmarkBase.ratioString(count, workflowLength);

return benchmark;
}
Expand All @@ -81,9 +61,9 @@ public static BioToolsBenchmark countOSEntries(List<JSONObject> biotoolsAnnotati
benchmarkTitle.getExpectedValue());
int count = (int) benchmark.workflow.stream().filter(tool -> tool.getDesirabilityValue() > 0).count();

benchmark.desirabilityValue = normalDistribution(count, workflowLength);
benchmark.desirabilityValue = BenchmarkBase.normalDesirabilityDistribution(count, workflowLength);

benchmark.value = ratioString(count, workflowLength);
benchmark.value = BenchmarkBase.ratioString(count, workflowLength);

return benchmark;
}
Expand Down Expand Up @@ -183,6 +163,10 @@ private static int countExistanceOfFields(List<JSONObject> biotoolsAnnotations,
return (int) count;
}

private static boolean emptyToolAnnotation(JSONObject toolAnnot) {
return !toolAnnot.has("biotoolsID");
}

/**
* Check whether the given value is in the set of given values.
*/
Expand Down
182 changes: 181 additions & 1 deletion src/main/java/nl/esciencecenter/models/benchmarks/OpenEBenchmark.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package nl.esciencecenter.models.benchmarks;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.json.JSONArray;
Expand All @@ -9,6 +10,7 @@

import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import nl.esciencecenter.restape.LicenseType;
import nl.esciencecenter.restape.ToolBenchmarkingAPIs;

@RequiredArgsConstructor
Expand All @@ -24,12 +26,190 @@ public class OpenEBenchmark {
private double desirabilityValue;
private List<WorkflowStepBench> workflow;

public static OpenEBenchmark countLicenseOpenness(List<JSONObject> openEBenchBiotoolsMetrics,
BenchmarkBase benchmarkTitle) {
OpenEBenchmark benchmark = new OpenEBenchmark(benchmarkTitle);
int workflowLength = openEBenchBiotoolsMetrics.size();

benchmark.workflow = evaluateLicenseBenchmark(openEBenchBiotoolsMetrics);
int count = (int) benchmark.workflow.stream().filter(tool -> tool.getDesirabilityValue() > 0).count();

benchmark.desirabilityValue = BenchmarkBase.strictDesirabilityDistribution(count, workflowLength);
benchmark.value = BenchmarkBase.ratioString(count, workflowLength);

return benchmark;
}

/**
* Count the number of tools which have the given field name in the bio.tools
* annotation JSON.
*
* @param biotoolsAnnotations
* @param fieldName
* @return
*/
private static List<WorkflowStepBench> evaluateLicenseBenchmark(List<JSONObject> biotoolsAnnotations) {
List<WorkflowStepBench> biotoolsEntries = new ArrayList<>();

biotoolsAnnotations.stream().forEach(toolAnnot -> {
WorkflowStepBench biotoolsEntryBenchmark = new WorkflowStepBench();
LicenseType license = isOSIFromOEBMetrics(toolAnnot);
// set case for each license type
switch (license) {
case Unknown:
biotoolsEntryBenchmark.setDesirabilityValue(0);
biotoolsEntryBenchmark.setValue("unknown");
biotoolsEntryBenchmark.setDescription("Unknown");
break;
case Closed:
biotoolsEntryBenchmark.setDesirabilityValue(0.1);
biotoolsEntryBenchmark.setValue("closed");
biotoolsEntryBenchmark.setDescription("Closed");
break;
case Open:
biotoolsEntryBenchmark.setDesirabilityValue(0.8);
biotoolsEntryBenchmark.setValue("open");
biotoolsEntryBenchmark.setDescription("Open");
break;
case OSI_Approved:
biotoolsEntryBenchmark.setDesirabilityValue(1);
biotoolsEntryBenchmark.setValue("osi");
biotoolsEntryBenchmark.setDescription("OSI approved");
break;
default:
throw new IllegalArgumentException();
}
biotoolsEntries.add(biotoolsEntryBenchmark);
});

return biotoolsEntries;
}

public static OpenEBenchmark countCitationsBenchmark(List<JSONObject> openEBenchBiotoolsMetrics,
BenchmarkBase benchmarkTitle) {
OpenEBenchmark benchmark = new OpenEBenchmark(benchmarkTitle);

benchmark.workflow = countCitationPerTool(openEBenchBiotoolsMetrics);
List<Integer> counts = new ArrayList<>();
benchmark.workflow.forEach(tool -> counts.add(Integer.parseInt(tool.getValue())));
int median = findMedian(counts);

benchmark.value = median + "";
benchmark.desirabilityValue = computeCitationDesirability(median);

return benchmark;
}

/**
* Calculates the median of the given List of Integers.
*
* @param counts the List of Integer values
* @return the median value as a double
* @throws IllegalArgumentException if the input list is empty
*/
private static int findMedian(List<Integer> counts) {
if (counts == null || counts.isEmpty()) {
throw new IllegalArgumentException("List of counts cannot be null or empty");
}

Collections.sort(counts);

int size = counts.size();
if (size % 2 == 1) {
// If the size is odd, return the middle element
return counts.get(size / 2);
} else {
// If the size is even, return the average of the two middle elements
double leftMiddle = counts.get(size / 2 - 1);
double rightMiddle = counts.get(size / 2);
return (int) (leftMiddle + rightMiddle) / 2;
}
}

private static List<WorkflowStepBench> countCitationPerTool(List<JSONObject> openEBenchBiotoolsMetrics) {
List<WorkflowStepBench> biotoolsEntries = new ArrayList<>();
openEBenchBiotoolsMetrics.stream().forEach(toolAnnot -> {
WorkflowStepBench biotoolsEntryBenchmark = new WorkflowStepBench();
int count = 0;
try {
JSONArray publications = toolAnnot.getJSONObject("project").getJSONArray("publications");
for (int i = 0; i < publications.length(); i++) {
JSONObject publicationData = publications.getJSONObject(i);
count += publicationData.getJSONArray("entries").getJSONObject(0).getInt("cit_count");
}
// set case for each license type
biotoolsEntryBenchmark.setDesirabilityValue(computeCitationDesirability(count));
biotoolsEntryBenchmark.setValue(String.valueOf(count));
biotoolsEntryBenchmark.setDescription(String.valueOf(count));
biotoolsEntries.add(biotoolsEntryBenchmark);
} catch (JSONException e) {
e.printStackTrace();
// set case for each license type
biotoolsEntryBenchmark.setDesirabilityValue(0);
biotoolsEntryBenchmark.setValue("Unknown");
biotoolsEntryBenchmark.setDescription("Unknown");
biotoolsEntries.add(biotoolsEntryBenchmark);
}
});

return biotoolsEntries;
}

/*
* Citation desirability is computed according to a predefined set of rules.
*
*/
private static @NonNull double computeCitationDesirability(int count) {
if (count == 0) {
return 0;
} else if (count < 10) {
return 0.25;
} else if (count < 30) {
return 0.5;
} else if (count < 50) {
return 0.75;
} else {
return 1;
}
}

public JSONObject getJson() {
JSONObject benchmarkJson = this.benchmarkTitle.getTitleJson();

benchmarkJson.put("value", value);
benchmarkJson.put("desirability_value", desirabilityValue);
benchmarkJson.put("workflow", workflow);
JSONArray workflowJson = new JSONArray();
for (WorkflowStepBench step : workflow) {
workflowJson.put(step.toJSON());
}
benchmarkJson.put("steps", workflowJson);
return benchmarkJson;
}

/**
* Parse the JSON object returned by OpenEBench API describing the tool metrics
* and return whether the tool has an OSI approved license.
*
* @param toolMetrics - JSON object returned by OpenEBench API describing the
* tool metrics.
* @return true if the tool has an OSI approved license, false otherwise.
*/
public static LicenseType isOSIFromOEBMetrics(JSONObject toolMetrics) throws JSONException {
JSONObject licenseJson;
try {
licenseJson = toolMetrics.getJSONObject("project").getJSONObject("license");
} catch (JSONException e) {
return LicenseType.Unknown;
}

boolean isOSI = licenseJson.getBoolean("osi");
if (isOSI) {
return LicenseType.OSI_Approved;
} else if (licenseJson.getBoolean("open_source")) {
return LicenseType.Open;
} else {
return LicenseType.Closed;
}
}

}
34 changes: 34 additions & 0 deletions src/main/java/nl/esciencecenter/restape/LicenseType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package nl.esciencecenter.restape;

/**
* Enumeration of various types of software licenses.
* This enum classifies licenses into categories based on their openness
* and OSI approval status.
*/
public enum LicenseType {
/**
* Represents an unknown license type.
* This value is used when the license type cannot be determined.
*/
Unknown,

/**
* Represents a closed-source license.
* This indicates proprietary software where the source code is not publicly
* available.
*/
Closed,

/**
* Represents an open-source license.
* This indicates software where the source code is publicly available, but it
* is not necessarily OSI-approved.
*/
Open,

/**
* Represents a license that is approved by the Open Source Initiative (OSI).
* This indicates software that adheres to the OSI's definition of open source.
*/
OSI_Approved;
}
4 changes: 4 additions & 0 deletions src/main/java/nl/esciencecenter/restape/RestApeUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.function.Consumer;

import org.json.JSONObject;

import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import lombok.AccessLevel;
import lombok.Getter;

@Slf4j
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class RestApeUtils {

Expand Down Expand Up @@ -126,4 +129,5 @@ public static JSONObject combineJSONObjects(JSONObject... jsonObjects) {
}
return combinedJson;
}

}
Loading

0 comments on commit dc4e756

Please sign in to comment.