Skip to content

Commit

Permalink
Merge branch 'develop' into 10104-get-dataset-citation-deaccessioned #…
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Nov 21, 2023
2 parents be631af + 9186b06 commit 6a1ac43
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 45 deletions.
3 changes: 3 additions & 0 deletions doc/release-notes/10060-api-changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html

See also #10060.
13 changes: 13 additions & 0 deletions doc/sphinx-guides/source/api/changelog.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
API Changelog
=============

.. contents:: |toctitle|
:local:
:depth: 1

6.0
-----

Changes
~~~~~~~
- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch.
1 change: 1 addition & 0 deletions doc/sphinx-guides/source/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ API Guide
linkeddatanotification
apps
faq
changelog
6 changes: 0 additions & 6 deletions mdc-logs/raw-mdc-2019-01-07.log

This file was deleted.

61 changes: 27 additions & 34 deletions src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -1449,6 +1449,17 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) {
return s3io;
}

private static InputStream getOriginalFileInputStream(StorageIO<DataFile> storage, boolean isTabularData) throws IOException {
storage.open(DataAccessOption.READ_ACCESS);
if (!isTabularData) {
return storage.getInputStream();
} else {
// if this is a tabular file, read the preserved original "auxiliary file"
// instead:
return storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
}
}

public static void validateDataFileChecksum(DataFile dataFile) throws IOException {
DataFile.ChecksumType checksumType = dataFile.getChecksumType();
if (checksumType == null) {
Expand All @@ -1458,35 +1469,24 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
}

StorageIO<DataFile> storage = dataFile.getStorageIO();
InputStream in = null;

try {
storage.open(DataAccessOption.READ_ACCESS);
String recalculatedChecksum = null;

if (!dataFile.isTabularData()) {
in = storage.getInputStream();
} else {
// if this is a tabular file, read the preserved original "auxiliary file"
// instead:
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
}
try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
} catch (IOException ioex) {
in = null;
}

if (in == null) {
String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString()));
logger.log(Level.INFO, info);
throw new IOException(info);
}

String recalculatedChecksum = null;
try {
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
} catch (RuntimeException rte) {
logger.log(Level.SEVERE, "failed to calculated checksum, one retry", rte);
recalculatedChecksum = null;
} finally {
IOUtils.closeQuietly(in);
}

if (recalculatedChecksum == null) { //retry once
storage = dataFile.getStorageIO();
try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
}
}

if (recalculatedChecksum == null) {
Expand All @@ -1504,19 +1504,12 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
boolean fixed = false;
if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) {
// try again, see if the .orig file happens to be there:
try {
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
} catch (IOException ioex) {
in = null;
try (InputStream in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION)) {
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
} catch (RuntimeException rte) {
recalculatedChecksum = null;
}
if (in != null) {
try {
recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
} catch (RuntimeException rte) {
recalculatedChecksum = null;
} finally {
IOUtils.closeQuietly(in);
}
if (recalculatedChecksum != null) {
// try again:
if (recalculatedChecksum.equals(dataFile.getChecksumValue())) {
fixed = true;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- This creates a function that ESTIMATES the size of the
-- GuestbookResponse table (for the metrics display), instead
-- of relying on straight "SELECT COUNT(*) ..."
-- It uses statistics to estimate the number of guestbook entries
-- and the fraction of them related to downloads,
-- i.e. those that weren't created for 'AccessRequest' events.
-- Significant potential savings for an active installation.
-- See https://github.com/IQSS/dataverse/issues/8840 and
-- https://github.com/IQSS/dataverse/pull/8972 for more details

CREATE OR REPLACE FUNCTION estimateGuestBookResponseTableSize()
RETURNS bigint AS $$
DECLARE
estimatedsize bigint;
BEGIN
SELECT CASE WHEN relpages<10 THEN 0
ELSE ((reltuples / relpages)
* (pg_relation_size('public.guestbookresponse') / current_setting('block_size')::int))::bigint
* (SELECT CASE WHEN ((select count(*) from pg_stats where tablename='guestbookresponse') = 0
OR (select array_position(most_common_vals::text::text[], 'AccessRequest')
FROM pg_stats WHERE tablename='guestbookresponse' AND attname='eventtype') IS NULL) THEN 1
ELSE 1 - (SELECT (most_common_freqs::text::text[])[array_position(most_common_vals::text::text[], 'AccessRequest')]::float
FROM pg_stats WHERE tablename='guestbookresponse' and attname='eventtype') END)
END
FROM pg_class
WHERE oid = 'public.guestbookresponse'::regclass INTO estimatedsize;

if estimatedsize = 0 then
SELECT COUNT(id) FROM guestbookresponse WHERE eventtype!= 'AccessRequest' INTO estimatedsize;
END if;

RETURN estimatedsize;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
11 changes: 6 additions & 5 deletions src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ public void testDownloadSingleFile() {
//Not logged in non-restricted
Response anonDownloadOriginal = UtilIT.downloadFileOriginal(tabFile1Id);
Response anonDownloadConverted = UtilIT.downloadFile(tabFile1Id);
Response anonDownloadConvertedNullKey = UtilIT.downloadFile(tabFile1Id, null);

// ... and download the same tabular data file, but without the variable name header added:
Response anonDownloadTabularNoHeader = UtilIT.downloadTabularFileNoVarHeader(tabFile1Id);
// ... and download the same tabular file, this time requesting the "format=tab" explicitly:
Expand All @@ -206,6 +208,8 @@ public void testDownloadSingleFile() {
assertEquals(OK.getStatusCode(), anonDownloadConverted.getStatusCode());
assertEquals(OK.getStatusCode(), anonDownloadTabularNoHeader.getStatusCode());
assertEquals(OK.getStatusCode(), anonDownloadTabularWithFormatName.getStatusCode());
assertEquals(UNAUTHORIZED.getStatusCode(), anonDownloadConvertedNullKey.getStatusCode());

int origSizeAnon = anonDownloadOriginal.getBody().asByteArray().length;
int convertSizeAnon = anonDownloadConverted.getBody().asByteArray().length;
int tabularSizeNoVarHeader = anonDownloadTabularNoHeader.getBody().asByteArray().length;
Expand Down Expand Up @@ -423,10 +427,7 @@ private HashMap<String,ByteArrayOutputStream> readZipResponse(InputStream iStrea
}

String name = entry.getName();
// String s = String.format("Entry: %s len %d added %TD",
// entry.getName(), entry.getSize(),
// new Date(entry.getTime()));
// System.out.println(s);


// Once we get the entry from the zStream, the zStream is
// positioned read to read the raw data, and we keep
Expand Down Expand Up @@ -466,7 +467,7 @@ private HashMap<String,ByteArrayOutputStream> readZipResponse(InputStream iStrea

@Test
public void testRequestAccess() throws InterruptedException {

String pathToJsonFile = "scripts/api/data/dataset-create-new.json";
Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken);
createDatasetResponse.prettyPrint();
Expand Down

0 comments on commit 6a1ac43

Please sign in to comment.