diff --git a/.github/actions/setup-maven/action.yml b/.github/actions/setup-maven/action.yml new file mode 100644 index 00000000000..4cf09f34231 --- /dev/null +++ b/.github/actions/setup-maven/action.yml @@ -0,0 +1,37 @@ +--- +name: "Setup Maven and Caches" +description: "Determine Java version and setup Maven, including necessary caches." +inputs: + git-reference: + description: 'The git reference (branch/tag) to check out' + required: false + default: '${{ github.ref }}' + pom-paths: + description: "List of paths to Maven POM(s) for cache dependency setup" + required: false + default: 'pom.xml' +runs: + using: composite + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ inputs.git-reference }} + - name: Determine Java version by reading the Maven property + shell: bash + run: | + echo "JAVA_VERSION=$(grep '' ${GITHUB_WORKSPACE}/modules/dataverse-parent/pom.xml | cut -f2 -d'>' | cut -f1 -d'<')" | tee -a ${GITHUB_ENV} + - name: Set up JDK ${{ env.JAVA_VERSION }} + id: setup-java + uses: actions/setup-java@v4 + with: + java-version: ${{ env.JAVA_VERSION }} + distribution: 'temurin' + cache: 'maven' + cache-dependency-path: ${{ inputs.pom-paths }} + - name: Download common cache on branch cache miss + if: ${{ steps.setup-java.outputs.cache-hit != 'true' }} + uses: actions/cache/restore@v4 + with: + key: dataverse-maven-cache + path: ~/.m2/repository diff --git a/.github/workflows/container_app_push.yml b/.github/workflows/container_app_push.yml index b3e247e376c..3b7ce066d73 100644 --- a/.github/workflows/container_app_push.yml +++ b/.github/workflows/container_app_push.yml @@ -5,6 +5,12 @@ on: # We are deliberately *not* running on push events here to avoid double runs. # Instead, push events will trigger from the base image and maven unit tests via workflow_call. workflow_call: + inputs: + base-image-ref: + type: string + description: "Reference of the base image to build on in full qualified form [/]/:" + required: false + default: "gdcc/base:unstable" pull_request: branches: - develop @@ -16,7 +22,6 @@ on: env: IMAGE_TAG: unstable - BASE_IMAGE_TAG: unstable REGISTRY: "" # Empty means default to Docker Hub PLATFORMS: "linux/amd64,linux/arm64" MASTER_BRANCH_TAG: alpha @@ -33,20 +38,24 @@ jobs: if: ${{ github.repository_owner == 'IQSS' }} steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up JDK - uses: actions/setup-java@v3 + - name: Checkout and Setup Maven + uses: IQSS/dataverse/.github/actions/setup-maven@develop with: - java-version: "17" - distribution: temurin - cache: maven + pom-paths: | + pom.xml + modules/container-configbaker/pom.xml + modules/dataverse-parent/pom.xml + + # TODO: Add a filter step here, that avoids building the image if this is a PR and there are other files touched than declared above. + # Use https://github.com/dorny/paths-filter to solve this. This will ensure we do not run this twice if this workflow + # will be triggered by the other workflows already (base image or java changes) + # To become a part of #10618. - name: Build app and configbaker container image with local architecture and submodules (profile will skip tests) run: > mvn -B -f modules/dataverse-parent -P ct -pl edu.harvard.iq:dataverse -am + $( [[ -n "${{ inputs.base-image-ref }}" ]] && echo "-Dbase.image=${{ inputs.base-image-ref }}" ) install # TODO: add smoke / integration testing here (add "-Pct -DskipIntegrationTests=false") @@ -106,11 +115,13 @@ jobs: if: needs.check-secrets.outputs.available == 'true' && ( github.event_name != 'push' || ( github.event_name == 'push' && contains(fromJSON('["develop", "master"]'), github.ref_name))) steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 + - name: Checkout and Setup Maven + uses: IQSS/dataverse/.github/actions/setup-maven@develop with: - java-version: "17" - distribution: temurin + pom-paths: | + pom.xml + modules/container-configbaker/pom.xml + modules/dataverse-parent/pom.xml # Depending on context, we push to different targets. Login accordingly. - if: github.event_name != 'pull_request' @@ -146,11 +157,13 @@ jobs: run: > mvn -B -f modules/dataverse-parent -P ct -pl edu.harvard.iq:dataverse -am + $( [[ -n "${{ inputs.base-image-ref }}" ]] && echo "-Dbase.image=${{ inputs.base-image-ref }}" ) install - name: Deploy multi-arch application and configbaker container image run: > mvn - -Dapp.image.tag=${{ env.IMAGE_TAG }} -Dbase.image.tag=${{ env.BASE_IMAGE_TAG }} + -Dapp.image.tag=${{ env.IMAGE_TAG }} + $( [[ -n "${{ inputs.base-image-ref }}" ]] && echo "-Dbase.image=${{ inputs.base-image-ref }}" ) ${{ env.REGISTRY }} -Ddocker.platforms=${{ env.PLATFORMS }} -P ct deploy diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index b938851f816..c2340576c78 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -1,99 +1,130 @@ --- -name: Base Container Image +name: Container Images Releasing on: push: + tags: + - 'v[6-9].**' branches: - 'develop' - - 'master' + # "Path filters are not evaluated for pushes of tags" https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#onpushpull_requestpull_request_targetpathspaths-ignore paths: - 'modules/container-base/**' + - '!modules/container-base/src/backports/**' + - '!modules/container-base/README.md' - 'modules/dataverse-parent/pom.xml' - '.github/workflows/container_base_push.yml' - pull_request: - branches: - - 'develop' - - 'master' - paths: - - 'modules/container-base/**' - - 'modules/dataverse-parent/pom.xml' - - '.github/workflows/container_base_push.yml' - schedule: - - cron: '23 3 * * 0' # Run for 'develop' every Sunday at 03:23 UTC + + # These TODOs are left for #10618 + # TODO: we are missing a workflow_call option here, so we can trigger this flow from pr comments and maven tests (keep the secrets availability in mind!) + # TODO: we are missing a pull_request option here (filter for stuff that would trigger the maven runs!) so we can trigger preview builds for them when coming from the main repo (keep the secrets availability in mind!) env: - IMAGE_TAG: unstable PLATFORMS: linux/amd64,linux/arm64 + DEVELOPMENT_BRANCH: develop jobs: build: - name: Build image + name: Base Image runs-on: ubuntu-latest permissions: contents: read packages: read - strategy: - matrix: - jdk: [ '17' ] # Only run in upstream repo - avoid unnecessary runs in forks if: ${{ github.repository_owner == 'IQSS' }} + outputs: + base-image-ref: ${{ steps.finalize.outputs.base-image-ref }} steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up JDK ${{ matrix.jdk }} - uses: actions/setup-java@v3 + - name: Checkout and Setup Maven + uses: IQSS/dataverse/.github/actions/setup-maven@develop with: - java-version: ${{ matrix.jdk }} - distribution: 'adopt' - - name: Cache Maven packages - uses: actions/cache@v3 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - - name: Build base container image with local architecture - run: mvn -f modules/container-base -Pct package + pom-paths: modules/container-base/pom.xml - # Run anything below only if this is not a pull request. - # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets. - - - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} - name: Push description to DockerHub - uses: peter-evans/dockerhub-description@v3 + # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and + # on events in context of upstream because secrets. PRs run in context of forks by default! + - name: Log in to the Container registry + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - repository: gdcc/base - short-description: "Dataverse Base Container image providing Payara application server and optimized configuration" - readme-filepath: ./modules/container-base/README.md - - if: ${{ github.event_name != 'pull_request' }} - name: Log in to the Container registry - uses: docker/login-action@v2 + # In case this is a push to develop, we care about buildtime. + # Configure a remote ARM64 build host in addition to the local AMD64 in two steps. + - name: Setup SSH agent + if: ${{ github.event_name != 'schedule' }} + uses: webfactory/ssh-agent@v0.9.0 with: - registry: ${{ env.REGISTRY }} - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - if: ${{ github.event_name != 'pull_request' }} - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v2 - - name: Re-set image tag based on branch - if: ${{ github.ref_name == 'master' }} - run: echo "IMAGE_TAG=alpha" >> $GITHUB_ENV - - if: ${{ github.event_name != 'pull_request' }} - name: Deploy multi-arch base container image to Docker Hub - run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.platforms=${{ env.PLATFORMS }} + ssh-private-key: ${{ secrets.BUILDER_ARM64_SSH_PRIVATE_KEY }} + - name: Provide the known hosts key and the builder config + if: ${{ github.event_name != 'schedule' }} + run: | + echo "${{ secrets.BUILDER_ARM64_SSH_HOST_KEY }}" > ~/.ssh/known_hosts + mkdir -p modules/container-base/target/buildx-state/buildx/instances + cat > modules/container-base/target/buildx-state/buildx/instances/maven << EOF + { "Name": "maven", + "Driver": "docker-container", + "Dynamic": false, + "Nodes": [{"Name": "maven0", + "Endpoint": "unix:///var/run/docker.sock", + "Platforms": [{"os": "linux", "architecture": "amd64"}], + "DriverOpts": null, + "Flags": ["--allow-insecure-entitlement=network.host"], + "Files": null}, + {"Name": "maven1", + "Endpoint": "ssh://${{ secrets.BUILDER_ARM64_SSH_CONNECTION }}", + "Platforms": [{"os": "linux", "architecture": "arm64"}], + "DriverOpts": null, + "Flags": ["--allow-insecure-entitlement=network.host"], + "Files": null}]} + EOF + + # Determine the base image name we are going to use from here on + - name: Determine base image name + run: | + if [[ "${{ github.ref_name }}" = "${{ env.DEVELOPMENT_BRANCH }}" ]]; then + echo "BASE_IMAGE=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -q -DforceStdout )" | tee -a "${GITHUB_ENV}" + echo "BASE_IMAGE_UPCOMING=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout )" | tee -a "${GITHUB_ENV}" + else + echo "BASE_IMAGE=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout )" | tee -a "${GITHUB_ENV}" + fi + - name: Calculate revision number for immutable tag (on release branches only) + if: ${{ github.ref_name != env.DEVELOPMENT_BRANCH }} + id: revision-tag + uses: ./.github/actions/get-image-revision + with: + image-ref: ${{ env.BASE_IMAGE }} + tag-options-prefix: "-Dbase.image.tag.suffix='' -Ddocker.tags.revision=" + - name: Configure update of "latest" tag for development branch + id: develop-tag + if: ${{ github.ref_name == env.DEVELOPMENT_BRANCH }} + run: | + echo "tag-options=-Ddocker.tags.develop=unstable -Ddocker.tags.upcoming=${BASE_IMAGE_UPCOMING#*:}" | tee -a "${GITHUB_OUTPUT}" + + - name: Deploy multi-arch base container image to Docker Hub + id: build + run: | + mvn -f modules/container-base -Pct deploy -Ddocker.noCache -Ddocker.platforms=${{ env.PLATFORMS }} \ + -Ddocker.imagePropertyConfiguration=override ${{ steps.develop-tag.outputs.tag-options }} ${{ steps.revision-tag.outputs.tag-options }} + + - name: Determine appropriate base image ref for app image + id: finalize + run: | + if [[ "${{ github.ref_name }}" = "${{ env.DEVELOPMENT_BRANCH }}" ]]; then + echo "base-image-ref=${BASE_IMAGE_UPCOMING}" | tee -a "$GITHUB_OUTPUT" + else + echo "base-image-ref=gdcc/base:${{ steps.revision-tag.outputs.revision-tag }}" | tee -a "$GITHUB_OUTPUT" + fi + push-app-img: name: "Rebase & Publish App Image" permissions: contents: read packages: write pull-requests: write - needs: build - # We do not release a new base image for pull requests, so do not trigger. - if: ${{ github.event_name != 'pull_request' }} - uses: ./.github/workflows/container_app_push.yml secrets: inherit + needs: + - build + uses: ./.github/workflows/container_app_push.yml + with: + base-image-ref: ${{ needs.build.outputs.base-image-ref }} diff --git a/.github/workflows/container_maintenance.yml b/.github/workflows/container_maintenance.yml new file mode 100644 index 00000000000..986fe25cdf5 --- /dev/null +++ b/.github/workflows/container_maintenance.yml @@ -0,0 +1,119 @@ +--- +name: Container Images Scheduled Maintenance + +on: + # TODO: think about adding a (filtered) push event trigger here in case we change the patches + # --- + # Allow manual workflow triggers in case we need to repair images on Docker Hub (build and replace) + workflow_dispatch: + inputs: + force_build: + type: boolean + required: false + default: false + description: "Build and deploy even if no newer Java images or package updates are found." + schedule: + - cron: '23 3 * * 0' # Run for 'develop' every Sunday at 03:23 UTC + +env: + PLATFORMS: linux/amd64,linux/arm64 + NUM_PAST_RELEASES: 3 + +jobs: + build: + name: Base Image Matrix Build + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + # Only run in upstream repo - avoid unnecessary runs in forks + if: ${{ github.repository_owner == 'IQSS' }} + outputs: + supported_tag_matrix: ${{ steps.execute.outputs.supported_tag_matrix }} + rebuilt_base_images: ${{ steps.execute.outputs.rebuilt_base_images }} + + steps: + - name: Checkout and Setup Maven + uses: IQSS/dataverse/.github/actions/setup-maven@develop + with: + pom-paths: modules/container-base/pom.xml + + # Note: Accessing, pushing tags etc. to DockerHub will only succeed in upstream and + # on events in context of upstream because secrets. PRs run in context of forks by default! + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up QEMU for multi-arch builds + uses: docker/setup-qemu-action@v3 + with: + platforms: ${{ env.PLATFORMS }} + + # Discover the releases we want to maintain + - name: Discover maintained releases + id: discover + run: | + echo "FORCE_BUILD=$( [[ "${{ inputs.force_build }}" = "true" ]] && echo 1 || echo 0 )" | tee -a "$GITHUB_ENV" + DEVELOPMENT_BRANCH=$( curl -f -sS https://api.github.com/repos/${{ github.repository }} | jq -r '.default_branch' ) + echo "DEVELOPMENT_BRANCH=$DEVELOPMENT_BRANCH" | tee -a "$GITHUB_ENV" + echo "branches=$( curl -f -sS https://api.github.com/repos/IQSS/dataverse/releases | jq -r " .[0:${{ env.NUM_PAST_RELEASES }}] | .[].tag_name, \"${DEVELOPMENT_BRANCH}\" " | tr "\n" " " )" | tee -a "${GITHUB_OUTPUT}" + + # Execute matrix build for the discovered branches + - name: Execute build matrix script + id: execute + run: | + .github/workflows/scripts/maintenance-job.sh ${{ steps.discover.outputs.branches }} + + # TODO: Use the needs.build.outputs.rebuilt_base_images with fromJSON() to create a matrix job. + # Must be a single rank matrix (vector), the branch and base image tag information ships as "branch=tag" string + # Will be part of working on #10618, app image versioned tags. + #push-app-img: + # name: "Rebase & Publish App Image" + # permissions: + # contents: read + # packages: write + # pull-requests: write + # secrets: inherit + # needs: + # - build + # strategy: + # fail-fast: false + # matrix: + # branch: ${{ fromJson(needs.discover.outputs.branches) }} + # uses: ./.github/workflows/container_app_push.yml + # with: + # branch: ${{ matrix.branch }} + + hub-description: + name: Push description to DockerHub + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + needs: build + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Render README + id: render + run: | + TAGS_JSON='${{ needs.build.outputs.supported_tag_matrix }}' + echo "$TAGS_JSON" | jq -r 'keys | sort | reverse | .[]' | + while IFS= read -r branch; do + echo \ + "- \`$( echo "$TAGS_JSON" | jq --arg v "$branch" -r '.[$v] | join("`, `")' )\`" \ + "([Dockerfile](https://github.com/IQSS/dataverse/blob/${branch}/modules/container-base/src/main/docker/Dockerfile)," \ + "[Patches](https://github.com/IQSS/dataverse/blob/develop/modules/container-base/src/backports/${branch}))" \ + | tee -a "${GITHUB_WORKSPACE}/tags.md" + done + sed -i -e "/<\!-- TAG BLOCK HERE -->/r ${GITHUB_WORKSPACE}/tags.md" "./modules/container-base/README.md" + + - name: Push description to DockerHub + uses: peter-evans/dockerhub-description@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: gdcc/base + short-description: "Dataverse Base Container image providing Payara application server and optimized configuration" + readme-filepath: ./modules/container-base/README.md \ No newline at end of file diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml index 4ad4798bc64..a94b17a67ba 100644 --- a/.github/workflows/maven_unit_test.yml +++ b/.github/workflows/maven_unit_test.yml @@ -30,6 +30,7 @@ jobs: continue-on-error: ${{ matrix.experimental }} runs-on: ubuntu-latest steps: + # TODO: As part of #10618 change to setup-maven custom action # Basic setup chores - uses: actions/checkout@v3 - name: Set up JDK ${{ matrix.jdk }} @@ -95,6 +96,7 @@ jobs: # status: "Experimental" continue-on-error: ${{ matrix.experimental }} steps: + # TODO: As part of #10618 change to setup-maven custom action # Basic setup chores - uses: actions/checkout@v3 - name: Set up JDK ${{ matrix.jdk }} @@ -128,6 +130,7 @@ jobs: needs: integration-test name: Coverage Report Submission steps: + # TODO: As part of #10618 change to setup-maven custom action # Basic setup chores - uses: actions/checkout@v3 - uses: actions/setup-java@v3 @@ -156,6 +159,11 @@ jobs: # NOTE: this may be extended with adding a report to the build output, leave a comment, send to Sonarcloud, ... + # TODO: Add a filter step here, that avoids calling the app image release workflow if there are changes to the base image. + # Use https://github.com/dorny/paths-filter to solve this. Will require and additional job or adding to integration-test job. + # This way we ensure that we're not running the app image flow with a non-matching base image. + # To become a part of #10618. + push-app-img: name: Publish App Image permissions: diff --git a/.github/workflows/scripts/maintenance-job.sh b/.github/workflows/scripts/maintenance-job.sh new file mode 100755 index 00000000000..370988b9812 --- /dev/null +++ b/.github/workflows/scripts/maintenance-job.sh @@ -0,0 +1,180 @@ +#!/bin/bash + +# A matrix-like job to maintain a number of releases as well as the latest snap of Dataverse. + +# PREREQUISITES: +# - You have Java, Maven, QEMU and Docker all setup and ready to go +# - You obviously checked out the develop branch, otherwise you'd not be executing this script +# - You added all the branch names you want to run maintenance for as arguments +# Optional, but recommended: +# - You added a DEVELOPMENT_BRANCH env var to your runner/job env with the name of the development branch +# - You added a FORCE_BUILD=0|1 env var to indicate if the base image build should be forced +# - You added a PLATFORMS env var with all the target platforms you want to build for + +# NOTE: +# This script is a culmination of Github Action steps into a single script. +# The reason to put all of this in here is due to the complexity of the Github Action and the limitation of the +# matrix support in Github actions, where outputs cannot be aggregated or otherwise used further. + +set -euo pipefail + +# Get all the inputs +# If not within a runner, just print to stdout (duplicating the output in case of tee usage, but that's ok for testing) +GITHUB_OUTPUT=${GITHUB_OUTPUT:-"/proc/self/fd/1"} +GITHUB_ENV=${GITHUB_ENV:-"/proc/self/fd/1"} +GITHUB_WORKSPACE=${GITHUB_WORKSPACE:-"$(pwd)"} +GITHUB_SERVER_URL=${GITHUB_SERVER_URL:-"https://github.com"} +GITHUB_REPOSITORY=${GITHUB_REPOSITORY:-"IQSS/dataverse"} + +MAINTENANCE_WORKSPACE="${GITHUB_WORKSPACE}/maintenance-job" + +DEVELOPMENT_BRANCH="${DEVELOPMENT_BRANCH:-"develop"}" +FORCE_BUILD="${FORCE_BUILD:-"0"}" +PLATFORMS="${PLATFORMS:-"linux/amd64,linux/arm64"}" + +# Setup and validation +if [[ -z "$*" ]]; then + >&2 echo "You must give a list of branch names as arguments" + exit 1; +fi + +source "$( dirname "$0" )/utils.sh" + +# Delete old stuff if present +rm -rf "$MAINTENANCE_WORKSPACE" +mkdir -p "$MAINTENANCE_WORKSPACE" + +# Store the image tags we maintain in this array (same order as branches array!) +# This list will be used to build the support matrix within the Docker Hub image description +SUPPORTED_ROLLING_TAGS=() +# Store the tags of base images we are actually rebuilding to base new app images upon +# Takes the from "branch-name=base-image-ref" +REBUILT_BASE_IMAGES=() + +for BRANCH in "$@"; do + echo "::group::Running maintenance for $BRANCH" + + # 0. Determine if this is a development branch and the most current release + IS_DEV=0 + if [[ "$BRANCH" = "$DEVELOPMENT_BRANCH" ]]; then + IS_DEV=1 + fi + IS_CURRENT_RELEASE=0 + if [[ "$BRANCH" = $( curl -f -sS "https://api.github.com/repos/$GITHUB_REPOSITORY/releases" | jq -r '.[0].tag_name' ) ]]; then + IS_CURRENT_RELEASE=1 + fi + + # 1. Let's get the maintained sources + git clone -c advice.detachedHead=false --depth 1 --branch "$BRANCH" "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}" "$MAINTENANCE_WORKSPACE/$BRANCH" + # Switch context + cd "$MAINTENANCE_WORKSPACE/$BRANCH" + + # 2. Now let's apply the patches (we have them checked out in $GITHUB_WORKSPACE, not necessarily in this local checkout) + echo "Checking for patches..." + if [[ -d ${GITHUB_WORKSPACE}/modules/container-base/src/backports/$BRANCH ]]; then + echo "Applying patches now." + find "${GITHUB_WORKSPACE}/modules/container-base/src/backports/$BRANCH" -type f -name '*.patch' -print0 | xargs -0 -n1 patch -p1 -s -i + fi + + # 3. Determine the base image ref (/:) + BASE_IMAGE_REF="" + # For the dev branch we want to full flexi stack tag, to detect stack upgrades requiring new build + if (( IS_DEV )); then + BASE_IMAGE_REF=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -q -DforceStdout ) + else + BASE_IMAGE_REF=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image -Dbase.image.tag.suffix="" -q -DforceStdout ) + fi + echo "Determined BASE_IMAGE_REF=$BASE_IMAGE_REF from Maven" + + # 4. Check for Temurin image updates + JAVA_IMAGE_REF=$( mvn help:evaluate -Pct -f modules/container-base -Dexpression=java.image -q -DforceStdout ) + echo "Determined JAVA_IMAGE_REF=$JAVA_IMAGE_REF from Maven" + NEWER_JAVA_IMAGE=0 + if check_newer_parent "$JAVA_IMAGE_REF" "$BASE_IMAGE_REF"; then + NEWER_JAVA_IMAGE=1 + fi + + # 5. Check for package updates in base image + PKGS="$( grep "ARG PKGS" modules/container-base/src/main/docker/Dockerfile | cut -f2 -d= | tr -d '"' )" + echo "Determined installed packages=\"$PKGS\" from Maven" + NEWER_PKGS=0 + # Don't bother with package checks if the java image is newer already + if ! (( NEWER_JAVA_IMAGE )); then + if check_newer_pkgs "$BASE_IMAGE_REF" "$PKGS"; then + NEWER_PKGS=1 + fi + fi + + # 6. Get current immutable revision tag if not on the dev branch + REV=$( current_revision "$BASE_IMAGE_REF" ) + CURRENT_REV_TAG="${BASE_IMAGE_REF#*:}-r$REV" + NEXT_REV_TAG="${BASE_IMAGE_REF#*:}-r$(( REV + 1 ))" + + # 7. Let's put together what tags we want added to this build run + TAG_OPTIONS="" + if ! (( IS_DEV )); then + TAG_OPTIONS="-Dbase.image=$BASE_IMAGE_REF -Ddocker.tags.revision=$NEXT_REV_TAG" + # In case of the current release, add the "latest" tag as well. + if (( IS_CURRENT_RELEASE )); then + TAG_OPTIONS="$TAG_OPTIONS -Ddocker.tags.latest=latest" + fi + else + UPCOMING_TAG=$( mvn initialize help:evaluate -Pct -f modules/container-base -Dexpression=base.image.tag -Dbase.image.tag.suffix="" -q -DforceStdout ) + TAG_OPTIONS="-Ddocker.tags.develop=unstable -Ddocker.tags.upcoming=$UPCOMING_TAG" + + # For the dev branch we only have rolling tags and can add them now already + SUPPORTED_ROLLING_TAGS+=("[\"unstable\", \"$UPCOMING_TAG\", \"${BASE_IMAGE_REF#*:}\"]") + fi + echo "Determined these additional Maven tag options: $TAG_OPTIONS" + + # 8. Let's build the base image if necessary + NEWER_IMAGE=0 + if (( NEWER_JAVA_IMAGE + NEWER_PKGS + FORCE_BUILD > 0 )); then + mvn -Pct -f modules/container-base deploy -Ddocker.noCache -Ddocker.platforms="${PLATFORMS}" \ + -Ddocker.imagePropertyConfiguration=override $TAG_OPTIONS + NEWER_IMAGE=1 + # Save the information about the immutable or rolling tag we just built + if ! (( IS_DEV )); then + REBUILT_BASE_IMAGES+=("$BRANCH=${BASE_IMAGE_REF%:*}:$NEXT_REV_TAG") + else + REBUILT_BASE_IMAGES+=("$BRANCH=$BASE_IMAGE_REF") + fi + else + echo "No rebuild necessary, we're done here." + fi + + # 9. Add list of rolling and immutable tags for release builds + if ! (( IS_DEV )); then + RELEASE_TAGS_LIST="[" + if (( IS_CURRENT_RELEASE )); then + RELEASE_TAGS_LIST+="\"latest\", " + fi + RELEASE_TAGS_LIST+="\"${BASE_IMAGE_REF#*:}\", " + if (( NEWER_IMAGE )); then + RELEASE_TAGS_LIST+="\"$NEXT_REV_TAG\"]" + else + RELEASE_TAGS_LIST+="\"$CURRENT_REV_TAG\"]" + fi + SUPPORTED_ROLLING_TAGS+=("${RELEASE_TAGS_LIST}") + fi + + echo "::endgroup::" +done + +# Built the output which base images have actually been rebuilt as JSON +REBUILT_IMAGES="[" +for IMAGE in "${REBUILT_BASE_IMAGES[@]}"; do + REBUILT_IMAGES+=" \"$IMAGE\" " +done +REBUILT_IMAGES+="]" +echo "rebuilt_base_images=${REBUILT_IMAGES// /, }" | tee -a "${GITHUB_OUTPUT}" + +# Built the supported rolling tags matrix as JSON +SUPPORTED_TAGS="{" +for (( i=0; i < ${#SUPPORTED_ROLLING_TAGS[@]} ; i++ )); do + j=$((i+1)) + SUPPORTED_TAGS+="\"${!j}\": ${SUPPORTED_ROLLING_TAGS[$i]}" + (( i < ${#SUPPORTED_ROLLING_TAGS[@]}-1 )) && SUPPORTED_TAGS+=", " +done +SUPPORTED_TAGS+="}" +echo "supported_tag_matrix=$SUPPORTED_TAGS" | tee -a "$GITHUB_OUTPUT" diff --git a/.github/workflows/scripts/utils.sh b/.github/workflows/scripts/utils.sh new file mode 100644 index 00000000000..987b58d8bb5 --- /dev/null +++ b/.github/workflows/scripts/utils.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +set -euo pipefail + +function check_newer_parent() { + PARENT_IMAGE="$1" + # Get namespace, default to "library" if not found + PARENT_IMAGE_NS="${PARENT_IMAGE%/*}" + if [[ "$PARENT_IMAGE_NS" = "${PARENT_IMAGE}" ]]; then + PARENT_IMAGE_NS="library" + fi + PARENT_IMAGE_REPO="${PARENT_IMAGE%:*}" + PARENT_IMAGE_TAG="${PARENT_IMAGE#*:}" + + PARENT_IMAGE_LAST_UPDATE="$( curl -sS "https://hub.docker.com/v2/namespaces/${PARENT_IMAGE_NS}/repositories/${PARENT_IMAGE_REPO}/tags/${PARENT_IMAGE_TAG}" | jq -r .last_updated )" + if [[ "$PARENT_IMAGE_LAST_UPDATE" = "null" ]]; then + echo "::error title='Invalid PARENT Image'::Could not find ${PARENT_IMAGE} in the registry" + exit 1 + fi + + DERIVED_IMAGE="$2" + # Get namespace, default to "library" if not found + DERIVED_IMAGE_NS="${DERIVED_IMAGE%/*}" + if [[ "${DERIVED_IMAGE_NS}" = "${DERIVED_IMAGE}" ]]; then + DERIVED_IMAGE_NS="library" + fi + DERIVED_IMAGE_REPO="$( echo "${DERIVED_IMAGE%:*}" | cut -f2 -d/ )" + DERIVED_IMAGE_TAG="${DERIVED_IMAGE#*:}" + + DERIVED_IMAGE_LAST_UPDATE="$( curl -sS "https://hub.docker.com/v2/namespaces/${DERIVED_IMAGE_NS}/repositories/${DERIVED_IMAGE_REPO}/tags/${DERIVED_IMAGE_TAG}" | jq -r .last_updated )" + if [[ "$DERIVED_IMAGE_LAST_UPDATE" = "null" || "$DERIVED_IMAGE_LAST_UPDATE" < "$PARENT_IMAGE_LAST_UPDATE" ]]; then + echo "Parent image $PARENT_IMAGE has a newer release ($PARENT_IMAGE_LAST_UPDATE), which is more recent than $DERIVED_IMAGE ($DERIVED_IMAGE_LAST_UPDATE)" + return 0 + else + echo "Parent image $PARENT_IMAGE ($PARENT_IMAGE_LAST_UPDATE) is older than $DERIVED_IMAGE ($DERIVED_IMAGE_LAST_UPDATE)" + return 1 + fi +} + +function check_newer_pkgs() { + IMAGE="$1" + PKGS="$2" + + docker run --rm -u 0 "${IMAGE}" sh -c "apt update >/dev/null 2>&1 && apt install -s ${PKGS}" | tee /proc/self/fd/2 | grep -q "0 upgraded" + STATUS=$? + + if [[ $STATUS -eq 0 ]]; then + echo "Base image $IMAGE has no updates for our custom installed packages" + return 1 + else + echo "Base image $IMAGE needs updates for our custom installed packages" + return 0 + fi + + # TODO: In a future version of this script, we might want to include checking for other security updates, + # not just updates to the packages we installed. + # grep security /etc/apt/sources.list > /tmp/security.list + # apt-get update -oDir::Etc::Sourcelist=/tmp/security.list + # apt-get dist-upgrade -y -oDir::Etc::Sourcelist=/tmp/security.list -oDir::Etc::SourceParts=/bin/false -s + +} + +function current_revision() { + IMAGE="$1" + IMAGE_NS_REPO="${IMAGE%:*}" + IMAGE_TAG="${IMAGE#*:}" + + if [[ "$IMAGE_TAG" = "$IMAGE_NS_REPO" ]]; then + >&2 echo "You must provide an image reference in the format [/]:" + exit 1 + fi + + case "$IMAGE_NS_REPO" in + */*) :;; # namespace/repository syntax, leave as is + *) IMAGE_NS_REPO="library/$IMAGE_NS_REPO";; # bare repository name (docker official image); must convert to namespace/repository syntax + esac + + # Without such a token we may run into rate limits + # OB 2024-09-16: for some reason using this token stopped working. Let's go without and see if we really fall into rate limits. + # token=$( curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:$IMAGE_NS_REPO:pull" ) + + ALL_TAGS="$( + i=0 + while [ $? == 0 ]; do + i=$((i+1)) + # OB 2024-09-16: for some reason using this token stopped working. Let's go without and see if we really fall into rate limits. + # RESULT=$( curl -s -H "Authorization: Bearer $token" "https://registry.hub.docker.com/v2/repositories/$IMAGE_NS_REPO/tags/?page=$i&page_size=100" ) + RESULT=$( curl -s "https://registry.hub.docker.com/v2/repositories/$IMAGE_NS_REPO/tags/?page=$i&page_size=100" ) + if [[ $( echo "$RESULT" | jq '.message' ) != "null" ]]; then + # If we run into an error on the first attempt, that means we have a problem. + if [[ "$i" == "1" ]]; then + >&2 echo "Error when retrieving tag data: $( echo "$RESULT" | jq '.message' )" + exit 2 + # Otherwise it will just mean we reached the last page already + else + break + fi + else + echo "$RESULT" | jq -r '."results"[]["name"]' + # DEBUG: + #echo "$RESULT" | >&2 jq -r '."results"[]["name"]' + fi + done + )" + + # Note: if a former tag could not be found, it just might not exist already. Start new series with rev 0 + echo "$ALL_TAGS" | grep "${IMAGE_TAG}-r" | sed -e "s#${IMAGE_TAG}-r##" | sort -h | tail -n1 || echo "-1" +} diff --git a/conf/solr/schema.xml b/conf/solr/schema.xml index 1773837e39d..2aed50e9998 100644 --- a/conf/solr/schema.xml +++ b/conf/solr/schema.xml @@ -352,6 +352,7 @@ + @@ -593,6 +594,7 @@ + diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md deleted file mode 100644 index 92ff4a917d5..00000000000 --- a/doc/release-notes/10169-JSON-schema-validation.md +++ /dev/null @@ -1,3 +0,0 @@ -### Improved JSON Schema validation for datasets - -Enhanced JSON schema validation with checks for required and allowed child objects, type checking for field types including `primitive`, `compound` and `controlledVocabulary`. More user-friendly error messages to help pinpoint the issues in the dataset JSON. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10543. diff --git a/doc/release-notes/10287-use-support-address-in-system-email-text.md b/doc/release-notes/10287-use-support-address-in-system-email-text.md deleted file mode 100644 index 4c294404298..00000000000 --- a/doc/release-notes/10287-use-support-address-in-system-email-text.md +++ /dev/null @@ -1,4 +0,0 @@ -### Notification Email Improvement - -The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; "contact us for support at", instead of the default system email address. -Using the system email address here was particularly problematic when it was a 'noreply' address. diff --git a/doc/release-notes/10341-croissant.md b/doc/release-notes/10341-croissant.md deleted file mode 100644 index 15bc7029099..00000000000 --- a/doc/release-notes/10341-croissant.md +++ /dev/null @@ -1,9 +0,0 @@ -A new metadata export format called Croissant is now available as an external metadata exporter. It is oriented toward making datasets consumable by machine learning. - -When enabled, Croissant replaces the Schema.org JSON-LD format in the `` of dataset landing pages. For details, see the [Schema.org JSON-LD/Croissant Metadata](https://dataverse-guide--10533.org.readthedocs.build/en/10533/admin/discoverability.html#schema-org-head) under the discoverability section of the Admin Guide. - -For more about the Croissant exporter, see https://github.com/gdcc/exporter-croissant - -For installation instructions, see [Enabling External Exporters](https://dataverse-guide--10533.org.readthedocs.build/en/10533/installation/advanced.html#enabling-external-exporters) in the Installation Guide. - -See also Issue #10341 and PR #10533. diff --git a/doc/release-notes/10433-add-thumbnail-for-featured-dataverses.md b/doc/release-notes/10433-add-thumbnail-for-featured-dataverses.md deleted file mode 100644 index 0ebb84a8eb0..00000000000 --- a/doc/release-notes/10433-add-thumbnail-for-featured-dataverses.md +++ /dev/null @@ -1,5 +0,0 @@ -Add the ability to configure a thumbnail logo that is displayed for a collection when the collection is configured as a featured collection. If present, this thumbnail logo is shown. Otherwise, the collection logo is shown. Configuration is done under the "Theme" for a collection. - -The HTML preview of the documentation can be found [here](https://dataverse-guide--10433.org.readthedocs.build/en/10433/user/dataverse-management.html#theme). - -For more information, see [#10291](https://github.com/IQSS/dataverse/issues/10291). diff --git a/doc/release-notes/10508-base-image-fixes.md b/doc/release-notes/10508-base-image-fixes.md deleted file mode 100644 index 148066435e8..00000000000 --- a/doc/release-notes/10508-base-image-fixes.md +++ /dev/null @@ -1,12 +0,0 @@ -# Security and Compatibility Fixes to the Container Base Image - -- Switch "wait-for" to "wait4x", aligned with the Configbaker Image -- Update "jattach" to v2.2 -- Install AMD64 / ARM64 versions of tools as necessary -- Run base image as unprivileged user by default instead of `root` - this was an oversight from OpenShift changes -- Linux User, Payara Admin and Domain Master passwords: - - Print hints about default, public knowledge passwords in place for - - Enable replacing these passwords at container boot time -- Enable building with updates Temurin JRE image based on Ubuntu 24.04 LTS -- Fix entrypoint script troubles with pre- and postboot script files -- Unify location of files at CONFIG_DIR=/opt/payara/config, avoid writing to other places \ No newline at end of file diff --git a/doc/release-notes/10517-datasetType.md b/doc/release-notes/10517-datasetType.md deleted file mode 100644 index 2e3aff940c7..00000000000 --- a/doc/release-notes/10517-datasetType.md +++ /dev/null @@ -1,10 +0,0 @@ -### Initial Support for Dataset Types - -Out of the box, all datasets have the type "dataset" but superusers can add additional types. At this time the type can only be set at creation time via API. The types "dataset", "software", and "workflow" will be sent to DataCite when the dataset is published. - -For details see and #10517. Please note that this feature is highly experimental and is expected to evolve. - -Upgrade instructions --------------------- - -Update your Solr schema.xml file to pick up the "datasetType" additions and do a full reindex. diff --git a/doc/release-notes/10583-dataset-unlink-functionality-same-permission-as-link.md b/doc/release-notes/10583-dataset-unlink-functionality-same-permission-as-link.md deleted file mode 100644 index f97bd252db3..00000000000 --- a/doc/release-notes/10583-dataset-unlink-functionality-same-permission-as-link.md +++ /dev/null @@ -1,2 +0,0 @@ -New "Unlink Dataset" button has been added to the Dataset Page to allow a user to unlink a dataset from a collection that was previously linked with the "Link Dataset" button. The user must possess the same permissions needed to unlink the Dataset as they would to link the Dataset. -The [existing API](https://guides.dataverse.org/en/6.3/admin/dataverses-datasets.html#unlink-a-dataset) for unlinking datasets has been updated to no longer require superuser access. The "Publish Dataset" permission is now enough. diff --git a/doc/release-notes/10606-dataverse-in-windows-wsl.md b/doc/release-notes/10606-dataverse-in-windows-wsl.md deleted file mode 100644 index 9501d6e3090..00000000000 --- a/doc/release-notes/10606-dataverse-in-windows-wsl.md +++ /dev/null @@ -1 +0,0 @@ -New instructions have been added for developers on Windows trying to run a Dataverse development environment using Windows Subsystem for Linux (WSL). See https://dataverse-guide--10608.org.readthedocs.build/en/10608/developers/windows.html #10606 and #10608. diff --git a/doc/release-notes/10633-add-dataverse-api-extension.md b/doc/release-notes/10633-add-dataverse-api-extension.md deleted file mode 100644 index f5d8030e8ac..00000000000 --- a/doc/release-notes/10633-add-dataverse-api-extension.md +++ /dev/null @@ -1 +0,0 @@ -The addDataverse (/api/dataverses/{identifier}) API endpoint has been extended to allow adding metadata blocks, input levels and facet ids at creation time, as the Dataverse page in create mode does in JSF. diff --git a/doc/release-notes/10726-dataverse-facets-api-extension.md b/doc/release-notes/10726-dataverse-facets-api-extension.md deleted file mode 100644 index baf6f798e35..00000000000 --- a/doc/release-notes/10726-dataverse-facets-api-extension.md +++ /dev/null @@ -1,3 +0,0 @@ -New optional query parameter "returnDetails" added to "dataverses/{identifier}/facets/" endpoint to include detailed information of each DataverseFacet. - -New endpoint "datasetfields/facetables" that lists all facetable dataset fields defined in the installation. diff --git a/doc/release-notes/10733-add-publication-status-to-search-api-results.md b/doc/release-notes/10733-add-publication-status-to-search-api-results.md deleted file mode 100644 index d015a50a00d..00000000000 --- a/doc/release-notes/10733-add-publication-status-to-search-api-results.md +++ /dev/null @@ -1,14 +0,0 @@ -Search API (/api/search) response will now include publicationStatuses in the Json response as long as the list is not empty - -Example: -```javascript -"items": [ - { - "name": "Darwin's Finches", - ... - "publicationStatuses": [ - "Unpublished", - "Draft" - ], -(etc, etc) -``` diff --git a/doc/release-notes/10741-list-metadatablocks-display-on-create-fix.md b/doc/release-notes/10741-list-metadatablocks-display-on-create-fix.md deleted file mode 100644 index 4edadcaa1fc..00000000000 --- a/doc/release-notes/10741-list-metadatablocks-display-on-create-fix.md +++ /dev/null @@ -1 +0,0 @@ -Fixed dataverses/{identifier}/metadatablocks endpoint to not return fields marked as displayOnCreate=true if there is an input level with include=false, when query parameters returnDatasetFieldTypes=true and onlyDisplayedOnCreate=true are set. diff --git a/doc/release-notes/10744-ro-crate-docs.md b/doc/release-notes/10744-ro-crate-docs.md deleted file mode 100644 index 9d52b4578b4..00000000000 --- a/doc/release-notes/10744-ro-crate-docs.md +++ /dev/null @@ -1,3 +0,0 @@ -## RO-Crate Support (Metadata Export) - -Dataverse now supports [RO-Crate](https://www.researchobject.org/ro-crate/) in the sense that dataset metadata can be exported in that format. This functionality is not available out of the box but you can enable one or more RO-Crate exporters from the [list of external exporters](https://preview.guides.gdcc.io/en/develop/installation/advanced.html#inventory-of-external-exporters). See also #10744. diff --git a/doc/release-notes/10749-dataverse-user-permissions-api-extension.md b/doc/release-notes/10749-dataverse-user-permissions-api-extension.md deleted file mode 100644 index 706b1f42641..00000000000 --- a/doc/release-notes/10749-dataverse-user-permissions-api-extension.md +++ /dev/null @@ -1 +0,0 @@ -New API endpoint "dataverses/{identifier}/userPermissions" for obtaining the user permissions on a dataverse. diff --git a/doc/release-notes/10758-rust-client.md b/doc/release-notes/10758-rust-client.md deleted file mode 100644 index e206f27ce65..00000000000 --- a/doc/release-notes/10758-rust-client.md +++ /dev/null @@ -1,3 +0,0 @@ -### Rust API client library - -An API client library for the Rust programming language is now available at https://github.com/gdcc/rust-dataverse and has been added to the [list of client libraries](https://dataverse-guide--10758.org.readthedocs.build/en/10758/api/client-libraries.html) in the API Guide. See also #10758. diff --git a/doc/release-notes/10797-update-current-version-bug-fix.md b/doc/release-notes/10797-update-current-version-bug-fix.md deleted file mode 100644 index 2cfaf69cad3..00000000000 --- a/doc/release-notes/10797-update-current-version-bug-fix.md +++ /dev/null @@ -1,11 +0,0 @@ -A significant bug in the superuser-only "Update-Current-Version" publication was found and fixed in this release. If the Update-Current-Version option was used when changes were made to the dataset Terms (rather than to dataset metadata), or if the PID provider service was down/returned an error, the update would fail and render the dataset unusable and require restoration from a backup. The fix in this release allows the update to succeed in both of these cases and redesigns the functionality such that any unknown issues should not make the dataset unusable (i.e. the error would be reported and the dataset would remain in its current state with the last-published version as it was and changes still in the draft version.) - -Users of earlier Dataverse releases are encouraged to alert their superusers to this issue. Those who wish to disable this functionality have two options: -* Change the dataset.updateRelease entry in the Bundle.properties file (or local language version) to "Do Not Use" or similar (doesn't disable but alerts superusers to the issue), or -* Edit the dataset.xhtml file to remove the lines - - - - - -, delete the contents of the generated and osgi-cache directories in the Dataverse Payara domain, and restart the Payara server. diff --git a/doc/release-notes/10800-add-dataverse-request-json-fix.md b/doc/release-notes/10800-add-dataverse-request-json-fix.md deleted file mode 100644 index ddd6c388ec6..00000000000 --- a/doc/release-notes/10800-add-dataverse-request-json-fix.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the "addDataverse" API endpoint (/dataverses/{id} POST) expected request JSON structure to parse facetIds as described in the docs. \ No newline at end of file diff --git a/doc/release-notes/10810-search-api-payload-extensions.md b/doc/release-notes/10810-search-api-payload-extensions.md deleted file mode 100644 index 5112d9f62ee..00000000000 --- a/doc/release-notes/10810-search-api-payload-extensions.md +++ /dev/null @@ -1,52 +0,0 @@ -Search API (/api/search) response will now include new fields for the different entities. - -For Dataverse: - -- "affiliation" -- "parentDataverseName" -- "parentDataverseIdentifier" -- "image_url" (optional) - -```javascript -"items": [ - { - "name": "Darwin's Finches", - ... - "affiliation": "Dataverse.org", - "parentDataverseName": "Root", - "parentDataverseIdentifier": "root", - "image_url":"data:image/png;base64,iVBORw0..." -(etc, etc) -``` - -For DataFile: - -- "releaseOrCreateDate" -- "image_url" (optional) - -```javascript -"items": [ - { - "name": "test.txt", - ... - "releaseOrCreateDate": "2016-05-10T12:53:39Z", - "image_url":"data:image/png;base64,iVBORw0..." -(etc, etc) -``` - -For Dataset: - -- "image_url" (optional) - -```javascript -"items": [ - { - ... - "image_url": "http://localhost:8080/api/datasets/2/logo" - ... -(etc, etc) -``` - -The image_url field was already part of the SolrSearchResult JSON (and incorrectly appeared in Search API documentation), but it wasn’t returned by the API because it was appended only after the Solr query was executed in the SearchIncludeFragment of JSF. Now, the field is set in SearchServiceBean, ensuring it is always returned by the API when an image is available. - -The schema.xml file for Solr has been updated to include a new field called dvParentAlias for supporting the new response field "parentDataverseIdentifier". So for the next Dataverse released version, a Solr reindex will be necessary to apply the new schema.xml version. diff --git a/doc/release-notes/10819-publish-thumbnail-bug.md b/doc/release-notes/10819-publish-thumbnail-bug.md deleted file mode 100644 index 46c9875a6ef..00000000000 --- a/doc/release-notes/10819-publish-thumbnail-bug.md +++ /dev/null @@ -1,6 +0,0 @@ -The initial release of the Dataverse v6.3 introduced a bug where publishing would break the dataset thumbnail, which in turn broke the rendering of the parent Collection ("dataverse") page. This problem was fixed in the PR 10820. - -This bug fix will prevent this from happening in the future, but does not fix any existing broken links. To restore any broken thumbnails caused by this bug, you can call the http://localhost:8080/api/admin/clearThumbnailFailureFlag API, which will attempt to clear the flag on all files (regardless of whether caused by this bug or some other problem with the file) or the http://localhost:8080/api/admin/clearThumbnailFailureFlag/id to clear the flag for individual files. Calling the former, batch API is recommended. - -Additionally, the same PR made it possible to turn off the feature that automatically selects of one of the image datafiles to serve as the thumbnail of the parent dataset. An admin can turn it off by raising the feature flag `-Ddataverse.feature.disable-dataset-thumbnail-autoselect=true`. When the feature is disabled, a user can still manually pick a thumbnail image, or upload a dedicated thumbnail image. - diff --git a/doc/release-notes/10857-add-expiration-date-to-recreate-token-api.md b/doc/release-notes/10857-add-expiration-date-to-recreate-token-api.md new file mode 100644 index 00000000000..b450867c630 --- /dev/null +++ b/doc/release-notes/10857-add-expiration-date-to-recreate-token-api.md @@ -0,0 +1 @@ +An optional query parameter called 'returnExpiration' has been added to the 'users/token/recreate' endpoint, which, if set to true, returns the expiration time in the response message. diff --git a/doc/release-notes/6.4-release-notes.md b/doc/release-notes/6.4-release-notes.md new file mode 100644 index 00000000000..979fd16bf9e --- /dev/null +++ b/doc/release-notes/6.4-release-notes.md @@ -0,0 +1,526 @@ +# Dataverse 6.4 + +Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.4 rather than the list of releases, which will cut them off. + +This release brings new features, enhancements, and bug fixes to Dataverse. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights + +New features in Dataverse 6.4: + +- Enhanced DataCite Metadata, including "Relation Type" +- All ISO 639-3 languages are now supported +- There is now a button for "Unlink Dataset" +- Users will have DOIs/PIDs reserved for their files as part of file upload instead of at publication time +- Datasets can now have types such as "software" or "workflow" +- Croissant support +- RO-Crate support +- and more! Please see below. + +New client library: + +- Rust + +This release also fixes two important bugs described below and in [a post](https://groups.google.com/g/dataverse-community/c/evn5C-pyrS8/m/JrH9vp47DwAJ) on the mailing list: + +- "Update Current Version" can cause metadata loss +- Publishing breaks designated dataset thumbnail, messes up collection page + +Additional details on the above as well as many more features and bug fixes included in the release are described below. Read on! + +## Features Added + +### Enhanced DataCite Metadata, Including "Relation Type" + +Within the "Related Publication" field, a new subfield has been added called "Relation Type" that allows for the most common [values](https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/relationType/) recommended by DataCite: isCitedBy, Cites, IsSupplementTo, IsSupplementedBy, IsReferencedBy, and References. For existing datasets where no "Relation Type" has been specified, "IsSupplementTo" is assumed. + +Dataverse now supports the [DataCite v4.5 schema](http://schema.datacite.org/meta/kernel-4/). Additional metadata is now being sent to DataCite including metadata about related publications and files in the dataset. Improved metadata is being sent including how PIDs (ORCID, ROR, DOIs, etc.), license/terms, geospatial, and other metadata are represented. The enhanced metadata will automatically be sent to DataCite when datasets are created and published. Additionally, after publication, you can inspect what was sent by looking at the DataCite XML export. + +The additions are in rough alignment with the OpenAIRE XML export, but there are some minor differences in addition to the Relation Type addition, including an update to the DataCite 4.5 schema. For details see #10632, #10615 and the [design document](https://docs.google.com/document/d/1JzDo9UOIy9dVvaHvtIbOI8tFU6bWdfDfuQvWWpC0tkA/edit?usp=sharing) referenced there. + +Multiple backward incompatible changes and bug fixes have been made to API calls (three of four of which were not documented) related to updating PID target URLs and metadata at the provider service: +- [Update Target URL for a Published Dataset at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-target-url-for-a-published-dataset-at-the-pid-provider) +- [Update Target URL for all Published Datasets at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-target-url-for-all-published-datasets-at-the-pid-provider) +- [Update Metadata for a Published Dataset at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-metadata-for-a-published-dataset-at-the-pid-provider) +- [Update Metadata for all Published Datasets at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-metadata-for-all-published-datasets-at-the-pid-provider) + +### Full List of ISO 639-3 Languages Now Supported + +The controlled vocabulary values list for the metadata field "Language" in the citation block has now been extended to include roughly 7920 ISO 639-3 values. + +Some of the language entries in the pre-6.4 list correspond to "macro languages" in ISO-639-3 and admins/users may wish to update to use the corresponding individual language entries from ISO-639-3. As these cases are expected to be rare (they do not involve major world languages), finding them is not covered in the release notes. Anyone who desires help in this area is encouraged to reach out to the Dataverse community via any of the standard communication channels. + +ISO 639-3 codes were downloaded from [sil.org](https://iso639-3.sil.org/code_tables/download_tables#Complete%20Code%20Tables:~:text=iso%2D639%2D3_Code_Tables_20240415.zip) and the file used for merging with the existing citation.tsv was "iso-639-3.tab". See also #8578 and #10762. + +### Unlink Dataset Button + +A new "Unlink Dataset" button has been added to the dataset page to allow a user to unlink a dataset from a collection. To unlink a dataset the user must have permission to link the dataset. Additionally, the [existing API](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#unlink-a-dataset) for unlinking datasets has been updated to no longer require superuser access as the "Publish Dataset" permission is now enough. See also #10583 and #10689. + +### Pre-Publish File DOI Reservation + +Dataverse installations using DataCite as a persistent identifier (PID) provider (or other providers that support reserving PIDs) will be able to reserve PIDs for files when they are uploaded (rather than at publication time). Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration). + +### Initial Support for Dataset Types + +Out of the box, all datasets now have the type "dataset" but superusers can add additional types. At this time the type of a dataset can only be set at creation time via API. The types "dataset", "software", and "workflow" (just those three, for now) will be sent to DataCite (as `resourceTypeGeneral`) when the dataset is published. + +For details see [the guides](https://guides.dataverse.org/en/6.4/user/dataset-management.html#dataset-types), #10517 and #10694. Please note that this feature is highly experimental and is expected to [evolve](https://github.com/IQSS/dataverse-pm/issues/307). + +### Croissant Support (Metadata Export) + +A new metadata export format called [Croissant](https://github.com/mlcommons/croissant) is now available as an external metadata exporter. It is oriented toward making datasets consumable by machine learning. + +For more about the Croissant exporter, including installation instructions, see . See also #10341, #10533, and [discussion](https://groups.google.com/g/dataverse-community/c/JI8HPgGarr8/m/DqEIkiwlAgAJ) on the mailing list. + +Please note: the Croissant exporter works best with Dataverse 6.2 and higher (where it updates the content of `` as [described](https://guides.dataverse.org/en/6.4/admin/discoverability.html#schema-org-head) in the guides) but can be used with 6.0 and higher (to get the export functionality). + +### RO-Crate Support (Metadata Export) + +Dataverse now supports [RO-Crate](https://www.researchobject.org/ro-crate/) as a metadata export format. This functionality is not available out of the box, but you can enable one or more RO-Crate exporters from the [list of external exporters](https://guides.dataverse.org/en/6.4/installation/advanced.html#inventory-of-external-exporters). See also #10744 and #10796. + +### Rust API Client Library + +An Dataverse API client library for the Rust programming language is now available at https://github.com/gdcc/rust-dataverse and has been added to the [list of client libraries](https://guides.dataverse.org/en/6.4/api/client-libraries.html) in the API Guide. See also #10758. + +### Collection Thumbnail Logo for Featured Collections + +Collections can now have a thumbnail logo that is displayed when the collection is configured as a featured collection. If present, this thumbnail logo is shown. Otherwise, the collection logo is shown. Configuration is done under the "Theme" for a collection as explained in [the guides](https://guides.dataverse.org/en/6.4/user/dataverse-management.html#theme). See also #10291 and #10433. + +### Saved Searches Can Be Deleted + +Saved searches can now be deleted via API. See the [Saved Search](https://guides.dataverse.org/en/6.4/api/native-api.html#saved-search) section of the API Guide, #9317 and #10198. + +### Notification Email Improvement + +When notification emails are sent the part of the closing that says "contact us for support at" will now show the support email address (`dataverse.mail.support-email`), when configured, instead of the default system email address. Using the system email address here was particularly problematic when it was a "noreply" address. See also #10287 and #10504. + +### Ability to Disable Automatic Thumbnail Selection + +It is now possible to turn off the feature that automatically selects one of the image datafiles to serve as the thumbnail of the parent dataset. An admin can turn it off by enabling the [feature flag](https://guides.dataverse.org/en/6.4/installation/config.html#feature-flags) `dataverse.feature.disable-dataset-thumbnail-autoselect`. When the feature is disabled, a user can still manually pick a thumbnail image, or upload a dedicated thumbnail image. See also #10820. + +### More Flexible PermaLinks + +The configuration setting `dataverse.pid.*.permalink.base-url`, which is used for PermaLinks, has been updated to support greater flexibility. Previously, the string `/citation?persistentId=` was automatically appended to the configured base URL. With this update, the base URL will now be used exactly as configured, without any automatic additions. See also #10775. + +### Globus Async Framework + +A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See `globus-use-experimental-async-framework` under [Feature Flags](https://guides.dataverse.org/en/6.4/installation/config.html#feature-flags) and [dataverse.files.globus-monitoring-server](https://guides.dataverse.org/en/6.4/installation/config.html#dataverse-files-globus-monitoring-server) in the Installation Guide. See also #10623 and #10781. + +### CVoc (Controlled Vocabulary): Allow ORCID and ROR to Be Used Together in Author Field + +Changes in Dataverse and updates to the ORCID and ROR external vocabulary scripts support deploying these for the citation block author field (and others). See also #10711, #10712, and . + +### Development on Windows + +New instructions have been added for developers on Windows trying to run a Dataverse development environment using Windows Subsystem for Linux (WSL). See [the guides](https://guides.dataverse.org/en/6.4/developers/windows.html), #10606, and #10608. + +### Experimental Crossref PID (DOI) Provider + +Crossref can now be used as a PID (DOI) provider, but this feature is experimental. Please provide feedback through the usual channels. See also the [guides](https://guides.dataverse.org/en/6.4/installation/config.html#crossref-specific-settings), #8581, and #10806. + +### Improved JSON Schema Validation for Datasets + +JSON Schema validation has been enhanced with checks for required and allowed child objects as well as type checking for field types including `primitive`, `compound` and `controlledVocabulary`. More user-friendly error messages help pinpoint the issues in the dataset JSON. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.4/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide, #10169, and #10543. + +### Counter Processor 1.05 Support (Make Data Count) + +Counter Processor 1.05 is now supported for use with Make Data Count. If you are running Counter Processor, you should reinstall/reconfigure it as described in the latest guides. Note that Counter Processor 1.05 requires Python 3, so you will need to follow the full Counter Processor install. Also note that if you configure the new version the same way, it will reprocess the days in the current month when it is first run. This is normal and will not affect the metrics in Dataverse. See also #10479. + +### Version Tags for Container Base Images + +With this release we introduce a detailed maintenance workflow for our container images. As output of the [Containerization Working Group](https://ct.gdcc.io), the community takes another step towards production ready containers available directly from the core project. + +The maintenance workflow regularly updates the [Container Base Image](https://guides.dataverse.org/en/6.4/container/base-image.html), which contains the operating system, Java, Payara, and tools and libraries required by the Dataverse application. Shipping these rolling releases as well as immutable revisions is the foundation for secure and reliable [Dataverse Application Container](https://guides.dataverse.org/en/6.4/container/app-image.html) images. See also #10478 and #10827. + +## Bugs Fixed + +### Update Current Version + +A significant bug in the superuser-only [Update Current Version](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#make-metadata-updates-without-changing-dataset-version) publication option was fixed. If the "Update Current Version" option was used when changes were made to the dataset terms (rather than to dataset metadata) or if the PID provider service was down or returned an error, the update would fail and render the dataset unusable and require restoration from a backup. The fix in this release allows the update to succeed in both of these cases and redesigns the functionality such that any unknown issues should not make the dataset unusable (i.e. the error would be reported and the dataset would remain in its current state with the last-published version as it was and changes still in the draft version.) + +If you do not plan to upgrade to Dataverse 6.4 right away, you are encouraged to alert your superusers to this issue (see [this post](https://groups.google.com/g/dataverse-community/c/evn5C-pyrS8/m/JrH9vp47DwAJ)). Here are some workarounds for pre-6.4 versions: + +* Change the "dataset.updateRelease" entry in the Bundle.properties file (or local language version) to "Do Not Use" or similar (this doesn't disable the button but alerts superusers to the issue), or +* Edit the dataset.xhtml file to remove the lines below, delete the contents of the generated and osgi-cache directories in the Dataverse Payara domain, and restart the Payara server. This will remove the "Update Current Version" from the UI. + +``` + + + +``` + +Again, the workarounds above are only for pre-6.4 versions. The bug has been fixed in Dataverse 6.4. See also #10797. + +### Broken Thumbnails + +Dataverse 6.3 introduced a bug where publishing would break the dataset thumbnail, which in turn broke the rendering of the parent collection (dataverse) page. + +This bug has been fixed but any existing broken thumbnails must be fixed manually. See "clearThumbnailFailureFlag" in the upgrade instructions below. + +Additionally, it is now possible to turn off the feature that automatically selects of one of the image datafiles to serve as the thumbnail of the parent dataset. An admin can turn it off by raising the feature flag `-Ddataverse.feature.disable-dataset-thumbnail-autoselect=true`. When the feature is disabled, a user can still manually pick a thumbnail image, or upload a dedicated thumbnail image. + +See also #10819, #10820, and [the post](https://groups.google.com/g/dataverse-community/c/evn5C-pyrS8/m/JrH9vp47DwAJ) on the mailing list. + +### No License, No Terms of Use + +When datasets have neither a license nor custom terms of use, the dataset page will now indicate this. Also, these datasets will no longer be indexed as having custom terms. See also #8796, #10513, and #10614. + +### CC0 License Bug Fix + +At a high level, some datasets have been mislabeled as "Custom License" when they should have been "CC0 1.0". This has been corrected. + +In Dataverse 5.10, datasets with only "CC0 Waiver" in the "termsofuse" field were converted to "Custom License" (instead of the CC0 1.0 license) through a SQL migration script (see #10634). On deployment of Dataverse 6.4, a new SQL migration script will be run automatically to correct this, changing these datasets to CC0. You can review the script in #10634, which only affect the following datasets: + +- The existing "Terms of Use" must be equal to "This dataset is made available under a Creative Commons CC0 license with the following additional/modified terms and conditions: CC0 Waiver" (this was set in #10634). +- The following terms fields must be empty: Confidentiality Declaration, Special Permissions, Restrictions, Citation Requirements, Depositor Requirements, Conditions, and Disclaimer. +- The license ID must not be assigned. + +The script will set the license ID to that of the CC0 1.0 license and remove the contents of "termsofuse" field. See also #9081 and #10634. + +### Remap oai_dc Export and Harvesting Format Fields: dc:type and dc:date + +The `oai_dc` export and harvesting format has had the following fields remapped: + +- dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset". +- dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped the field "Publication Date" or the field used for the citation date, if set (see [Set Citation Date Field Type for a Dataset](https://guides.dataverse.org/en/6.4/api/native-api.html#set-citation-date-field-type-for-a-dataset)). + +In order for these changes to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.4/admin/metadataexport.html#batch-exports-through-the-api) should be run (mentioned below). See #8129 and #10737. + +### Zip File No Longer Misdetected as Shapefile (Hidden Directories) + +When detecting files types, Dataverse would previously detect a zip file as a shapefile if it contained [markers of a shapefile](https://guides.dataverse.org/en/6.4/developers/geospatial.html) in hidden directories. These hidden directories are now ignored when deciding if a zip file is a shapefile or not. See also #8945 and #10627. + +### External Controlled Vocabulary + +This release fixes a bug (introduced in v6.3) in the external controlled vocabulary mechanism that could cause indexing to fail (with a NullPointerException) when a script is configured for one child field and no other child fields were managed. See also #10869 and #10870. + +### Valid JSON in Error Response + +When any `ApiBlockingFilter` policy applies to a request, the JSON in the body of the error response is now valid JSON. See also #10085. + +### Docker Container Base Image Security and Compatibility + +- Switch "wait-for" to "wait4x", aligned with the Configbaker Image +- Update "jattach" to v2.2 +- Install AMD64 / ARM64 versions of tools as necessary +- Run base image as unprivileged user by default instead of `root` - this was an oversight from OpenShift changes +- Linux User, Payara Admin and Domain Master passwords: + - Print hints about default, public knowledge passwords in place for + - Enable replacing these passwords at container boot time +- Enable building with updates Temurin JRE image based on Ubuntu 24.04 LTS +- Fix entrypoint script troubles with pre- and postboot script files +- Unify location of files at CONFIG_DIR=/opt/payara/config, avoid writing to other places + +See also #10508, #10672 and #10722. + +### Cleanup of Temp Directories + +In this release we addressed an issue where copies of files uploaded via the UI were left in one specific temp directory (`.../domain1/uploads` by default). We would like to remind all the installation admins that it is strongly recommended to have some automated (and aggressive) cleanup mechanisms in place for all the temp directories used by Dataverse. For example, at Harvard/IQSS we have the following configuration for the PrimeFaces uploads directory above: (note that, even with this fix in place, PrimeFaces will be leaving a large number of small log files in that location) + +Instead of the default location (`.../domain1/uploads`) we use a directory on a dedicated partition, outside of the filesystem where Dataverse is installed, via the following JVM option: + +``` +-Ddataverse.files.uploads=/uploads/web +``` + +and we have a dedicated cronjob that runs every 30 minutes and deletes everything older than 2 hours in that directory: + +``` +15,45 * * * * /bin/find /uploads/web/ -mmin +119 -type f -name "upload*" -exec rm -f {} \; > /dev/null 2>&1 +``` + +### Trailing Commas in Author Name Now Permitted + +When an author name ended in a comma (e.g. `Smith,` or `Smith, `), the dataset page was broken after publishing (a "500" error page was presented to the user). The underlying issue causing the JSON-LD Schema.org output on the page to break was fixed. See #10343 and #10776. + +## API Updates + +### Search API: affiliation, parentDataverseName, image_url, etc. + +The Search API (`/api/search`) response now includes additional fields, depending on the type. + +For collections (dataverses): + +- "affiliation" +- "parentDataverseName" +- "parentDataverseIdentifier" +- "image_url" (optional) + +```javascript +"items": [ + { + "name": "Darwin's Finches", + ... + "affiliation": "Dataverse.org", + "parentDataverseName": "Root", + "parentDataverseIdentifier": "root", + "image_url":"/api/access/dvCardImage/{identifier}" +(etc, etc) +``` + +For datasets: + +- "image_url" (optional) + +```javascript +"items": [ + { + ... + "image_url": "http://localhost:8080/api/datasets/2/logo" + ... +(etc, etc) +``` + +For files: + +- "releaseOrCreateDate" +- "image_url" (optional) + +```javascript +"items": [ + { + "name": "test.png", + ... + "releaseOrCreateDate": "2016-05-10T12:53:39Z", + "image_url":"/api/access/datafile/42?imageThumb=true" +(etc, etc) +``` + +These examples are also shown in the [Search API](https://guides.dataverse.org/en/6.4/api/search.html) section of the API Guide. + +The image_url field was already part of the SolrSearchResult JSON (and incorrectly appeared in Search API documentation), but it wasn't returned by the API because it was appended only after the Solr query was executed in the SearchIncludeFragment of JSF (the old/current UI framework). Now, the field is set in SearchServiceBean, ensuring it is always returned by the API when an image is available. + +The Solr schema.xml file has been updated to include a new field called "dvParentAlias" for supporting the new response field "parentDataverseIdentifier". See upgrade instructions below. + +See also #10810 and #10811. + +### Search API: publicationStatuses + +The Search API (`/api/search`) response will now include publicationStatuses in the JSON response as long as the list is not empty. + +Example: + +```javascript +"items": [ + { + "name": "Darwin's Finches", + ... + "publicationStatuses": [ + "Unpublished", + "Draft" + ], +(etc, etc) +``` + +See also #10733 and #10738. + +### Search Facet Information Exposed + +A new endpoint `/api/datasetfields/facetables` lists all facetable dataset fields defined in the installation, as described in [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#list-all-facetable-dataset-fields). + +A new optional query parameter "returnDetails" added to `/api/dataverses/{identifier}/facets/` endpoint to include detailed information of each DataverseFacet, as described in [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#list-facets-configured-for-a-dataverse-collection). See also #10726 and #10727. + +### User Permissions on Collections + +A new endpoint at `/api/dataverses/{identifier}/userPermissions` for obtaining the user permissions on a collection (dataverse) has been added. See also [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#get-user-permissions-on-a-dataverse), #10749 and #10751. + +### addDataverse Extended + +The addDataverse (`/api/dataverses/{identifier}`) API endpoint has been extended to allow adding metadata blocks, input levels and facet IDs at creation time, as the Dataverse page in create mode does in JSF. See also [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#create-a-dataverse-collection), #10633 and #10644. + +### Metadata Blocks and Display on Create + +The `/api/dataverses/{identifier}/metadatablocks` endpoint has been fixed to not return fields marked as displayOnCreate=true if there is an input level with include=false, when query parameters returnDatasetFieldTypes=true and onlyDisplayedOnCreate=true are set. See also #10741 and #10767. + +The fields "depositor" and "dateOfDeposit" in the citation.tsv metadata block file have been updated to have the property "displayOnCreate" set to TRUE. In practice, only the API is affected because the UI has special logic that already shows these fields when datasets are created. See also and #10850 and #10884. + +### Feature Flags Can Be Listed + +It is now possible to list all feature flags and see if they are enabled or not. See also [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#list-all-feature-flags) and #10732. + +## Settings Added + +The following settings have been added: + +- dataverse.feature.disable-dataset-thumbnail-autoselect +- dataverse.feature.globus-use-experimental-async-framework +- dataverse.files.globus-monitoring-server +- dataverse.pid.*.crossref.url +- dataverse.pid.*.crossref.rest-api-url +- dataverse.pid.*.crossref.username +- dataverse.pid.*.crossref.password +- dataverse.pid.*.crossref.depositor +- dataverse.pid.*.crossref.depositor-email + +## Backward Incompatible Changes + +- The oai_dc export format has changed. See the "Remap oai_dc" section above. +- Several APIs related to DataCite have changed. See "More and Better Data Sent to DataCite" above. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.4 milestone](https://github.com/IQSS/dataverse/issues?q=milestone%3A6.4+is%3Aclosed) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/g/dataverse-community) or email support@dataverse.org. + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan accordingly, create backups of your database, etc. + +These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.3. + +0\. These instructions assume that you are upgrading from the immediate previous version. If you are running an earlier version, the only supported way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to this version. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands, we assume that Payara 6 is installed in `/usr/local/payara6`. If not, adjust as needed. + +```shell +export PAYARA=/usr/local/payara6` +``` + +(or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell) + +1\. Undeploy the previous version + +```shell +$PAYARA/bin/asadmin undeploy dataverse-6.3 +``` + +2\. Stop and start Payara + +```shell +service payara stop +sudo service payara start +``` + +3\. Deploy this version + +```shell +$PAYARA/bin/asadmin deploy dataverse-6.4.war +``` + +Note: if you have any trouble deploying, stop Payara, remove the following directories, start Payara, and try to deploy again. + +```shell +service payara stop +rm -rf $PAYARA/glassfish/domains/domain1/generated +rm -rf $PAYARA/glassfish/domains/domain1/osgi-cache +rm -rf $PAYARA/glassfish/domains/domain1/lib/databases +``` + +4\. For installations with internationalization: + +Please remember to update translations via [Dataverse language packs](https://github.com/GlobalDataverseCommunityConsortium/dataverse-language-packs). + +5\. Restart Payara + +```shell +service payara stop +service payara start +``` + +6\. Update metadata blocks + +These changes reflect incremental improvements made to the handling of core metadata fields. + +```shell +wget https://raw.githubusercontent.com/IQSS/dataverse/v6.4/scripts/api/data/metadatablocks/citation.tsv + +curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file citation.tsv +``` + +7\. Update Solr schema.xml file. Start with the standard v6.4 schema.xml, then, if your installation uses any custom or experimental metadata blocks, update it to include the extra fields (step 7a). + +Stop Solr (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/6.4/installation/prerequisites.html#solr-init-script)). + +```shell +service solr stop +``` + +Replace schema.xml + +```shell +wget https://raw.githubusercontent.com/IQSS/dataverse/v6.4/conf/solr/schema.xml +cp schema.xml /usr/local/solr/solr-9.4.1/server/solr/collection1/conf +``` + +Start Solr (but if you use any custom metadata blocks, perform the next step, 7a first). + +```shell +service solr start +``` + +7a\. For installations with custom or experimental metadata blocks: + +Before starting Solr, update the schema to include all the extra metadata fields that your installation uses. We do this by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed to reflect the names of the directories, if different): + +```shell + wget https://raw.githubusercontent.com/IQSS/dataverse/v6.4/conf/solr/update-fields.sh + chmod +x update-fields.sh + curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.4.1/server/solr/collection1/conf/schema.xml +``` + +Now start Solr. + +8\. Reindex Solr + +Below is the simplest way to reindex Solr: + +```shell +curl http://localhost:8080/api/admin/index +``` + +The API above rebuilds the existing index "in place". If you want to be absolutely sure that your index is up-to-date and consistent, you may consider wiping it clean and reindexing everything from scratch (see [the guides](https://guides.dataverse.org/en/latest/admin/solr-search-index.html)). Just note that, depending on the size of your database, a full reindex may take a while and the users will be seeing incomplete search results during that window. + +9\. Run reExportAll to update dataset metadata exports + +This step is necessary because of changes described above for the `Datacite` and `oai_dc` export formats. + +Below is the simple way to reexport all dataset metadata. For more advanced usage, please see [the guides](http://guides.dataverse.org/en/6.4/admin/metadataexport.html#batch-exports-through-the-api). + +```shell +curl http://localhost:8080/api/admin/metadata/reExportAll +``` + +10\. Pushing updated metadata to DataCite + +(If you don't use DataCite, you can skip this.) + +Above you updated the citation metadata block and Solr with the new "relationType" field. With these two changes, the "Relation Type" fields will be available and creation/publication of datasets will result in the expanded XML being sent to DataCite. You've also already run "reExportAll" to update the `Datacite` metadata export format. + +Entries at DataCite for published datasets can be updated by a superuser using an API call (newly [documented](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-metadata-for-all-published-datasets-at-the-pid-provider)): + +`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets/modifyRegistrationPIDMetadataAll` + +This will loop through all published datasets (and released files with PIDs). As long as the loop completes, the call will return a 200/OK response. Any PIDs for which the update fails can be found using the following command: + +`grep 'Failure for id' server.log` + +Failures may occur if PIDs were never registered, or if they were never made findable. Any such cases can be fixed manually in DataCite Fabrica or using the [Reserve a PID](https://guides.dataverse.org/en/6.4/api/native-api.html#reserve-a-pid) API call and the newly documented `/api/datasets//modifyRegistration` call respectively. See https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#send-dataset-metadata-to-pid-provider. Please reach out with any questions. + +PIDs can also be updated by a superuser on a per-dataset basis using + +`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets//modifyRegistrationMetadata` + +### Additional Upgrade Steps + +11\. If there are broken thumbnails + +To restore any broken thumbnails caused by the bug described above, you can call the `http://localhost:8080/api/admin/clearThumbnailFailureFlag` API, which will attempt to clear the flag on all files (regardless of whether caused by this bug or some other problem with the file) or the `http://localhost:8080/api/admin/clearThumbnailFailureFlag/$FILE_ID` to clear the flag for individual files. Calling the former, batch API is recommended. + +12\. PermaLinks with custom base-url + +If you currently use PermaLinks with a custom `base-url`: You must manually append `/citation?persistentId=` to the base URL to maintain functionality. + +If you use a PermaLinks without a configured `base-url`, no changes are required. diff --git a/doc/release-notes/7068-reserve-file-pids.md b/doc/release-notes/7068-reserve-file-pids.md deleted file mode 100644 index 182a0d7f67b..00000000000 --- a/doc/release-notes/7068-reserve-file-pids.md +++ /dev/null @@ -1,9 +0,0 @@ -## Release Highlights - -### Pre-Publish File DOI Reservation with DataCite - -Dataverse installations using DataCite (or other persistent identifier (PID) Providers that support reserving PIDs) will be able to reserve PIDs for files when they are uploaded (rather than at publication time). Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration). - -## Major Use Cases - -- Users will have DOIs/PIDs reserved for their files as part of file upload instead of at publication time. (Issue #7068, PR #7334) diff --git a/doc/release-notes/8129-harvesting.md b/doc/release-notes/8129-harvesting.md deleted file mode 100644 index 63ca8744941..00000000000 --- a/doc/release-notes/8129-harvesting.md +++ /dev/null @@ -1,18 +0,0 @@ -### Remap oai_dc export and harvesting format fields: dc:type and dc:date - -The `oai_dc` export and harvesting format has had the following fields remapped: - -- dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset". -- dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped the field "Publication Date" or the field used for the citation date, if set (see [Set Citation Date Field Type for a Dataset](https://guides.dataverse.org/en/6.3/api/native-api.html#set-citation-date-field-type-for-a-dataset)). - -In order for these changes to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run. - -For more information, please see #8129 and #10737. - -### Backward incompatible changes - -See the "Remap oai_dc export" section above. - -### Upgrade instructions - -In order for changes to the `oai_dc` metadata export format to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run. diff --git a/doc/release-notes/8578-support-for-iso-639-3-languages.md b/doc/release-notes/8578-support-for-iso-639-3-languages.md deleted file mode 100644 index c702b6b8a59..00000000000 --- a/doc/release-notes/8578-support-for-iso-639-3-languages.md +++ /dev/null @@ -1,17 +0,0 @@ -The Controlled Vocabulary Values list for the metadata field Language in the Citation block has now been extended to include roughly 7920 ISO 639-3 values. -- Some of the language entries in the pre-v.6.4 list correspond to "macro languages" in ISO-639-3 and admins/users may wish to update to use the corresponding individual language entries from ISO-639-3. As these cases are expected to be rare (they do not involve major world languages), finding them is not covered in the release notes. Anyone who desires help in this area is encouraged to reach out to the Dataverse community via any of the standard communication channels. -- ISO 639-3 codes were downloaded from: -``` -https://iso639-3.sil.org/code_tables/download_tables#Complete%20Code%20Tables:~:text=iso%2D639%2D3_Code_Tables_20240415.zip -``` -- The file used for merging with the existing citation.tsv was iso-639-3.tab - -To be added to the 6.4 release instructions: - -### Additional Upgrade Steps -6\. Update the Citation metadata block: - -``` -- `wget https://github.com/IQSS/dataverse/releases/download/v6.4/citation.tsv` -- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @citation.tsv -H "Content-type: text/tab-separated-values"` -``` diff --git a/doc/release-notes/8581-add-crossref-pid-provider.md b/doc/release-notes/8581-add-crossref-pid-provider.md deleted file mode 100644 index 3610aa6d7cc..00000000000 --- a/doc/release-notes/8581-add-crossref-pid-provider.md +++ /dev/null @@ -1,3 +0,0 @@ -Added CrossRef DOI Pid Provider - -See Installation Configuration document for JVM Settings to enable CrossRef as a Pid Provider diff --git a/doc/release-notes/8796-fix-license-display-indexing.md b/doc/release-notes/8796-fix-license-display-indexing.md deleted file mode 100644 index ebded088875..00000000000 --- a/doc/release-notes/8796-fix-license-display-indexing.md +++ /dev/null @@ -1 +0,0 @@ -When datasets have neither a license nor custom terms of use the display will indicate this. Also, these datasets will no longer be indexed as having custom terms. diff --git a/doc/release-notes/8945-ignore-shapefiles-under-hidden-directories-in-zip.md b/doc/release-notes/8945-ignore-shapefiles-under-hidden-directories-in-zip.md deleted file mode 100644 index 145ae5f6d55..00000000000 --- a/doc/release-notes/8945-ignore-shapefiles-under-hidden-directories-in-zip.md +++ /dev/null @@ -1,5 +0,0 @@ -### Shapefile Handling will now ignore files under a hidden directory within the zip file - -Directories that are hidden will be ignored when determining if a zip file contains Shapefile files. - -For more information, see #8945. \ No newline at end of file diff --git a/doc/release-notes/9081-CC0-waiver-turned-into-custom-license.md b/doc/release-notes/9081-CC0-waiver-turned-into-custom-license.md deleted file mode 100644 index 042b2ec39fd..00000000000 --- a/doc/release-notes/9081-CC0-waiver-turned-into-custom-license.md +++ /dev/null @@ -1,6 +0,0 @@ -In an earlier Dataverse release, Datasets with only 'CC0 Waiver' in termsofuse field were converted to 'Custom License' instead of CC0 1.0 licenses during an automated process. A new process was added to correct this. Only Datasets with no terms other than the one create by the previous process will be modified. -- The existing 'Terms of Use' must be equal to 'This dataset is made available under a Creative Commons CC0 license with the following additional/modified terms and conditions: CC0 Waiver' -- The following terms fields must be empty: Confidentiality Declaration, Special Permissions, Restrictions, Citation Requirements, Depositor Requirements, Conditions, and Disclaimer. -- The License ID must not be assigned. - -This process will set the License ID to that of the CC0 1.0 license and remove the contents of termsofuse field. diff --git a/doc/release-notes/9317-delete-saved-search.md b/doc/release-notes/9317-delete-saved-search.md deleted file mode 100644 index 34723801036..00000000000 --- a/doc/release-notes/9317-delete-saved-search.md +++ /dev/null @@ -1,4 +0,0 @@ -### Saved search deletion - -Saved searches can now be removed using API `/api/admin/savedsearches/$id`. See PR #10198. -This is reflected in the [Saved Search Native API section](https://dataverse-guide--10198.org.readthedocs.build/en/10198/api/native-api.html#saved-search) of the Guide. \ No newline at end of file diff --git a/doc/release-notes/9650-5-improve-list-linked-dataverses-API.md b/doc/release-notes/9650-5-improve-list-linked-dataverses-API.md new file mode 100644 index 00000000000..8c79955891b --- /dev/null +++ b/doc/release-notes/9650-5-improve-list-linked-dataverses-API.md @@ -0,0 +1,5 @@ +The following API have been added: + +/api/datasets/{datasetId}/links + +It lists the linked dataverses to a dataset. It can be executed only by administrators. \ No newline at end of file diff --git a/doc/release-notes/api-blocking-filter-json.md b/doc/release-notes/api-blocking-filter-json.md deleted file mode 100644 index 337ff82dd8b..00000000000 --- a/doc/release-notes/api-blocking-filter-json.md +++ /dev/null @@ -1,3 +0,0 @@ -* When any `ApiBlockingFilter` policy applies to a request, the JSON in the body of the error response is now valid JSON. - In case an API client did any special processing to allow it to parse the body, that is no longer necessary. - The status code of such responses has not changed. diff --git a/doc/release-notes/make-data-count-.md b/doc/release-notes/make-data-count-.md deleted file mode 100644 index 9022582dddb..00000000000 --- a/doc/release-notes/make-data-count-.md +++ /dev/null @@ -1,3 +0,0 @@ -### Counter Processor 1.05 Support - -This release includes support for counter-processor-1.05 for processing Make Data Count metrics. If you are running Make Data Counts support, you should reinstall/reconfigure counter-processor as described in the latest Guides. (For existing installations, note that counter-processor-1.05 requires a Python3, so you will need to follow the full counter-processor install. Also note that if you configure the new version the same way, it will reprocess the days in the current month when it is first run. This is normal and will not affect the metrics in Dataverse.) diff --git a/doc/release-notes/permalink-base-urls.md b/doc/release-notes/permalink-base-urls.md deleted file mode 100644 index 1dd74057351..00000000000 --- a/doc/release-notes/permalink-base-urls.md +++ /dev/null @@ -1,10 +0,0 @@ -The configuration setting `dataverse.pid.*.permalink.base-url`, which is used for PermaLinks, has been updated to -support greater flexibility. Previously, the string "/citation?persistentId=" was automatically appended to the -configured base URL. With this update, the base URL will now be used exactly as configured, without any automatic -additions. - -**Upgrade instructions:** - -- If you currently use a PermaLink provider with a configured `base-url`: You must manually append - "/citation?persistentId=" to the existing base URL to maintain functionality. -- If you use a PermaLink provider without a configured `base-url`: No changes are required. \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index c6f15968f00..7c03a6f80c0 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -129,15 +129,21 @@ Lists the link(s) created between a dataset and a Dataverse collection (see the curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/datasets/$linked-dataset-id/links -It returns a list in the following format: +It returns a list in the following format (new format as of v6.4): .. code-block:: json { "status": "OK", "data": { - "dataverses that link to dataset id 56782": [ - "crc990 (id 18802)" + "id": 5, + "identifier": "FK2/OTCWMM", + "linked-dataverses": [ + { + "id": 2, + "alias": "dataverse1", + "displayName": "Lab experiments 2023 June" + } ] } } @@ -195,12 +201,41 @@ Mints a new identifier for a dataset previously registered with a handle. Only a .. _send-metadata-to-pid-provider: -Send Dataset metadata to PID provider -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Update Target URL for a Published Dataset at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Forces update to metadata provided to the PID provider of a published dataset. Only accessible to superusers. :: +Forces update to the target URL provided to the PID provider of a published dataset and assures the PID is findable. +Only accessible to superusers. :: + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/$dataset-id/modifyRegistration + +Update Target URL for all Published Datasets at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Forces update to the target URL provided to the PID provider of all published datasets and assures the PID is findable. +Only accessible to superusers. :: + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/modifyRegistrationAll + +Update Metadata for a Published Dataset at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Checks to see that the PID metadata for a published dataset (and any released files in it using file PIDs) +is up-to-date at the provider and updates the metadata if necessary. +Only accessible to superusers. :: curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/$dataset-id/modifyRegistrationMetadata + +Update Metadata for all Published Datasets at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Checks to see that the PID metadata is up-to-date at the provider for all published datasets +(and any released files in them using file PIDs) and updates the metadata if necessary. +Only accessible to superusers. :: + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/modifyRegistrationPIDMetadataAll + +The call returns 200/OK as long as the call completes. Any errors for individual datasets are reported in the log. Check for Unreserved PIDs and Reserve Them ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index a7af3e84b28..92cd4fc941b 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -7,6 +7,13 @@ This API changelog is experimental and we would love feedback on its usefulness. :local: :depth: 1 +v6.4 +---- + +- **/api/datasets/$dataset-id/modifyRegistration**: Changed from GET to POST +- **/api/datasets/modifyRegistrationPIDMetadataAll**: Changed from GET to POST +- **/api/datasets/{identifier}/links**: The GET endpoint returns a list of Dataverses linked to the given Dataset. The format of the response has changes for v6.4 making it backward incompatible. + v6.3 ---- diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 117aceb141d..f8b8620f121 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -887,7 +887,7 @@ Before calling the API, make sure the data files referenced by the ``POST``\ ed * This API does not cover staging files (with correct contents, checksums, sizes, etc.) in the corresponding places in the Dataverse installation's filestore. * This API endpoint does not support importing *files'* persistent identifiers. - * A Dataverse installation can import datasets with a valid PID that uses a different protocol or authority than said server is configured for. However, the server will not update the PID metadata on subsequent update and publish actions. + * A Dataverse installation can only import datasets with a valid PID that is managed by one of the PID providers that said installation is configured for. .. _import-dataset-with-type: @@ -935,7 +935,7 @@ Note that DDI XML does not have a field that corresponds to the "Subject" field .. warning:: * This API does not handle files related to the DDI file. - * A Dataverse installation can import datasets with a valid PID that uses a different protocol or authority than said server is configured for. However, the server will not update the PID metadata on subsequent update and publish actions. + * A Dataverse installation can only import datasets with a valid PID that is managed by one of the PID providers that said installation is configured for. .. _publish-dataverse-api: @@ -4412,6 +4412,12 @@ In order to obtain a new token use:: curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/users/token/recreate" +This endpoint by default will return a response message indicating the user identifier and the new token. + +To also include the expiration time in the response message, the query parameter ``returnExpiration`` must be set to true:: + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/users/token/recreate?returnExpiration=true" + Delete a Token ~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index 359313ce1b5..7ca9a5abca6 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -61,7 +61,7 @@ https://demo.dataverse.org/api/search?q=trees "name":"Trees", "type":"dataverse", "url":"https://demo.dataverse.org/dataverse/trees", - "image_url":"data:image/png;base64,iVBORw0...", + "image_url":"https://demo.dataverse.org/api/access/dvCardImage/1", "identifier":"trees", "description":"A tree dataverse with some birds", "published_at":"2016-05-10T12:53:38Z", @@ -76,7 +76,7 @@ https://demo.dataverse.org/api/search?q=trees "name":"Chestnut Trees", "type":"dataverse", "url":"https://demo.dataverse.org/dataverse/chestnuttrees", - "image_url":"data:image/png;base64,iVBORw0...", + "image_url":"https://demo.dataverse.org/api/access/dvCardImage/2", "identifier":"chestnuttrees", "description":"A dataverse with chestnut trees and an oriole", "published_at":"2016-05-10T12:52:38Z", @@ -91,7 +91,7 @@ https://demo.dataverse.org/api/search?q=trees "name":"trees.png", "type":"file", "url":"https://demo.dataverse.org/api/access/datafile/12", - "image_url":"data:image/png;base64,iVBORw0...", + "image_url":"https://demo.dataverse.org/api/access/datafile/12?imageThumb=true", "file_id":"12", "description":"", "published_at":"2016-05-10T12:53:39Z", @@ -113,7 +113,7 @@ https://demo.dataverse.org/api/search?q=trees "name":"Birds", "type":"dataverse", "url":"https://demo.dataverse.org/dataverse/birds", - "image_url":"data:image/png;base64,iVBORw0...", + "image_url":"https://demo.dataverse.org/api/access/dvCardImage/3", "identifier":"birds", "description":"A bird Dataverse collection with some trees", "published_at":"2016-05-10T12:57:27Z", @@ -173,8 +173,6 @@ https://demo.dataverse.org/api/search?q=trees } } -Note that the image_url field, if exists, will be returned as a regular URL for Datasets, while for Files and Dataverses, it will be returned as a Base64 URL. We plan to standardize this behavior so that the field always returns a regular URL. (See: https://github.com/IQSS/dataverse/issues/10831) - .. _advancedsearch-example: Advanced Search Examples @@ -202,7 +200,7 @@ In this example, ``show_relevance=true`` matches per field are shown. Available "name":"Finches", "type":"dataverse", "url":"https://demo.dataverse.org/dataverse/finches", - "image_url":"data:image/png;base64,iVBORw0...", + "image_url":"https://demo.dataverse.org/api/access/dvCardImage/2", "identifier":"finches", "description":"A Dataverse collection with finches", "published_at":"2016-05-10T12:57:38Z", diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index c719fb05e3c..7ee355302d8 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -68,9 +68,9 @@ # built documents. # # The short X.Y version. -version = '6.3' +version = '6.4' # The full version, including alpha/beta/rc tags. -release = '6.3' +release = '6.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 0005265fb1c..a0852a5465f 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -21,20 +21,70 @@ IQSS will not offer you support how to deploy or run it, please reach out to the You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) efforts. +.. _base-supported-image-tags: + Supported Image Tags ++++++++++++++++++++ This image is sourced from the main upstream code `repository of the Dataverse software `_. Development and maintenance of the `image's code `_ -happens there (again, by the community). Community-supported image tags are based on the two most important -upstream branches: - -- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. - (`Dockerfile `__) -- The ``alpha`` tag corresponds to the ``master`` branch, where releases are cut from. - (`Dockerfile `__) - - +happens there (again, by the community). + +Our tagging is inspired by `Bitnami `_ and we offer two categories of tags: + +- rolling: images change over time +- immutable: images are fixed and never change + +In the tags below you'll see the term "flavor". This refers to flavor of Linux the container is built on. We use Ubuntu as the basis for our images and, for the time being, the only operating system flavors we use and support are ``noble`` (6.4+) and ``jammy`` (pre-6.4). + +You can find all the tags at https://hub.docker.com/r/gdcc/base/tags + +Tags for Production Use +^^^^^^^^^^^^^^^^^^^^^^^ + +The images of the three latest releases of the Dataverse project will receive updates such as security patches for the underlying operating system. +Content will be fairly stable as disruptive changes like Payara or Java upgrades will be handled in a new major or minor upgrade to Dataverse (a new ``.`` tag). +Expect disruptive changes in case of high risk security threats. + +- | **Latest** + | Definition: ``latest`` + | Summary: Rolling tag, always pointing to the latest revision of the most current Dataverse release. +- | **Rolling Production** + | Definition: ``.-`` + | Example: ``6.4-noble`` + | Summary: Rolling tag, pointing to the latest revision of an immutable production image for released versions of Dataverse. +- | **Immutable Production** + | Definition: ``.--r`` + | Example: ``6.4-noble-r1`` + | Summary: An **immutable tag** where the revision is incremented for rebuilds of the image. + | This image should be especially attractive if you want explict control over when your images are updated. + +Tags for Development Use +^^^^^^^^^^^^^^^^^^^^^^^^ + +All of the tags below are strongly recommended for development purposes only due to their fast changing nature. +In addition to updates due to PR merges, the most recent are undergoing scheduled maintenance to ensure timely security fixes. +When a development cycle of the Dataverse project finishes, maintenance ceases for any tags carrying version numbers. +For now, stale images will be kept on Docker Hub indefinitely. + +- | **Unstable** + | Definition: ``unstable`` + | Summary: Rolling tag, tracking the ``develop`` branch (see also :ref:`develop-branch`). (`Dockerfile `__) + | Please expect abrupt changes like new Payara or Java versions as well as OS updates or flavor switches when using this tag. +- | **Upcoming** + | Definition: ``.-`` + | Example: ``6.5-noble`` + | Summary: Rolling tag, equivalent to ``unstable`` for current development cycle. + Will roll over to the rolling production tag after a Dataverse release. +- | **Flexible Stack** + | Definition: ``.--p-j`` + | Example: ``6.5-noble-p6.2024.6-j17`` + | Summary: Rolling tag during a development cycle of the Dataverse software (`Dockerfile `__). + +**NOTE**: In these tags for development usage, the version number will always be 1 minor version ahead of existing Dataverse releases. +Example: Assume Dataverse ``6.x`` is released, ``6.(x+1)`` is underway. +The rolling tag in use during the cycle will be ``6.(x+1)-FFF`` and ``6.(x+1)-FFF-p6.202P.P-jJJ``. +See also: :doc:`/developers/making-releases`. Image Contents ++++++++++++++ @@ -387,4 +437,4 @@ from `run-java-sh recommendations`_. .. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html .. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations .. _Domain Master Password: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Server%20Documentation/Security%20Guide/Administering%20System%20Security.html#to-change-the-master-password -.. _Payara Admin Console: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Server%20Documentation/General%20Administration/Overview.html#administration-console \ No newline at end of file +.. _Payara Admin Console: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Server%20Documentation/General%20Administration/Overview.html#administration-console diff --git a/doc/sphinx-guides/source/contributor/documentation.md b/doc/sphinx-guides/source/contributor/documentation.md index 96277c3b373..2a8d6794921 100644 --- a/doc/sphinx-guides/source/contributor/documentation.md +++ b/doc/sphinx-guides/source/contributor/documentation.md @@ -56,7 +56,7 @@ In case you decide to use a Sphinx Docker container to build the guides, you can ### Installing Sphinx -First, make a fork of https://github.com/IQSS/dataverse and clone your fork locally. Then change to the ``doc/sphinx-guides`` directory. +First, make a fork of and clone your fork locally. Then change to the ``doc/sphinx-guides`` directory. ``cd doc/sphinx-guides`` @@ -83,7 +83,7 @@ On a Mac we recommend installing GraphViz through [Homebrew](). To edit the existing documentation: -- Create a branch (see :ref:`how-to-make-a-pull-request`). +- Create a branch (see {ref}`how-to-make-a-pull-request`). - In ``doc/sphinx-guides/source`` you will find the .rst files that correspond to https://guides.dataverse.org. - Using your preferred text editor, open and edit the necessary files, or create new ones. diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 4aaed10512e..759dd40413b 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -187,3 +187,5 @@ As described in that document, Globus transfers can be initiated by choosing the An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. See also :ref:`Globus settings <:GlobusSettings>`. + +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag (see :ref:`feature-flags`) and the JVM option :ref:`dataverse.files.globus-monitoring-server`. diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 902fc9db2ee..43c237546be 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -185,6 +185,8 @@ As the transfer can take significant time and the API call is asynchronous, the Once the transfer completes, Dataverse will remove the write permission for the principal. +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This new framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag (see :ref:`feature-flags`) and the JVM option :ref:`dataverse.files.globus-monitoring-server`. + Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. In the remote/reference case, where there is no transfer to monitor, the standard /addFiles API call (see :ref:`direct-add-to-dataset-api`) is used instead. There are no changes for the Globus case. diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index e436ba9e9d2..4936e942389 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -40,8 +40,8 @@ Use the GitHub issue number and the release tag for the name of the branch. (e.g Make the following changes in the release branch. -Bump Version Numbers --------------------- +Bump Version Numbers and Prepare Container Tags +----------------------------------------------- Increment the version number to the milestone (e.g. 5.10.1) in the following two files: @@ -52,6 +52,12 @@ Add the version being released to the lists in the following file: - doc/sphinx-guides/source/versions.rst (e.g. `versions.rst commit `_) +Return to the parent pom and make the following change, which is necessary for proper tagging of images: + +- modules/dataverse-parent/pom.xml -> ```` -> profile "ct" -> ```` -> Set ```` to ``${revision}`` + +(Before you make this change the value should be ``${parsedVersion.majorVersion}.${parsedVersion.nextMinorVersion}``. Later on, after cutting a release, we'll change it back to that value.) + Check in the Changes Above into a Release Branch and Merge It ------------------------------------------------------------- @@ -213,6 +219,17 @@ Now that we've published the release, close the milestone and create a new one. Note that for milestones we use just the number without the "v" (e.g. "5.10.1"). +Update the Container Base Image Version Property +------------------------------------------------ + +Create a new branch (any name is fine but ``prepare-next-iteration`` is suggested) and update the following files to prepare for the next development cycle: + +- modules/dataverse-parent/pom.xml -> ```` -> profile "ct" -> ```` -> Set ```` to ``${parsedVersion.majorVersion}.${parsedVersion.nextMinorVersion}`` + +Now create a pull request and merge it. + +For more background, see :ref:`base-supported-image-tags`. + Add the Release to the Dataverse Roadmap ---------------------------------------- diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 127955a44ea..ecd2db6214d 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -97,10 +97,12 @@ In the issue you can simply leave a comment to say you're working on it. If you tell us your GitHub username we are happy to add you to the "read only" team at https://github.com/orgs/IQSS/teams/dataverse-readonly/members so that we can assign the issue to you while you're working on it. You can also tell us if you'd like to be added to the `Dataverse Community Contributors spreadsheet `_. +.. _create-branch-for-pr: + Create a New Branch Off the develop Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing (e.g. `#3728 `_) and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. +Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing (e.g. `#3728 `_) and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. Please do not call your branch "develop" as it can cause maintainers :ref:`trouble `. Commit Your Change to Your New Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -299,3 +301,40 @@ GitHub documents how to make changes to a fork at https://help.github.com/articl vim path/to/file.txt git commit git push OdumInstitute 4709-postgresql_96 + +.. _develop-into-develop: + +Handing a Pull Request from a "develop" Branch +---------------------------------------------- + +Note: this is something only maintainers of Dataverse need to worry about, typically. + +From time to time a pull request comes in from a fork of Dataverse that uses "develop" as the branch behind the PR. (We've started asking contributors not to do this. See :ref:`create-branch-for-pr`.) This is problematic because the "develop" branch is the main integration branch for the project. (See :ref:`develop-branch`.) + +If the PR is perfect and can be merged as-is, no problem. Just merge it. However, if you would like to push commits to the PR, you are likely to run into trouble with multiple "develop" branches locally. + +The following is a list of commands oriented toward the simple task of merging the latest from the "develop" branch into the PR but the same technique can be used to push other commits to the PR as well. In this example the PR is coming from username "coder123" on GitHub. At a high level, what we're doing is working in a safe place (/tmp) away from our normal copy of the repo. We clone the main repo from IQSS, check out coder123's version of "develop" (called "dev2" or "false develop"), merge the real "develop" into it, and push to the PR. + +If there's a better way to do this, please get in touch! + +.. code-block:: bash + + # do all this in /tmp away from your normal code + cd /tmp + git clone git@github.com:IQSS/dataverse.git + cd dataverse + git remote add coder123 git@github.com:coder123/dataverse.git + git fetch coder123 + # check out coder123's "develop" to a branch with a different name ("dev2") + git checkout coder123/develop -b dev2 + # merge IQSS "develop" into coder123's "develop" ("dev2") + git merge origin/develop + # delete the IQSS "develop" branch locally (!) + git branch -d develop + # checkout "dev2" (false "develop") as "develop" for now + git checkout -b develop + # push the false "develop" to coder123's fork (to the PR) + git push coder123 develop + cd .. + # delete the tmp space (done! \o/) + rm -rf /tmp/dataverse diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a3d0743216e..e98ed8f5189 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -232,6 +232,10 @@ Dataverse can be configured with one or more PID providers, each of which can mi to manage an authority/shoulder combination, aka a "prefix" (PermaLinks also support custom separator characters as part of the prefix), along with an optional list of individual PIDs (with different authority/shoulders) than can be managed with that account. +Dataverse automatically manages assigning PIDs and making them findable when datasets are published. There are also :ref:`API calls that +allow updating the PID target URLs and metadata of already-published datasets manually if needed `, e.g. if a Dataverse instance is +moved to a new URL or when the software is updated to generate additional metadata or address schema changes at the PID service. + Testing PID Providers +++++++++++++++++++++ @@ -246,11 +250,11 @@ configure the credentials as described below. Alternately, you may wish to configure other providers for testing: - - EZID is available to University of California scholars and researchers. Testing can be done using the authority 10.5072 and shoulder FK2 with the "apitest" account (contact EZID for credentials) or an institutional account. Configuration in Dataverse is then analogous to using DataCite. +- EZID is available to University of California scholars and researchers. Testing can be done using the authority 10.5072 and shoulder FK2 with the "apitest" account (contact EZID for credentials) or an institutional account. Configuration in Dataverse is then analogous to using DataCite. - - The PermaLink provider, like the FAKE DOI provider, does not involve an external account. - Unlike the Fake DOI provider, the PermaLink provider creates PIDs that begin with "perma:", making it clearer that they are not DOIs, - and that do resolve to the local dataset/file page in Dataverse, making them useful for some production use cases. See :ref:`permalinks` and (for the FAKE DOI provider) the :doc:`/developers/dev-environment` section of the Developer Guide. +- The PermaLink provider, like the FAKE DOI provider, does not involve an external account. + Unlike the Fake DOI provider, the PermaLink provider creates PIDs that begin with "perma:", making it clearer that they are not DOIs, + and that do resolve to the local dataset/file page in Dataverse, making them useful for some production use cases. See :ref:`permalinks` and (for the FAKE DOI provider) the :doc:`/developers/dev-environment` section of the Developer Guide. Provider-specific configuration is described below. @@ -3308,6 +3312,13 @@ The email for your institution that you'd like to appear in bag-info.txt. See :r Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. +.. _dataverse.files.globus-monitoring-server: + +dataverse.files.globus-monitoring-server +++++++++++++++++++++++++++++++++++++++++ + +This setting is required in conjunction with the ``globus-use-experimental-async-framework`` feature flag (see :ref:`feature-flags`). Setting it to true designates the Dataverse instance to serve as the dedicated polling server. It is needed so that the new framework can be used in a multi-node installation. + .. _feature-flags: Feature Flags @@ -3343,6 +3354,12 @@ please find all known feature flags below. Any of these flags can be activated u * - disable-return-to-author-reason - Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call. - ``Off`` + * - disable-dataset-thumbnail-autoselect + - Turns off automatic selection of a dataset thumbnail from image files in that dataset. When set to ``On``, a user can still manually pick a thumbnail image or upload a dedicated thumbnail image. + - ``Off`` + * - globus-use-experimental-async-framework + - Activates a new experimental implementation of Globus polling of ongoing remote data transfers that does not rely on the instance staying up continuously for the duration of the transfers and saves the state information about Globus upload requests in the database. Added in v6.4. Affects :ref:`:GlobusPollingInterval`. Note that the JVM option :ref:`dataverse.files.globus-monitoring-server` described above must also be enabled on one (and only one, in a multi-node installation) Dataverse instance. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. @@ -4821,10 +4838,12 @@ The list of parent dataset field names for which the LDN Announce workflow step The URL where the `dataverse-globus `_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details. +.. _:GlobusPollingInterval: + :GlobusPollingInterval ++++++++++++++++++++++ -The interval in seconds between Dataverse calls to Globus to check on upload progress. Defaults to 50 seconds. See :ref:`globus-support` for details. +The interval in seconds between Dataverse calls to Globus to check on upload progress. Defaults to 50 seconds (or to 10 minutes, when the ``globus-use-experimental-async-framework`` feature flag is enabled). See :ref:`globus-support` for details. :GlobusSingleFileTransfer +++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 952eba72616..800bdc6e0f9 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -7,7 +7,8 @@ Dataverse Software Documentation Versions This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. - pre-release `HTML (not final!) `__ and `PDF (experimental!) `__ built from the :doc:`develop ` branch :doc:`(how to contribute!) ` -- 6.3 +- 6.4 +- `6.3 `__ - `6.2 `__ - `6.1 `__ - `6.0 `__ diff --git a/modules/container-base/README.md b/modules/container-base/README.md index dc4d185bbb5..0598d709eac 100644 --- a/modules/container-base/README.md +++ b/modules/container-base/README.md @@ -31,17 +31,23 @@ to ask for help and guidance. ## Supported Image Tags This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). -Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-base) -happens there (again, by the community). Community-supported image tags are based on the two most important branches: +Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-base) happens there (again, by the community). +Community-supported image tags are based on the two most important branches: -- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged. - ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile)) -- The `alpha` tag corresponds to the `master` branch, where releases are cut from. - ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) +Our tagging is inspired by [Bitnami](https://docs.vmware.com/en/VMware-Tanzu-Application-Catalog/services/tutorials/GUID-understand-rolling-tags-containers-index.html). +For more detailed information about our tagging policy, please read about our [base image tags](https://guides.dataverse.org/en/latest/container/base-image.html#supported-image-tags) in the Dataverse Containers Guide. + +For ease of use, here is a list of images that are currently maintained. + + + +All of them are rolling tags, except those ending with `-r`, which are the most recent immutable tags. +The `unstable` tags are the current development branch snapshot. +We strongly recommend using only immutable tags for production use cases. Within the main repository, you may find the base image files at `/modules/container-base`. This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image. -You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to under the terms of the Apache 2.0 license. **Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index fc672696df4..6417b5b91fa 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -39,9 +39,13 @@ ct docker-build + gdcc/base:${base.image.tag} - unstable - eclipse-temurin:${target.java.version}-jre + + ${base.image.version}-${java.image.flavor}${base.image.tag.suffix} + -p${payara.version}-j${target.java.version} + eclipse-temurin:${target.java.version}-jre-${java.image.flavor} + noble 1000 1000 linux/amd64,linux/arm64 @@ -95,7 +99,6 @@ - ${docker.platforms} ${project.build.directory}/buildx-state diff --git a/modules/container-base/src/backports/v6.1/001-pom.xml.patch b/modules/container-base/src/backports/v6.1/001-pom.xml.patch new file mode 100644 index 00000000000..6498f972889 --- /dev/null +++ b/modules/container-base/src/backports/v6.1/001-pom.xml.patch @@ -0,0 +1,26 @@ +--- a/modules/container-base/pom.xml 2024-08-26 21:53:55.985584815 +0200 ++++ b/modules/container-base/pom.xml 2024-08-26 21:38:09.925910314 +0200 +@@ -40,8 +42,11 @@ + + docker-build + gdcc/base:${base.image.tag} +- unstable +- eclipse-temurin:${target.java.version}-jre ++ gdcc/base:${base.image.tag} ++ ${base.image.version}-${java.image.flavor}${base.image.tag.suffix} ++ -p${payara.version}-j${target.java.version} ++ eclipse-temurin:${target.java.version}-jre-${java.image.flavor} ++ jammy + 1000 + 1000 + linux/amd64,linux/arm64 +@@ -110,6 +113,9 @@ + + assembly.xml + ++ ++ ${base.image.tag.revision} ++ + + + diff --git a/modules/container-base/src/backports/v6.1/002-Dockerfile.patch b/modules/container-base/src/backports/v6.1/002-Dockerfile.patch new file mode 100644 index 00000000000..4bb7a1eac91 --- /dev/null +++ b/modules/container-base/src/backports/v6.1/002-Dockerfile.patch @@ -0,0 +1,10 @@ +--- a/modules/container-base/src/main/docker/Dockerfile ++++ b/modules/container-base/src/main/docker/Dockerfile +@@ -233,4 +233,6 @@ LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Base Image" \ +- org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" ++ org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" \ ++ org.opencontainers.image.base.name="@java.image@" \ ++ org.dataverse.deps.payara.version="@payara.version@" diff --git a/modules/container-base/src/backports/v6.1/003-parent-pom.xml.patch b/modules/container-base/src/backports/v6.1/003-parent-pom.xml.patch new file mode 100644 index 00000000000..a69cfd43ea7 --- /dev/null +++ b/modules/container-base/src/backports/v6.1/003-parent-pom.xml.patch @@ -0,0 +1,11 @@ +--- a/modules/dataverse-parent/pom.xml ++++ b/modules/dataverse-parent/pom.xml +@@ -457,7 +457,8 @@ + + ++ ${revision} + + + diff --git a/modules/container-base/src/backports/v6.2/001-pom.xml.patch b/modules/container-base/src/backports/v6.2/001-pom.xml.patch new file mode 100644 index 00000000000..6498f972889 --- /dev/null +++ b/modules/container-base/src/backports/v6.2/001-pom.xml.patch @@ -0,0 +1,26 @@ +--- a/modules/container-base/pom.xml 2024-08-26 21:53:55.985584815 +0200 ++++ b/modules/container-base/pom.xml 2024-08-26 21:38:09.925910314 +0200 +@@ -40,8 +42,11 @@ + + docker-build + gdcc/base:${base.image.tag} +- unstable +- eclipse-temurin:${target.java.version}-jre ++ gdcc/base:${base.image.tag} ++ ${base.image.version}-${java.image.flavor}${base.image.tag.suffix} ++ -p${payara.version}-j${target.java.version} ++ eclipse-temurin:${target.java.version}-jre-${java.image.flavor} ++ jammy + 1000 + 1000 + linux/amd64,linux/arm64 +@@ -110,6 +113,9 @@ + + assembly.xml + ++ ++ ${base.image.tag.revision} ++ + + + diff --git a/modules/container-base/src/backports/v6.2/002-Dockerfile.labels.patch b/modules/container-base/src/backports/v6.2/002-Dockerfile.labels.patch new file mode 100644 index 00000000000..fbb7f80c4ce --- /dev/null +++ b/modules/container-base/src/backports/v6.2/002-Dockerfile.labels.patch @@ -0,0 +1,10 @@ +--- a/modules/container-base/src/main/docker/Dockerfile ++++ b/modules/container-base/src/main/docker/Dockerfile +@@ -242,4 +242,6 @@ LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Base Image" \ +- org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" ++ org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" \ ++ org.opencontainers.image.base.name="@java.image@" \ ++ org.dataverse.deps.payara.version="@payara.version@" diff --git a/modules/container-base/src/backports/v6.2/003-Dockerfile.security.patch b/modules/container-base/src/backports/v6.2/003-Dockerfile.security.patch new file mode 100644 index 00000000000..1ab4c3a980a --- /dev/null +++ b/modules/container-base/src/backports/v6.2/003-Dockerfile.security.patch @@ -0,0 +1,10 @@ +--- a/modules/container-base/src/main/docker/Dockerfile ++++ b/modules/container-base/src/main/docker/Dockerfile +@@ -226,6 +226,7 @@ USER root + RUN true && \ + chgrp -R 0 "${DOMAIN_DIR}" && \ + chmod -R g=u "${DOMAIN_DIR}" ++USER payara + + # Set the entrypoint to tini (as a process supervisor) + ENTRYPOINT ["/usr/bin/dumb-init", "--"] diff --git a/modules/container-base/src/backports/v6.2/004-parent-pom.xml.patch b/modules/container-base/src/backports/v6.2/004-parent-pom.xml.patch new file mode 100644 index 00000000000..a69cfd43ea7 --- /dev/null +++ b/modules/container-base/src/backports/v6.2/004-parent-pom.xml.patch @@ -0,0 +1,11 @@ +--- a/modules/dataverse-parent/pom.xml ++++ b/modules/dataverse-parent/pom.xml +@@ -457,7 +457,8 @@ + + ++ ${revision} + + + diff --git a/modules/container-base/src/backports/v6.3/001-pom.xml.patch b/modules/container-base/src/backports/v6.3/001-pom.xml.patch new file mode 100644 index 00000000000..6498f972889 --- /dev/null +++ b/modules/container-base/src/backports/v6.3/001-pom.xml.patch @@ -0,0 +1,26 @@ +--- a/modules/container-base/pom.xml 2024-08-26 21:53:55.985584815 +0200 ++++ b/modules/container-base/pom.xml 2024-08-26 21:38:09.925910314 +0200 +@@ -40,8 +42,11 @@ + + docker-build + gdcc/base:${base.image.tag} +- unstable +- eclipse-temurin:${target.java.version}-jre ++ gdcc/base:${base.image.tag} ++ ${base.image.version}-${java.image.flavor}${base.image.tag.suffix} ++ -p${payara.version}-j${target.java.version} ++ eclipse-temurin:${target.java.version}-jre-${java.image.flavor} ++ jammy + 1000 + 1000 + linux/amd64,linux/arm64 +@@ -110,6 +113,9 @@ + + assembly.xml + ++ ++ ${base.image.tag.revision} ++ + + + diff --git a/modules/container-base/src/backports/v6.3/002-Dockerfile.labels.patch b/modules/container-base/src/backports/v6.3/002-Dockerfile.labels.patch new file mode 100644 index 00000000000..c7744882153 --- /dev/null +++ b/modules/container-base/src/backports/v6.3/002-Dockerfile.labels.patch @@ -0,0 +1,10 @@ +--- a/modules/container-base/src/main/docker/Dockerfile ++++ b/modules/container-base/src/main/docker/Dockerfile +@@ -240,4 +241,6 @@ LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="Dataverse Base Image" \ +- org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" ++ org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" \ ++ org.opencontainers.image.base.name="@java.image@" \ ++ org.dataverse.deps.payara.version="@payara.version@" diff --git a/modules/container-base/src/backports/v6.3/003-Dockerfile.security.patch b/modules/container-base/src/backports/v6.3/003-Dockerfile.security.patch new file mode 100644 index 00000000000..d8487b3aacc --- /dev/null +++ b/modules/container-base/src/backports/v6.3/003-Dockerfile.security.patch @@ -0,0 +1,7 @@ +--- a/modules/container-base/src/main/docker/Dockerfile ++++ b/modules/container-base/src/main/docker/Dockerfile +@@ -224,6 +224,7 @@ USER root + RUN true && \ + chgrp -R 0 "${DOMAIN_DIR}" && \ + chmod -R g=u "${DOMAIN_DIR}" ++USER payara diff --git a/modules/container-base/src/backports/v6.3/004-parent-pom.xml.patch b/modules/container-base/src/backports/v6.3/004-parent-pom.xml.patch new file mode 100644 index 00000000000..a69cfd43ea7 --- /dev/null +++ b/modules/container-base/src/backports/v6.3/004-parent-pom.xml.patch @@ -0,0 +1,11 @@ +--- a/modules/dataverse-parent/pom.xml ++++ b/modules/dataverse-parent/pom.xml +@@ -457,7 +457,8 @@ + + ++ ${revision} + + + diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 57ab5ac5787..802db62e5e4 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -222,7 +222,7 @@ RUN < - 6.3 + 6.4 17 UTF-8 @@ -441,10 +441,13 @@ ct - - + ${parsedVersion.majorVersion}.${parsedVersion.nextMinorVersion} + @@ -470,6 +473,21 @@ false + + + org.codehaus.mojo + build-helper-maven-plugin + 3.6.0 + + + parse-version + + parse-version + + initialize + + + diff --git a/pom.xml b/pom.xml index 76a8f61444f..edf72067976 100644 --- a/pom.xml +++ b/pom.xml @@ -999,7 +999,10 @@ unstable false gdcc/base:${base.image.tag} - unstable + + noble + + ${base.image.version}-${base.image.flavor}-p${payara.version}-j${target.java.version} gdcc/configbaker:${conf.image.tag} ${app.image.tag} diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index cc856c6372f..151c4732ad7 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -331,6 +331,12 @@ "typeClass": "compound", "value": [ { + "publicationRelationType" : { + "typeName" : "publicationRelationType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "IsSupplementTo" + }, "publicationCitation": { "typeName": "publicationCitation", "multiple": false, diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index b7c265f7244..abc09465603 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -31,55 +31,56 @@ topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 27 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 28 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 29 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for the related publication textbox 30 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 31 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber Identifier The identifier for a related publication text 32 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 33 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution - notesText Notes Additional information about the Dataset textbox 34 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language A language that the Dataset's files is written in text 35 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 36 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 37 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 40 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 41
FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 42 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 43 TRUE FALSE TRUE TRUE FALSE FALSE citation - contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 44 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type Indicates the type of contribution made to the dataset text 45 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 46 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Funding Information Information about the Dataset's financial support none 47 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 50 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 51 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 54 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 55
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 56 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 57 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 58 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 59 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection The dates when the data were collected or generated none 62 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 65 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 66 : FALSE FALSE TRUE FALSE FALSE FALSE citation - seriesName Name The name of the dataset series text 67 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 68 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset none 69 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 70 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 71 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 75 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + publicationRelationType Relation Type The nature of the relationship between this Dataset and the related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://datacite.org/schema/kernel-4/simpleTypes#relationType + publicationCitation Citation The full bibliographic citation for the related publication textbox 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 32 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 33 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 34 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 35 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 36 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 37 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 38 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 40 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 41 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 42
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 43 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 44 TRUE FALSE TRUE TRUE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 45 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 46 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 48 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 51 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 52 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 54 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 55 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 56
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 57 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 58 FALSE FALSE FALSE FALSE TRUE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 59 FALSE FALSE FALSE TRUE TRUE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 60 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 62 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 63 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 65 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 66 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 67 : FALSE FALSE TRUE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 68 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 69 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 70 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 71 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 72 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 75 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 76 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 79 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 @@ -172,14 +173,13 @@ language Acatepec Me'phaa tpx 30 tpx language Achagua aca 31 aca language Achang acn 32 acn - language Ache yif 33 yif + language Ache yif 33 yif guq Aché language Acheron acz 34 acz language Achi acr 35 acr language Achinese ace 36 ace language Achterhoeks act 37 act language Achuar-Shiwiar acu 38 acu language Achumawi acv 39 acv - language Aché guq 40 guq language Acoli ach 41 ach language Acroá acs 42 acs language Adai xad 43 xad @@ -295,7 +295,7 @@ language Akwa akw 153 akw language Akyaung Ari Naga nqy 154 nqy language Al-Sayyid Bedouin Sign Language syy 155 syy - language Alaba-K’abeena alw 156 alw + language Alaba-K'abeena alw 156 alw language Alabama akz 157 akz language Alabat Island Agta dul 158 dul language Alacatlatzala Mixtec mim 159 mim @@ -796,7 +796,7 @@ language Bardi bcj 654 bcj language Barein bva 655 bva language Bargam mlp 656 mlp - language Bari bfa 657 bfa + language Bari bfa 657 bfa mot Barí language Bariai bch 658 bch language Bariji bjc 659 bjc language Barikanchi bxo 660 bxo @@ -810,7 +810,6 @@ language Barwe bwg 668 bwg language Barzani Jewish Neo-Aramaic bjf 669 bjf language Baré bae 670 bae - language Barí mot 671 mot language Basa (Cameroon) bas 672 bas language Basa (Nigeria) bzw 673 bzw language Basa-Gumna bsl 674 bsl @@ -2410,7 +2409,7 @@ language Gweda grw 2268 grw language Gweno gwe 2269 gwe language Gwere gwr 2270 gwr - language Gwichʼin gwi 2271 gwi + language Gwich'in gwi 2271 gwi language Gyalsumdo gyo 2272 gyo language Gyele gyi 2273 gyi language Gyem gye 2274 gye @@ -2887,7 +2886,7 @@ language Jutish jut 2745 jut language Juwal mwb 2746 mwb language Juxtlahuaca Mixtec vmc 2747 vmc - language Juǀʼhoan ktz 2748 ktz + language Juǀ'hoan ktz 2748 ktz language Jwira-Pepesa jwi 2749 jwi language Jèrriais nrf 2750 nrf language Júma jua 2751 jua @@ -5401,7 +5400,7 @@ language Old Burmese obr 5259 obr language Old Cham ocm 5260 ocm language Old Chinese och 5261 och - language Old Church Slavonic, Church Slavonic, Old Bulgarian chu 5262 chu cu Church Slavonic Church Slavic Old Church Slavonic Old Bulgarian + language Old Church Slavonic chu 5262 chu cu Church Slavonic Church Slavic Old Church Slavonic Old Bulgarian language Old Cornish oco 5263 oco language Old Dutch odt 5264 odt language Old English (ca. 450-1100) ang 5265 ang @@ -6068,7 +6067,7 @@ language Salasaca Highland Quichua qxl 5926 qxl language Salawati xmx 5927 xmx language Saleman sau 5928 sau - language Saliba sbe 5929 sbe + language Saliba sbe 5929 sbe slc Sáliba language Salinan sln 5930 sln language Sallands sdz 5931 sdz language Salt-Yui sll 5932 sll @@ -6692,7 +6691,6 @@ language Syenara Senoufo shz 6550 shz language Sylheti syl 6551 syl language Syriac syr 6552 syr - language Sáliba slc 6553 slc language São Paulo Kaingáng zkp 6554 zkp language Sãotomense cri 6555 cri language Sìcìté Sénoufo sep 6556 sep @@ -7439,7 +7437,7 @@ language Volapük vol 7297 vol vo language Volscian xvo 7298 xvo language Vono kch 7299 kch - language Voro vor 7300 vor + language Voro vor 7300 vor vro Võro language Votic vot 7301 vot language Vumbu vum 7302 vum language Vunapu vnp 7303 vnp @@ -7447,7 +7445,6 @@ language Vurës msn 7305 msn language Vute vut 7306 vut language Vwanji wbi 7307 wbi - language Võro vro 7308 vro language Wa wbm 7309 wbm language Wa'ema wag 7310 wag language Waama wwa 7311 wwa @@ -7544,7 +7541,7 @@ language Wapishana wap 7402 wap language Wappo wao 7403 wao language War-Jaintia aml 7404 aml - language Wara wbf 7405 wbf + language Wara wbf 7405 wbf tci Wára language Warao wba 7406 wba language Waray (Australia) wrz 7407 wrz language Waray (Philippines) war 7408 war @@ -7747,7 +7744,6 @@ language Wuzlam udl 7605 udl language Wyandot wyn 7606 wyn language Wymysorys wym 7607 wym - language Wára tci 7608 tci language Wãpha juw 7609 juw language Wè Northern wob 7610 wob language Wè Southern gxx 7611 gxx @@ -8060,3 +8056,9 @@ language ǂUngkue gku 7918 gku language ǃXóõ nmn 7919 nmn language Not applicable 7920 + publicationRelationType IsCitedBy RT1 1 + publicationRelationType Cites RT2 2 + publicationRelationType IsSupplementTo RT3 3 + publicationRelationType IsSupplementedBy RT4 4 + publicationRelationType IsReferencedBy RT5 5 + publicationRelationType References RT6 6 \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 29a4a14c021..1a610d9ea6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -1123,4 +1123,23 @@ private boolean tagExists(String tagLabel) { } return false; } + + public boolean isDeaccessioned() { + // return true, if all published versions were deaccessioned + boolean inDeaccessionedVersions = false; + for (FileMetadata fmd : getFileMetadatas()) { + DatasetVersion testDsv = fmd.getDatasetVersion(); + if (testDsv.isReleased()) { + return false; + } + // Also check for draft version + if (testDsv.isDraft()) { + return false; + } + if (testDsv.isDeaccessioned()) { + inDeaccessionedVersions = true; + } + } + return inDeaccessionedVersions; // since any published version would have already returned + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java index c679cd7edad..c03baec73af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java @@ -225,6 +225,15 @@ public Pair getLinkComponents() { return linkComponents.get(parentDatasetField.getDatasetFieldType().getName()); } + public boolean hasChildOfType(String name) { + for (DatasetField child : childDatasetFields) { + if (child.getDatasetFieldType().getName().equals(name)) { + return true; + } + } + return false; + } + private Map removeLastComma(Map mapIn) { Iterator> itr = mapIn.entrySet().iterator(); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index d91aa101eb5..abb812d1ba3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -40,6 +40,7 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String note = "note"; + public final static String publicationRelationType = "publicationRelationType"; public final static String publicationCitation = "publicationCitation"; public final static String publicationIDType = "publicationIDType"; public final static String publicationIDNumber = "publicationIDNumber"; @@ -157,6 +158,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String confidentialityDeclaration="confidentialityDeclaration"; public final static String specialPermissions="specialPermissions"; public final static String restrictions="restrictions"; + @Deprecated + //Doesn't appear to be used and is not datasetContact public final static String contact="contact"; public final static String citationRequirements="citationRequirements"; public final static String depositorRequirements="depositorRequirements"; @@ -487,6 +490,8 @@ public String getRestrictions() { return restrictions; } + @Deprecated + //Appears to not be used public String getContact() { return contact; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 8cc86626c6a..ff78b0c83ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -85,6 +85,9 @@ public class DatasetFieldServiceBean implements java.io.Serializable { //Note that for primitive fields, the prent and term-uri-field are the same and these maps have the same entry Map cvocMapByTermUri = null; + //Flat list of cvoc term-uri and managed fields by Id + Set cvocFieldSet = null; + //The hash of the existing CVocConf setting. Used to determine when the setting has changed and it needs to be re-parsed to recreate the cvocMaps String oldHash = null; @@ -278,6 +281,10 @@ public Map getCVocConf(boolean byTermUriField){ String cvocSetting = settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf); if (cvocSetting == null || cvocSetting.isEmpty()) { oldHash=null; + //Release old maps + cvocMap=null; + cvocMapByTermUri=null; + cvocFieldSet = null; return new HashMap<>(); } String newHash = DigestUtils.md5Hex(cvocSetting); @@ -287,6 +294,7 @@ public Map getCVocConf(boolean byTermUriField){ oldHash=newHash; cvocMap=new HashMap<>(); cvocMapByTermUri=new HashMap<>(); + cvocFieldSet = new HashSet<>(); try (JsonReader jsonReader = Json.createReader(new StringReader(settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)))) { JsonArray cvocConfJsonArray = jsonReader.readArray(); @@ -303,11 +311,13 @@ public Map getCVocConf(boolean byTermUriField){ if (termUriField.equals(dft.getName())) { logger.fine("Found primitive field for term uri : " + dft.getName() + ": " + dft.getId()); cvocMapByTermUri.put(dft.getId(), jo); + cvocFieldSet.add(dft.getId()); } } else { DatasetFieldType childdft = findByNameOpt(jo.getString("term-uri-field")); logger.fine("Found term child field: " + childdft.getName()+ ": " + childdft.getId()); cvocMapByTermUri.put(childdft.getId(), jo); + cvocFieldSet.add(childdft.getId()); if (childdft.getParentDatasetFieldType() != dft) { logger.warning("Term URI field (" + childdft.getDisplayName() + ") not a child of parent: " + dft.getDisplayName()); @@ -327,6 +337,7 @@ public Map getCVocConf(boolean byTermUriField){ + managedFields.getString(s)); } else { logger.fine("Found: " + dft.getName()); + cvocFieldSet.add(dft.getId()); } } } @@ -338,6 +349,10 @@ public Map getCVocConf(boolean byTermUriField){ return byTermUriField ? cvocMapByTermUri : cvocMap; } + public Set getCvocFieldSet() { + return cvocFieldSet; + } + /** * Adds information about the external vocabulary term being used in this DatasetField to the ExternalVocabularyValue table if it doesn't already exist. * @param df - the primitive/parent compound field containing a newly saved value @@ -468,7 +483,8 @@ public JsonObject getExternalVocabularyValue(String termUri) { logger.warning("Problem parsing external vocab value for uri: " + termUri + " : " + e.getMessage()); } } catch (NoResultException nre) { - logger.warning("No external vocab value for uri: " + termUri); + //Could just be a plain text value + logger.fine("No external vocab value for uri: " + termUri); } return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java index 7680ebc16db..a0696ab38d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java @@ -6,7 +6,6 @@ package edu.harvard.iq.dataverse; - /** * * @author skraffmiller @@ -25,10 +24,12 @@ public class DatasetRelPublication { private String description; private boolean replicationData; private int displayOrder; + private String relationType; public int getDisplayOrder() { return displayOrder; } + public void setDisplayOrder(int displayOrder) { this.displayOrder = displayOrder; } @@ -64,8 +65,7 @@ public String getUrl() { public void setUrl(String url) { this.url = url; } - - + public String getTitle() { return title; } @@ -82,12 +82,21 @@ public void setDescription(String description) { this.description = description; } - public boolean isEmpty() { - return ((text==null || text.trim().equals("")) - && (!replicationData) - && (idType==null || idType.trim().equals("")) - && (idNumber==null || idNumber.trim().equals("")) - && (url==null || url.trim().equals(""))); - } + public void setRelationType(String type) { + relationType = type; + + } + + public String getRelationType() { + return relationType; + } + + public boolean isEmpty() { + return ((text == null || text.trim().equals("")) + && (!replicationData) + && (idType == null || idType.trim().equals("")) + && (idNumber == null || idNumber.trim().equals("")) + && (url == null || url.trim().equals(""))); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 8b81800ba39..e519614ba55 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -412,12 +412,20 @@ public boolean checkDatasetLock(Long datasetId) { List lock = lockCounter.getResultList(); return lock.size()>0; } - + + public List getLocksByDatasetId(Long datasetId) { + TypedQuery locksQuery = em.createNamedQuery("DatasetLock.getLocksByDatasetId", DatasetLock.class); + locksQuery.setParameter("datasetId", datasetId); + return locksQuery.getResultList(); + } + public List getDatasetLocksByUser( AuthenticatedUser user) { return listLocks(null, user); } + // @todo: we'll be better off getting rid of this method and using the other + // version of addDatasetLock() (that uses datasetId instead of Dataset). @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public DatasetLock addDatasetLock(Dataset dataset, DatasetLock lock) { lock.setDataset(dataset); @@ -467,6 +475,7 @@ public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Lon * is {@code aReason}. * @param dataset the dataset whose locks (for {@code aReason}) will be removed. * @param aReason The reason of the locks that will be removed. + * @todo this should probably take dataset_id, not a dataset */ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void removeDatasetLocks(Dataset dataset, DatasetLock.Reason aReason) { @@ -947,7 +956,7 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont try { Thread.sleep(1000); } catch (Exception ex) { - logger.warning("Failed to sleep for a second."); + logger.warning("Failed to sleep for one second."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 943693355a3..0433c425fd2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1342,7 +1342,7 @@ public List getGeographicCoverage() { } geoCoverages.add(coverageItem); } - + break; } } return geoCoverages; @@ -1356,24 +1356,42 @@ public List getRelatedPublications() { for (DatasetFieldCompoundValue publication : dsf.getDatasetFieldCompoundValues()) { DatasetRelPublication relatedPublication = new DatasetRelPublication(); for (DatasetField subField : publication.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { - String citation = subField.getDisplayValue(); - relatedPublication.setText(citation); - } - - - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType - // for this url metadata field is likely set up so that the display value is automatically - // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. - // So we want to use the raw value of the field instead, with - // minimal HTML sanitation, just in case (this would be done on all URLs in getDisplayValue()). - String url = subField.getValue(); - if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { - relatedPublication.setUrl(""); - } else { - relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); - } + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: + relatedPublication.setText(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationURL: + // We have to avoid using subField.getDisplayValue() here - because the + // DisplayFormatType + // for this url metadata field is likely set up so that the display value is + // automatically + // turned into a clickable HTML HREF block, which we don't want to end in our + // Schema.org + // JSON-LD output. So we want to use the raw value of the field instead, with + // minimal HTML + // sanitation, just in case (this would be done on all URLs in + // getDisplayValue()). + String url = subField.getValue(); + if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { + relatedPublication.setUrl(""); + } else { + relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); + } + break; + case DatasetFieldConstant.publicationIDType: + // QDR idType has a trailing : now (Aug 2021) + // Get value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdType(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationIDNumber: + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdNumber(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationRelationType: + relatedPublication.setRelationType(subField.getDisplayValue()); + break; } } relatedPublications.add(relatedPublication); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java index 55b98c178bb..f1ddf2304b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java @@ -16,6 +16,8 @@ import java.util.List; import java.util.TreeMap; +import org.apache.commons.lang3.StringUtils; + import jakarta.ejb.EJB; import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; @@ -62,14 +64,14 @@ public void setMetadataBlocksForEdit(TreeMap> public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boolean createBlanks) { /*takes in the values of a dataset version and apportions them into lists for - viewing and editng in the dataset page. + viewing and editing in the dataset page. */ setDatasetVersion(datasetVersion); //this.setDatasetAuthors(new ArrayList()); this.setDatasetRelPublications(new ArrayList<>()); - // loop through vaues to get fields for view mode + // loop through values to get fields for view mode for (DatasetField dsf : datasetVersion.getDatasetFields()) { //Special Handling for various fields displayed above tabs in dataset page view. if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.title)) { @@ -114,17 +116,23 @@ public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boo datasetRelPublication.setTitle(dsf.getDatasetFieldType().getLocaleTitle()); datasetRelPublication.setDescription(dsf.getDatasetFieldType().getLocaleDescription()); for (DatasetField subField : relPubVal.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { - datasetRelPublication.setText(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDNumber)) { - datasetRelPublication.setIdNumber(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDType)) { - datasetRelPublication.setIdType(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - datasetRelPublication.setUrl(subField.getValue()); + String value = subField.getValue(); + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: + datasetRelPublication.setText(subField.getValue()); + break; + case DatasetFieldConstant.publicationIDNumber: + datasetRelPublication.setIdNumber(subField.getValue()); + break; + case DatasetFieldConstant.publicationIDType: + datasetRelPublication.setIdType(subField.getValue()); + break; + case DatasetFieldConstant.publicationURL: + datasetRelPublication.setUrl(subField.getValue()); + break; + case DatasetFieldConstant.publicationRelationType: + datasetRelPublication.setRelationType(subField.getValue()); + break; } } this.getDatasetRelPublications().add(datasetRelPublication); @@ -263,6 +271,18 @@ public String getRelPublicationUrl() { } } + public String getRelPublicationRelationType() { + if (!this.datasetRelPublications.isEmpty()) { + //Add ': ' formatting if relationType exists + String relationType = this.getDatasetRelPublications().get(0).getRelationType(); + if (!StringUtils.isBlank(relationType)) { + return relationType + ": "; + } + } + return ""; + + } + public String getUNF() { //todo get UNF to calculate and display here. return ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 31fd775ffdf..91b15f77111 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -361,7 +361,15 @@ public String getDataverseLogoThumbnailAsBase64ById(Long dvId) { } return null; } - + + public String getDataverseLogoThumbnailAsUrl(Long dvId) { + File dataverseLogoFile = getLogoById(dvId); + if (dataverseLogoFile != null && dataverseLogoFile.exists()) { + return SystemConfig.getDataverseSiteUrlStatic() + "/api/access/dvCardImage/" + dvId; + } + return null; + } + private File getLogo(Dataverse dataverse) { if (dataverse.getId() == null) { return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 993cb02b66b..87997731642 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -2092,6 +2092,12 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { errorMessages.add(cex.getMessage()); uploadComponentId = event.getComponent().getClientId(); return; + } finally { + try { + uFile.delete(); + } catch (IOException ioex) { + logger.warning("Failed to delete temp file uploaded via PrimeFaces " + uFile.getFileName()); + } } /*catch (FileExceedsMaxSizeException ex) { logger.warning("Failed to process and/or save the file " + uFile.getFileName() + "; " + ex.getMessage()); @@ -2121,8 +2127,12 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { } /** - * Using information from the DropBox choose, ingest the chosen files - * https://www.dropbox.com/developers/dropins/chooser/js + * External, aka "Direct" Upload. + * The file(s) have been uploaded to physical storage (such as S3) directly, + * this call is to create and add the DataFiles to the Dataset on the Dataverse + * side. The method does NOT finalize saving the datafiles in the database - + * that will happen when the user clicks 'Save', similar to how the "normal" + * uploads are handled. * * @param event */ diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java new file mode 100644 index 00000000000..c90fdc6edc2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -0,0 +1,110 @@ +package edu.harvard.iq.dataverse; + +import jakarta.persistence.Column; +import jakarta.persistence.Index; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.Table; +import java.io.Serializable; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; + +/** + * + * @author landreev + * + * The name of the class is provisional. I'm open to better-sounding alternatives, + * if anyone can think of any. + * But I wanted to avoid having the word "Globus" in the entity name. I'm adding + * it specifically for the Globus use case. But I'm guessing there's a chance + * this setup may come in handy for other types of datafile uploads that happen + * externally. (?) + */ +@NamedQueries({ + @NamedQuery(name = "ExternalFileUploadInProgress.deleteByTaskId", + query = "DELETE FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId"), + @NamedQuery(name = "ExternalFileUploadInProgress.findByTaskId", + query = "SELECT f FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId")}) +@Entity +@Table(indexes = {@Index(columnList="taskid")}) +public class ExternalFileUploadInProgress implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + /** + * Rather than saving various individual fields defining the datafile, + * which would essentially replicate the DataFile table, we are simply + * storing the full json record as passed to the API here. + */ + @Column(columnDefinition = "TEXT", nullable=false) + private String fileInfo; + + /** + * This is Globus-specific task id associated with the upload in progress + */ + @Column(nullable=false) + private String taskId; + + public ExternalFileUploadInProgress() { + } + + public ExternalFileUploadInProgress(String taskId, String fileInfo) { + this.taskId = taskId; + this.fileInfo = fileInfo; + } + + public String getFileInfo() { + return fileInfo; + } + + public void setFileInfo(String fileInfo) { + this.fileInfo = fileInfo; + } + + public String getTaskId() { + return taskId; + } + + public void setTaskId(String taskId) { + this.taskId = taskId; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof ExternalFileUploadInProgress)) { + return false; + } + ExternalFileUploadInProgress other = (ExternalFileUploadInProgress) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.ExternalFileUploadInProgress[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java index 0b7285c017e..8c4fb6b1325 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java @@ -12,7 +12,9 @@ public enum ExternalIdentifier { GND("GND", "https://d-nb.info/gnd/%s", "^1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X]$"), // note: DAI is missing from this list, because it doesn't have resolvable URL ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^[A-Z\\d][A-Z\\d-]+[A-Z\\d]$"), - ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$"); + ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$"), + //Requiring ROR to be URL form as we use it where there is no id type field and matching any 9 digit number starting with 0 seems a bit aggressive + ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror.org\\/)0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"); private String name; private String template; diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index a542cb52ac0..1c8783c5bd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -100,6 +100,13 @@ public String asURL() { } return null; } + + public String asRawIdentifier() { + if (protocol == null || authority == null || identifier == null) { + return ""; + } + return authority + separator + identifier; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index aee51ed573a..2995c0c5f47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -624,6 +624,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio comment )) ; return downloadCompletedMessage; + case GLOBUSUPLOADCOMPLETEDWITHERRORS: dataset = (Dataset) targetObject; messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); @@ -634,8 +635,30 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio comment )) ; return uploadCompletedWithErrorsMessage; + + case GLOBUSUPLOADREMOTEFAILURE: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadFailedRemotelyMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.failedRemotely", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalId().asString(), + dataset.getDisplayName(), + comment + )) ; + return uploadFailedRemotelyMessage; - case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADLOCALFAILURE: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadFailedLocallyMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.failedLocally", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalId().asString(), + dataset.getDisplayName(), + comment + )) ; + return uploadFailedLocallyMessage; + + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: dataset = (Dataset) targetObject; messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String downloadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completedWithErrors", Arrays.asList( @@ -764,6 +787,8 @@ public Object getObjectOfNotification (UserNotification userNotification){ return versionService.find(userNotification.getObjectId()); case GLOBUSUPLOADCOMPLETED: case GLOBUSUPLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADREMOTEFAILURE: + case GLOBUSUPLOADLOCALFAILURE: case GLOBUSDOWNLOADCOMPLETED: case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: return datasetService.find(userNotification.getObjectId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 48196591b19..222d2881cd2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -98,6 +98,7 @@ public class SettingsWrapper implements java.io.Serializable { //External Vocabulary support private Map cachedCvocMap = null; private Map cachedCvocByTermFieldMap = null; + private Set cvocFieldSet; private Long zipDownloadLimit = null; @@ -806,6 +807,17 @@ public Map getCVocConf(boolean byTermField) { } } + public boolean isCvocField(Long fieldId) { + + if(cvocFieldSet == null) { + cvocFieldSet = fieldService.getCvocFieldSet(); + } + if(cvocFieldSet == null) { + return false; + } + return cvocFieldSet.contains(fieldId); + } + public String getMetricsUrl() { if (metricsUrl == null) { metricsUrl = getValueForKey(SettingsServiceBean.Key.MetricsUrl); diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index c1127079da4..542cf39cfbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.search.SolrSearchResult; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; @@ -48,6 +49,19 @@ public class ThumbnailServiceWrapper implements java.io.Serializable { private Map dvobjectViewMap = new HashMap<>(); private Map hasThumbMap = new HashMap<>(); + public String getFileCardImageAsUrl(SolrSearchResult result) { + DataFile dataFile = result != null && result.getEntity() != null ? ((DataFile) result.getEntity()) : null; + if (dataFile == null || result.isHarvested() + || !isThumbnailAvailable(dataFile) + || dataFile.isRestricted() + || !dataFile.isReleased() + || FileUtil.isActivelyEmbargoed(dataFile) + || FileUtil.isRetentionExpired(dataFile)) { + return null; + } + return SystemConfig.getDataverseSiteUrlStatic() + "/api/access/datafile/" + dataFile.getId() + "?imageThumb=true"; + } + // it's the responsibility of the user - to make sure the search result // passed to this method is of the Datafile type! public String getFileCardImageAsBase64Url(SolrSearchResult result) { @@ -208,7 +222,13 @@ public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean public String getDataverseCardImageAsBase64Url(SolrSearchResult result) { return dataverseService.getDataverseLogoThumbnailAsBase64ById(result.getEntityId()); } - + + // it's the responsibility of the user - to make sure the search result + // passed to this method is of the Dataverse type! + public String getDataverseCardImageAsUrl(SolrSearchResult result) { + return dataverseService.getDataverseLogoThumbnailAsUrl(result.getEntityId()); + } + public void resetObjectMaps() { dvobjectThumbnailsMap = new HashMap<>(); dvobjectViewMap = new HashMap<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index 280c2075494..2d37540fab3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -39,7 +39,8 @@ public enum Type { CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, STATUSUPDATED, DATASETCREATED, DATASETMENTIONED, GLOBUSUPLOADCOMPLETED, GLOBUSUPLOADCOMPLETEDWITHERRORS, - GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS; + GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS, + GLOBUSUPLOADREMOTEFAILURE, GLOBUSUPLOADLOCALFAILURE; public String getDescription() { return BundleUtil.getStringFromBundle("notification.typeDescription." + this.name()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java index 347a8946a46..15114085c21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java @@ -17,4 +17,8 @@ private ApiConstants() { public static final String DS_VERSION_LATEST = ":latest"; public static final String DS_VERSION_DRAFT = ":draft"; public static final String DS_VERSION_LATEST_PUBLISHED = ":latest-published"; + + // addFiles call + public static final String API_ADD_FILES_COUNT_PROCESSED = "Total number of files"; + public static final String API_ADD_FILES_COUNT_SUCCESSFUL = "Number of files successfully added"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 034ba4536a1..369a22fe8d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -98,6 +98,7 @@ import java.util.stream.Collectors; import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.dataset.DatasetType; import edu.harvard.iq.dataverse.dataset.DatasetTypeServiceBean; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; @@ -658,7 +659,7 @@ public Response getLinkset(@Context ContainerRequestContext crc, @PathParam("id" } } - @GET + @POST @AuthRequired @Path("{id}/modifyRegistration") public Response updateDatasetTargetURL(@Context ContainerRequestContext crc, @PathParam("id") String id ) { @@ -706,7 +707,7 @@ public Response updateDatasetPIDMetadata(@Context ContainerRequestContext crc, @ }, getRequestUser(crc)); } - @GET + @POST @AuthRequired @Path("/modifyRegistrationPIDMetadataAll") public Response updateDatasetPIDMetadataAll(@Context ContainerRequestContext crc) { @@ -2076,10 +2077,16 @@ public Response getLinks(@Context ContainerRequestContext crc, @PathParam("id") List dvsThatLinkToThisDatasetId = dataverseSvc.findDataversesThatLinkToThisDatasetId(datasetId); JsonArrayBuilder dataversesThatLinkToThisDatasetIdBuilder = Json.createArrayBuilder(); for (Dataverse dataverse : dvsThatLinkToThisDatasetId) { - dataversesThatLinkToThisDatasetIdBuilder.add(dataverse.getAlias() + " (id " + dataverse.getId() + ")"); + JsonObjectBuilder datasetBuilder = Json.createObjectBuilder(); + datasetBuilder.add("id", dataverse.getId()); + datasetBuilder.add("alias", dataverse.getAlias()); + datasetBuilder.add("displayName", dataverse.getDisplayName()); + dataversesThatLinkToThisDatasetIdBuilder.add(datasetBuilder.build()); } JsonObjectBuilder response = Json.createObjectBuilder(); - response.add("dataverses that link to dataset id " + datasetId, dataversesThatLinkToThisDatasetIdBuilder); + response.add("id", datasetId); + response.add("identifier", dataset.getIdentifier()); + response.add("linked-dataverses", dataversesThatLinkToThisDatasetIdBuilder); return ok(response); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -3913,7 +3920,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP if (!systemConfig.isGlobusUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, - BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + BundleUtil.getStringFromBundle("file.api.globusUploadDisabled")); } // ------------------------------------- @@ -4013,10 +4020,6 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - // ------------------------------------- // (1) Get the user from the API key // ------------------------------------- @@ -4039,6 +4042,32 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, return wr.getResponse(); } + // Is Globus upload service available? + + // ... on this Dataverse instance? + if (!systemConfig.isGlobusUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.globusUploadDisabled")); + } + + // ... and on this specific Dataset? + String storeId = dataset.getEffectiveStorageDriverId(); + // acceptsGlobusTransfers should only be true for an S3 or globus store + if (!GlobusAccessibleStore.acceptsGlobusTransfers(storeId) + && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + } + + // Check if the dataset is already locked + // We are reusing the code and logic used by various command to determine + // if there are any locks on the dataset that would prevent the current + // users from modifying it: + try { + DataverseRequest dataverseRequest = createDataverseRequest(authUser); + permissionService.checkEditDatasetLock(dataset, dataverseRequest, null); + } catch (IllegalCommandException icex) { + return error(Response.Status.FORBIDDEN, "Dataset " + datasetId + " is locked: " + icex.getLocalizedMessage()); + } + JsonObject jsonObject = null; try { jsonObject = JsonUtil.getJsonObject(jsonData); @@ -4069,18 +4098,18 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); } - - ApiToken token = authSvc.findApiTokenByUser(authUser); - if(uriInfo != null) { logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); } - String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); // Async Call - globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + try { + globusService.globusUpload(jsonObject, dataset, requestUrl, authUser); + } catch (IllegalArgumentException ex) { + return badRequest("Invalid parameters: "+ex.getMessage()); + } return ok("Async call to Globus Upload started "); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 17e3086f184..0ee146ed99b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -407,6 +407,12 @@ public Response importDataset(@Context ContainerRequestContext crc, String jsonB if (ds.getIdentifier() == null) { return badRequest("Please provide a persistent identifier, either by including it in the JSON, or by using the pid query parameter."); } + + PidProvider pidProvider = PidUtil.getPidProvider(ds.getGlobalId().getProviderId()); + if (pidProvider == null || !pidProvider.canManagePID()) { + return badRequest("Cannot import a dataset that has a PID that doesn't match the server's settings"); + } + boolean shouldRelease = StringUtil.isTrue(releaseParam); DataverseRequest request = createDataverseRequest(u); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index 1f5430340c2..c1a7c95dbff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -24,13 +24,7 @@ import jakarta.ejb.Stateless; import jakarta.json.JsonArray; import jakarta.json.JsonObjectBuilder; -import jakarta.ws.rs.BadRequestException; -import jakarta.ws.rs.DELETE; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.PathParam; -import jakarta.ws.rs.Produces; +import jakarta.ws.rs.*; import jakarta.ws.rs.container.ContainerRequestContext; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.MediaType; @@ -157,7 +151,7 @@ public Response getTokenExpirationDate() { @Path("token/recreate") @AuthRequired @POST - public Response recreateToken(@Context ContainerRequestContext crc) { + public Response recreateToken(@Context ContainerRequestContext crc, @QueryParam("returnExpiration") boolean returnExpiration) { User u = getRequestUser(crc); AuthenticatedUser au; @@ -174,8 +168,12 @@ public Response recreateToken(@Context ContainerRequestContext crc) { ApiToken newToken = authSvc.generateApiTokenForUser(au); authSvc.save(newToken); - return ok("New token for " + au.getUserIdentifier() + " is " + newToken.getTokenString()); + String message = "New token for " + au.getUserIdentifier() + " is " + newToken.getTokenString(); + if (returnExpiration) { + message += " and expires on " + newToken.getExpireTime(); + } + return ok(message); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index a0e3f899443..48afb2b830a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -528,6 +528,8 @@ public void displayNotification() { case GLOBUSUPLOADCOMPLETEDWITHERRORS: case GLOBUSDOWNLOADCOMPLETED: case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADREMOTEFAILURE: + case GLOBUSUPLOADLOCALFAILURE: userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); break; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 0143fced87c..a470f08f736 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2139,9 +2139,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { logger.log(Level.WARNING, "Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.log(Level.INFO, "Removed EditInProgress lock "); + logger.log(Level.FINE, "Removed EditInProgress lock"); } - + try { Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); @@ -2167,8 +2167,8 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } JsonObjectBuilder result = Json.createObjectBuilder() - .add("Total number of files", totalNumberofFiles) - .add("Number of files successfully added", successNumberofFiles); + .add(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, totalNumberofFiles) + .add(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFUL, successNumberofFiles); return Response.ok().entity(Json.createObjectBuilder() @@ -2306,7 +2306,7 @@ public Response replaceFiles(String jsonData, Dataset ds, User authUser) { logger.warning("Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.info("Removed EditInProgress lock "); + logger.fine("Removed EditInProgress lock "); } try { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index 7b7c5fd0e93..db9dc142506 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -118,9 +118,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { pidProvider.generatePid(theDataset); } - // Attempt the registration if importing dataset through the API, or the app (but not harvest) - handlePid(theDataset, ctxt); - DatasetType defaultDatasetType = ctxt.datasetTypes().getByName(DatasetType.DEFAULT_DATASET_TYPE); DatasetType existingDatasetType = theDataset.getDatasetType(); logger.fine("existing dataset type: " + existingDatasetType); @@ -130,6 +127,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } else { theDataset.setDatasetType(defaultDatasetType); } + + // Attempt the registration if importing dataset through the API, or the app (but not harvest) + handlePid(theDataset, ctxt); + + ctxt.em().persist(theDataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 69ebe6feed8..fa8cfeb810a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -211,7 +211,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (theDataset.getLatestVersion().getVersionState() != RELEASED) { // some imported datasets may already be released. - + theDataset.getLatestVersion().setVersionState(RELEASED); if (!datasetExternallyReleased) { publicizeExternalIdentifier(theDataset, ctxt); // Will throw a CommandException, unless successful. @@ -220,7 +220,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // a failure - it will remove any locks, and it will send a // proper notification to the user(s). } - theDataset.getLatestVersion().setVersionState(RELEASED); } final Dataset ds = ctxt.em().merge(theDataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java index 5a0ae7cbf5d..8cf2d0109d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java @@ -46,11 +46,13 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { ctxt.em().merge(target); ctxt.em().flush(); for (DataFile df : target.getFiles()) { - doiRetString = pidProvider.modifyIdentifierTargetURL(df); - if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { - df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); - ctxt.em().merge(df); - ctxt.em().flush(); + if (df.isReleased()) { + doiRetString = pidProvider.modifyIdentifierTargetURL(df); + if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { + df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); + ctxt.em().merge(df); + ctxt.em().flush(); + } } } } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 768bb88fd43..bb5f5a71e24 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -298,5 +298,5 @@ public boolean onSuccess(CommandContext ctxt, Object r) { ctxt.index().asyncIndexDataset((Dataset) r, true); return true; } - + } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java index 5bf54ac1ec1..14d17dcd900 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java @@ -69,7 +69,8 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { for (DataFile df : target.getFiles()) { if (isFilePIDsEnabled && // using file PIDs and (!(df.getIdentifier() == null || df.getIdentifier().isEmpty()) || // identifier exists, or - canCreatePidsForFiles) // we can create PIDs for files + canCreatePidsForFiles) && // we can create PIDs for files and + df.isReleased() // the file is not a draft ) { doiRetString = pidProvider.updateIdentifier(df); if (doiRetString) { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java index 8caf32b2df0..c21d6b5cd1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java @@ -7,6 +7,7 @@ import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import io.gdcc.spi.export.XMLExporter; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.OutputStream; @@ -20,11 +21,7 @@ */ @AutoService(Exporter.class) public class DataCiteExporter implements XMLExporter { - - private static String DEFAULT_XML_NAMESPACE = "http://datacite.org/schema/kernel-3"; - private static String DEFAULT_XML_SCHEMALOCATION = "http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"; - private static String DEFAULT_XML_VERSION = "3.0"; - + public static final String NAME = "Datacite"; @Override @@ -60,17 +57,17 @@ public Boolean isAvailableToUsers() { @Override public String getXMLNameSpace() { - return DataCiteExporter.DEFAULT_XML_NAMESPACE; + return XmlMetadataTemplate.XML_NAMESPACE; } @Override public String getXMLSchemaLocation() { - return DataCiteExporter.DEFAULT_XML_SCHEMALOCATION; + return XmlMetadataTemplate.XML_SCHEMA_LOCATION; } @Override public String getXMLSchemaVersion() { - return DataCiteExporter.DEFAULT_XML_VERSION; + return XmlMetadataTemplate.XML_SCHEMA_VERSION; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 9a689f7a4ed..f5efc448090 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -24,6 +24,8 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -111,9 +113,9 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles()); @@ -133,9 +135,9 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createFileDscr(xmlw, fileDetails); @@ -186,15 +188,15 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "subTitl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.subTitle)); FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); if (altField != null) { writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); } xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(pid); @@ -218,23 +220,23 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) boolean excludeRepository = settingsService.isTrueForKey(SettingsServiceBean.Key.ExportInstallationAsDistributorOnlyWhenNotSet, false); if (!StringUtils.isEmpty(datasetDto.getPublisher()) && !(excludeRepository && distributorSet)) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); //distrbtr } writeDistributorsElement(xmlw, version, datasetDto.getMetadataLanguage()); writeContactsElement(xmlw, version); /* per SCHEMA, depositr comes before depDate! - L.A. */ - writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); + XmlWriterUtil.writeFullElement(xmlw, "depositr", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.depositor)); /* ... and depDate comes before distDate - L.A. */ - writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); - writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); + XmlWriterUtil.writeFullElement(xmlw, "depDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + XmlWriterUtil.writeFullElement(xmlw, "distDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.distributionDate)); xmlw.writeEndElement(); // diststmt writeSeriesElement(xmlw, version); xmlw.writeStartElement("holdings"); - writeAttribute(xmlw, "URI", pidUri); + XmlWriterUtil.writeAttribute(xmlw, "URI", pidUri); xmlw.writeEndElement(); //holdings xmlw.writeEndElement(); // citation @@ -247,7 +249,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeSubjectElement(xmlw, version, datasetDto.getMetadataLanguage()); //Subject and Keywords writeAbstractElement(xmlw, version, datasetDto.getMetadataLanguage()); // Description writeSummaryDescriptionElement(xmlw, version, datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.notesText)); //////// xmlw.writeEndElement(); // stdyInfo @@ -255,7 +257,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeDataAccess(xmlw , version); writeOtherStudyMaterial(xmlw , version); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); xmlw.writeEndElement(); // stdyDscr @@ -274,10 +276,10 @@ private static void writeOtherStudyMaterial(XMLStreamWriter xmlw , DatasetVersio return; } xmlw.writeStartElement("othrStdyMat"); - writeFullElementList(xmlw, "relMat", relMaterials); - writeFullElementList(xmlw, "relStdy", relDatasets); + XmlWriterUtil.writeFullElementList(xmlw, "relMat", relMaterials); + XmlWriterUtil.writeFullElementList(xmlw, "relStdy", relDatasets); writeRelPublElement(xmlw, version); - writeFullElementList(xmlw, "othRefs", relReferences); + XmlWriterUtil.writeFullElementList(xmlw, "othRefs", relReferences); xmlw.writeEndElement(); //othrStdyMat } @@ -292,29 +294,29 @@ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO ver xmlw.writeStartElement("dataAccs"); xmlw.writeStartElement("setAvail"); - writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); - writeFullElement(xmlw, "origArch", version.getOriginalArchive()); - writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); - writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); - writeFullElement(xmlw, "complete", version.getStudyCompletion()); + XmlWriterUtil.writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); + XmlWriterUtil.writeFullElement(xmlw, "origArch", version.getOriginalArchive()); + XmlWriterUtil.writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); + XmlWriterUtil.writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); + XmlWriterUtil.writeFullElement(xmlw, "complete", version.getStudyCompletion()); xmlw.writeEndElement(); //setAvail xmlw.writeStartElement("useStmt"); - writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); - writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); - writeFullElement(xmlw, "restrctn", version.getRestrictions()); - writeFullElement(xmlw, "contact", version.getContactForAccess()); - writeFullElement(xmlw, "citReq", version.getCitationRequirements()); - writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); - writeFullElement(xmlw, "conditions", version.getConditions()); - writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); + XmlWriterUtil.writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); + XmlWriterUtil.writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); + XmlWriterUtil.writeFullElement(xmlw, "restrctn", version.getRestrictions()); + XmlWriterUtil.writeFullElement(xmlw, "contact", version.getContactForAccess()); + XmlWriterUtil.writeFullElement(xmlw, "citReq", version.getCitationRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "conditions", version.getConditions()); + XmlWriterUtil.writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); xmlw.writeEndElement(); //useStmt /* any s: */ if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); - writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeAttribute("type", NOTE_TYPE_TERMS_OF_ACCESS); + xmlw.writeAttribute("level", LEVEL_DV); xmlw.writeCharacters(version.getTermsOfAccess()); xmlw.writeEndElement(); //notes } @@ -341,9 +343,9 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase xmlw.writeStartElement("docDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt @@ -351,11 +353,11 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase //The doc is always published by the Dataverse Repository if (!StringUtils.isEmpty(datasetDto.getPublisher())) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); // distrbtr } - writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); + XmlWriterUtil.writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt writeVersionStatement(xmlw, version); @@ -369,10 +371,10 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - writeAttribute(xmlw,"source","archive"); + xmlw.writeAttribute("source","archive"); xmlw.writeStartElement("version"); - writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); - writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); + XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); + XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString()); xmlw.writeEndElement(); // version xmlw.writeEndElement(); // verStmt @@ -523,14 +525,14 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset * "" entries, then all the "" ones: */ for (String nationEntry : nationList) { - writeFullElement(xmlw, "nation", nationEntry); + XmlWriterUtil.writeFullElement(xmlw, "nation", nationEntry); } for (String geogCoverEntry : geogCoverList) { - writeFullElement(xmlw, "geogCover", geogCoverEntry); + XmlWriterUtil.writeFullElement(xmlw, "geogCover", geogCoverEntry); } } - writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); + XmlWriterUtil.writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); /* Only 1 geoBndBox is allowed in the DDI. So, I'm just going to arbitrarily use the first one, and ignore the rest! -L.A. */ @@ -563,16 +565,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset */ if (geoBndBoxMap.get("westBL") != null) { - writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); + XmlWriterUtil.writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); } if (geoBndBoxMap.get("eastBL") != null) { - writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); + XmlWriterUtil.writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); } if (geoBndBoxMap.get("southBL") != null) { - writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); + XmlWriterUtil.writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); } if (geoBndBoxMap.get("northBL") != null) { - writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); + XmlWriterUtil.writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); } xmlw.writeEndElement(); @@ -580,7 +582,7 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset /* analyUnit: */ if (unitOfAnalysisDTO != null) { - writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); } @@ -600,16 +602,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset private static void writeMultipleElement(XMLStreamWriter xmlw, String element, FieldDTO fieldDTO, String lang) throws XMLStreamException { for (String value : fieldDTO.getMultiplePrimitive()) { //Write multiple lang vals for controlled vocab, otherwise don't include any lang tag - writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); + XmlWriterUtil.writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); } } private static void writeDateElement(XMLStreamWriter xmlw, String element, String cycle, String event, String dateIn) throws XMLStreamException { xmlw.writeStartElement(element); - writeAttribute(xmlw, "cycle", cycle); - writeAttribute(xmlw, "event", event); - writeAttribute(xmlw, "date", dateIn); + XmlWriterUtil.writeAttribute(xmlw, "cycle", cycle); + XmlWriterUtil.writeAttribute(xmlw, "event", event); + XmlWriterUtil.writeAttribute(xmlw, "date", dateIn); xmlw.writeCharacters(dateIn); xmlw.writeEndElement(); @@ -641,15 +643,15 @@ private static void writeDateElement(XMLStreamWriter xmlw, String element, Strin private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO version, String lang) throws XMLStreamException{ xmlw.writeStartElement("method"); xmlw.writeStartElement("dataColl"); - writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); - writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); - writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); - writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); - writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); + XmlWriterUtil.writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); + XmlWriterUtil.writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); + XmlWriterUtil.writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); writeTargetSampleElement(xmlw, version); - writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); + XmlWriterUtil.writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); /* comes before : */ FieldDTO collModeFieldDTO = dto2FieldDTO(version, DatasetFieldConstant.collectionMode, "socialscience"); @@ -658,37 +660,37 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO // Below is a backward compatibility check allowing export to work in // an instance where the metadata block has not been updated yet. if (collModeFieldDTO.getMultiple()) { - writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); } else { - writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); } } /* and so does : */ - writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); + XmlWriterUtil.writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); xmlw.writeStartElement("sources"); - writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); - writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); - writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); + XmlWriterUtil.writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); + XmlWriterUtil.writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); xmlw.writeEndElement(); //sources - writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); - writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); + XmlWriterUtil.writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); /* "" has the uppercase C: */ - writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); - writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); - writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); + XmlWriterUtil.writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); xmlw.writeEndElement(); //dataColl /* before : */ writeNotesElement(xmlw, version); xmlw.writeStartElement("anlyInfo"); - //writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); - writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); - writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); - writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); + //XmlWriterUtil.writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); + XmlWriterUtil.writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); xmlw.writeEndElement(); //anlyInfo xmlw.writeEndElement();//method @@ -705,7 +707,7 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO if (CITATION_BLOCK_NAME.equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.subject.equals(fieldDTO.getTypeName())) { - writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", + XmlWriterUtil.writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", fieldDTO.getTypeClass(), "citation", lang); } @@ -732,14 +734,10 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!keywordValue.isEmpty()) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue(keywordValue, DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -753,13 +751,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // Keyword } @@ -792,14 +786,10 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), } if (!topicClassificationValue.isEmpty()) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue( topicClassificationValue, DatasetFieldConstant.topicClassValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -813,13 +803,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // topcClas } @@ -856,9 +842,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); - if(!authorAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",authorAffiliation); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty } @@ -879,9 +863,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); - if(!contributorType.isEmpty()){ - writeAttribute(xmlw,"role", contributorType); - } + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId } @@ -921,12 +903,8 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO // TODO: Since datasetContactEmail is a required field but datasetContactName is not consider not checking if datasetContactName is empty so we can write out datasetContactEmail. if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); - if(!datasetContactAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",datasetContactAffiliation); - } - if(!datasetContactEmail.isEmpty()){ - writeAttribute(xmlw,"email",datasetContactEmail); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty } @@ -969,15 +947,9 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } if (!producerName.isEmpty()) { xmlw.writeStartElement("producer"); - if (!producerAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", producerAffiliation); - } - if (!producerAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", producerAbbreviation); - } - /*if (!producerLogo.isEmpty()) { - writeAttribute(xmlw, "role", producerLogo); - }*/ + XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); + //XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty } @@ -987,7 +959,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } } } - writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); + XmlWriterUtil.writeFullElement(xmlw, "prodDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.productionDate)); // productionPlace was made multiple as of 5.14: // (a quick backward compatibility check was added to dto2PrimitiveList(), // see the method for details) @@ -1033,17 +1005,11 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - if (!distributorAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", distributorAffiliation); - } - if (!distributorAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", distributorAbbreviation); - } - if (!distributorURL.isEmpty()) { - writeAttribute(xmlw, "URI", distributorURL); + xmlw.writeAttribute("xml:lang", lang); } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", distributorAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", distributorAbbreviation); + XmlWriterUtil.writeAttribute(xmlw, "URI", distributorURL); xmlw.writeCharacters(distributorName); xmlw.writeEndElement(); //AuthEnty } @@ -1102,7 +1068,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO (In other words - titlStmt is mandatory! -L.A.) */ xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", citation); + XmlWriterUtil.writeFullElement(xmlw, "titl", citation); if (IDNo != null && !IDNo.trim().equals("")) { xmlw.writeStartElement("IDNo"); @@ -1115,7 +1081,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO xmlw.writeEndElement(); // titlStmt - writeFullElement(xmlw,"biblCit",citation); + XmlWriterUtil.writeFullElement(xmlw,"biblCit",citation); xmlw.writeEndElement(); //citation if (url != null && !url.trim().equals("") ) { xmlw.writeStartElement("ExtLink"); @@ -1163,11 +1129,9 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); - if(!descriptionDate.isEmpty()){ - writeAttribute(xmlw,"date",descriptionDate); - } + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); + xmlw.writeAttribute("xml:lang", lang); } xmlw.writeCharacters(descriptionText); xmlw.writeEndElement(); //abstract @@ -1200,9 +1164,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); - if(!grantAgency.isEmpty()){ - writeAttribute(xmlw,"agency",grantAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno } @@ -1234,9 +1196,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); - if(!otherIdAgency.isEmpty()){ - writeAttribute(xmlw,"agency",otherIdAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo } @@ -1268,9 +1228,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); - if(!softwareVersion.isEmpty()){ - writeAttribute(xmlw,"version",softwareVersion); - } + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software } @@ -1383,12 +1341,8 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); - if(!notesType.isEmpty()){ - writeAttribute(xmlw,"type",notesType); - } - if(!notesSubject.isEmpty()){ - writeAttribute(xmlw,"subject",notesSubject); - } + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); xmlw.writeCharacters(notesText); xmlw.writeEndElement(); } @@ -1412,14 +1366,14 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos // and observations, etc.) if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); + XmlWriterUtil.writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); String pidURL = fileDTo.getDataFile().getPidURL(); if (pidURL != null && !pidURL.isEmpty()){ - writeAttribute(xmlw, "URI", pidURL); + xmlw.writeAttribute("URI", pidURL); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileDTo.getDataFile().getFilename()); xmlw.writeEndElement(); // labl @@ -1430,9 +1384,9 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos String contentType = fileDTo.getDataFile().getContentType(); if (!StringUtilisEmpty(contentType)) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(contentType); xmlw.writeEndElement(); // notes } @@ -1460,14 +1414,14 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // and observations, etc.) if (!fileJson.containsKey("dataTables")) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileJson.getJsonNumber(("id").toString())); + xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString())); if (fileJson.containsKey("pidUrl")){ - writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); + XmlWriterUtil.writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileJson.getString("filename")); xmlw.writeEndElement(); // labl @@ -1482,9 +1436,9 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // specially formatted notes section: if (fileJson.containsKey("contentType")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(fileJson.getString("contentType")); xmlw.writeEndElement(); // notes } @@ -1502,33 +1456,7 @@ private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) xmlw.writeEndElement(); // txt } - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - return fieldDTO.getSinglePrimitive(); - } - } - } - return null; - } - - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - String rawVal = fieldDTO.getSinglePrimitive(); - if (fieldDTO.isControlledVocabularyField()) { - return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), - locale, false); - } - } - } - } - return null; - } + private static List dto2PrimitiveList(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { @@ -1562,104 +1490,6 @@ private static FieldDTO dto2FieldDTO(DatasetVersionDTO datasetVersionDTO, String return null; } - private static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { - //For the simplest Elements we can - if (values != null && !values.isEmpty()) { - for (String value : values) { - xmlw.writeStartElement(name); - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - } - - private static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, - String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) - throws XMLStreamException { - - if (values != null && !values.isEmpty()) { - Locale defaultLocale = Locale.getDefault(); - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); - if (localeVal != null) { - - value = localeVal; - writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); - } else { - writeFullElement(xmlw, name, value); - } - } else { - writeFullElement(xmlw, name, value); - } - } - if (lang != null && !defaultLocale.getLanguage().equals(lang)) { - // Get values in dataset metadata language - // Loop before testing fieldTypeClass to be ready for external CVV - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); - if (localeVal != null) { - writeFullElement(xmlw, name, localeVal, lang); - } - } - } - } - } - } - - private static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, - String fieldTypeName, String lang) throws XMLStreamException { - // Get the default value - String val = dto2Primitive(version, fieldTypeName); - Locale defaultLocale = Locale.getDefault(); - // Get the language-specific value for the default language - // A null value is returned if this is not a CVV field - String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); - String requestedLocaleVal = null; - if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { - // Also get the value in the requested locale/lang if that's not the default - // lang. - requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); - } - // FWIW locale-specific vals will only be non-null for CVV values (at present) - if (localeVal == null && requestedLocaleVal == null) { - // Not CVV/no translations so print without lang tag - writeFullElement(xmlw, name, val); - } else { - // Print in either/both languages if we have values - if (localeVal != null) { - // Print the value for the default locale with it's own lang tag - writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); - } - // Also print in the request lang (i.e. the metadata language for the dataset) if a value exists, print it with a lang tag - if (requestedLocaleVal != null) { - writeFullElement(xmlw, name, requestedLocaleVal, lang); - } - } - } - - private static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - writeFullElement(xmlw, name, value, null); - } - - private static void writeFullElement (XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { - //For the simplest Elements we can - if (!StringUtilisEmpty(value)) { - xmlw.writeStartElement(name); - if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - - private static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - if (!StringUtilisEmpty(value)) { - xmlw.writeAttribute(name, value); - } - } private static boolean StringUtilisEmpty(String str) { if (str == null || str.trim().equals("")) { @@ -1747,14 +1577,14 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t } private static void createVarGroupDDI(XMLStreamWriter xmlw, JsonObject varGrp) throws XMLStreamException { xmlw.writeStartElement("varGrp"); - writeAttribute(xmlw, "ID", "VG" + varGrp.getJsonNumber("id").toString()); + xmlw.writeAttribute("ID", "VG" + varGrp.getJsonNumber("id").toString()); String vars = ""; JsonArray varsInGroup = varGrp.getJsonArray("dataVariableIds"); for (int j=0;j sumStat : dvar.getJsonObject("summaryStatistics").entrySet()) { xmlw.writeStartElement("sumStat"); - writeAttribute(xmlw, "type", sumStat.getKey()); + XmlWriterUtil.writeAttribute(xmlw, "type", sumStat.getKey()); xmlw.writeCharacters(((JsonString)sumStat.getValue()).getString()); xmlw.writeEndElement(); // sumStat } @@ -1917,7 +1747,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject varCat = varCats.getJsonObject(i); xmlw.writeStartElement("catgry"); if (varCat.getBoolean("isMissing")) { - writeAttribute(xmlw, "missing", "Y"); + xmlw.writeAttribute("missing", "Y"); } // catValu @@ -1928,7 +1758,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // label if (varCat.containsKey("label")) { xmlw.writeStartElement("labl"); - writeAttribute(xmlw, "level", "category"); + xmlw.writeAttribute("level", "category"); xmlw.writeCharacters(varCat.getString("label")); xmlw.writeEndElement(); // labl } @@ -1936,7 +1766,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // catStat if (varCat.containsKey("frequency")) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("type", "freq"); Double freq = varCat.getJsonNumber("frequency").doubleValue(); // if frequency is actually a long value, we want to write "100" instead of // "100.0" @@ -1955,8 +1785,8 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject cm = catMetas.getJsonObject(j); if (cm.getString("categoryValue").equals(varCat.getString("value"))) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "wgtd", "wgtd"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("wgtd", "wgtd"); + xmlw.writeAttribute("type", "freq"); xmlw.writeCharacters(cm.getJsonNumber("wFreq").toString()); xmlw.writeEndElement(); // catStat break; @@ -1972,24 +1802,24 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // varFormat xmlw.writeEmptyElement("varFormat"); if(dvar.containsKey("variableFormatType")) { - writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); + XmlWriterUtil.writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); } else { throw new XMLStreamException("Illegal Variable Format Type!"); } if(dvar.containsKey("format")) { - writeAttribute(xmlw, "formatname", dvar.getString("format")); + XmlWriterUtil.writeAttribute(xmlw, "formatname", dvar.getString("format")); } //experiment writeAttribute(xmlw, "schema", dv.getFormatSchema()); if(dvar.containsKey("formatCategory")) { - writeAttribute(xmlw, "category", dvar.getString("formatCategory")); + XmlWriterUtil.writeAttribute(xmlw, "category", dvar.getString("formatCategory")); } // notes if (dvar.containsKey("UNF") && !dvar.getString("UNF").isBlank()) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "subject", "Universal Numeric Fingerprint"); - writeAttribute(xmlw, "level", "variable"); - writeAttribute(xmlw, "type", "Dataverse:UNF"); + xmlw.writeAttribute("subject", "Universal Numeric Fingerprint"); + xmlw.writeAttribute("level", "variable"); + xmlw.writeAttribute("type", "Dataverse:UNF"); xmlw.writeCharacters(dvar.getString("UNF")); xmlw.writeEndElement(); //notes } @@ -2020,8 +1850,8 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) } xmlw.writeStartElement("fileDscr"); String fileId = fileJson.getJsonNumber("id").toString(); - writeAttribute(xmlw, "ID", "f" + fileId); - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileId); + xmlw.writeAttribute("ID", "f" + fileId); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileId); xmlw.writeStartElement("fileTxt"); xmlw.writeStartElement("fileName"); @@ -2064,9 +1894,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) // (Universal Numeric Fingerprint) signature: if ((dt!=null) && (dt.containsKey("UNF") && !dt.getString("UNF").isBlank())) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_UNF); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_UNF); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_UNF); + xmlw.writeAttribute("subject", NOTE_SUBJECT_UNF); xmlw.writeCharacters(dt.getString("UNF")); xmlw.writeEndElement(); // notes } @@ -2075,9 +1905,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) JsonArray tags = fileJson.getJsonArray("tabularTags"); for (int j = 0; j < tags.size(); j++) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_TAG); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_TAG); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_TAG); + xmlw.writeAttribute("subject", NOTE_SUBJECT_TAG); xmlw.writeCharacters(tags.getString(j)); xmlw.writeEndElement(); // notes } @@ -2091,13 +1921,7 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) - private static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - return true; - } public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java index 9a2c3085d2d..d201801bc45 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java @@ -30,6 +30,8 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; + /** * * @author skraffmi @@ -322,26 +324,35 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO String IDType = ""; String IDNo = ""; String url = ""; + String relationType = null; for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); - if (DatasetFieldConstant.publicationCitation.equals(next.getTypeName())) { - citation = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDType.equals(next.getTypeName())) { - IDType = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDNumber.equals(next.getTypeName())) { - IDNo = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { - url = next.getSinglePrimitive(); + switch (next.getTypeName()) { + case DatasetFieldConstant.publicationCitation: + citation = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDType: + IDType = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDNumber: + IDNo = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationURL: + url = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationRelationType: + relationType = next.getSinglePrimitive(); + break; } } + if(StringUtils.isBlank(relationType)) { + relationType = "isReferencedBy"; + } pubString = appendCommaSeparatedValue(citation, IDType); pubString = appendCommaSeparatedValue(pubString, IDNo); pubString = appendCommaSeparatedValue(pubString, url); if (!pubString.isEmpty()){ - xmlw.writeStartElement(dcFlavor+":"+"isReferencedBy"); + xmlw.writeStartElement(dcFlavor+":" + relationType); xmlw.writeCharacters(pubString); xmlw.writeEndElement(); //relPubl } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 4b8822e8b66..dd01750942d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -932,6 +932,7 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe String relatedIdentifierType = null; String relatedIdentifier = null; // is used when relatedIdentifierType variable is not URL String relatedURL = null; // is used when relatedIdentifierType variable is URL + String relationType = null; // is used when relatedIdentifierType variable is URL for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); @@ -944,6 +945,9 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { relatedURL = next.getSinglePrimitive(); } + if (DatasetFieldConstant.publicationRelationType.equals(next.getTypeName())) { + relationType = next.getSinglePrimitive(); + } } if (StringUtils.isNotBlank(relatedIdentifierType)) { @@ -956,7 +960,10 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe } relatedIdentifier_map.put("relatedIdentifierType", relatedIdentifierType); - relatedIdentifier_map.put("relationType", "IsCitedBy"); + if(relationType== null) { + relationType = "IsCitedBy"; + } + relatedIdentifier_map.put("relationType", relationType); if (StringUtils.containsIgnoreCase(relatedIdentifierType, "url")) { writeFullElement(xmlw, null, "relatedIdentifier", relatedIdentifier_map, relatedURL, language); @@ -1436,6 +1443,8 @@ public static void writeFundingReferencesElement(XMLStreamWriter xmlw, DatasetVe writeEndTag(xmlw, fundingReference_check); } + + //Duplicates XmlWriterUtil.dto2Primitive private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { // give the single value of the given metadata for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index fb50214c259..ac3c81622fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -22,7 +22,6 @@ import jakarta.json.JsonString; import jakarta.json.JsonValue.ValueType; import jakarta.json.stream.JsonParsingException; -import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.HttpMethod; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; @@ -33,7 +32,6 @@ import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; -import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.time.Duration; @@ -53,6 +51,7 @@ import org.primefaces.PrimeFaces; import com.google.gson.Gson; +import edu.harvard.iq.dataverse.api.ApiConstants; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -61,15 +60,26 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonReader; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.ws.rs.core.Response; +import org.apache.http.util.EntityUtils; @Stateless @Named("GlobusServiceBean") @@ -81,6 +91,8 @@ public class GlobusServiceBean implements java.io.Serializable { protected SettingsServiceBean settingsSvc; @Inject DataverseSession session; + @Inject + DataverseRequestServiceBean dataverseRequestSvc; @EJB protected AuthenticationServiceBean authSvc; @EJB @@ -92,7 +104,15 @@ public class GlobusServiceBean implements java.io.Serializable { @EJB FileDownloadServiceBean fileDownloadService; @EJB - DataFileServiceBean dataFileService; + DataFileServiceBean dataFileSvc; + @EJB + PermissionServiceBean permissionSvc; + @EJB + IngestServiceBean ingestSvc; + @EJB + SystemConfig systemConfig; + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -130,7 +150,7 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi * @param ruleId - Globus rule id - assumed to be associated with the * dataset's file path (should not be called with a user * specified rule id w/o further checking) - * @param datasetId - the id of the dataset associated with the rule + * @param dataset - the dataset associated with the rule * @param globusLogger - a separate logger instance, may be null */ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) { @@ -379,19 +399,33 @@ private void monitorTemporaryPermissions(String ruleId, long datasetId) { * @return * @throws MalformedURLException */ - public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { + public GlobusTaskState getTask(String accessToken, String taskId, Logger globusLogger) { + + Logger myLogger = globusLogger != null ? globusLogger : logger; - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); + URL url; + try { + url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); + } catch (MalformedURLException mue) { + myLogger.warning("Malformed URL exception when trying to contact Globus. Globus API url: " + + "https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + + taskId); + return null; + } MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "GET", null); - GlobusTask task = null; + GlobusTaskState task = null; if (result.status == 200) { - task = parseJson(result.jsonResponse, GlobusTask.class, false); + task = parseJson(result.jsonResponse, GlobusTaskState.class, false); } if (result.status != 200) { - globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + // @todo It should probably retry it 2-3 times before giving up; + // similarly, it should probably differentiate between a "no such task" + // response and something intermittent like a server/network error or + // an expired token... i.e. something that's recoverable (?) + myLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + result.jsonResponse.toString()); } @@ -633,41 +667,50 @@ private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List< @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, - AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { + public void globusUpload(JsonObject jsonData, Dataset dataset, String httpRequestUrl, + AuthenticatedUser authUser) throws IllegalArgumentException, ExecutionException, InterruptedException, MalformedURLException { - Integer countAll = 0; - Integer countSuccess = 0; - Integer countError = 0; - String logTimestamp = logFormatter.format(new Date()); + // Before we do anything else, let's do some basic validation of what + // we've been passed: + + JsonArray filesJsonArray = jsonData.getJsonArray("files"); + + if (filesJsonArray == null || filesJsonArray.size() < 1) { + throw new IllegalArgumentException("No valid json entries supplied for the files being uploaded"); + } + + Date startDate = new Date(); + + String logTimestamp = logFormatter.format(startDate); Logger globusLogger = Logger.getLogger( "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); - String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; - boolean fileHandlerSuceeded; + try { fileHandler = new FileHandler(logFileName); globusLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; + fileHandler = null; } - if (fileHandlerSuceeded) { + if (fileHandler != null) { globusLogger.addHandler(fileHandler); } else { globusLogger = logger; } logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); + + globusLogger.info("Globus upload initiated"); String taskIdentifier = jsonData.getString("taskIdentifier"); GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); - String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + GlobusTaskState taskState = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + String ruleId = getRuleId(endpoint, taskState.getOwner_id(), "rw"); logger.fine("Found rule: " + ruleId); if (ruleId != null) { Long datasetId = rulesCache.getIfPresent(ruleId); @@ -676,28 +719,109 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S rulesCache.invalidate(ruleId); } } - + // Wait before first check Thread.sleep(5000); - // globus task status check - task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); - String taskStatus = getTaskStatus(task); + + if (FeatureFlags.GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK.enabled()) { + + // Save the task information in the database so that the Globus monitoring + // service can continue checking on its progress. + + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), authUser, ruleId, new Timestamp(startDate.getTime())); + em.persist(taskInProgress); + + // Save the metadata entries that define the files that are being uploaded + // in the database. These entries will be used once/if the uploads + // completes successfully to add the files to the dataset. - globusLogger.info("Starting a globusUpload "); + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + ExternalFileUploadInProgress fileUploadRecord = new ExternalFileUploadInProgress(taskIdentifier, fileJsonObject.toString()); + em.persist(fileUploadRecord); + } + + if (fileHandler != null) { + fileHandler.close(); + } + + // return and forget + return; + } + + + // the old implementation that relies on looping continuosly, + // sleeping-then-checking the task status repeatedly: + + // globus task status check + // (the following method performs continuous looped checks of the remote + // Globus API, monitoring it for as long as it takes for the task to + // finish one way or another!) + taskState = globusStatusCheck(endpoint, taskIdentifier, globusLogger); + // @todo null check, or make sure it's never null + String taskStatus = GlobusUtil.getTaskStatus(taskState); + + boolean taskSuccess = GlobusUtil.isTaskCompleted(taskState); + + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, taskSuccess, taskStatus); + + if (fileHandler != null) { + fileHandler.close(); + } + } + /** + * As the name suggests, the method completes and finalizes an upload task, + * whether it completed successfully or failed. (In the latter case, it + * simply sends a failure notification and does some cleanup). + * The method is called in both task monitoring scenarios: the old method, + * that relies on continuous looping, and the new, implemented on the basis + * of timer-like monitoring from a dedicated monitoring Singleton service. + * @param dataset the dataset + * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles + * @param authUser the user that should be be performing the addFiles call + * finalizing adding the files to the Dataset. Note that this + * user will need to be obtained from the saved api token, when this + * method is called via the TaskMonitoringService + * @param ruleId Globus rule/permission id associated with the task + * @param myLogger the Logger; if null, the main logger of the service bean will be used + * @param fileHandler FileHandler associated with the Logger, when not null + * @param taskSuccess boolean task status of the completed task + * @param taskState human-readable task status label as reported by the Globus API + * the method should not throw any exceptions; all the exceptions thrown + * by the methods within are expected to be intercepted. + */ + private void processCompletedUploadTask(Dataset dataset, + JsonArray filesJsonArray, + AuthenticatedUser authUser, + String ruleId, + Logger globusLogger, + boolean taskSuccess, + String taskStatus) { + + Logger myLogger = globusLogger == null ? logger : globusLogger; + if (ruleId != null) { // Transfer is complete, so delete rule - deletePermission(ruleId, dataset, globusLogger); - + deletePermission(ruleId, dataset, myLogger); } - + // If success, switch to an EditInProgress lock - do this before removing the // GlobusUpload lock // Keeping a lock through the add datafiles API call avoids a conflicting edit - // and keeps any open dataset page refreshing until the datafile appears - if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { - datasetSvc.addDatasetLock(dataset, - new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); + // and keeps any open dataset page refreshing until the datafile appears. + + if (taskSuccess) { + myLogger.info("Finished upload via Globus job."); + + DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), + DatasetLock.Reason.EditInProgress, + (authUser).getId(), + "Completing Globus Upload"); + if (editLock != null) { + dataset.addLock(editLock); + } else { + myLogger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } } DatasetLock gLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); @@ -714,205 +838,260 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S * addFilesAsync method called within the globusUpload method. I.e. it appeared * that the lock removal was not committed/visible outside this method until * globusUpload itself ended. + * (from @landreev:) If I understand the comment above correctly - annotations + * like "@TransactionAttribute(REQUIRES_NEW) do NOT work when you call a method + * directly within the same service bean. Strictly speaking, it's not the + * "within the same bean" part that is the key, rather, these annotations + * only apply when calling a method via an @EJB-defined service. So it + * is generally possible to call another method within FooServiceBean + * with the REQUIRES_NEW transaction taking effect - but then it would need + * to define *itself* as an @EJB - + * @EJB FooServiceBean fooSvc; + * ... + * fooSvc.doSomethingInNewTransaction(...); + * etc. */ datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); } - - if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { - String comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " - + taskStatus.split("#")[2]; + + if (!taskSuccess) { + String comment; + if (taskStatus != null) { + comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " + + taskStatus.split("#")[2]; + } else { + comment = "No further information available"; + } + + myLogger.info("Globus Upload task failed "); userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); - globusLogger.info("Globus task failed "); + UserNotification.Type.GLOBUSUPLOADREMOTEFAILURE, dataset.getId(), comment, true); } else { try { - // - - List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonData.getJsonArray("files"); - - if (filesJsonArray != null) { - String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" - + dataset.getIdentifierForFileStorage(); - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from - // externalTool - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - String storeId = parts[0]; - // If this is an S3 store, we need to split out the bucket name - String[] bits = parts[1].split(":"); - String bucketName = ""; - if (bits.length > 1) { - bucketName = bits[0]; - } - String fileId = bits[bits.length - 1]; - - // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; - String fileName = fileJsonObject.getString("fileName"); - - inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); - } - - // calculateMissingMetadataFields: checksum, mimetype - JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); - JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - logger.fine("Size: " + newfilesJsonArray.size()); - logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); - JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - countAll++; - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String fileName = fileJsonObject.getString("fileName"); - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - // If this is an S3 store, we need to split out the bucket name - String[] bits = parts[1].split(":"); - if (bits.length > 1) { - } - String fileId = bits[bits.length - 1]; - - List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) - .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) - .filter(Objects::nonNull).collect(Collectors.toList()); - if (newfileJsonObject != null) { - logger.fine("List Size: " + newfileJsonObject.size()); - // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { - JsonPatch path = Json.createPatchBuilder() - .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); - fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder() - .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); - fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(fileJsonObject); - countSuccess++; - // } else { - // globusLogger.info(fileName - // + " will be skipped from adding to dataset by second API due to missing - // values "); - // countError++; - // } - } else { - globusLogger.info(fileName - + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } - } - - String newjsonData = jsonDataSecondAPI.build().toString(); - - globusLogger.info("Successfully generated new JsonData for Second API call"); - - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " - + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" - + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command); - - // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of - // calling API - - String output = addFilesAsync(command, globusLogger); - if (output.equalsIgnoreCase("ok")) { - // if(!taskSkippedFiles) - if (countError == 0) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, - dataset.getId(), countSuccess + " files added out of " + countAll, true); - } else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), - countSuccess + " files added out of " + countAll, true); - } - globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); - } else { - globusLogger.log(Level.SEVERE, - "******* Error while executing api/datasets/:persistentId/add call ", command); - } - - } - - globusLogger.info("Files processed: " + countAll.toString()); - globusLogger.info("Files added successfully: " + countSuccess.toString()); - globusLogger.info("Files failures: " + countError.toString()); - globusLogger.info("Finished upload via Globus job."); - + processUploadedFiles(filesJsonArray, dataset, authUser, myLogger); } catch (Exception e) { - logger.info("Exception from globusUpload call "); + logger.info("Exception from processUploadedFiles call "); e.printStackTrace(); - globusLogger.info("Exception from globusUpload call " + e.getMessage()); + myLogger.info("Exception from processUploadedFiles call " + e.getMessage()); datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); } } if (ruleId != null) { - deletePermission(ruleId, dataset, globusLogger); - globusLogger.info("Removed upload permission: " + ruleId); - } - if (fileHandlerSuceeded) { - fileHandler.close(); + deletePermission(ruleId, dataset, myLogger); + myLogger.info("Removed upload permission: " + ruleId); } + //if (fileHandler != null) { + // fileHandler.close(); + //} + } + + + /** + * The code in this method is copy-and-pasted from the previous Borealis + * implemenation. + * @todo see if it can be refactored and simplified a bit, the json manipulation + * specifically (?) + * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles + * @param dataset the dataset + * @param authUser the user that should be be performing the addFiles call + * finalizing adding the files to the Dataset. Note that this + * user will need to be obtained from the saved api token, when this + * method is called via the TaskMonitoringService + * @param myLogger the Logger; if null, the main logger of the service bean will be used + * @throws IOException, InterruptedException, ExecutionException @todo may need to throw more exceptions (?) + */ + private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, AuthenticatedUser authUser, Logger myLogger) throws IOException, InterruptedException, ExecutionException { + myLogger = myLogger != null ? myLogger : logger; + + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; + Integer countAddFilesSuccess = 0; + String notificationErrorMessage = ""; + + List inputList = new ArrayList(); + + String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + + dataset.getIdentifierForFileStorage(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from + // externalTool + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String storeId = parts[0]; + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if (bits.length > 1) { + bucketName = bits[0]; + } + String fileId = bits[bits.length - 1]; - public String addFilesAsync(String curlCommand, Logger globusLogger) - throws ExecutionException, InterruptedException { - CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; + String fileName = fileJsonObject.getString("fileName"); + + inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); + } + + // calculateMissingMetadataFields: checksum, mimetype + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, myLogger); + JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); + logger.fine("Size: " + newfilesJsonArray.size()); + logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); + JsonArrayBuilder addFilesJsonData = Json.createArrayBuilder(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + countAll++; + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + if (bits.length > 1) { } - return (addFiles(curlCommand, globusLogger)); - }, executor).exceptionally(ex -> { - globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); - ex.printStackTrace(); - return null; - }); + String fileId = bits[bits.length - 1]; + + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) + .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) + .filter(Objects::nonNull).collect(Collectors.toList()); + if (newfileJsonObject != null) { + logger.fine("List Size: " + newfileJsonObject.size()); + // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder() + .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder() + .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + addFilesJsonData.add(fileJsonObject); + countSuccess++; + // } else { + // globusLogger.info(fileName + // + " will be skipped from adding to dataset by second API due to missing + // values "); + // countError++; + // } + } else { + myLogger.info(fileName + + " will be skipped from adding to dataset in the final AddReplaceFileHelper.addFiles() call. "); + countError++; + } + } - String result = addFilesFuture.get(); + String newjsonData = addFilesJsonData.build().toString(); - return result; - } + myLogger.info("Successfully generated new JsonData for addFiles call"); - private String addFiles(String curlCommand, Logger globusLogger) { - ProcessBuilder processBuilder = new ProcessBuilder(); - Process process = null; - String line; - String status = ""; + myLogger.info("Files passed to /addGlobusFiles: " + countAll); + myLogger.info("Files processed successfully: " + countSuccess); + myLogger.info("Files failures to process: " + countError); + + if (countSuccess < 1) { + // We don't have any valid entries to call addFiles() for; so, no + // need to proceed. + notificationErrorMessage = "Failed to successfully process any of the file entries, " + + "out of the " + countAll + " total as submitted to Dataverse"; + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADREMOTEFAILURE, + dataset.getId(), notificationErrorMessage, true); + return; + } else if (countSuccess < countAll) { + notificationErrorMessage = "Out of the " + countAll + " file entries submitted to /addGlobusFiles " + + "only " + countSuccess + " could be successfully parsed and processed. "; + } + + // A new AddReplaceFileHelper implementation, replacing the old one that + // was relying on calling /addFiles api via curl: + + // Passing null for the HttpServletRequest to make a new DataverseRequest. + // The parent method is always executed asynchronously, so the real request + // that was associated with the original API call that triggered this upload + // cannot be obtained. + DataverseRequest dataverseRequest = new DataverseRequest(authUser, (HttpServletRequest)null); + + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dataverseRequest, + this.ingestSvc, + this.datasetSvc, + this.dataFileSvc, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + // The old code had 2 sec. of sleep, so ... + Thread.sleep(2000); + + Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); + + if (addFilesResponse == null) { + logger.info("null response from addFiles call"); + //@todo add this case to the user notification in case of error + return; + } + + JsonObject addFilesJsonObject = JsonUtil.getJsonObject(addFilesResponse.getEntity().toString()); + + // @todo null check? + String addFilesStatus = addFilesJsonObject.getString("status", null); + myLogger.info("addFilesResponse status: " + addFilesStatus); + + if (ApiConstants.STATUS_OK.equalsIgnoreCase(addFilesStatus)) { + if (addFilesJsonObject.containsKey("data") && addFilesJsonObject.getJsonObject("data").containsKey("Result")) { + + //Integer countAddFilesTotal = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, -1); + countAddFilesSuccess = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFUL, -1); + myLogger.info("Files successfully added by addFiles(): " + countAddFilesSuccess); - try { - globusLogger.info("Call to : " + curlCommand); - processBuilder.command("bash", "-c", curlCommand); - process = processBuilder.start(); - process.waitFor(); - - BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream())); - - StringBuilder sb = new StringBuilder(); - while ((line = br.readLine()) != null) - sb.append(line); - globusLogger.info(" API Output : " + sb.toString()); - JsonObject jsonObject = null; - jsonObject = JsonUtil.getJsonObject(sb.toString()); - - status = jsonObject.getString("status"); - } catch (Exception ex) { - if (ex instanceof JsonParsingException) { - globusLogger.log(Level.SEVERE, "Error parsing dataset json."); } else { - globusLogger.log(Level.SEVERE, - "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + myLogger.warning("Malformed addFiles response json: " + addFilesJsonObject.toString()); + notificationErrorMessage = "Malformed response received when attempting to add the files to the dataset. "; } + + myLogger.info("Completed addFiles call "); + } else if (ApiConstants.STATUS_ERROR.equalsIgnoreCase(addFilesStatus)) { + String addFilesMessage = addFilesJsonObject.getString("message", null); + + myLogger.log(Level.SEVERE, + "******* Error while executing addFiles ", newjsonData); + myLogger.log(Level.SEVERE, "****** Output from addFiles: ", addFilesMessage); + notificationErrorMessage += "Error response received when attempting to add the files to the dataset: " + addFilesMessage + " "; + + } else { + myLogger.log(Level.SEVERE, + "******* Error while executing addFiles ", newjsonData); + notificationErrorMessage += "Unexpected error encountered when attemptingh to add the files to the dataset."; + } + + // if(!taskSkippedFiles) + if (countAddFilesSuccess == countAll) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, + dataset.getId(), countSuccess + " files added out of " + countAll, true); + } else if (countAddFilesSuccess > 0) { + // success, but partial: + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), + countSuccess + " files added out of " + countAll + notificationErrorMessage, true); + } else { + notificationErrorMessage = "".equals(notificationErrorMessage) + ? " No additional information is available." : notificationErrorMessage; + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADLOCALFAILURE, dataset.getId(), + notificationErrorMessage, true); } - return status; } - + @Asynchronous public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { @@ -958,7 +1137,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro // If the rules_cache times out, the permission will be deleted. Presumably that // doesn't affect a // globus task status check - GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + GlobusTaskState task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); String ruleId = getRuleId(endpoint, task.getOwner_id(), "r"); if (ruleId != null) { logger.fine("Found rule: " + ruleId); @@ -974,7 +1153,8 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro logger.warning("ruleId not found for taskId: " + taskIdentifier); } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); - String taskStatus = getTaskStatus(task); + // @todo null check? + String taskStatus = GlobusUtil.getTaskStatus(task); // Transfer is done (success or failure) so delete the rule if (ruleId != null) { @@ -1008,76 +1188,29 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Executor executor = Executors.newFixedThreadPool(10); - private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) + private GlobusTaskState globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) throws MalformedURLException { - boolean taskCompletion = false; - String status = ""; - GlobusTask task = null; + boolean taskCompleted = false; + GlobusTaskState task = null; int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); do { try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(pollingInterval * 1000); + // Call the (centralized) Globus API to check on the task state/status: task = getTask(endpoint.getClientToken(), taskId, globusLogger); - if (task != null) { - status = task.getStatus(); - if (status != null) { - // The task is in progress. - if (status.equalsIgnoreCase("ACTIVE")) { - if (task.getNice_status().equalsIgnoreCase("ok") - || task.getNice_status().equalsIgnoreCase("queued")) { - taskCompletion = false; - } else { - taskCompletion = true; - // status = "FAILED" + "#" + task.getNice_status() + "#" + - // task.getNice_status_short_description(); - } - } else { - // The task is either succeeded, failed or inactive. - taskCompletion = true; - // status = status + "#" + task.getNice_status() + "#" + - // task.getNice_status_short_description(); - } - } else { - // status = "FAILED"; - taskCompletion = true; - } - } else { - // status = "FAILED"; - taskCompletion = true; - } + taskCompleted = GlobusUtil.isTaskCompleted(task); } catch (Exception ex) { ex.printStackTrace(); } - } while (!taskCompletion); + } while (!taskCompleted); globusLogger.info("globus transfer task completed successfully"); return task; } - - private String getTaskStatus(GlobusTask task) { - String status = null; - if (task != null) { - status = task.getStatus(); - if (status != null) { - // The task is in progress but is not ok or queued - if (status.equalsIgnoreCase("ACTIVE")) { - status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } else { - // The task is either succeeded, failed or inactive. - status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } - } else { - status = "FAILED"; - } - } else { - status = "FAILED"; - } - return status; - } - + public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { @@ -1133,27 +1266,39 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) String fileName = id.split("IDsplit")[2]; // ToDo: what if the file does not exist in s3 + // (L.A.) - good question. maybe it should call .open and .exists() here? + // otherwise, there doesn't seem to be any diagnostics as to which + // files uploaded successfully and which failed (?) + // ... however, any partially successful upload cases should be + // properly handled later, during the .addFiles() call - only + // the files that actually exists in storage remotely will be + // added to the dataset permanently then. // ToDo: what if checksum calculation failed + // (L.A.) - this appears to have been addressed - by using "Not available in Dataverse" + // in place of a checksum. - do { - try { - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - in = dataFileStorageIO.getInputStream(); - checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - count = 3; - } catch (IOException ioex) { - count = 3; - logger.fine(ioex.getMessage()); - globusLogger.info( - "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); - } catch (Exception ex) { - count = count + 1; - ex.printStackTrace(); - logger.info(ex.getMessage()); - Thread.sleep(5000); - } + String storageDriverId = DataAccess.getDriverIdAndStorageLocation(fullPath)[0]; - } while (count < 3); + if (StorageIO.isDataverseAccessible(storageDriverId)) { + do { + try { + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + in = dataFileStorageIO.getInputStream(); + checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + count = 3; + } catch (IOException ioex) { + count = 3; + logger.fine(ioex.getMessage()); + globusLogger.info( + "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); + } catch (Exception ex) { + count = count + 1; + ex.printStackTrace(); + logger.info(ex.getMessage()); + Thread.sleep(5000); + } + } while (count < 3); + } if (checksumVal.length() == 0) { checksumVal = "Not available in Dataverse"; @@ -1261,7 +1406,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, Long fileId = Long.parseLong(idAsString); // If we need to create a GuestBookResponse record, we have to // look up the DataFile object for this file: - df = dataFileService.findCheapAndEasy(fileId); + df = dataFileSvc.findCheapAndEasy(fileId); selectedFiles.add(df); if (!doNotSaveGuestbookResponse) { guestbookResponse.setDataFile(df); @@ -1281,5 +1426,58 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, } } } + + public List findAllOngoingTasks() { + return em.createQuery("select object(o) from GlobusTaskInProgress as o order by o.startTime", GlobusTaskInProgress.class).getResultList(); + } + + public void deleteTask(GlobusTaskInProgress task) { + GlobusTaskInProgress mergedTask = em.merge(task); + em.remove(mergedTask); + } + + public List findExternalUploadsByTaskId(String taskId) { + return em.createNamedQuery("ExternalFileUploadInProgress.findByTaskId").setParameter("taskId", taskId).getResultList(); + } + + public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess, String taskStatus, Logger taskLogger) { + String ruleId = globusTask.getRuleId(); + Dataset dataset = globusTask.getDataset(); + AuthenticatedUser authUser = globusTask.getLocalUser(); + if (authUser == null) { + // @todo log error message; do nothing + return; + } + + if (GlobusTaskInProgress.TaskType.UPLOAD.equals(globusTask.getTaskType())) { + List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); + if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { + // @todo log error message; do nothing + // (will this ever happen though?) + return; + } + + JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); + + for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { + String jsonInfoString = pendingFile.getFileInfo(); + JsonObject fileObject = JsonUtil.getJsonObject(jsonInfoString); + filesJsonArrayBuilder.add(fileObject); + } + + JsonArray filesJsonArray = filesJsonArrayBuilder.build(); + + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, taskLogger, taskSuccess, taskStatus); + } else { + // @todo eventually, extend this async. framework to handle Glonus downloads as well + } + + } + + public void deleteExternalUploadRecords(String taskId) { + em.createNamedQuery("ExternalFileUploadInProgress.deleteByTaskId") + .setParameter("taskId", taskId) + .executeUpdate(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java new file mode 100644 index 00000000000..8644bca6143 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -0,0 +1,202 @@ +package edu.harvard.iq.dataverse.globus; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import jakarta.persistence.Column; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import jakarta.persistence.UniqueConstraint; +import java.io.Serializable; +import java.sql.Timestamp; +import java.util.Arrays; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; + +/** + * + * @author landreev + */ +@Entity +@Table(uniqueConstraints = {@UniqueConstraint(columnNames = "taskid")}) +public class GlobusTaskInProgress implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + /** + * Globus-side identifier of the task in progress, upload or download + */ + @Column(nullable=false, unique = true) + private String taskId; + + /** + * I was considering giving this enum type a more specific name "TransferType" + * - but maybe there will be another use case where we need to keep track of + * Globus tasks that are not data transfers (?) + */ + public enum TaskType { + + UPLOAD("UPLOAD"), + DOWNLOAD("DOWNLOAD"); + + private final String text; + + private TaskType(final String text) { + this.text = text; + } + + public static TaskType fromString(String text) { + if (text != null) { + for (TaskType taskType : TaskType.values()) { + if (text.equals(taskType.text)) { + return taskType; + } + } + } + throw new IllegalArgumentException("TaskType must be one of these values: " + Arrays.asList(TaskType.values()) + "."); + } + + @Override + public String toString() { + return text; + } + } + + @Column(nullable=false) + @Enumerated(EnumType.STRING) + private TaskType taskType; + + /** + * Globus API token that should be used to monitor the status of the task + */ + @Column(nullable=false) + private String globusToken; + + /** + * This is the the user who initiated the Globus task + */ + @ManyToOne + @JoinColumn + private AuthenticatedUser user; + + @Column(nullable=false) + private String ruleId; + + @JoinColumn(nullable = false) + @ManyToOne + private Dataset dataset; + + @Column + private Timestamp startTime; + + public GlobusTaskInProgress() { + } + + GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, AuthenticatedUser authUser, String ruleId, Timestamp startTime) { + this.taskId = taskId; + this.taskType = taskType; + this.dataset = dataset; + this.globusToken = globusToken; + this.user = authUser; + this.ruleId = ruleId; + this.startTime = startTime; + } + + + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getTaskId() { + return taskId; + } + + public void setTaskId(String taskId) { + this.taskId = taskId; + } + + public TaskType getTaskType() { + return taskType; + } + + public void setTaskType(TaskType taskType) { + this.taskType = taskType; + } + + public String getGlobusToken() { + return globusToken; + } + + public void setGlobusToken(String clientToken) { + this.globusToken = clientToken; + } + + public AuthenticatedUser getLocalUser() { + return user; + } + + public void setLocalUser(AuthenticatedUser authUser) { + this.user = authUser; + } + + public String getRuleId() { + return ruleId; + } + + public void setRuleId(String ruleId) { + this.ruleId = ruleId; + } + public Dataset getDataset() { + return dataset; + } + + public void setDataset(Dataset dataset) { + this.dataset = dataset; + } + + public Timestamp getStartTime() { + return startTime; + } + + public void setStartTime(Timestamp startTime) { + this.startTime = startTime; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof GlobusTaskInProgress)) { + return false; + } + GlobusTaskInProgress other = (GlobusTaskInProgress) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.globus.GlobusTaskInProgress[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java similarity index 93% rename from src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java rename to src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java index c2b01779f4a..b5db20d46c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java @@ -1,6 +1,10 @@ package edu.harvard.iq.dataverse.globus; -public class GlobusTask { +/** + * This class is used to store the state of an ongoing Globus task (transfer) + * as reported by the Globus task API. + */ +public class GlobusTaskState { private String DATA_TYPE; private String type; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java index 92cf8ac7704..652898591ac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -30,4 +30,64 @@ public static JsonObject getFilesMap(List dataFiles, Dataset d) { } return filesBuilder.build(); } + + public static boolean isTaskCompleted(GlobusTaskState task) { + if (task != null) { + String status = task.getStatus(); + if (status != null) { + if (status.equalsIgnoreCase("ACTIVE")) { + if (task.getNice_status().equalsIgnoreCase("ok") + || task.getNice_status().equalsIgnoreCase("queued")) { + return false; + } + } + } + } + return true; + } + + public static boolean isTaskSucceeded(GlobusTaskState task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + status = status.toUpperCase(); + if (status.equals("ACTIVE") || status.startsWith("FAILED") || status.startsWith("INACTIVE")) { + // There are cases where a failed task may still be showing + // as "ACTIVE". But it is definitely safe to assume that it + // has not completed *successfully*. + return false; + } + return true; + } + } + return false; + } + /** + * Produces a human-readable Status label of a completed task + * @param GlobusTaskState task - a looked-up state of a task as reported by Globus API + */ + public static String getTaskStatus(GlobusTaskState task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + // The task is in progress but is not ok or queued + // (L.A.) I think the assumption here is that this method is called + // exclusively on tasks that have already completed. So that's why + // it is safe to assume that "ACTIVE" means "FAILED". + if (status.equalsIgnoreCase("ACTIVE")) { + status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } else { + // The task is either succeeded, failed or inactive. + status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } else { + status = "FAILED"; + } + } else { + status = "FAILED"; + } + return status; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java new file mode 100644 index 00000000000..fdb2b222804 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -0,0 +1,131 @@ +package edu.harvard.iq.dataverse.globus; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.Resource; +import jakarta.ejb.EJB; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import jakarta.enterprise.concurrent.ManagedScheduledExecutorService; +import java.io.File; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.logging.FileHandler; +import java.util.logging.Logger; + +/** + * + * This Singleton monitors ongoing Globus tasks by checking with the centralized + * Globus API on the status of all the registered ongoing tasks. + * When a successful completion of a task is detected, the service triggers + * the execution of the associated tasks (for example, finalizing adding datafiles + * to the dataset on completion of a remote Globus upload). When a task fails or + * terminates abnormally, a message is logged and the task record is deleted + * from the database. + * + * @author landreev + */ +@Singleton +@Startup +public class TaskMonitoringServiceBean { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.globus.TaskMonitoringServiceBean"); + + @Resource + ManagedScheduledExecutorService scheduler; + + @EJB + SystemConfig systemConfig; + @EJB + SettingsServiceBean settingsSvc; + @EJB + GlobusServiceBean globusService; + + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + + @PostConstruct + public void init() { + if (JvmSettings.GLOBUS_TASK_MONITORING_SERVER.lookupOptional(Boolean.class).orElse(false)) { + logger.info("Starting Globus task monitoring service"); + int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( + settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 600); + this.scheduler.scheduleWithFixedDelay(this::checkOngoingTasks, + 0, pollingInterval, + TimeUnit.SECONDS); + } else { + logger.info("Skipping Globus task monitor initialization"); + } + } + + /** + * This method will be executed on a timer-like schedule, continuously + * monitoring all the ongoing external Globus tasks (transfers). + */ + public void checkOngoingTasks() { + logger.fine("Performing a scheduled external Globus task check"); + List tasks = globusService.findAllOngoingTasks(); + + tasks.forEach(t -> { + FileHandler taskLogHandler = getTaskLogHandler(t); + Logger taskLogger = getTaskLogger(t, taskLogHandler); + + GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), taskLogger); + if (GlobusUtil.isTaskCompleted(retrieved)) { + // Do our thing, finalize adding the files to the dataset + globusService.processCompletedTask(t, GlobusUtil.isTaskSucceeded(retrieved), GlobusUtil.getTaskStatus(retrieved), taskLogger); + // Whether it finished successfully, or failed in the process, + // there's no need to keep monitoring this task, so we can + // delete it. + //globusService.deleteExternalUploadRecords(t.getTaskId()); + globusService.deleteTask(t); + } + + if (taskLogHandler != null) { + // @todo it should be prudent to cache these loggers and handlers + // between monitoring runs (should be fairly easy to do) + taskLogHandler.close(); + } + }); + } + + private FileHandler getTaskLogHandler(GlobusTaskInProgress task) { + if (task == null) { + return null; + } + + Date startDate = new Date(task.getStartTime().getTime()); + String logTimeStamp = logFormatter.format(startDate); + + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + task.getDataset().getId() + "_" + logTimeStamp + + ".log"; + FileHandler fileHandler; + try { + fileHandler = new FileHandler(logFileName); + } catch (IOException | SecurityException ex) { + // @todo log this error somehow? + fileHandler = null; + } + return fileHandler; + } + + private Logger getTaskLogger(GlobusTaskInProgress task, FileHandler logFileHandler) { + if (logFileHandler == null) { + return null; + } + Date startDate = new Date(task.getStartTime().getTime()); + String logTimeStamp = logFormatter.format(startDate); + + Logger taskLogger = Logger.getLogger( + "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimeStamp); + taskLogger.setUseParentHandlers(false); + + taskLogger.addHandler(logFileHandler); + + return taskLogger; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 9bacafd173f..b42fd950528 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -345,12 +345,13 @@ public List saveAndAddFilesToDataset(DatasetVersion version, StorageIO dataAccess = DataAccess.getStorageIO(dataFile); //Populate metadata dataAccess.open(DataAccessOption.READ_ACCESS); - + // (the .open() above makes a remote call to check if + // the file exists and obtains its size) confirmedFileSize = dataAccess.getSize(); // For directly-uploaded files, we will perform the file size // limit and quota checks here. Perform them *again*, in - // some cases: a directly uploaded files have already been + // some cases: files directly uploaded via the UI have already been // checked (for the sake of being able to reject the upload // before the user clicks "save"). But in case of direct // uploads via API, these checks haven't been performed yet, diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java index 40044408c63..b01fb5e7eba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java @@ -23,9 +23,11 @@ import jakarta.ejb.Singleton; import jakarta.ejb.Startup; import jakarta.inject.Inject; +import jakarta.json.JsonObject; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.GlobalId; @@ -247,4 +249,5 @@ public PidProvider getDefaultPidGenerator() { return PidUtil.getPidProvider(protocol, authority, shoulder); } } + } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index 43e34e74c59..02a7dedce47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -91,31 +91,30 @@ public String getMetadataFromDvObject(String identifier, Map met } else { dataset = (Dataset) dvObject.getOwner(); } - - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + doiMetadata.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); if (dvObject.isInstanceofDataset()) { - metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); + doiMetadata.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; String fileDescription = df.getDescription(); - metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); + doiMetadata.setDescription(fileDescription == null ? "" : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - metadataTemplate.setTitle(dvObject.getCurrentName()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setTitle(dvObject.getCurrentName()); String producerString = pidProviderService.getProducer(); if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { producerString = UNAVAILABLE; } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java new file mode 100644 index 00000000000..ffd24747bc2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java @@ -0,0 +1,138 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.util.ArrayList; +import java.util.List; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import edu.harvard.iq.dataverse.DatasetAuthor; + + +//Parses some specific parts of a DataCite XML metadata file +public class DoiMetadata { + + private String identifier; + private List creators; + private String title; + private String publisher; + private String publisherYear; + private List datafileIdentifiers; + private List authors; + private String description; + private List contacts; + private List producers; + + + public DoiMetadata() { + } + + public void parseDataCiteXML(String xmlMetaData) { + Document doc = Jsoup.parseBodyFragment(xmlMetaData); + Elements identifierElements = doc.select("identifier"); + if (identifierElements.size() > 0) { + identifier = identifierElements.get(0).html(); + } + Elements creatorElements = doc.select("creatorName"); + creators = new ArrayList<>(); + for (Element creatorElement : creatorElements) { + creators.add(creatorElement.html()); + } + Elements titleElements = doc.select("title"); + if (titleElements.size() > 0) { + title = titleElements.get(0).html(); + } + Elements publisherElements = doc.select("publisher"); + if (publisherElements.size() > 0) { + publisher = publisherElements.get(0).html(); + } + Elements publisherYearElements = doc.select("publicationYear"); + if (publisherYearElements.size() > 0) { + publisherYear = publisherYearElements.get(0).html(); + } + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublisherYear() { + return publisherYear; + } + + public void setPublisherYear(String publisherYear) { + this.publisherYear = publisherYear; + } + + + public List getProducers() { + return producers; + } + + public void setProducers(List producers) { + this.producers = producers; + } + + public List getContacts() { + return contacts; + } + + public void setContacts(List contacts) { + this.contacts = contacts; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + + public List getDatafileIdentifiers() { + return datafileIdentifiers; + } + + public void setDatafileIdentifiers(List datafileIdentifiers) { + this.datafileIdentifiers = datafileIdentifiers; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index fb4e294d246..a74a9f34bc9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1,340 +1,1565 @@ package edu.harvard.iq.dataverse.pidproviders.doi; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; -import java.util.logging.Level; +import java.util.Map; +import java.util.Optional; +import java.util.Set; import java.util.logging.Logger; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; +import org.ocpsoft.common.util.Strings; + +import edu.harvard.iq.dataverse.AlternativePersistentIdentifier; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetRelPublication; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.ExternalIdentifier; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.dataset.DatasetType; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; +import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; +import jakarta.enterprise.inject.spi.CDI; +import jakarta.json.JsonObject; public class XmlMetadataTemplate { - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); - private static String template; + private static final Logger logger = Logger.getLogger(XmlMetadataTemplate.class.getName()); - static { - try (InputStream in = XmlMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = new String(in.readAllBytes(), StandardCharsets.UTF_8); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } + public static final String XML_NAMESPACE = "http://datacite.org/schema/kernel-4"; + public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + public static final String XML_XSI = "http://www.w3.org/2001/XMLSchema-instance"; + public static final String XML_SCHEMA_VERSION = "4.5"; - private String xmlMetadata; - private String identifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String resourceTypeGeneral; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } + private DoiMetadata doiMetadata; + //QDR - used to get ROR name from ExternalVocabularyValue via pidProvider.get + private PidProvider pidProvider = null; - public void setProducers(List producers) { - this.producers = producers; + public XmlMetadataTemplate() { } - public List getContacts() { - return contacts; + public XmlMetadataTemplate(DoiMetadata doiMetadata) { + this.doiMetadata = doiMetadata; } - public void setContacts(List contacts) { - this.contacts = contacts; - } + public String generateXML(DvObject dvObject) { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + generateXML(dvObject, outputStream); - public String getDescription() { - return description; + String xml = outputStream.toString(); + logger.fine(xml); + return XmlPrinter.prettyPrintXml(xml); + } catch (XMLStreamException | IOException e) { + logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); + e.printStackTrace(); + } + return null; } - public void setDescription(String description) { - this.description = description; - } + private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { + // Could/should use dataset metadata language for metadata from DvObject itself? + String language = null; // machine locale? e.g. for Publisher which is global + String metadataLanguage = null; // when set, otherwise = language? + + //QDR - used to get ROR name from ExternalVocabularyValue via pidProvider.get + GlobalId pid = null; + pid = dvObject.getGlobalId(); + if ((pid == null) && (dvObject instanceof DataFile df)) { + pid = df.getOwner().getGlobalId(); + } + pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); + xmlw.writeStartElement("resource"); + boolean deaccessioned=false; + if(dvObject instanceof Dataset d) { + deaccessioned=d.isDeaccessioned(); + } else if (dvObject instanceof DataFile df) { + deaccessioned = df.isDeaccessioned(); + } + xmlw.writeDefaultNamespace(XML_NAMESPACE); + xmlw.writeAttribute("xmlns:xsi", XML_XSI); + xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - public List getAuthors() { - return authors; + writeIdentifier(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors(), deaccessioned); + writeTitles(xmlw, dvObject, language, deaccessioned); + writePublisher(xmlw, dvObject, deaccessioned); + writePublicationYear(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + } + writeResourceType(xmlw, dvObject); + if (!deaccessioned) { + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + } + writeDescriptions(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); + } + xmlw.writeEndElement(); + xmlw.flush(); } - public void setAuthors(List authors) { - this.authors = authors; - } + /** + * 3, Title(s) (with optional type sub-properties) (M) + * + * @param xmlw + * The Stream writer + * @param dvObject + * The dataset/file + * @param language + * the metadata language + * @return + * @throws XMLStreamException + */ + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language, boolean deaccessioned) throws XMLStreamException { + String title = null; + String subTitle = null; + List altTitles = new ArrayList<>(); - public XmlMetadataTemplate() { - } + if (!deaccessioned) { + title = doiMetadata.getTitle(); - public List getDatafileIdentifiers() { - return datafileIdentifiers; - } + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } + } + } else { + title = AbstractDOIProvider.UNAVAILABLE; + } + if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { + xmlw.writeStartElement("titles"); + if (StringUtils.isNotBlank(title)) { + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + } + Map attributes = new HashMap(); - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; + if (StringUtils.isNotBlank(subTitle)) { + attributes.put("titleType", "Subtitle"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, subTitle); + } + if ((altTitles != null && !String.join("", altTitles).isBlank())) { + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } + } + xmlw.writeEndElement(); + } } - public XmlMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); + /** + * 1, Identifier (with mandatory type sub-property) (M) Note DataCite expects + * identifierType="DOI" but OpenAire allows several others (see + * https://guidelines.readthedocs.io/en/latest/data/field_identifier.html#d-identifiertype) + * Dataverse is currently only capable of creating DOI, Handle, or URL types + * from the OpenAire list (the last from PermaLinks) ToDo - If we add,e.g., an + * ARK or PURL provider, this code has to change or we'll need to refactor so + * that the identifiertype and id value can be sent via the JSON/ORE + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset or file with the PID + * @throws XMLStreamException + */ + private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + GlobalId pid = dvObject.getGlobalId(); + String identifierType = null; + String identifier = null; + switch (pid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = pid.asRawIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = pid.asRawIdentifier(); + break; + case PermaLinkPidProvider.PERMA_PROTOCOL: + identifierType = "URL"; + identifier = pid.asURL(); + break; } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); + Map attributeMap = new HashMap(); + attributeMap.put("identifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "identifier", attributeMap, identifier); + } + + /** + * 2, Creator (with optional given name, family name, name identifier and + * affiliation sub-properties) (M) + * + * @param xmlw + * The stream writer + * @param authorList + * - the list of authors + * @throws XMLStreamException + */ + public void writeCreators(XMLStreamWriter xmlw, List authorList, boolean deaccessioned) throws XMLStreamException { + // creators -> creator -> creatorName with nameType attribute, givenName, + // familyName, nameIdentifier + // write all creators + xmlw.writeStartElement("creators"); // + if(deaccessioned) { + //skip the loop below + authorList = null; } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); + boolean nothingWritten = true; + if (authorList != null && !authorList.isEmpty()) { + for (DatasetAuthor author : authorList) { + String creatorName = author.getName().getDisplayValue(); + String affiliation = null; + if (author.getAffiliation() != null && !author.getAffiliation().getValue().isEmpty()) { + affiliation = author.getAffiliation().getValue(); + } + String nameIdentifier = null; + String nameIdentifierScheme = null; + if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { + nameIdentifier = author.getIdValue(); + if (nameIdentifier != null) { + // Normalizes to the URL form of the identifier, returns null if the identifier + // is not valid given the type + nameIdentifier = author.getIdentifierAsUrl(); + } + nameIdentifierScheme = author.getIdType(); + } + + if (StringUtils.isNotBlank(creatorName)) { + JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, + StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + nothingWritten = false; + writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); + } + + + } } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); + if (nothingWritten) { + // Authors unavailable + xmlw.writeStartElement("creator"); + XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); + xmlw.writeEndElement(); } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); + xmlw.writeEndElement(); // + } + + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { + // publisher should already be non null - :unav if it wasn't available + if(deaccessioned) { + doiMetadata.setPublisher(AbstractPidProvider.UNAVAILABLE); } + XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); } - public String generateXML(DvObject dvObject) { + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; + String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (this.publisherYear != null) { + if (! deaccessioned && (doiMetadata.getPublisherYear() != null)) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; - } - xmlMetadata = template.replace("${identifier}", getIdentifier().trim()).replace("${title}", this.title) - .replace("${publisher}", this.publisher).replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - - StringBuilder creatorsElement = new StringBuilder(); - if (authors != null && !authors.isEmpty()) { - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() - && !author.getIdValue().isEmpty() && author.getAffiliation() != null - && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } + pubYear = doiMetadata.getPublisherYear(); + } + XmlWriterUtil.writeFullElement(xmlw, "publicationYear", String.valueOf(pubYear)); + } + + /** + * 6, Subject (with scheme sub-property) R + * + * @param xmlw + * The Steam writer + * @param dvObject + * The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // subjects -> subject with subjectScheme and schemeURI attributes when + // available + boolean subjectsCreated = false; + List subjects = new ArrayList(); + List compoundKeywords = new ArrayList(); + List compoundTopics = new ArrayList(); + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.subject)) { + subjects.addAll(dsf.getValues()); } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement - .append("" + author.getAffiliation().getDisplayValue() + ""); + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { + compoundKeywords = dsf.getDatasetFieldCompoundValues(); + } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { + compoundTopics = dsf.getDatasetFieldCompoundValues(); } - creatorsElement.append(""); } - } else { - creatorsElement.append("").append(AbstractPidProvider.UNAVAILABLE) - .append(""); + } else if (dvObject instanceof DataFile df) { + subjects = df.getTagLabels(); } + for (String subject : subjects) { + if (StringUtils.isNotBlank(subject)) { + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElement(xmlw, "subject", StringEscapeUtils.escapeXml10(subject)); + } + } + for (DatasetFieldCompoundValue keywordFieldValue : compoundKeywords) { + String keyword = null; + String scheme = null; + String schemeUri = null; - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); + for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.keywordValue: + keyword = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(keyword)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(keyword)); + } + } + for (DatasetFieldCompoundValue topicFieldValue : compoundTopics) { + String topic = null; + String scheme = null; + String schemeUri = null; - StringBuilder contributorsElement = new StringBuilder(); - if (this.getContacts() != null) { - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); + for (DatasetField subField : topicFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.topicClassValue: + topic = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(topic)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(topic)); } } + if (subjectsCreated) { + xmlw.writeEndElement(); + } + } + + /** + * 7, Contributor (with optional given name, family name, name identifier and + * affiliation sub-properties) + * + * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, + * java.lang.String, java.lang.String, java.lang.String) + * + * @param xmlw + * The stream writer + * @param dvObject + * The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean contributorsCreated = false; + List compoundProducers = new ArrayList(); + List compoundDistributors = new ArrayList(); + List compoundContacts = new ArrayList(); + List compoundContributors = new ArrayList(); + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + // ToDo Include for files? + /* + * if(dvObject instanceof DataFile df) { dvObject = df.getOwner(); } + */ - if (this.getProducers() != null) { - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producer: + compoundProducers = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.distributor: + compoundDistributors = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.datasetContact: + compoundContacts = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contributor: + compoundContributors = dsf.getDatasetFieldCompoundValues(); } - contributorsElement.append(""); } } - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - String datasetTypeName = dataset.getDatasetType().getName(); - resourceTypeGeneral = switch (datasetTypeName) { - case DatasetType.DATASET_TYPE_DATASET -> - "Dataset"; - case DatasetType.DATASET_TYPE_SOFTWARE -> - "Software"; - case DatasetType.DATASET_TYPE_WORKFLOW -> - "Workflow"; - default -> - "Dataset"; - }; - xmlMetadata = xmlMetadata.replace("${resourceTypeGeneral}", resourceTypeGeneral); + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { + String producer = null; + String affiliation = null; + + for (DatasetField subField : producerFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producerName: + producer = subField.getValue(); + break; + case DatasetFieldConstant.producerAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(producer)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(producer, false, false); + writeEntityElements(xmlw, "contributor", "Producer", entityObject, affiliation, null, null); + } + } - String relIdentifiers = generateRelatedIdentifiers(dvObject); + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { + String distributor = null; + String affiliation = null; - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); + for (DatasetField subField : distributorFieldValue.getChildDatasetFields()) { - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; - } + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributorName: + distributor = subField.getValue(); + break; + case DatasetFieldConstant.distributorAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(distributor)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(distributor, false, false); + writeEntityElements(xmlw, "contributor", "Distributor", entityObject, affiliation, null, null); + } - private String generateRelatedIdentifiers(DvObject dvObject) { + } + for (DatasetFieldCompoundValue contactFieldValue : compoundContacts) { + String contact = null; + String affiliation = null; - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + for (DatasetField subField : contactFieldValue.getChildDatasetFields()) { - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - if (dataFile.getGlobalId() != null) { - if (sb.toString().isEmpty()) { - sb.append(""); - } - sb.append("" - + dataFile.getGlobalId() + ""); - } + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.datasetContactName: + contact = subField.getValue(); + break; + case DatasetFieldConstant.datasetContactAffiliation: + affiliation = subField.getValue(); + break; } + } + if (StringUtils.isNotBlank(contact)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contact, false, false); + writeEntityElements(xmlw, "contributor", "ContactPerson", entityObject, affiliation, null, null); + } - if (!sb.toString().isEmpty()) { - sb.append(""); + } + for (DatasetFieldCompoundValue contributorFieldValue : compoundContributors) { + String contributor = null; + String contributorType = null; + + for (DatasetField subField : contributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.contributorName: + contributor = subField.getValue(); + break; + case DatasetFieldConstant.contributorType: + contributorType = subField.getValue(); + if (contributorType != null) { + contributorType = contributorType.replace(" ", ""); + } + break; } } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" - + df.getOwner().getGlobalId() + ""); - sb.append(""); + // QDR - doesn't have Funder in the contributor type list. + // Using a string isn't i18n + if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { + contributorType = getCanonicalContributorType(contributorType); + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contributor, false, false); + writeEntityElements(xmlw, "contributor", contributorType, entityObject, null, null, null); + } + + } + + if (contributorsCreated) { + xmlw.writeEndElement(); } - return sb.toString(); } - public void generateFileIdentifiers(DvObject dvObject) { + //List from https://schema.datacite.org/meta/kernel-4/include/datacite-contributorType-v4.xsd + private Set contributorTypes = new HashSet<>(Arrays.asList("ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", "Editor", + "HostingInstitution", "Other", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", "RegistrationAuthority", + "RelatedPerson", "ResearchGroup", "RightsHolder", "Researcher", "Sponsor", "Supervisor", "WorkPackageLeader")); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + private String getCanonicalContributorType(String contributorType) { + if(StringUtils.isBlank(contributorType) || !contributorTypes.contains(contributorType)) { + return "Other"; + } + return contributorType; + } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + private void writeEntityElements(XMLStreamWriter xmlw, String elementName, String type, JsonObject entityObject, String affiliation, String nameIdentifier, String nameIdentifierScheme) throws XMLStreamException { + xmlw.writeStartElement(elementName); + Map attributeMap = new HashMap(); + if (StringUtils.isNotBlank(type)) { + xmlw.writeAttribute("contributorType", type); + } + // person name=, + if (entityObject.getBoolean("isPerson")) { + attributeMap.put("nameType", "Personal"); + } else { + attributeMap.put("nameType", "Organizational"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, elementName + "Name", attributeMap, + StringEscapeUtils.escapeXml10(entityObject.getString("fullName"))); + if (entityObject.containsKey("givenName")) { + XmlWriterUtil.writeFullElement(xmlw, "givenName", StringEscapeUtils.escapeXml10(entityObject.getString("givenName"))); + } + if (entityObject.containsKey("familyName")) { + XmlWriterUtil.writeFullElement(xmlw, "familyName", StringEscapeUtils.escapeXml10(entityObject.getString("familyName"))); + } - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - + template.substring(x, template.length() - 1); + if (nameIdentifier != null) { + attributeMap.clear(); + URL url; + try { + url = new URL(nameIdentifier); + String protocol = url.getProtocol(); + String authority = url.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + attributeMap.put("schemeURI", site); + attributeMap.put("nameIdentifierScheme", nameIdentifierScheme); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "nameIdentifier", attributeMap, nameIdentifier); + } catch (MalformedURLException e) { + logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); + } + } + if (StringUtils.isNotBlank(affiliation)) { + attributeMap.clear(); + boolean isROR=false; + String orgName = affiliation; + ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR; + if (externalIdentifier.isValidIdentifier(orgName)) { + isROR = true; + JsonObject jo = getExternalVocabularyValue(orgName); + if (jo != null) { + orgName = jo.getString("termName"); } + } + + if (isROR) { - } else { - xmlMetadata = xmlMetadata.replace( - "${relatedIdentifier}", - ""); + attributeMap.put("schemeURI", "https://ror.org"); + attributeMap.put("affiliationIdentifierScheme", "ROR"); + attributeMap.put("affiliationIdentifier", orgName); } + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "affiliation", attributeMap, StringEscapeUtils.escapeXml10(orgName)); } + xmlw.writeEndElement(); } - public static String getTemplate() { - return template; + private JsonObject getExternalVocabularyValue(String id) { + return CDI.current().select(DatasetFieldServiceBean.class).get().getExternalVocabularyValue(id); } - public static void setTemplate(String template) { - XmlMetadataTemplate.template = template; + /** + * 8, Date (with type sub-property) (R) + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile + * @throws XMLStreamException + */ + private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean datesWritten = false; + String dateOfDistribution = null; + String dateOfProduction = null; + String dateOfDeposit = null; + Date releaseDate = null; + String publicationDate = null; + boolean isAnUpdate=false; + List datesOfCollection = new ArrayList(); + List timePeriods = new ArrayList(); + + if (dvObject instanceof DataFile df) { + // Find the first released version the file is in to give a published date + List fmds = df.getFileMetadatas(); + DatasetVersion initialVersion = null; + for (FileMetadata fmd : fmds) { + DatasetVersion dv = fmd.getDatasetVersion(); + if (dv.isReleased()) { + initialVersion = dv; + publicationDate = Util.getDateFormat().format(dv.getReleaseTime()); + break; + } + } + // And the last update is the most recent + for (int i = fmds.size() - 1; i >= 0; i--) { + DatasetVersion dv = fmds.get(i).getDatasetVersion(); + if (dv.isReleased() && !dv.equals(initialVersion)) { + releaseDate = dv.getReleaseTime(); + isAnUpdate=true; + break; + } + } + } else if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + Long versionNumber = dv.getVersionNumber(); + if (versionNumber != null && !(versionNumber.equals(1) && dv.getMinorVersionNumber().equals(0))) { + isAnUpdate = true; + } + releaseDate = dv.getReleaseTime(); + publicationDate = d.getPublicationDateFormattedYYYYMMDD(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributionDate: + dateOfDistribution = dsf.getValue(); + break; + case DatasetFieldConstant.productionDate: + dateOfProduction = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfDeposit: + dateOfDeposit = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfCollection: + datesOfCollection = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.timePeriodCovered: + timePeriods = dsf.getDatasetFieldCompoundValues(); + break; + } + } + } + Map attributes = new HashMap(); + if (StringUtils.isNotBlank(dateOfDistribution)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Issued"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDistribution); + } + // dates -> date with dateType attribute + + if (StringUtils.isNotBlank(dateOfProduction)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Created"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfProduction); + } + if (StringUtils.isNotBlank(dateOfDeposit)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Submitted"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDeposit); + } + + if (publicationDate != null) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + + attributes.put("dateType", "Available"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, publicationDate); + } + if (isAnUpdate) { + String date = Util.getDateFormat().format(releaseDate); + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + + attributes.put("dateType", "Updated"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, date); + } + if (datesOfCollection != null) { + for (DatasetFieldCompoundValue collectionDateFieldValue : datesOfCollection) { + String startDate = null; + String endDate = null; + + for (DatasetField subField : collectionDateFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.dateOfCollectionStart: + startDate = subField.getValue(); + break; + case DatasetFieldConstant.dateOfCollectionEnd: + endDate = subField.getValue(); + break; + } + } + // Minimal clean-up - useful? Parse/format would remove unused chars, and an + // exception would clear the date so we don't send nonsense + startDate = cleanUpDate(startDate); + endDate = cleanUpDate(endDate); + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Collected"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); + } + } + } + if (timePeriods != null) { + for (DatasetFieldCompoundValue timePeriodFieldValue : timePeriods) { + String startDate = null; + String endDate = null; + + for (DatasetField subField : timePeriodFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.timePeriodCoveredStart: + startDate = subField.getValue(); + break; + case DatasetFieldConstant.timePeriodCoveredEnd: + endDate = subField.getValue(); + break; + } + } + // Minimal clean-up - useful? Parse/format would remove unused chars, and an + // exception would clear the date so we don't send nonsense + startDate = cleanUpDate(startDate); + endDate = cleanUpDate(endDate); + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Other"); + attributes.put("dateInformation", "Time period covered by the data"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); + } + } + } + if (datesWritten) { + xmlw.writeEndElement(); + } } - public String getIdentifier() { - return identifier; + private String cleanUpDate(String date) { + String newDate = null; + if (!StringUtils.isBlank(date)) { + try { + SimpleDateFormat sdf = Util.getDateFormat(); + Date start = sdf.parse(date); + newDate = sdf.format(start); + } catch (ParseException e) { + logger.warning("Could not parse date: " + date); + } + } + return newDate; } - public void setIdentifier(String identifier) { - this.identifier = identifier; + // 9, Language (MA), language + private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // Currently not supported. Spec indicates one 'primary' language. Could send + // the first entry in DatasetFieldConstant.language or send iff there is only + // one entry, and/or default to the machine's default lang, or the dataverse metadatalang? + return; } - public List getCreators() { - return creators; + // 10, ResourceType (with mandatory general type + // description sub- property) (M) + private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List kindOfDataValues = new ArrayList(); + Map attributes = new HashMap(); + String resourceType = "Dataset"; + if (dvObject instanceof Dataset dataset) { + String datasetTypeName = dataset.getDatasetType().getName(); + resourceType = switch (datasetTypeName) { + case DatasetType.DATASET_TYPE_DATASET -> "Dataset"; + case DatasetType.DATASET_TYPE_SOFTWARE -> "Software"; + case DatasetType.DATASET_TYPE_WORKFLOW -> "Workflow"; + default -> "Dataset"; + }; + } + attributes.put("resourceTypeGeneral", resourceType); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.kindOfData: + List vals = dsf.getValues(); + for(String val: vals) { + if(StringUtils.isNotBlank(val)) { + kindOfDataValues.add(val); + } + } + break; + } + } + } + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(";", kindOfDataValues)); + + } else { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } + } - public void setCreators(List creators) { - this.creators = creators; + /** + * 11 AlternateIdentifier (with type sub-property) (O) + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile + * @throws XMLStreamException + */ + private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List otherIdentifiers = new ArrayList(); + Set altPids = dvObject.getAlternativePersistentIndentifiers(); + + boolean alternatesWritten = false; + + Map attributes = new HashMap(); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (DatasetFieldConstant.otherId.equals(dsf.getDatasetFieldType().getName())) { + otherIdentifiers = dsf.getDatasetFieldCompoundValues(); + break; + } + } + } + + if (altPids != null && !altPids.isEmpty()) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternateIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + + } + } + + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { + String identifierType = null; + String identifier = null; + for (DatasetField subField : otherIdentifier.getChildDatasetFields()) { + identifierType = ":unav"; + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.otherIdAgency: + identifierType = subField.getValue(); + break; + case DatasetFieldConstant.otherIdValue: + identifier = subField.getValue(); + break; + } + } + attributes.put("alternateIdentifierType", identifierType); + if (!StringUtils.isBlank(identifier)) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } + } + if (alternatesWritten) { + xmlw.writeEndElement(); + } } - public String getTitle() { - return title; + /** + * 12, RelatedIdentifier (with type and relation type sub-properties) (R) + * + * @param xmlw + * The Steam writer + * @param dvObject + * the dataset/datafile + * @throws XMLStreamException + */ + private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + + boolean relatedIdentifiersWritten = false; + + Map attributes = new HashMap(); + + if (dvObject instanceof Dataset dataset) { + List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); + if (!relatedPublications.isEmpty()) { + for (DatasetRelPublication relatedPub : relatedPublications) { + attributes.clear(); + + String pubIdType = relatedPub.getIdType(); + String identifier = relatedPub.getIdNumber(); + String url = relatedPub.getUrl(); + String relationType = relatedPub.getRelationType(); + if(StringUtils.isBlank(relationType)) { + relationType = "IsSupplementTo"; + } + /* + * Note - with identifier and url fields, it's not clear that there's a single + * way those two fields are used for all identifier types. The code here is + * ~best effort to interpret those fields. + */ + logger.fine("Found relpub: " + pubIdType + " " + identifier + " " + url); + + pubIdType = getCanonicalPublicationType(pubIdType); + logger.fine("Canonical type: " + pubIdType); + // Prefer identifier if set, otherwise check url + String relatedIdentifier = identifier; + if (StringUtils.isBlank(relatedIdentifier)) { + relatedIdentifier = url; + } + logger.fine("Related identifier: " + relatedIdentifier); + // For types where we understand the protocol, get the canonical form + if (StringUtils.isNotBlank(relatedIdentifier)) { + switch (pubIdType != null ? pubIdType : "none") { + case "DOI": + if (!(relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http"))) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + logger.fine("Intermediate Related identifier: " + relatedIdentifier); + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + logger.warning("Invalid DOI: " + e.getLocalizedMessage()); + relatedIdentifier = null; + } + logger.fine("Final Related identifier: " + relatedIdentifier); + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || !relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + // If a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URI(relatedIdentifier).toURL(); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (URISyntaxException | MalformedURLException | IllegalArgumentException e) { + // Just an identifier but without a pubIdType we won't include it + logger.warning("Invalid Identifier of type URL: " + relatedIdentifier); + relatedIdentifier = null; + } + break; + case "none": + //Try to identify PIDs and URLs and send them as related identifiers + if (relatedIdentifier != null) { + // See if it is a GlobalID we know + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + pubIdType = getCanonicalPublicationType(pid.getProtocol()); + } catch (IllegalArgumentException e) { + } + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URI(relatedIdentifier).toURL(); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + pubIdType = "URL"; + } catch (URISyntaxException | MalformedURLException | IllegalArgumentException e) { + // Just an identifier but without a pubIdType we won't include it + logger.warning("Related Identifier found without type: " + relatedIdentifier); + //Won't be sent since pubIdType is null - could also set relatedIdentifier to null + } + } + break; + default: + //Some other valid type - we just send the identifier w/o optional attributes + //To Do - validation for other types? + break; + } + } + if (StringUtils.isNotBlank(relatedIdentifier) && StringUtils.isNotBlank(pubIdType)) { + // Still have a valid entry + attributes.put("relatedIdentifierType", pubIdType); + attributes.put("relationType", relationType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); + } + } + } + List fmds = dataset.getLatestVersionForCopy().getFileMetadatas(); + if (!((fmds==null) && fmds.isEmpty())) { + attributes.clear(); + attributes.put("relationType", "HasPart"); + for (FileMetadata fmd : fmds) { + DataFile dataFile = fmd.getDataFile(); + GlobalId pid = dataFile.getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); + } + } + } + } + } else if (dvObject instanceof DataFile df) { + GlobalId pid = df.getOwner().getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + + attributes.clear(); + attributes.put("relationType", "IsPartOf"); + attributes.put("relatedIdentifierType", pubIdType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); + } + } + } + if (relatedIdentifiersWritten) { + xmlw.writeEndElement(); + } } - public void setTitle(String title) { - this.title = title; + static HashMap relatedIdentifierTypeMap = new HashMap(); + + private static String getCanonicalPublicationType(String pubIdType) { + if (relatedIdentifierTypeMap.isEmpty()) { + relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); + relatedIdentifierTypeMap.put("arXiv", "arXiv"); + relatedIdentifierTypeMap.put("bibcode".toLowerCase(), "bibcode"); + relatedIdentifierTypeMap.put("DOI".toLowerCase(), "DOI"); + relatedIdentifierTypeMap.put("EAN13".toLowerCase(), "EAN13"); + relatedIdentifierTypeMap.put("EISSN".toLowerCase(), "EISSN"); + relatedIdentifierTypeMap.put("Handle".toLowerCase(), "Handle"); + relatedIdentifierTypeMap.put("IGSN".toLowerCase(), "IGSN"); + relatedIdentifierTypeMap.put("ISBN".toLowerCase(), "ISBN"); + relatedIdentifierTypeMap.put("ISSN".toLowerCase(), "ISSN"); + relatedIdentifierTypeMap.put("ISTC".toLowerCase(), "ISTC"); + relatedIdentifierTypeMap.put("LISSN".toLowerCase(), "LISSN"); + relatedIdentifierTypeMap.put("LSID".toLowerCase(), "LSID"); + relatedIdentifierTypeMap.put("PISSN".toLowerCase(), "PISSN"); + relatedIdentifierTypeMap.put("PMID".toLowerCase(), "PMID"); + relatedIdentifierTypeMap.put("PURL".toLowerCase(), "PURL"); + relatedIdentifierTypeMap.put("UPC".toLowerCase(), "UPC"); + relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); + relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); + relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); + // Add entry for Handle,Perma protocols so this can be used with GlobalId/getProtocol() + relatedIdentifierTypeMap.put("hdl".toLowerCase(), "Handle"); + relatedIdentifierTypeMap.put("perma".toLowerCase(), "URL"); + + } + return relatedIdentifierTypeMap.get(pubIdType); } - public String getPublisher() { - return publisher; + private void writeSize(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // sizes -> size + boolean sizesWritten = false; + List dataFiles = new ArrayList(); + + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + Long size = dataFile.getFilesize(); + if (size != -1) { + sizesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "sizes", sizesWritten); + XmlWriterUtil.writeFullElement(xmlw, "size", size.toString()); + } + } + } + if (sizesWritten) { + xmlw.writeEndElement(); + } + } - public void setPublisher(String publisher) { - this.publisher = publisher; + private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + + boolean formatsWritten = false; + List dataFiles = new ArrayList(); + + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + String format = dataFile.getContentType(); + if (StringUtils.isNotBlank(format)) { + formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); + XmlWriterUtil.writeFullElement(xmlw, "format", format); + } + /* + * Should original formats be sent? What about original sizes above? + * if(dataFile.isTabularData()) { String originalFormat = + * dataFile.getOriginalFileFormat(); if(StringUtils.isNotBlank(originalFormat)) + * { XmlWriterUtil.writeFullElement(xmlw, "format", format); } } + */ + } + } + if (formatsWritten) { + xmlw.writeEndElement(); + } + } - public String getPublisherYear() { - return publisherYear; + private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + Dataset d = null; + if (dvObject instanceof Dataset) { + d = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + d = ((DataFile) dvObject).getOwner(); + } + if (d != null) { + DatasetVersion dv = d.getLatestVersionForCopy(); + String version = dv.getFriendlyVersionNumber(); + if (StringUtils.isNotBlank(version)) { + XmlWriterUtil.writeFullElement(xmlw, "version", version); + } + } + } - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // rightsList -> rights with rightsURI attribute + xmlw.writeStartElement("rightsList"); // + + // set terms from the info:eu-repo-Access-Terms vocabulary + xmlw.writeStartElement("rights"); // + DatasetVersion dv = null; + boolean closed = false; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + closed = dv.isHasRestrictedFile(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); + + closed = df.isRestricted(); + } + TermsOfUseAndAccess terms = dv.getTermsOfUseAndAccess(); + boolean requestsAllowed = terms.isFileAccessRequest(); + License license = terms.getLicense(); + + if (requestsAllowed && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); + } else if (!requestsAllowed && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); + } else { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); + } + xmlw.writeEndElement(); // + xmlw.writeStartElement("rights"); // + + if (license != null) { + xmlw.writeAttribute("rightsURI", license.getUri().toString()); + xmlw.writeCharacters(license.getName()); + } else { + xmlw.writeAttribute("rightsURI", DatasetUtil.getLicenseURI(dv)); + xmlw.writeCharacters(BundleUtil.getStringFromBundle("license.custom.description")); + ; + } + xmlw.writeEndElement(); // + xmlw.writeEndElement(); // } - public String getResourceTypeGeneral() { - return resourceTypeGeneral; + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { + // descriptions -> description with descriptionType attribute + boolean descriptionsWritten = false; + List descriptions = null; + DatasetVersion dv = null; + if(deaccessioned) { + descriptions = new ArrayList(); + descriptions.add(AbstractDOIProvider.UNAVAILABLE); + } else { + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + descriptions = dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } + } + } + Map attributes = new HashMap(); + attributes.put("descriptionType", "Abstract"); + if (descriptions != null) { + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, StringEscapeUtils.escapeXml10(description)); + } + } + + if (dv != null) { + List dsfs = dv.getDatasetFields(); + + for (DatasetField dsf : dsfs) { + + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.software: + attributes.clear(); + attributes.put("descriptionType", "TechnicalInfo"); + List dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + + String softwareName = null; + String softwareVersion = null; + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + if (DatasetFieldConstant.softwareName.equals(childDsf.getDatasetFieldType().getName())) { + softwareName = childDsf.getValue(); + } else if (DatasetFieldConstant.softwareVersion.equals(childDsf.getDatasetFieldType().getName())) { + softwareVersion = childDsf.getValue(); + } + } + if (StringUtils.isNotBlank(softwareName)) { + if (StringUtils.isNotBlank(softwareVersion)) { + } + softwareName = softwareName + ", " + softwareVersion; + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, softwareName); + } + } + break; + case DatasetFieldConstant.originOfSources: + case DatasetFieldConstant.characteristicOfSources: + case DatasetFieldConstant.accessToSources: + attributes.clear(); + attributes.put("descriptionType", "Methods"); + String method = dsf.getValue(); + if (StringUtils.isNotBlank(method)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, method); + + } + break; + case DatasetFieldConstant.series: + attributes.clear(); + attributes.put("descriptionType", "SeriesInformation"); + dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + + if (DatasetFieldConstant.seriesName.equals(childDsf.getDatasetFieldType().getName())) { + String seriesInformation = childDsf.getValue(); + if (StringUtils.isNotBlank(seriesInformation)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, seriesInformation); + } + break; + } + } + } + break; + case DatasetFieldConstant.notesText: + attributes.clear(); + attributes.put("descriptionType", "Other"); + String notesText = dsf.getValue(); + if (StringUtils.isNotBlank(notesText)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, notesText); + } + break; + + } + } + + } + + if (descriptionsWritten) { + xmlw.writeEndElement(); // + } } - public void setResourceTypeGeneral(String resourceTypeGeneral) { - this.resourceTypeGeneral = resourceTypeGeneral; + private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + if (dvObject instanceof Dataset d) { + boolean geoLocationsWritten = false; + DatasetVersion dv = d.getLatestVersionForCopy(); + + List places = dv.getGeographicCoverage(); + if (places != null && !places.isEmpty()) { + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + for (String[] place : places) { + xmlw.writeStartElement("geoLocation"); // + + ArrayList placeList = new ArrayList(); + for (String placePart : place) { + if (!StringUtils.isBlank(placePart)) { + placeList.add(placePart); + } + } + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + xmlw.writeEndElement(); // + } + + } + boolean boundingBoxFound = false; + boolean productionPlaceFound = false; + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.geographicBoundingBox: + boundingBoxFound = true; + for (DatasetFieldCompoundValue dsfcv : dsf.getDatasetFieldCompoundValues()) { + List childDsfs = dsfcv.getChildDatasetFields(); + String nLatitude = null; + String sLatitude = null; + String eLongitude = null; + String wLongitude = null; + for (DatasetField childDsf : childDsfs) { + switch (childDsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.northLatitude: + nLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.southLatitude: + sLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.eastLongitude: + eLongitude = childDsf.getValue(); + break; + case DatasetFieldConstant.westLongitude: + wLongitude = childDsf.getValue(); + + } + } + if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + xmlw.writeStartElement("geoLocation"); // + if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { + // A point + xmlw.writeStartElement("geoLocationPoint"); + XmlWriterUtil.writeFullElement(xmlw, "pointLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "pointLatitude", sLatitude); + xmlw.writeEndElement(); + } else { + // A box + xmlw.writeStartElement("geoLocationBox"); + XmlWriterUtil.writeFullElement(xmlw, "westBoundLongitude", wLongitude); + XmlWriterUtil.writeFullElement(xmlw, "eastBoundLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "southBoundLatitude", sLatitude); + XmlWriterUtil.writeFullElement(xmlw, "northBoundLatitude", nLatitude); + xmlw.writeEndElement(); + + } + xmlw.writeEndElement(); // + } + } + case DatasetFieldConstant.productionPlace: + productionPlaceFound = true; + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + List prodPlaces = dsf.getValues(); + for (String prodPlace : prodPlaces) { + xmlw.writeStartElement("geoLocation"); // + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + xmlw.writeEndElement(); // + } + break; + } + if (boundingBoxFound && productionPlaceFound) { + break; + } + } + if (geoLocationsWritten) { + xmlw.writeEndElement(); // + } + } + } + private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // fundingReferences -> fundingReference -> funderName, awardNumber + boolean fundingReferenceWritten = false; + DatasetVersion dv = null; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); + } + if (dv != null) { + List retList = new ArrayList<>(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributor)) { + boolean addFunder = false; + for (DatasetFieldCompoundValue contributorValue : dsf.getDatasetFieldCompoundValues()) { + String contributorName = null; + String contributorType = null; + for (DatasetField subField : contributorValue.getChildDatasetFields()) { + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorName)) { + contributorName = subField.getDisplayValue(); + } + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorType)) { + contributorType = subField.getRawValue(); + } + } + // SEK 02/12/2019 move outside loop to prevent contrib type to carry over to + // next contributor + // TODO: Consider how this will work in French, Chinese, etc. + if ("Funder".equals(contributorType)) { + if (!StringUtils.isBlank(contributorName)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(contributorName)); + xmlw.writeEndElement(); // + } + } + } + } + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumber)) { + for (DatasetFieldCompoundValue grantObject : dsf.getDatasetFieldCompoundValues()) { + String funder = null; + String awardNumber = null; + for (DatasetField subField : grantObject.getChildDatasetFields()) { + // It would be nice to do something with grantNumberValue (the actual number) + // but schema.org doesn't support it. + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberAgency)) { + String grantAgency = subField.getDisplayValue(); + funder = grantAgency; + } else if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberValue)) { + String grantNumberValue = subField.getDisplayValue(); + awardNumber = grantNumberValue; + } + } + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + boolean isROR=false; + String funderIdentifier = null; + ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR; + if (externalIdentifier.isValidIdentifier(funder)) { + isROR = true; + JsonObject jo = getExternalVocabularyValue(funder); + if (jo != null) { + funderIdentifier = funder; + funder = jo.getString("termName"); + } + } + + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); + if (isROR) { + Map attributeMap = new HashMap<>(); + attributeMap.put("schemeURI", "https://ror.org"); + attributeMap.put("funderIdentifierType", "ROR"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "funderIdentifier", attributeMap, StringEscapeUtils.escapeXml10(funderIdentifier)); + } + if (StringUtils.isNotBlank(awardNumber)) { + XmlWriterUtil.writeFullElement(xmlw, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); + } + xmlw.writeEndElement(); // + } + + } + } + } + + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + + } + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java index cda70cbc506..a4d788de4df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; import org.xmlunit.builder.DiffBuilder; @@ -125,28 +126,28 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { description = AbstractPidProvider.UNAVAILABLE; } - metadataTemplate.setDescription(description); + doiMetadata.setDescription(description); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. //This could/should be removed if the datafile methods add escaping String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); - metadataTemplate.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); + doiMetadata.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); String title = dvObject.getCurrentName(); if(dvObject.isInstanceofDataFile()) { //Note file title is not currently escaped the way the dataset title is, so adding it here. @@ -157,40 +158,41 @@ public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + DoiMetadata doiMetadata = new DoiMetadata(); + + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setDescription(AbstractPidProvider.UNAVAILABLE); + doiMetadata.setDescription(AbstractPidProvider.UNAVAILABLE); String title =metadata.get("datacite.title"); System.out.print("Map metadata title: "+ metadata.get("datacite.title")); - metadataTemplate.setAuthors(null); + doiMetadata.setAuthors(null); - metadataTemplate.setTitle(title); + doiMetadata.setTitle(title); String producerString = AbstractPidProvider.UNAVAILABLE; - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } @@ -244,11 +246,12 @@ Map getMetadata(String identifier) throws IOException { Map metadata = new HashMap<>(); try { String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); - XmlMetadataTemplate template = new XmlMetadataTemplate(xmlMetadata); - metadata.put("datacite.creator", String.join("; ", template.getCreators())); - metadata.put("datacite.title", template.getTitle()); - metadata.put("datacite.publisher", template.getPublisher()); - metadata.put("datacite.publicationyear", template.getPublisherYear()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.parseDataCiteXML(xmlMetadata); + metadata.put("datacite.creator", String.join("; ", doiMetadata.getCreators())); + metadata.put("datacite.title", doiMetadata.getTitle()); + metadata.put("datacite.publisher", doiMetadata.getPublisher()); + metadata.put("datacite.publicationyear", doiMetadata.getPublisherYear()); } catch (RuntimeException e) { logger.log(Level.INFO, identifier, e); } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java index cd765933796..5630844fb32 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java @@ -17,6 +17,8 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import jakarta.json.JsonObject; + import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; @@ -124,6 +126,7 @@ public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { String identifier = getIdentifier(dvObject); try { Map metadata = getIdentifierMetadata(dvObject); + metadata.put("_target", getTargetUrl(dvObject)); doiDataCiteRegisterService.modifyIdentifier(identifier, metadata, dvObject); } catch (Exception e) { logger.log(Level.WARNING, "modifyMetadata failed", e); diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java index d185b0249b9..465b10ee407 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java @@ -21,6 +21,7 @@ import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.HttpEntity; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.CloseableHttpClient; @@ -75,7 +76,12 @@ public String getUrl(String doi) { HttpGet httpGet = new HttpGet(this.url + "/doi/" + doi); try { HttpResponse response = httpClient.execute(httpGet,context); - String data = EntityUtils.toString(response.getEntity(), encoding); + HttpEntity entity = response.getEntity(); + String data = null; + + if(entity != null) { + data = EntityUtils.toString(entity, encoding); + } if (response.getStatusLine().getStatusCode() != 200) { throw new RuntimeException("Response code: " + response.getStatusLine().getStatusCode() + ", " + data); } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index fe8f1030f82..a8cf9ed519b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1128,7 +1128,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set childDatasetFields = dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields(); for (DatasetField df : childDatasetFields) { - if(cvocManagedFieldMap.get(dsfType.getId()).contains(df.getDatasetFieldType().getName())) { + if(cvocManagedFieldMap.containsKey(dsfType.getId()) && cvocManagedFieldMap.get(dsfType.getId()).contains(df.getDatasetFieldType().getName())) { String solrManagedFieldSearchable = df.getDatasetFieldType().getSolrField().getNameSearchable(); // Try to get string values from externalvocabularyvalue but for a managed fields of the CVOCConf Set stringsForManagedField = datasetFieldService.getIndexableStringsByTermUri(val, cvocMap.get(dsfType.getId()), df.getDatasetFieldType().getName()); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 28676caeac5..ee93c49ad34 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -593,7 +593,7 @@ public SolrQueryResponse search( solrSearchResult.setDataverseAffiliation(dataverseAffiliation); solrSearchResult.setDataverseParentAlias(dataverseParentAlias); solrSearchResult.setDataverseParentName(dataverseParentName); - solrSearchResult.setImageUrl(thumbnailServiceWrapper.getDataverseCardImageAsBase64Url(solrSearchResult)); + solrSearchResult.setImageUrl(thumbnailServiceWrapper.getDataverseCardImageAsUrl(solrSearchResult)); /** * @todo Expose this API URL after "dvs" is changed to * "dataverses". Also, is an API token required for published @@ -652,7 +652,7 @@ public SolrQueryResponse search( } solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?persistentId=" + parentGlobalId); solrSearchResult.setDownloadUrl(baseUrl + "/api/access/datafile/" + entityid); - solrSearchResult.setImageUrl(thumbnailServiceWrapper.getFileCardImageAsBase64Url(solrSearchResult)); + solrSearchResult.setImageUrl(thumbnailServiceWrapper.getFileCardImageAsUrl(solrSearchResult)); /** * @todo We are not yet setting the API URL for files because * not all files have metadata. Only subsettable files (those diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 2bfda69247a..33e828e619d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -101,6 +101,10 @@ public enum FeatureFlags { * @since Dataverse 6.4 */ DISABLE_DATASET_THUMBNAIL_AUTOSELECT("disable-dataset-thumbnail-autoselect"), + /** + * Feature flag for the new Globus upload framework. + */ + GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"), ; final String flag; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 0acc5d3267f..d7eea970b8a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -51,6 +51,7 @@ public enum JvmSettings { DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"), + GLOBUS_TASK_MONITORING_SERVER(SCOPE_FILES, "globus-monitoring-server"), //STORAGE DRIVER SETTINGS SCOPE_DRIVER(SCOPE_FILES), diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java index 6eb3a8df0bc..a783b211b36 100644 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -120,13 +120,13 @@ public void handleTimeout(jakarta.ejb.Timer timer) { } try { - logger.log(Level.INFO,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); + logger.log(Level.FINE,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); } catch (UnknownHostException ex) { Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); } if (timer.getInfo() instanceof MotherTimerInfo) { - logger.info("Behold! I am the Master Timer, king of all timers! I'm here to create all the lesser timers!"); + logger.fine("Behold! I am the Master Timer, king of all timers! I'm here to create all the lesser timers!"); removeHarvestTimers(); for (HarvestingClient client : harvestingClientService.getAllHarvestingClients()) { createHarvestTimer(client); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index 36c249de834..f81ce093815 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -99,6 +99,23 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti } catch (Exception e) { return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompletedWithErrors.subject", rootDvNameAsList); } + case GLOBUSUPLOADREMOTEFAILURE: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedRemotely.subject", dsNameAsList); + + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedRemotely.subject", rootDvNameAsList); + } + case GLOBUSUPLOADLOCALFAILURE: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedLocally.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedLocally.subject", rootDvNameAsList); + } case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: try { DatasetVersion version = (DatasetVersion)objectOfNotification; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index f68957ad060..80e32184731 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -123,7 +123,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati if (!name.replaceFirst(",", "").contains(",")) { // contributorName=, String[] fullName = name.split(", "); - givenName = fullName[1]; + givenName = fullName.length > 1 ? fullName[1] : null; familyName = fullName[0]; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 5cc28e4b225..60967b13131 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -545,7 +545,7 @@ public boolean isTimerServer() { } return false; } - + public String getFooterCopyrightAndYear() { return BundleUtil.getStringFromBundle("footer.copyright", Arrays.asList(Year.now().getValue() + "")); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java index 586ca50b6fd..cec64ab95b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java @@ -24,7 +24,12 @@ public class XmlValidator { private static final Logger logger = Logger.getLogger(XmlValidator.class.getCanonicalName()); public static boolean validateXmlSchema(String fileToValidate, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { + Source xmlFile = new StreamSource(new File(fileToValidate)); + return validateXmlSchema(xmlFile, schemaToValidateAgainst); + } + + public static boolean validateXmlSchema(Source xmlFile, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = schemaFactory.newSchema(schemaToValidateAgainst); Validator validator = schema.newValidator(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java new file mode 100644 index 00000000000..8ec426ead1f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -0,0 +1,167 @@ +package edu.harvard.iq.dataverse.util.xml; + +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; + +public class XmlWriterUtil { + + public static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { + // For the simplest Elements we can + if (values != null && !values.isEmpty()) { + for (String value : values) { + xmlw.writeStartElement(name); + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + } + + public static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, + String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) + throws XMLStreamException { + + if (values != null && !values.isEmpty()) { + Locale defaultLocale = Locale.getDefault(); + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); + if (localeVal != null) { + + value = localeVal; + writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); + } else { + writeFullElement(xmlw, name, value); + } + } else { + writeFullElement(xmlw, name, value); + } + } + if (lang != null && !defaultLocale.getLanguage().equals(lang)) { + // Get values in dataset metadata language + // Loop before testing fieldTypeClass to be ready for external CVV + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); + if (localeVal != null) { + writeFullElement(xmlw, name, localeVal, lang); + } + } + } + } + } + } + + public static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, + String fieldTypeName, String lang) throws XMLStreamException { + // Get the default value + String val = dto2Primitive(version, fieldTypeName); + Locale defaultLocale = Locale.getDefault(); + // Get the language-specific value for the default language + // A null value is returned if this is not a CVV field + String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); + String requestedLocaleVal = null; + if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { + // Also get the value in the requested locale/lang if that's not the default + // lang. + requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); + } + // FWIW locale-specific vals will only be non-null for CVV values (at present) + if (localeVal == null && requestedLocaleVal == null) { + // Not CVV/no translations so print without lang tag + writeFullElement(xmlw, name, val); + } else { + // Print in either/both languages if we have values + if (localeVal != null) { + // Print the value for the default locale with it's own lang tag + writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); + } + // Also print in the request lang (i.e. the metadata language for the dataset) + // if a value exists, print it with a lang tag + if (requestedLocaleVal != null) { + writeFullElement(xmlw, name, requestedLocaleVal, lang); + } + } + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + return fieldDTO.getSinglePrimitive(); + } + } + } + return null; + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + String rawVal = fieldDTO.getSinglePrimitive(); + if (fieldDTO.isControlledVocabularyField()) { + return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), + locale, false); + } + } + } + } + return null; + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + writeFullElement(xmlw, name, value, null); + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { + // For the simplest Elements we can + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + if (DvObjectContainer.isMetadataLanguageSet(lang)) { + writeAttribute(xmlw, "xml:lang", lang); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeAttribute(name, value); + } + } + + + public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + for (String key : attributeMap.keySet()) { + writeAttribute(xmlw, key, attributeMap.get(key)); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static boolean writeOpenTagIfNeeded(XMLStreamWriter xmlw, String tag, boolean element_check) throws XMLStreamException { + // check if the current tag isn't opened + if (!element_check) { + xmlw.writeStartElement(tag); // + } + return true; + } +} diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 6c771d8337b..5f3e4c33e0b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -263,11 +263,16 @@ notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId= notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
+notification.mail.globus.upload.failedRemotely=Remote data transfer between Globus endpoints for Dataset {2} failed, as reported via Globus API.

{3}
+notification.mail.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.download.completed=Globus transfer from the dataset {1} was successful. notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. +notification.globus.upload.failedRemotely=Remote data transfer between Globus collections for Dataset {2} failed, reported via Globus API.

{3}
+notification.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
+ notification.globus.download.completedWithErrors=Globus transfer from the dataset {1} is complete with errors. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification @@ -833,8 +838,8 @@ notification.email.datasetWasMentioned.subject={0}: A Dataset Relationship has b notification.email.globus.uploadCompleted.subject={0}: Files uploaded successfully via Globus and verified notification.email.globus.downloadCompleted.subject={0}: Files downloaded successfully via Globus notification.email.globus.uploadCompletedWithErrors.subject={0}: Uploaded files via Globus with errors -notification.email.globus.downloadCompletedWithErrors.subject={0}: Downloaded files via Globus with errors - +notification.email.globus.uploadFailedRemotely.subject={0}: Failed to upload files via Globus +notification.email.globus.uploadFailedLocally.subject={0}: Failed to add files uploaded via Globus to dataset # dataverse.xhtml dataverse.name=Dataverse Name dataverse.name.title=The project, department, university, professor, or journal this dataverse will contain data for. @@ -1779,6 +1784,7 @@ file.fromWebloaderAfterCreate.tip=An option to upload a folder of files will be file.fromWebloader=Upload a Folder file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. +file.api.globusUploadDisabled=File upload via Globus is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File file.editFiles=Edit Files @@ -2698,7 +2704,7 @@ files.api.fileNotFound=File could not be found. datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. datasets.api.updatePIDMetadata.auth.mustBeSuperUser=Forbidden. You must be a superuser. datasets.api.updatePIDMetadata.success.for.single.dataset=Dataset {0} PID Metadata updated successfully. -datasets.api.updatePIDMetadata.success.for.update.all=All Dataset PID Metadata update completed successfully. +datasets.api.updatePIDMetadata.success.for.update.all=All Dataset PID Metadata update completed. See log for any issues. datasets.api.moveDataset.error.targetDataverseNotFound=Target dataverse not found. datasets.api.moveDataset.error.suggestForce=Use the query parameter forceMove=true to complete the move. datasets.api.moveDataset.success=Dataset moved successfully. diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index 1e4b251b084..5899523da67 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -31,6 +31,7 @@ datasetfieldtype.topicClassValue.title=Term datasetfieldtype.topicClassVocab.title=Controlled Vocabulary Name datasetfieldtype.topicClassVocabURI.title=Controlled Vocabulary URL datasetfieldtype.publication.title=Related Publication +datasetfieldtype.publicationRelationType.title=Relation Type datasetfieldtype.publicationCitation.title=Citation datasetfieldtype.publicationIDType.title=Identifier Type datasetfieldtype.publicationIDNumber.title=Identifier @@ -110,6 +111,7 @@ datasetfieldtype.topicClassValue.description=A topic or subject term datasetfieldtype.topicClassVocab.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) datasetfieldtype.topicClassVocabURI.description=The URL where one can access information about the term's controlled vocabulary datasetfieldtype.publication.description=The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab +datasetfieldtype.publicationRelationType.description=The nature of the relationship between this Dataset and the related publication datasetfieldtype.publicationCitation.description=The full bibliographic citation for the related publication datasetfieldtype.publicationIDType.description=The type of identifier that uniquely identifies a related publication datasetfieldtype.publicationIDNumber.description=The identifier for a related publication @@ -189,6 +191,7 @@ datasetfieldtype.topicClassValue.watermark= datasetfieldtype.topicClassVocab.watermark= datasetfieldtype.topicClassVocabURI.watermark=https:// datasetfieldtype.publication.watermark= +datasetfieldtype.publicationRelationType.watermark= datasetfieldtype.publicationCitation.watermark= datasetfieldtype.publicationIDType.watermark= datasetfieldtype.publicationIDNumber.watermark= @@ -271,6 +274,12 @@ controlledvocabulary.publicationIDType.upc=upc controlledvocabulary.publicationIDType.url=url controlledvocabulary.publicationIDType.urn=urn controlledvocabulary.publicationIDType.dash-nrs=DASH-NRS +controlledvocabulary.publicationRelationType.iscitedby=Is Cited By +controlledvocabulary.publicationRelationType.cites=Cites +controlledvocabulary.publicationRelationType.issupplementto=Is Supplement To +controlledvocabulary.publicationRelationType.issupplementedby=Is Supplemented By +controlledvocabulary.publicationRelationType.isreferencedby=Is Referenced By +controlledvocabulary.publicationRelationType.references=References controlledvocabulary.contributorType.data_collector=Data Collector controlledvocabulary.contributorType.data_curator=Data Curator controlledvocabulary.contributorType.data_manager=Data Manager @@ -314,7 +323,7 @@ controlledvocabulary.language.abidji=Abidji controlledvocabulary.language.abinomn=Abinomn controlledvocabulary.language.abipon=Abipon controlledvocabulary.language.abishira=Abishira -controlledvocabulary.language.abkhaz,_abkhazian=Abkhaz, Abkhazian +controlledvocabulary.language.abkhaz=Abkhaz, Abkhazian controlledvocabulary.language.abom=Abom controlledvocabulary.language.abon=Abon controlledvocabulary.language.abron=Abron @@ -451,7 +460,7 @@ controlledvocabulary.language.akurio=Akurio controlledvocabulary.language.akwa=Akwa controlledvocabulary.language.akyaung_ari_naga=Akyaung Ari Naga controlledvocabulary.language.al-sayyid_bedouin_sign_language=Al-Sayyid Bedouin Sign Language -controlledvocabulary.language.alaba-k’abeena=Alaba-K’abeena +controlledvocabulary.language.alaba-k'abeena=Alaba-K’abeena controlledvocabulary.language.alabama=Alabama controlledvocabulary.language.alabat_island_agta=Alabat Island Agta controlledvocabulary.language.alacatlatzala_mixtec=Alacatlatzala Mixtec @@ -1460,7 +1469,7 @@ controlledvocabulary.language.carrier=Carrier controlledvocabulary.language.cashibo-cacataibo=Cashibo-Cacataibo controlledvocabulary.language.cashinahua=Cashinahua controlledvocabulary.language.casiguran_dumagat_agta=Casiguran Dumagat Agta -controlledvocabulary.language.castilian,_spanish=Castilian, Spanish +controlledvocabulary.language.spanish,_castilian=Castilian, Spanish controlledvocabulary.language.casuarina_coast_asmat=Casuarina Coast Asmat controlledvocabulary.language.catalan_sign_language=Catalan Sign Language controlledvocabulary.language.catalan,_valencian=Catalan, Valencian @@ -1639,7 +1648,7 @@ controlledvocabulary.language.chulym=Chulym controlledvocabulary.language.chumburung=Chumburung controlledvocabulary.language.chung=Chung controlledvocabulary.language.churahi=Churahi -controlledvocabulary.language.church_slavonic,_church_slavic,_old_church_slavonic,_old_bulgarian=Church Slavonic, Church Slavic, Old Church Slavonic, Old Bulgarian +controlledvocabulary.language.old_church_slavonic=Church Slavonic, Church Slavic, Old Church Slavonic, Old Bulgarian controlledvocabulary.language.chut=Chut controlledvocabulary.language.chuukese=Chuukese controlledvocabulary.language.chuvantsy=Chuvantsy @@ -2484,7 +2493,7 @@ controlledvocabulary.language.guana_(paraguay)=Guana (Paraguay) controlledvocabulary.language.guanano=Guanano controlledvocabulary.language.guanche=Guanche controlledvocabulary.language.guanyinqiao=Guanyinqiao -controlledvocabulary.language.guarani,_guarani=Guarani, Guaraní +controlledvocabulary.language.guarani=Guarani, Guaraní controlledvocabulary.language.guarayu=Guarayu controlledvocabulary.language.guarequena=Guarequena controlledvocabulary.language.guatemalan_sign_language=Guatemalan Sign Language @@ -2565,7 +2574,7 @@ controlledvocabulary.language.gwandara=Gwandara controlledvocabulary.language.gweda=Gweda controlledvocabulary.language.gweno=Gweno controlledvocabulary.language.gwere=Gwere -controlledvocabulary.language.gwichʼin=Gwichʼin +controlledvocabulary.language.gwich'in=Gwichʼin controlledvocabulary.language.gyalsumdo=Gyalsumdo controlledvocabulary.language.gyele=Gyele controlledvocabulary.language.gyem=Gyem @@ -2634,7 +2643,7 @@ controlledvocabulary.language.hawaiian=Hawaiian controlledvocabulary.language.haya=Haya controlledvocabulary.language.hazaragi=Hazaragi controlledvocabulary.language.hdi=Hdi -controlledvocabulary.language.hebrew_(modern),_hebrew=Hebrew (modern), Hebrew +controlledvocabulary.language.hebrew_(modern)=Hebrew (modern), Hebrew controlledvocabulary.language.hehe=Hehe controlledvocabulary.language.heiban=Heiban controlledvocabulary.language.heiltsuk=Heiltsuk @@ -2852,7 +2861,7 @@ controlledvocabulary.language.inonhan=Inonhan controlledvocabulary.language.inor=Inor controlledvocabulary.language.inpui_naga=Inpui Naga controlledvocabulary.language.interglossa=Interglossa -controlledvocabulary.language.interlingua,_interlingua_(international_auxiliary_language_association)=Interlingua, Interlingua (International Auxiliary Language Association) +controlledvocabulary.language.interlingua=Interlingua, Interlingua (International Auxiliary Language Association) controlledvocabulary.language.interlingue=Interlingue controlledvocabulary.language.international_sign=International Sign controlledvocabulary.language.interslavic=Interslavic @@ -3042,7 +3051,7 @@ controlledvocabulary.language.juruna=Jurúna controlledvocabulary.language.jutish=Jutish controlledvocabulary.language.juwal=Juwal controlledvocabulary.language.juxtlahuaca_mixtec=Juxtlahuaca Mixtec -controlledvocabulary.language.juǀʼhoan=Juǀʼhoan +controlledvocabulary.language.juǀ'hoan=Juǀʼhoan controlledvocabulary.language.jwira-pepesa=Jwira-Pepesa controlledvocabulary.language.jerriais=Jèrriais controlledvocabulary.language.juma=Júma @@ -3462,7 +3471,7 @@ controlledvocabulary.language.kipsigis=Kipsigis controlledvocabulary.language.kiput=Kiput controlledvocabulary.language.kir-balar=Kir-Balar controlledvocabulary.language.kire=Kire -controlledvocabulary.language.kirghiz,_kyrgyz=Kirghiz, Kyrgyz +controlledvocabulary.language.kirghiz=Kirghiz, Kyrgyz controlledvocabulary.language.kirike=Kirike controlledvocabulary.language.kirikiri=Kirikiri controlledvocabulary.language.kirmanjki_(individual_language)=Kirmanjki (individual language) @@ -4079,7 +4088,7 @@ controlledvocabulary.language.logo=Logo controlledvocabulary.language.logol=Logol controlledvocabulary.language.logooli=Logooli controlledvocabulary.language.logorik=Logorik -controlledvocabulary.language.logudorese_sardinian,_croatian=Logudorese Sardinian, Croatian +controlledvocabulary.language.croatian=Logudorese Sardinian, Croatian controlledvocabulary.language.lohorung=Lohorung controlledvocabulary.language.loja_highland_quichua=Loja Highland Quichua controlledvocabulary.language.lojban=Lojban @@ -4311,7 +4320,7 @@ controlledvocabulary.language.malawi_lomwe=Malawi Lomwe controlledvocabulary.language.malawi_sena=Malawi Sena controlledvocabulary.language.malawian_sign_language=Malawian Sign Language controlledvocabulary.language.malay_(individual_language)=Malay (individual language) -controlledvocabulary.language.malay,_malay_(macrolanguage)=Malay, Malay (macrolanguage) +controlledvocabulary.language.malay=Malay, Malay (macrolanguage) controlledvocabulary.language.malayalam=Malayalam controlledvocabulary.language.malayic_dayak=Malayic Dayak controlledvocabulary.language.malaynon=Malaynon @@ -4442,7 +4451,7 @@ controlledvocabulary.language.marama=Marama controlledvocabulary.language.maranao=Maranao controlledvocabulary.language.maranunggu=Maranunggu controlledvocabulary.language.mararit=Mararit -controlledvocabulary.language.marathi,_marathi_(marathi)=Marathi, Marathi (Marāṭhī) +controlledvocabulary.language.marathi_(marathi)=Marathi, Marathi (Marāṭhī) controlledvocabulary.language.marau=Marau controlledvocabulary.language.marba=Marba controlledvocabulary.language.mardin_sign_language=Mardin Sign Language @@ -4764,7 +4773,7 @@ controlledvocabulary.language.mochica=Mochica controlledvocabulary.language.mocho=Mocho controlledvocabulary.language.mocovi=Mocoví controlledvocabulary.language.modang=Modang -controlledvocabulary.language.modern_greek_(1453-),_greek_(modern)=Modern Greek (1453-), Greek (modern) +controlledvocabulary.language.greek_(modern)=Modern Greek (1453-), Greek (modern) controlledvocabulary.language.modole=Modole controlledvocabulary.language.moere=Moere controlledvocabulary.language.mofu-gudur=Mofu-Gudur @@ -4971,7 +4980,7 @@ controlledvocabulary.language.mocheno=Mócheno controlledvocabulary.language.mun_chin=Mün Chin controlledvocabulary.language.mundu=Mündü controlledvocabulary.language.maharastri_prakrit=Māhārāṣṭri Prākrit -controlledvocabulary.language.maori,_maori=Māori, Maori +controlledvocabulary.language.maori=Māori, Maori controlledvocabulary.language.n'ko=N'Ko controlledvocabulary.language.na=Na controlledvocabulary.language.na-kara=Na-kara @@ -5126,7 +5135,7 @@ controlledvocabulary.language.neo=Neo controlledvocabulary.language.neo-hittite=Neo-Hittite controlledvocabulary.language.nepalese_sign_language=Nepalese Sign Language controlledvocabulary.language.nepali_(individual_language)=Nepali (individual language) -controlledvocabulary.language.nepali_(macrolanguage),_nepali=Nepali (macrolanguage), Nepali +controlledvocabulary.language.nepali=Nepali (macrolanguage), Nepali controlledvocabulary.language.nete=Nete controlledvocabulary.language.new_caledonian_javanese=New Caledonian Javanese controlledvocabulary.language.new_zealand_sign_language=New Zealand Sign Language @@ -5359,7 +5368,7 @@ controlledvocabulary.language.northern_luri=Northern Luri controlledvocabulary.language.northern_mashan_hmong=Northern Mashan Hmong controlledvocabulary.language.northern_muji=Northern Muji controlledvocabulary.language.northern_nago=Northern Nago -controlledvocabulary.language.northern_ndebele,_north_ndebele=Northern Ndebele, North Ndebele +controlledvocabulary.language.northern_ndebele=Northern Ndebele, North Ndebele controlledvocabulary.language.northern_ngbandi=Northern Ngbandi controlledvocabulary.language.northern_nisu=Northern Nisu controlledvocabulary.language.northern_nuni=Northern Nuni @@ -5516,7 +5525,7 @@ controlledvocabulary.language.obokuitai=Obokuitai controlledvocabulary.language.obolo=Obolo controlledvocabulary.language.obulom=Obulom controlledvocabulary.language.ocaina=Ocaina -controlledvocabulary.language.occitan_(post_1500),_occitan=Occitan (post 1500), Occitan +controlledvocabulary.language.occitan=Occitan (post 1500), Occitan controlledvocabulary.language.ocotepec_mixtec=Ocotepec Mixtec controlledvocabulary.language.ocotlan_zapotec=Ocotlán Zapotec controlledvocabulary.language.od=Od @@ -5626,7 +5635,7 @@ controlledvocabulary.language.orang_seletar=Orang Seletar controlledvocabulary.language.oraon_sadri=Oraon Sadri controlledvocabulary.language.orejon=Orejón controlledvocabulary.language.oring=Oring -controlledvocabulary.language.oriya,_oriya_(macrolanguage)=Oriya, Oriya (macrolanguage) +controlledvocabulary.language.oriya=Oriya, Oriya (macrolanguage) controlledvocabulary.language.orizaba_nahuatl=Orizaba Nahuatl controlledvocabulary.language.orma=Orma controlledvocabulary.language.ormu=Ormu @@ -5828,7 +5837,7 @@ controlledvocabulary.language.peranakan_indonesian=Peranakan Indonesian controlledvocabulary.language.pere=Pere controlledvocabulary.language.peripheral_mongolian=Peripheral Mongolian controlledvocabulary.language.pero=Pero -controlledvocabulary.language.persian,_persian_(farsi)=Persian, Persian (Farsi) +controlledvocabulary.language.persian_(farsi)=Persian, Persian (Farsi) controlledvocabulary.language.peruvian_sign_language=Peruvian Sign Language controlledvocabulary.language.pesse=Pesse controlledvocabulary.language.petapa_zapotec=Petapa Zapotec @@ -6014,7 +6023,7 @@ controlledvocabulary.language.pari=Päri controlledvocabulary.language.pemono=Pémono controlledvocabulary.language.peve=Pévé controlledvocabulary.language.pokoot=Pökoot -controlledvocabulary.language.pali,_pali=Pāli, Pali +controlledvocabulary.language.pali=Pāli, Pali controlledvocabulary.language.q'anjob'al=Q'anjob'al controlledvocabulary.language.qabiao=Qabiao controlledvocabulary.language.qaqet=Qaqet @@ -6157,7 +6166,7 @@ controlledvocabulary.language.rukai=Rukai controlledvocabulary.language.ruma=Ruma controlledvocabulary.language.rumai_palaung=Rumai Palaung controlledvocabulary.language.rumu=Rumu -controlledvocabulary.language.rundi,_kirundi=Rundi, Kirundi +controlledvocabulary.language.kirundi=Rundi, Kirundi controlledvocabulary.language.runga=Runga controlledvocabulary.language.rungtu_chin=Rungtu Chin controlledvocabulary.language.rungus=Rungus @@ -6296,7 +6305,7 @@ controlledvocabulary.language.sanie=Sanie controlledvocabulary.language.saniyo-hiyewe=Saniyo-Hiyewe controlledvocabulary.language.sankaran_maninka=Sankaran Maninka controlledvocabulary.language.sansi=Sansi -controlledvocabulary.language.sanskrit_(samskrta),_sanskrit=Sanskrit (Saṁskṛta), Sanskrit +controlledvocabulary.language.sanskrit_(samskrta)=Sanskrit (Saṁskṛta), Sanskrit controlledvocabulary.language.santa_ana_de_tusi_pasco_quechua=Santa Ana de Tusi Pasco Quechua controlledvocabulary.language.santa_catarina_albarradas_zapotec=Santa Catarina Albarradas Zapotec controlledvocabulary.language.santa_ines_ahuatempan_popoloca=Santa Inés Ahuatempan Popoloca @@ -6513,7 +6522,7 @@ controlledvocabulary.language.siberian_tatar=Siberian Tatar controlledvocabulary.language.sibu_melanau=Sibu Melanau controlledvocabulary.language.sicanian=Sicanian controlledvocabulary.language.sicel=Sicel -controlledvocabulary.language.sichuan_yi,_nuosu=Sichuan Yi, Nuosu +controlledvocabulary.language.nuosu=Sichuan Yi, Nuosu controlledvocabulary.language.sicilian=Sicilian controlledvocabulary.language.siculo_arabic=Siculo Arabic controlledvocabulary.language.sidamo=Sidamo @@ -6592,7 +6601,7 @@ controlledvocabulary.language.slave_(athapascan)=Slave (Athapascan) controlledvocabulary.language.slavomolisano=Slavomolisano controlledvocabulary.language.slovak=Slovak controlledvocabulary.language.slovakian_sign_language=Slovakian Sign Language -controlledvocabulary.language.slovenian,_slovene=Slovenian, Slovene +controlledvocabulary.language.slovenian=Slovenian, Slovene controlledvocabulary.language.small_flowery_miao=Small Flowery Miao controlledvocabulary.language.smarky_kanum=Smärky Kanum controlledvocabulary.language.snohomish=Snohomish @@ -6651,7 +6660,7 @@ controlledvocabulary.language.south_giziga=South Giziga controlledvocabulary.language.south_lembata=South Lembata controlledvocabulary.language.south_marquesan=South Marquesan controlledvocabulary.language.south_muyu=South Muyu -controlledvocabulary.language.south_ndebele,_southern_ndebele=South Ndebele, Southern Ndebele +controlledvocabulary.language.southern_ndebele=South Ndebele, Southern Ndebele controlledvocabulary.language.south_nuaulu=South Nuaulu controlledvocabulary.language.south_picene=South Picene controlledvocabulary.language.south_slavey=South Slavey @@ -6834,7 +6843,7 @@ controlledvocabulary.language.suya=Suyá controlledvocabulary.language.svan=Svan controlledvocabulary.language.swabian=Swabian controlledvocabulary.language.swahili_(individual_language)=Swahili (individual language) -controlledvocabulary.language.swahili,_swahili_(macrolanguage)=Swahili, Swahili (macrolanguage) +controlledvocabulary.language.swahili=Swahili, Swahili (macrolanguage) controlledvocabulary.language.swampy_cree=Swampy Cree controlledvocabulary.language.swati=Swati controlledvocabulary.language.swedish=Swedish @@ -7697,7 +7706,7 @@ controlledvocabulary.language.wapan=Wapan controlledvocabulary.language.wapishana=Wapishana controlledvocabulary.language.wappo=Wappo controlledvocabulary.language.war-jaintia=War-Jaintia -controlledvocabulary.language.wara=Wara +controlledvocabulary.language.wara=Wara Wára controlledvocabulary.language.warao=Warao controlledvocabulary.language.waray_(australia)=Waray (Australia) controlledvocabulary.language.waray_(philippines)=Waray (Philippines) @@ -8211,3 +8220,4 @@ controlledvocabulary.language.ǁxegwi=ǁXegwi controlledvocabulary.language.ǂhua=ǂHua controlledvocabulary.language.ǂungkue=ǂUngkue controlledvocabulary.language.ǃxoo=ǃXóõ +controlledvocabulary.language.not_applicable=Not Applicable diff --git a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml deleted file mode 100644 index 150a098834e..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - ${identifier} - ${creators} - - ${title} - - ${publisher} - ${publisherYear} - - ${relatedIdentifiers} - - ${description} - - {$contributors} - diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 255e63fbfc2..03173faf989 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -12,7 +12,7 @@ or !empty termsOfUseAndAccess.originalArchive or !empty termsOfUseAndAccess.availabilityStatus or !empty termsOfUseAndAccess.contactForAccess or !empty termsOfUseAndAccess.sizeOfCollection or !empty termsOfUseAndAccess.studyCompletion - or termsOfUseAndAccess.fileAccessRequest}"/> + }"/>
@@ -660,8 +661,10 @@ data-toggle="tooltip" data-placement="auto right" data-original-title="#{DatasetPage.datasetVersionUI.datasetRelPublications.get(0).description}">