From 0fc800d591c5744adfe3528ad990e76463111b1a Mon Sep 17 00:00:00 2001 From: JKamlah Date: Thu, 8 Feb 2024 11:52:29 +0100 Subject: [PATCH] Update gitmodules and workflow and add docs folder. --- .github/workflows/deploy.yml | 73 ++++++++ .github/workflows/gtrepo.yml | 343 ----------------------------------- .gitmodules | 6 - docs/index.md | 5 + 4 files changed, 78 insertions(+), 349 deletions(-) create mode 100644 .github/workflows/deploy.yml delete mode 100644 .github/workflows/gtrepo.yml create mode 100644 docs/index.md diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..24789b6 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,73 @@ +name: catalogue-repo +on: + push: + tags: + - '[vV]{0,1}[0-9]+.[0-9]+.[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+' + workflow_dispatch: + +jobs: + gh-pages: + name: pages and readme + runs-on: ubuntu-latest + + permissions: + checks: write + contents: write + + steps: + - name: Git checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + # Combine installation steps to reduce the number of steps and improve readability + - name: Install dependencies + run: | + git clone https://github.com/jkamlah/ocr-model-repo-scripts.git + git clone https://github.com/tboenig/gt-guidelines.git + sudo apt-get update + sudo apt-get install -y jq python3 python3-poetry + + # Setup Python environment in a single step + - name: Setup Python and dependencies + run: | + cd ocr-model-repo-scripts/ + poetry install + source $(poetry env info --path)/bin/activate + + # Run scripts as a single job to avoid repetitive activation of the environment + - name: Process metadata and update documentation + run: | + cd ocr-model-repo-scripts/ + source $(poetry env info --path)/bin/activate + poetry run python3 scripts/catalogue.py yaml2json ../ + poetry run python3 scripts/catalogue.py metadata ../ + poetry run python3 scripts/catalogue.py index ../ + poetry run python3 scripts/catalogue.py readme ../ --title ${{ github.event.repository.name }} --gh-url "https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }}" + mv index.md ../docs/ + cp scripts/table_hide.css scripts/levelparser.css scripts/lang.js scripts/_config.yml ../docs/ + + # Add a step to check for changes + - name: Check for changes + id: changes_check + run: | + if git diff --quiet; then + echo "No changes detected." + echo "CHANGES_DETECTED=false" >> $GITHUB_ENV + else + echo "Changes detected." + echo "CHANGES_DETECTED=true" >> $GITHUB_ENV + fi + + # Conditional commit step + - name: Commit and Push changes + if: env.CHANGES_DETECTED == 'true' + run: | + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git add README.md + git add docs/ + git add data/ + git commit -m "[Automatic] Update README and METADATA" + git push origin HEAD:main \ No newline at end of file diff --git a/.github/workflows/gtrepo.yml b/.github/workflows/gtrepo.yml deleted file mode 100644 index 6ea7645..0000000 --- a/.github/workflows/gtrepo.yml +++ /dev/null @@ -1,343 +0,0 @@ -name: gtrepo -on: - push: - tags: - - 'v[0-9]+.[0-9]+.[0-9]+' - - - - workflow_dispatch: - - - -jobs: - job1: - name: uniTest - runs-on: ubuntu-latest - permissions: - checks: write - contents: write - # Map a step output to a job output - outputs: - output1: ${{ steps.step4.outputs.test }} - output2: ${{ steps.step4.outputs.test2 }} - - steps: - - name: Git checkout - id: step1 - uses: actions/checkout@v3 - - # Installation Styles and Saxon - - - name: install analyse xsl-styles - id: step2 - run: | - git clone https://github.com/tboenig/gt-repo-scripts.git - mv gt-repo-scripts/scripts scripts/ - rm -r gt-repo-scripts - - - name: Download and install saxon - id: step3 - run: | - wget https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE12-3/SaxonHE12-3J.zip - unzip SaxonHE12-3J.zip - - - # Installation and Directories - - - name: make gh-pages_out - run: mkdir ghout - - - - name: Get SDK Version from config - id: lookupSdkVersion - uses: mikefarah/yq@master - with: - cmd: yq -o=json METADATA.yml > METADATA.json - - - name: PathTest - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_unitTest.xsl \ - output=unitTest1 \ - -s:scripts/gt-overview_unitTest.xsl -o:ghout/pathtest.md - shell: bash - - # Test GT-Page Folder Repo Structure - - - name: Empty - id: step4 - run: | - [ -s ghout/pathtest.md ] || echo "test=empty" >> $GITHUB_OUTPUT - [ ! -s ghout/pathtest.md ] || echo "test2=full" >> $GITHUB_OUTPUT - - # Error Logview - - - name: uniTestError - id: step5 - if: ${{steps.step4.outputs.test2 == 'full'}} - run: | - less ghout/pathtest.md - - - job2: - name: analyse_and_makebagit - needs: job1 - if: ${{needs.job1.outputs.output1 == 'empty'}} - runs-on: ubuntu-latest - permissions: - checks: write - contents: write - - - steps: - - - name: Git checkout - uses: actions/checkout@v3 - - # Installation Styles - - - name: install analyse xsl-styles - run: | - git clone https://github.com/tboenig/gt-repo-scripts.git - mv gt-repo-scripts/scripts scripts/ - rm -r gt-repo-scripts - - # Installation GT-Labelling Documentation - - - - name: install labeling - run: | - git clone https://github.com/tboenig/gt-guidelines.git - - - # Installation and Directories - - - name: install jq - run: sudo apt-get install jq - - - - name: Download and install saxon - run: | - wget https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE12-3/SaxonHE12-3J.zip - unzip SaxonHE12-3J.zip - - - name: make metadata_out - run: mkdir metadata_out - - - name: make ocrdzip_out - run: mkdir ocrdzip_out - - - name: make gh-pages_out - run: mkdir ghout - - - name: make readme_out - run: sh scripts/readmefolder.sh - - - - name: readme.xml file - run: sh scripts/xreadme.sh - - - - # Transformation and analyzing - - - name: Get SDK Version from config - id: lookupSdkVersion - uses: mikefarah/yq@master - with: - cmd: yq -o=json METADATA.yml > METADATA.json - - - name: transform METADATA and make GT-Overview - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=METADATA repoBase=$GITHUB_REF_NAME repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \ - -s:scripts/gt-overview_metadata.xsl -o:ghout/metadata.md - shell: bash - - - name: make Compressed table view - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=TABLE repoBase=$GITHUB_REF_NAME repoName=$GITHUB_REPOSITORY \ - -s:scripts/gt-overview_metadata.xsl -o:ghout/table.md - shell: bash - - - name: detailed table view - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=OVERVIEW repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY \ - -s:scripts/gt-overview_metadata.xsl -o:ghout/overview.md - shell: bash - - - name: leveling the volume and documents - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-level_parser.xsl \ - repoName=$GITHUB_REPOSITORY \ - -s:scripts/gt-level_parser.xsl -o:ghout/overview-level.md - shell: bash - - - name: generate mets.sh - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=METS repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY \ - -s:scripts/gt-overview_metadata.xsl -o:scripts/mets.sh - shell: bash - - - name: generate Metadata JSON file - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=METAJSON repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \ - -s:scripts/gt-overview_metadata.xsl -o:metadata_out/metadata_l.json - shell: bash - - - - name: format json file and copy to gh branch - run: | - jq '.' metadata_out/metadata_l.json > metadata_out/metadata.json - cp metadata_out/metadata.json ghout/ - rm metadata_out/metadata_l.json - - - - name: generate README - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=README repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY \ - -s:scripts/gt-overview_metadata.xsl -o:README.md - shell: bash - - - name: generate METS Volume File - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=METSvolume repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \ - -s:scripts/gt-overview_metadata.xsl -o:metadata_out/mets.xml - shell: bash - - - name: generate release download List - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=download repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \ - -s:scripts/gt-overview_metadata.xsl -o:ghout/download.txt - shell: bash - - - name: delete fileGrp DEFAULT - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=METSdefault repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \ - -s:scripts/gt-overview_metadata.xsl - shell: bash - - - name: generate CITATION.cff - run: | - java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ - output=CITATION repoBase=$GITHUB_REF_Name repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=$GITHUB_REF_NAME \ - -s:scripts/gt-overview_metadata.xsl -o:rawCITATION.cff - shell: bash - - - name: formating CITATION.cff - id: lookupSdkVersion2 - uses: mikefarah/yq@master - with: - cmd: | - yq -I4 rawCITATION.cff > CITATION.cff - rm rawCITATION.cff - - - - name: Index-link - run: | - cd ghout - ln -s metadata.md index.md - - - # Mets handling, Install OCR-D and Bagit - - - name: del invalidMets - run: sh -ex scripts/data_mets.sh - shell: bash - - - - name: install ocrd, make validMets and bagit - run: | - sudo apt-get install -y python3 imagemagick libgeos-dev - python3 -m venv venv - source venv/bin/activate - pip install -U pip 'setuptools>=61' - pip install ocrd - ocrd --version - - - - name: make validMets - run: | - source venv/bin/activate - sh -ex scripts/mets.sh - - - - name: make bagit - run: | - source venv/bin/activate - sh scripts/data_structure.sh - - - - name: copy css styles, js javascript and yml files to ghout - run: | - cp scripts/table_hide.css ghout/ - cp scripts/levelparser.css ghout/ - cp scripts/lang.js ghout/ - cp scripts/_config.yml ghout/ - - - - name: archive the metadata files from metadata_out folder - uses: thedoctor0/zip-release@master - with: - filename: metadata-v${{ github.run_number }}.zip - path: 'metadata_out' - - - name: copy metadata.zip to ocrdzip_out - run: | - cp metadata-v${{ github.run_number }}.zip ocrdzip_out/ - - - - name: Upload Release - uses: ncipollo/release-action@v1 - with: - artifacts: 'ocrdzip_out/*.zip' - artifactContentType: application/zip - name: Release ${{ github.run_number }}_${{ github.ref_name }} - body: | -
-
Version:
-
${{ github.ref_name }}
-
Info:
-
- To make use of Ground Truth, please download the provided zip files.
- The 'ocrd.zip' files are ocr-d-bagit files.
- The 'metadata-v${{ github.run_number }}.zip' file contains metadata for the Ground Truth corpus in both METS and JSON format.
- The 'mets.xml' file enumerates all the documents and BagIt files contained within.
- The bagits correspond to the OCR-D Bagit Spec.
- The source-code-zip and source-code-tar.gz files only provide metadata, citations, license and readme information.
- If you want to use the source files, please clone the repository. -
-
- - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Commit README - run: | - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - git add README.md - git commit -m "[Automatic] Update readme files" || echo "Nothing to update" - git push origin HEAD:main - - - name: Commit CITATION.cff - run: | - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - git add CITATION.cff - git commit -m "[Automatic] Update CITATION.cff files" || echo "Nothing to update" - git push origin HEAD:main - - - - name: Deploy GT-Overview to GitHub Pages 🚀 - uses: JamesIves/github-pages-deploy-action@v4.4.1 - with: - branch: gh-pages # The branch the action should deploy to. - folder: ghout # The folder the action should deploy. diff --git a/.gitmodules b/.gitmodules index ec4f561..b585633 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,3 @@ -[submodule "data/historical-reports-2col-ocr-model"] - path = data/historical-reports-2col-ocr-model - url = git@github.com:JKamlah/historical-reports-2col-ocr-model.git -[submodule "data/german-newspapers-ocr-model"] - path = data/german-newspapers-ocr-model - url = git@github.com:JKamlah/german-newspapers-ocr-model.git [submodule "data/german-newspapers"] path = data/german-newspapers url = git@github.com:JKamlah/german-newspapers-ocr-model.git diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..3266107 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,5 @@ +# Page Update Notice + +This page will be automatically updated when the next tag is pushed to the repository. + +Stay tuned for updates! \ No newline at end of file