Merge branch 'main' into pippa

plantbreeding · Jul 9, 2024 · 5596757 · 5596757
2 parents cadf1f8 + 4dc279b
commit 5596757
Show file tree

Hide file tree

Showing 74 changed files with 1,465 additions and 619 deletions.
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -0,0 +1,84 @@
+# See https://www.appveyor.com/docs/getting-started-with-appveyor-for-linux/
+# Don't build branches with a PR, since their build will be created with the PR itself. 
+# Otherwise there would be two builds -- one for the PR and one for the branch.
+# If you're having issues with getting your PR to build, make sure there are no merge conflicts.
+skip_branch_with_pr: true
+
+# Enable 'Do not build on "Push" events' in the AppVeyor project settings
+# to only build commits from pull requests
+branches:
+  only:
+    - main
+    - master
+
+# Only run AppVeyor on commits that modify at least one of the following files
+# Delete these lines to run AppVeyor on all main/master branch commits
+only_commits:
+  files:
+    - .appveyor.yml
+    - build/
+    - ci/install.sh
+    - content/
+
+image: ubuntu2204
+services:
+  - docker
+
+# Set SPELLCHECK to true to enable Pandoc spellchecking
+environment:
+  SPELLCHECK: true
+
+install:
+  # Create the message with the triggering commit before install so it is
+  # available if the build fails
+  - TRIGGERING_COMMIT=${APPVEYOR_PULL_REQUEST_HEAD_COMMIT:-APPVEYOR_REPO_COMMIT}
+  - JOB_MESSAGE=" for commit $TRIGGERING_COMMIT "
+  - source ci/install.sh
+
+test_script:
+  - bash build/build.sh
+  - MANUSCRIPT_FILENAME=manuscript-$APPVEYOR_BUILD_VERSION-${TRIGGERING_COMMIT:0:7}
+  - cp output/manuscript.html $MANUSCRIPT_FILENAME.html
+  - cp output/manuscript.pdf $MANUSCRIPT_FILENAME.pdf
+  - appveyor PushArtifact $MANUSCRIPT_FILENAME.html
+  - appveyor PushArtifact $MANUSCRIPT_FILENAME.pdf
+  - |
+      if [ "${SPELLCHECK:-}" = "true" ]; then
+        SPELLING_ERRORS_FILENAME=spelling-errors-$APPVEYOR_BUILD_VERSION-${TRIGGERING_COMMIT:0:7}.txt
+        cp output/spelling-errors.txt $SPELLING_ERRORS_FILENAME
+        appveyor PushArtifact $SPELLING_ERRORS_FILENAME
+        SPELLING_ERROR_LOCATIONS_FILENAME=spelling-error-locations-$APPVEYOR_BUILD_VERSION-${TRIGGERING_COMMIT:0:7}.txt
+        cp output/spelling-error-locations.txt $SPELLING_ERROR_LOCATIONS_FILENAME
+        appveyor PushArtifact $SPELLING_ERROR_LOCATIONS_FILENAME
+      fi
+
+build: off
+
+cache:
+  - ci/cache
+
+on_success:
+  - echo "Artifacts available from $APPVEYOR_URL/project/$APPVEYOR_ACCOUNT_NAME/$APPVEYOR_PROJECT_SLUG/builds/$APPVEYOR_BUILD_ID/artifacts"
+  - echo "Updated PDF available from $APPVEYOR_URL/api/buildjobs/$APPVEYOR_JOB_ID/artifacts/$MANUSCRIPT_FILENAME.pdf"
+  - appveyor AddMessage "$JOB_MESSAGE is now complete."
+  - |
+      if [ "${SPELLCHECK:-}" = "true" ]; then
+        SPELLING_ERROR_COUNT=($(wc -l $SPELLING_ERROR_LOCATIONS_FILENAME))
+        appveyor AddMessage " <details><summary>Found $SPELLING_ERROR_COUNT potential spelling error(s). Preview:</summary>$(head -n 100 $SPELLING_ERROR_LOCATIONS_FILENAME)"
+        appveyor AddMessage "... </details>"
+      fi
+
+on_failure:
+  - appveyor AddMessage "$JOB_MESSAGE failed."
+
+# The following lines can be safely deleted, which will disable AppVeyorBot
+# notifications in GitHub pull requests
+# Notifications use Mustache templates http://mustache.github.io/mustache.5.html
+# See https://www.appveyor.com/docs/notifications/#customizing-message-template
+# for available variables
+notifications:
+  - provider: GitHubPullRequest
+    template: "AppVeyor [build {{buildVersion}}]({{buildUrl}})
+      {{#jobs}}{{#messages}}{{{message}}}{{/messages}}{{/jobs}}
+      {{#passed}}The rendered manuscript from this build is temporarily available for download at:\n\n
+      {{#jobs}}{{#artifacts}}- [`{{fileName}}`]({{permalink}})\n{{/artifacts}}{{/jobs}}{{/passed}}"
diff --git a/.github/workflows/ai-revision.yaml b/.github/workflows/ai-revision.yaml
@@ -16,17 +16,25 @@ on:
         description: 'Language model'
         required: true
         type: string
-        default: 'text-davinci-003'
+        default: 'gpt-3.5-turbo'
+      custom_prompt:
+        description: 'Custom prompt'
+        required: false
+        type: string
+        default: ''
       branch_name:
         description: 'Output branch'
         required: true
         type: string
-        default: 'ai-revision-davinci'
+        default: 'ai-revision-gpt35'
 
 jobs:
   ai-revise:
     name: AI Revise
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
     defaults:
       run:
         shell: bash --login {0}
@@ -51,6 +59,7 @@ jobs:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AI_EDITOR_LANGUAGE_MODEL: ${{ inputs.model }}
           AI_EDITOR_FILENAMES_TO_REVISE: ${{ inputs.file_names }}
+          AI_EDITOR_CUSTOM_PROMPT: ${{ inputs.custom_prompt }}
           # More variables can be specified to control the behavior of the model:
           #  https://github.com/manubot/manubot-ai-editor/blob/main/libs/manubot_ai_editor/env_vars.py
         run: manubot ai-revision --content-directory content/

diff --git a/.github/workflows/manubot.yaml b/.github/workflows/manubot.yaml
@@ -84,7 +84,9 @@ jobs:
           activate-environment: manubot
           environment-file: build/environment.yml
           auto-activate-base: false
+          miniforge-variant: Mambaforge
           miniforge-version: 'latest'
+          use-mamba: true
       - name: Install Spellcheck
         if: env.SPELLCHECK == 'true'
         run: bash ci/install-spellcheck.sh

diff --git a/USAGE.md b/USAGE.md
@@ -322,6 +322,8 @@ metadata:
   csl: https://github.com/citation-style-language/styles/raw/906cd6d43d0c136190ecfbb12f6af0ca794e3c5b/peerj.csl
 ```
 
+Instructions for generating additional manuscript output formats such as DOCX can be found in [`build/README.md`](build/README.md).
+
 ## Spellchecking
 
 When the `SPELLCHECK` environment variable is `true`, the pandoc [spellcheck filter](https://github.com/pandoc/lua-filters/tree/master/spellcheck) is run.
@@ -332,7 +334,7 @@ Spellchecking is currently only supported for English language manuscripts.
 ## AI-assisted authoring
 
 The workflow [`ai-revision`](.github/workflows/ai-revision.yaml) is available to assist authors in writing their manuscripts.
-It uses large language models to revise the manuscript text, fixing spelling and grammar errors, and improving the sentence structure and the writing style with section-specific prompts.
+It uses large language models to revise the manuscript text, fix spelling and grammar errors, and improve the sentence structure and the writing style with section-specific prompts.
 It is manually triggered by the user (it never runs automatically), and it generates a pull request with suggested revisions.
 Then the user can review these changes and merge the pull request if they are acceptable.
 More information about this tool is available in [this manuscript](https://greenelab.github.io/manubot-gpt-manuscript/).
@@ -342,7 +344,21 @@ For 1), go to the settings page and, within "Secrets and variables," select "Act
 Next, create a repository secret with the name `OPENAI_API_KEY` and the value of the API token (you can also do this using "Organization secrets" if available).
 For 2), go to "Actions", "General", "Workflow permissions", and activate the checkbox "Allow GitHub Actions to create and approve pull requests."
 
-By default, the tool uses the model `text-davinci-003`.
+Additionally, you also need to decide which type of prompts you want to use.
+"Prompts" are the instructions given to the language model to revise your manuscript.
+Basically, you can select 1) the default set of section-specific prompts already provided by the tool, or 2) a custom prompt that you provide.
+For 1), you can check [this manuscript](https://greenelab.github.io/manubot-gpt-manuscript/) for a more detailed description of the section-specific prompts.
+These prompts are already provided, but they need to know the section of each of your Markdown files (for instance, if it is the abstract, or the introduction, etc.).
+For this, the tool will try to infer them from the file names automatically, and if this fails, the tool might not revise all of your files.
+In this case, you need to indicate the section of each file using the "section mapping" environment variable that is described [here](https://github.com/manubot/manubot-ai-editor/blob/main/libs/manubot_ai_editor/env_vars.py) (read the header of the file for more instructions).
+For 2), you can provide your own custom prompt, which will be used for all the files regardless of their section.
+For example, instead of the more complex section-specific prompts in 1), you might just want to perform simpler revision tasks.
+An example of a custom prompt is "proofread the following paragraph".
+You can provide your custom prompt when you manually trigger the workflow by using the "Custom prompt" field.
+This could be more appropriate if you are testing different prompts.
+To set a fixed prompt for all runs, read the documentation [here](https://github.com/manubot/manubot-ai-editor/blob/main/libs/manubot_ai_editor/env_vars.py) to set the "custom prompt" environment variable.
+
+By default, the tool uses the model `text-davinci-003`, but you are encouraged to check the [OpenAI documentation](https://platform.openai.com/docs/models) to see which models are available, which one is the most suitable for your manuscript, and [whether our tool supports it](https://github.com/manubot/manubot-ai-editor).
 Make sure to check the [pricing](https://openai.com/api/pricing/) of the OpenAI API.
 With $0.02 per 1000 tokens using the most powerful AI models, the cost for a revision of a standard manuscript (around 35 paragraphs) should be around $0.50.
 The workflow allows specifying the branch and file names (in the `content/` directory) to revise, the language model to use, and the output branch name.

diff --git a/build/README.md b/build/README.md
@@ -7,8 +7,19 @@ However, setting the `BUILD_PDF` environment variable to `false` will suppress P
 For example, run local builds using the command `BUILD_PDF=false bash build/build.sh`.
 
 To build a DOCX file of the manuscript, set the `BUILD_DOCX` environment variable to `true`.
-For example, use the command `BUILD_DOCX=true bash build/build.sh`.
-To export DOCX for all CI builds, set an environment variable (see docs for [GitHub Actions](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/using-environment-variables)).
+For example, use the command `BUILD_DOCX=true bash build/build.sh` locally.
+To export DOCX for all CI builds, set an environment variable in the CI configuration file.
+For GitHub Actions, set the variable in `.github\workflows\manubot.yaml` (see [docs](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/using-environment-variables)):
+
+```yaml
+name: Manubot
+env:
+  BUILD_DOCX: true
+```
+
+To generate a single DOCX output of the latest manuscript with GitHub Actions, click the "Actions" tab at the top of the repository.
+Select the "Manubot" workflow, then the "Run workflow" button and check "generate DOCX output" before clicking the green "Run workflow" button.
+
 Currently, equation numbers via `pandoc-eqnos` are not supported for DOCX output.
 
 Format conversion is done using [Pandoc](https://pandoc.org/MANUAL.html).
@@ -20,12 +31,17 @@ To change the options, either edit the YAML files directly or add additional `--
 
 Note: currently, **Windows is not supported**.
 
-Install the [conda](https://conda.io) environment specified in [`environment.yml`](environment.yml) by running the following commands
+The Manubot environment is managed with [conda](https://conda.io).
+If you do not have `conda` installed, we recommend using the Miniforge3 (includes `conda`) or Mambaforge (includes `conda` and `mamba`) installers from [miniforge](https://github.com/conda-forge/miniforge).
+Install the environment from [`environment.yml`](environment.yml) by running one of following commands
 (from the repository's root directory):
 
 ```sh
-# Install the environment
+# Install the environment using conda
 conda env create --file build/environment.yml
+
+# Install the environment using mamba (faster)
+mamba env create --file build/environment.yml
 ```
 
 If the `manubot` environment is already installed, but needs to be updated to reflect changes to `environment.yml`, use one of the following options:
@@ -38,6 +54,9 @@ conda env update --file build/environment.yml
 # Slower than option 1, but guarantees a fresh environment.
 conda env remove --name manubot
 conda env create --file build/environment.yml
+
+# option 3: reinstall the manubot environment faster using mamba.
+mamba env create --force --file build/environment.yml
 ```
 
 Activate with `conda activate manubot` (assumes `conda` version of [at least](https://github.com/conda/conda/blob/9d759d8edeb86569c25f6eb82053f09581013a2a/CHANGELOG.md#440-2017-12-20) 4.4).