Merge remote-tracking branch 'template/main'

ncgr · Sep 6, 2024 · 9d6f6d5 · 9d6f6d5
2 parents c104ef2 + 5cb828d
commit 9d6f6d5
Show file tree

Hide file tree

Showing 24 changed files with 51,070 additions and 0 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,52 @@
+## Summary
+
+<!-- Provide a brief description of the changes you are making. Explain the context and the problem that this PR is solving. -->
+
+## Related Issues
+
+<!-- List any related issues, tickets, or Jira tasks that are addressed by this PR. 
+Use keywords like "Fixes", "Closes", or "Resolves" to automatically link and close issues, e.g., "Fixes #123". -->
+
+## Changes
+
+<!-- Describe the changes in detail. If there are multiple commits, explain each one if necessary.
+Consider breaking this section down into smaller parts, such as "Features Added", "Bugs Fixed", "Technical Debt", etc. -->
+
+- **Feature 1**: Added the ability to do X.
+- **Bug Fix**: Corrected the issue where Y would fail under condition Z.
+- **Refactoring**: Improved the structure of component A without changing its functionality.
+
+## Testing
+
+<!-- Explain how you tested the changes and what steps you took to verify the correctness. 
+Include instructions for others to test if applicable, such as command-line scripts, UI steps, etc. -->
+
+- [ ] Unit tests
+- [ ] Integration tests
+- [ ] Manual testing
+
+### How to test
+
+1. Step 1: [Instruction]
+2. Step 2: [Instruction]
+3. Step 3: [Instruction]
+
+## Screenshots (if applicable)
+
+<!-- Add any relevant screenshots or GIFs to illustrate the changes. This is particularly useful for UI/UX changes. -->
+
+## Checklist
+
+<!-- Ensure that you have completed the following tasks before submitting the PR. -->
+
+- [ ] My code follows the code style of this project.
+- [ ] I have performed a self-review of my code.
+- [ ] I have commented my code, particularly in hard-to-understand areas.
+- [ ] I have made corresponding changes to the documentation.
+- [ ] My changes generate no new warnings or errors.
+- [ ] I have added tests that prove my fix is effective or that my feature works.
+- [ ] New and existing unit tests pass locally with my changes.
+
+## Notes for Reviewers
+
+<!-- Add any additional notes for the reviewers. This could include areas of the code that you would like reviewers to focus on, known issues, or challenges you faced while implementing the changes. -->
diff --git a/.github/workflows/check-links.yaml b/.github/workflows/check-links.yaml
@@ -0,0 +1,31 @@
+name: 'Check Links'
+on:
+  workflow_call:
+    inputs:
+      directory:
+        required: false
+        type: string
+      repo_link_ignore_list:
+        required: true
+        type: string
+    secrets:
+      PAT:
+        required: false
+  push:
+  pull_request:
+
+jobs:
+  link_check:
+    name: 'Link Check'
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Link Check
+        run: |
+          python3 .github/workflows/check_links.py
+        env:
+          LINK_IGNORE_LIST: https://www.sciencedirect.com,https://portlandpress.com
+          PAT: ${{ secrets.PAT }}
diff --git a/.github/workflows/check_links.py b/.github/workflows/check_links.py
@@ -0,0 +1,168 @@
+import http.client
+import urllib.request, urllib.error
+import os
+import sys
+import re
+
+
+
+# set some default variables
+remove_characters = ['**', '\\n']
+
+# text that tends to be at the end of the url that we need truncate everything past them
+end_characters = [')',",","'",'`',"\"",'</a>','</div>',"\\",">","]"]
+
+big_regex = re.compile('|'.join(map(re.escape, remove_characters)))
+
+# if there are any URLs to ignore add here
+link_ignore_list = []
+link_ignore_list_env = os.getenv("LINK_IGNORE_LIST")
+if link_ignore_list_env and len(link_ignore_list_env) > 0:
+    link_ignore_list = link_ignore_list_env.split(',')
+
+# Add any repo specific ignores
+link_ignore_list_env_2 = os.getenv("inputs.repo_link_ignore_list")
+if link_ignore_list_env_2 and len(link_ignore_list_env_2) > 0:
+    link_ignore_list.extend(link_ignore_list_env.split(','))
+
+print_valid = os.getenv("print_valid_links") is not None
+
+# If we are given a directory then use it, otherwise assume path is current directory
+path = "."
+if len(sys.argv) >1  and os.path.exists(sys.argv[1]):
+    path = sys.argv[1]
+
+# directory environment overrides the system arguments and default.
+directory_env = os.getenv("inputs.directory")
+if directory_env and len(directory_env) > 0:
+    path = directory_env
+
+pat_env = os.getenv("INPUT_PAT")
+if directory_env and len(directory_env) > 0:
+    path = directory_env
+
+# list which stores all links to check
+links_to_check = []
+link_file_map = {}
+# Get the response code of the url to see if it exists
+def getResponseCode(url):
+    content = None
+    try:
+        req = urllib.request.Request(url,
+                                     headers={'User-Agent': 'Mozilla/5.0'})
+        conn = urllib.request.urlopen(req)
+        # Only get HTML if we have a potential anchor link
+        if "#" in url and "pdf" not in url:
+            content = conn.read().decode("utf-8")
+    except urllib.error.HTTPError as e:
+        return [e.code, content]
+    except urllib.error.URLError as e:
+        return [404, content]
+    except http.client.InvalidURL:
+        return [200, content]
+    return [conn.getcode(), content]
+
+def clean_link(link):
+    if link.endswith("."):
+        link = link[:link.rfind(".")]
+    if link.endswith("'"):
+        link = link[:link.rfind("'")]
+    if link.endswith("\""):
+        link = link[:link.rfind("\"")]
+    link_stripped = big_regex.sub("", link.strip())
+    for end_c in end_characters:
+        end_index = link_stripped.find(end_c)
+        if end_index != -1:
+            link_stripped = link_stripped[:end_index]
+    return link_stripped
+
+def add_link(loc,link):
+    # this is a command being ran so difficult to validate in this script, skip it
+    if '$(uname' in link:
+        return False
+
+    # get just from the http portion if there was more in from of the string we grabbed
+    link = link[link.find("http"):]
+
+    # if there is a period at the end, truncate to that period. Other periods may be valid
+    # strip various characters that may be in the string
+    link_stripped = clean_link(link)
+    while link_stripped != link:
+        link = link_stripped
+        link_stripped = clean_link(link)
+
+    # add link to be checked
+    links_to_check.append(link_stripped)
+
+    # store where the link is so we can fix it
+    link_file_map[link_stripped] = loc
+def check_link(link):
+    # try and get the url, if its 404 or 500 then its invalid, let us know and trigger the error flag
+    code = getResponseCode(link)
+    loc =link_file_map[link]
+    if code[0] in [404, 403, 500]:
+
+        # If the link failed, but we are ignoring it then just mention that
+        for ignored_link in link_ignore_list:
+            if ignored_link in link:
+                print(
+                    loc + ", " + link + ", Ignored")
+                return False
+
+        # print(file+" Code:"+str(code[0])+" Line "+str(line_num)+"("+str(char)+"):"+item_stripped)
+        print(
+            loc + ", " + link + ", Failed")
+        return True
+
+    # check for missing anchors
+    elif "#" in link and \
+        code[1] is not None \
+        and 'href=\"' + link[link.find("#"):] + '\"' not in \
+        code[1]:
+        print(
+            loc + ", " + link + ", Failed - Anchor")
+    # print(file + " Missing Anchor Line " + str(
+    #     line_num) + "(" + str(
+    #     char) + "):" + item_stripped)
+    elif print_valid:
+        print(
+            loc + ", " + link + ", Valid")
+    return True
+
+
+if __name__ == "__main__":
+    err = 0
+    print("Directory is "+path)
+    # Loop through all files in path
+    for root, dirs, files in os.walk(path):
+        for file in files:
+            #  only read file that match template ( txt, md or python notebook)
+            if file.endswith(".md") or file.endswith(".txt") or file.endswith(
+                ".ipynb"):
+
+                # get content and separate into lines and then separate by spaces
+                raw_content = open(os.path.join(root, file), "r").read()
+                content = raw_content.split("\n")
+                content = [x.split(" ") for x in content]
+                loc = os.path.join(root, file)
+                # have an incrementer for line number later export
+                for line in content:
+                    for item in line:
+
+                        if "https://" in item or "http://" in item:
+                            if "](" in item:
+                                add_link(loc,item[item.find("]"):])
+                                # if we get any error  then add it
+                                if item[item.find("("):] == item[item.find("]"):]:
+                                    continue
+                                add_link(loc,item[item.find("("):])
+                            else:
+                                add_link(loc,item)
+
+    for link in set(links_to_check):
+        # if we get any error  then add to err variable
+        err = check_link(link) + err
+    # if the error is > 1 then set it to 1 to error as 1
+    if err > 1:
+        err = 1
+    exit(err)
diff --git a/.github/workflows/notebook-lint.yaml b/.github/workflows/notebook-lint.yaml
@@ -1,5 +1,10 @@
 name: 'Lint Notebook'
 on:
+  workflow_call:
+    inputs:
+      directory:
+        required: false
+        type: string
   push:
 permissions:
   contents: write

diff --git a/flashcards/Bio_definitions1.json b/flashcards/Bio_definitions1.json
@@ -0,0 +1,10 @@
+[
+    {
+        "front": "homolog",
+        "back": "A gene related to a second gene by descent from a common ancestral DNA sequence."
+    },
+    {
+        "front": "ortholog",
+        "back": "gene sequences derived from the same ancestral gene present in two species' last common ancestor."
+    }
+]