Skip to content

Commit

Permalink
Merge remote-tracking branch 'template/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
alancleary committed Sep 6, 2024
2 parents c104ef2 + 5cb828d commit 9d6f6d5
Show file tree
Hide file tree
Showing 24 changed files with 51,070 additions and 0 deletions.
52 changes: 52 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
## Summary

<!-- Provide a brief description of the changes you are making. Explain the context and the problem that this PR is solving. -->

## Related Issues

<!-- List any related issues, tickets, or Jira tasks that are addressed by this PR.
Use keywords like "Fixes", "Closes", or "Resolves" to automatically link and close issues, e.g., "Fixes #123". -->

## Changes

<!-- Describe the changes in detail. If there are multiple commits, explain each one if necessary.
Consider breaking this section down into smaller parts, such as "Features Added", "Bugs Fixed", "Technical Debt", etc. -->

- **Feature 1**: Added the ability to do X.
- **Bug Fix**: Corrected the issue where Y would fail under condition Z.
- **Refactoring**: Improved the structure of component A without changing its functionality.

## Testing

<!-- Explain how you tested the changes and what steps you took to verify the correctness.
Include instructions for others to test if applicable, such as command-line scripts, UI steps, etc. -->

- [ ] Unit tests
- [ ] Integration tests
- [ ] Manual testing

### How to test

1. Step 1: [Instruction]
2. Step 2: [Instruction]
3. Step 3: [Instruction]

## Screenshots (if applicable)

<!-- Add any relevant screenshots or GIFs to illustrate the changes. This is particularly useful for UI/UX changes. -->

## Checklist

<!-- Ensure that you have completed the following tasks before submitting the PR. -->

- [ ] My code follows the code style of this project.
- [ ] I have performed a self-review of my code.
- [ ] I have commented my code, particularly in hard-to-understand areas.
- [ ] I have made corresponding changes to the documentation.
- [ ] My changes generate no new warnings or errors.
- [ ] I have added tests that prove my fix is effective or that my feature works.
- [ ] New and existing unit tests pass locally with my changes.

## Notes for Reviewers

<!-- Add any additional notes for the reviewers. This could include areas of the code that you would like reviewers to focus on, known issues, or challenges you faced while implementing the changes. -->
31 changes: 31 additions & 0 deletions .github/workflows/check-links.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: 'Check Links'
on:
workflow_call:
inputs:
directory:
required: false
type: string
repo_link_ignore_list:
required: true
type: string
secrets:
PAT:
required: false
push:
pull_request:

jobs:
link_check:
name: 'Link Check'
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Link Check
run: |
python3 .github/workflows/check_links.py
env:
LINK_IGNORE_LIST: https://www.sciencedirect.com,https://portlandpress.com
PAT: ${{ secrets.PAT }}
168 changes: 168 additions & 0 deletions .github/workflows/check_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import http.client
import urllib.request, urllib.error
import os
import sys
import re



# set some default variables
remove_characters = ['**', '\\n']

# text that tends to be at the end of the url that we need truncate everything past them
end_characters = [')',",","'",'`',"\"",'</a>','</div>',"\\",">","]"]

big_regex = re.compile('|'.join(map(re.escape, remove_characters)))

# if there are any URLs to ignore add here
link_ignore_list = []
link_ignore_list_env = os.getenv("LINK_IGNORE_LIST")
if link_ignore_list_env and len(link_ignore_list_env) > 0:
link_ignore_list = link_ignore_list_env.split(',')

# Add any repo specific ignores
link_ignore_list_env_2 = os.getenv("inputs.repo_link_ignore_list")
if link_ignore_list_env_2 and len(link_ignore_list_env_2) > 0:
link_ignore_list.extend(link_ignore_list_env.split(','))

print_valid = os.getenv("print_valid_links") is not None

# If we are given a directory then use it, otherwise assume path is current directory
path = "."
if len(sys.argv) >1 and os.path.exists(sys.argv[1]):
path = sys.argv[1]

# directory environment overrides the system arguments and default.
directory_env = os.getenv("inputs.directory")
if directory_env and len(directory_env) > 0:
path = directory_env

pat_env = os.getenv("INPUT_PAT")
if directory_env and len(directory_env) > 0:
path = directory_env

# list which stores all links to check
links_to_check = []
link_file_map = {}
# Get the response code of the url to see if it exists
def getResponseCode(url):
content = None
try:
req = urllib.request.Request(url,
headers={'User-Agent': 'Mozilla/5.0'})
conn = urllib.request.urlopen(req)
# Only get HTML if we have a potential anchor link
if "#" in url and "pdf" not in url:
content = conn.read().decode("utf-8")
except urllib.error.HTTPError as e:
return [e.code, content]
except urllib.error.URLError as e:
return [404, content]
except http.client.InvalidURL:
return [200, content]
return [conn.getcode(), content]

def clean_link(link):
if link.endswith("."):
link = link[:link.rfind(".")]
if link.endswith("'"):
link = link[:link.rfind("'")]
if link.endswith("\""):
link = link[:link.rfind("\"")]
link_stripped = big_regex.sub("", link.strip())
for end_c in end_characters:
end_index = link_stripped.find(end_c)
if end_index != -1:
link_stripped = link_stripped[:end_index]
return link_stripped

def add_link(loc,link):
# this is a command being ran so difficult to validate in this script, skip it
if '$(uname' in link:
return False

# get just from the http portion if there was more in from of the string we grabbed
link = link[link.find("http"):]

# if there is a period at the end, truncate to that period. Other periods may be valid
# strip various characters that may be in the string
link_stripped = clean_link(link)
while link_stripped != link:
link = link_stripped
link_stripped = clean_link(link)

# add link to be checked
links_to_check.append(link_stripped)

# store where the link is so we can fix it
link_file_map[link_stripped] = loc
def check_link(link):
# try and get the url, if its 404 or 500 then its invalid, let us know and trigger the error flag
code = getResponseCode(link)
loc =link_file_map[link]
if code[0] in [404, 403, 500]:

# If the link failed, but we are ignoring it then just mention that
for ignored_link in link_ignore_list:
if ignored_link in link:
print(
loc + ", " + link + ", Ignored")
return False

# print(file+" Code:"+str(code[0])+" Line "+str(line_num)+"("+str(char)+"):"+item_stripped)
print(
loc + ", " + link + ", Failed")
return True

# check for missing anchors
elif "#" in link and \
code[1] is not None \
and 'href=\"' + link[link.find("#"):] + '\"' not in \
code[1]:
print(
loc + ", " + link + ", Failed - Anchor")
# print(file + " Missing Anchor Line " + str(
# line_num) + "(" + str(
# char) + "):" + item_stripped)
elif print_valid:
print(
loc + ", " + link + ", Valid")
return True


if __name__ == "__main__":
err = 0
print("Directory is "+path)
# Loop through all files in path
for root, dirs, files in os.walk(path):
for file in files:
# only read file that match template ( txt, md or python notebook)
if file.endswith(".md") or file.endswith(".txt") or file.endswith(
".ipynb"):

# get content and separate into lines and then separate by spaces
raw_content = open(os.path.join(root, file), "r").read()
content = raw_content.split("\n")
content = [x.split(" ") for x in content]
loc = os.path.join(root, file)
# have an incrementer for line number later export
for line in content:
for item in line:

if "https://" in item or "http://" in item:
if "](" in item:
add_link(loc,item[item.find("]"):])
# if we get any error then add it
if item[item.find("("):] == item[item.find("]"):]:
continue
add_link(loc,item[item.find("("):])
else:
add_link(loc,item)

for link in set(links_to_check):
# if we get any error then add to err variable
err = check_link(link) + err
# if the error is > 1 then set it to 1 to error as 1
if err > 1:
err = 1
exit(err)
5 changes: 5 additions & 0 deletions .github/workflows/notebook-lint.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
name: 'Lint Notebook'
on:
workflow_call:
inputs:
directory:
required: false
type: string
push:
permissions:
contents: write
Expand Down
10 changes: 10 additions & 0 deletions flashcards/Bio_definitions1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"front": "homolog",
"back": "A gene related to a second gene by descent from a common ancestral DNA sequence."
},
{
"front": "ortholog",
"back": "gene sequences derived from the same ancestral gene present in two species' last common ancestor."
}
]
Loading

0 comments on commit 9d6f6d5

Please sign in to comment.