Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overview on datasources #1

Merged
merged 22 commits into from
Mar 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions .github/workflows/qaqc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,12 @@ jobs:
run: |
python .github/workflows/ensure_clean_notebooks.py

# NOTE: this isn't a comprehensive spellcheck, just common typos
- name: Spellcheck
if: always()
uses: codespell-project/actions-codespell@master
with:
check_filenames: true
check_hidden: true
skip: '.git,qaqc.yml'
ignore_words_list: slippy,hist
- name: Install pyspellchecker
run: |
pip install pyspellchecker

- name: Custom Spellcheck with Ignore List
run: python .github/workflows/spellcheck.py

# borrowed from https://github.com/ProjectPythia/pythia-foundations/blob/main/.github/workflows/link-checker.yaml
- name: Disable Notebook Execution Before Linkcheck
Expand Down
61 changes: 61 additions & 0 deletions .github/workflows/spellcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from spellchecker import SpellChecker
import nbformat
import os
import sys
import string # Import the string module to get a list of punctuation characters

def remove_punctuation(text):
translator = str.maketrans('', '', string.punctuation)
return text.translate(translator)

def spell_check_notebook(filepath, ignore_words):
spell = SpellChecker()
spell.word_frequency.load_words(ignore_words)

misspelled_words = {}

with open(filepath, 'r', encoding='utf-8') as f:
nb = nbformat.read(f, as_version=4)

for cell in nb.cells:
if cell.cell_type == 'markdown':
# Preprocess the cell text to remove punctuation before splitting into words
text = remove_punctuation(cell.source)
misspelled = spell.unknown(text.split())
if misspelled:
if filepath not in misspelled_words:
misspelled_words[filepath] = set()
misspelled_words[filepath].update(misspelled)

return misspelled_words

def spell_check_directory(directory, ignore_words):
all_misspelled_words = {}
for subdir, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.ipynb'):
filepath = os.path.join(subdir, file)
result = spell_check_notebook(filepath, ignore_words)
if result:
all_misspelled_words.update(result)

if all_misspelled_words:
for filepath, words in all_misspelled_words.items():
print(f"Misspelled words in {filepath}: {', '.join(words)}")
sys.exit(1) # Exit with a non-zero status code to indicate failure

if __name__ == "__main__":
ignore_list = [
"geoweaver", "workflow", "datasets", "snotel", "snowpack", "amsrderived", "highelevation",
"snowpacktelemetrynetwork", "snowcastwormhole", "500m", "amsr", "decisionmaking",
"qualitycontrolled", "pagehttpswwwearthdatanasagovsensorsamsre", "netcdf", "daac", "4km",
"satelliteii", "usthe", "strategizing", "xband", "modis", "pagehttpswwwearthdatanasagovsensorsmodis",
"nam", "amsrrelated", "adeosii", "mesoscale", "amsradeosii", "apis", "asos", "missionwater",
"metadata", "shortterm", "groundbased", "hdf", "british", "october", "level2a", "gcomw1", "csv",
"dataloggers", "columbia", "ascii", "tsv", "amsadeosii", "longterm", "onboard", "wrf", "km", "hdf5",
"nsidc", "realtime", "satellitebased", "amsre", "level1a", "websitehttpswwwclimatologylaborggridmethtml",
"124th", "cryospheric", "american", "timestamped", "geolocation", "nrcs", "satellitederived", "awdn",
"1000m", "snowmelt", "nasas", "amsr2", "dataset", "gridmet", "hightech", "youll", "cryosphere", "µm",
"250m", "highspatial", "hydroclimatic", "fsca", "workflow", "swe", "eg", "snowtel", "gportalhttpsgportaljaxajpgpr", 'scan', "gdp", "scansnowtel", "geo", "aoi", "climateengineorghttpclimateengineorg", "daily", "monitoring", "youre", "checkbox", "nasa", "toolhttpsclimatenorthwestknowledgenetmacagdpphp", "v6", "element", "thredds", "climate", "xy", "dropdown", "websitehttpwwwwccnrcsusdagovnwccinventory", "websitehttpsearthexplorerusgsgov", "havent", "wgethttpswwwclimatologylaborgwgetgridmethtml", "lpdaac", "mod09ga", "earthdata", "nasahttpsursearthdatanasagov", "shapefile", "statecounty", "gcomw", "dont", "snowtel", "opendap", "popup", "data", "wget", "categoryfuturewarning", "preprocess", "colormapped", "doesnt", "trainstartdate", "csvs", "png", "functionalities", "tuples", "warningsfilterwarningsignore", "urllib", "yearsit", "userdefined", "datetime", "dataframe", "preparecumulativehistorycsvs", "dataframes", "userspecified", "datan", "1st", "url", "netcdf4", "mai", "matplotlib", "dem", "futurewarnings", "nc", "forcetrue", "timesensitive", "trainenddate", "cumulative", "downloader",
"Daily", "scan", "climate repository", "https://www.northwestknowledgenet.metdata/data", "data", "Element", "SNOTELmonitoring", "cumulative", "Climate", "repository", "repositoryhttpswwwnorthwestknowledgenetmetdatadata", "servicehttpssuzakueorcjaxajpgcomwresearchresdisthtml", "snowtel, data", "Climate Monitoring",
]
2 changes: 2 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion book/_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

title: "Snow Water Equivalent Workflow"
author: Center for Spatial Information Science and Systems (CSISS), George Mason University
logo: img/logo/png
logo: img/logo.png
only_build_toc_files: true

# Add GitHub buttons to your book
Expand Down
Loading
Loading