From 9fa813d1289a992ab361b8d246a9c131d7b3450f Mon Sep 17 00:00:00 2001 From: Jordan-Day-ONS <57715292+Jday7879@users.noreply.github.com> Date: Thu, 28 Nov 2024 16:25:59 +0000 Subject: [PATCH] 668 setup repo structure (#2) * Setup structure using govcookiecutter * remove duplicate package in dev requirements * Update setup.cfg author * Update repo structure: - Change PR Template - Create workflows - Point requirements to MBS repo - create placeholder unit test for workflows * Update setup.cfg * Update setup.cfg * Update setup.cfg updating to be same structure as mbs --- .env | 34 + .envrc | 23 + .flake8 | 9 + .github/pull_request_template.md | 62 ++ .github/workflows/main.yaml | 48 + .gitignore | 885 ++++++++++++++++++ .pre-commit-config.yaml | 76 ++ .secrets.baseline | 94 ++ CODE_OF_CONDUCT.md | 6 + CONTRIBUTING.md | 6 + LICENSE | 2 +- Makefile | 88 ++ README.md | 104 +- __init__.py | 0 conftest.py | 0 cons_results/__init__.py | 0 cons_results/config.json | 8 + cons_results/estimation/__init__.py | 0 cons_results/imputation/__init__.py | 0 cons_results/main.py | 8 + cons_results/outlier_detection/__init__.py | 0 cons_results/outputs/__init__.py | 0 cons_results/staging/__init__.py | 0 cons_results/utilities/__init__.py | 0 docs/README.md | 32 + docs/_static/.gitkeep | 0 docs/aqa/README.md | 10 + docs/aqa/aqa_plan.md | 12 + docs/aqa/assumptions_caveats.md | 30 + docs/aqa/data_log.md | 30 + docs/conf.py | 214 +++++ docs/contributor_guide/CODE_OF_CONDUCT.md | 98 ++ docs/contributor_guide/CONTRIBUTING.md | 121 +++ docs/contributor_guide/README.md | 13 + docs/contributor_guide/pre_commit_hooks.md | 173 ++++ docs/contributor_guide/updating_gitignore.md | 10 + .../writing_accessible_documentation.md | 52 + .../writing_sphinx_documentation.md | 128 +++ docs/index.md | 12 + docs/user_guide/README.md | 8 + .../loading_environment_variables.md | 90 ++ docs/user_guide/using_pytest.md | 40 + make.bat | 72 ++ pyproject.toml | 30 + setup.cfg | 34 + setup.py | 4 + tests/estimation/test_estimation.py | 1 + tests/imputation/test_imputation.py | 1 + .../test_outlier_detection.py | 1 + tests/outputs/test_outputs.py | 1 + tests/staging/test_staging.py | 1 + tests/test_placeholder.py | 2 + tests/utilities/test_utilities.py | 1 + 53 files changed, 2672 insertions(+), 2 deletions(-) create mode 100644 .env create mode 100644 .envrc create mode 100644 .flake8 create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/main.yaml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .secrets.baseline create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 Makefile create mode 100644 __init__.py create mode 100644 conftest.py create mode 100644 cons_results/__init__.py create mode 100644 cons_results/config.json create mode 100644 cons_results/estimation/__init__.py create mode 100644 cons_results/imputation/__init__.py create mode 100644 cons_results/main.py create mode 100644 cons_results/outlier_detection/__init__.py create mode 100644 cons_results/outputs/__init__.py create mode 100644 cons_results/staging/__init__.py create mode 100644 cons_results/utilities/__init__.py create mode 100644 docs/README.md create mode 100644 docs/_static/.gitkeep create mode 100644 docs/aqa/README.md create mode 100644 docs/aqa/aqa_plan.md create mode 100644 docs/aqa/assumptions_caveats.md create mode 100644 docs/aqa/data_log.md create mode 100644 docs/conf.py create mode 100644 docs/contributor_guide/CODE_OF_CONDUCT.md create mode 100644 docs/contributor_guide/CONTRIBUTING.md create mode 100644 docs/contributor_guide/README.md create mode 100644 docs/contributor_guide/pre_commit_hooks.md create mode 100644 docs/contributor_guide/updating_gitignore.md create mode 100644 docs/contributor_guide/writing_accessible_documentation.md create mode 100644 docs/contributor_guide/writing_sphinx_documentation.md create mode 100644 docs/index.md create mode 100644 docs/user_guide/README.md create mode 100644 docs/user_guide/loading_environment_variables.md create mode 100644 docs/user_guide/using_pytest.md create mode 100644 make.bat create mode 100644 pyproject.toml create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/estimation/test_estimation.py create mode 100644 tests/imputation/test_imputation.py create mode 100644 tests/outlier_detection/test_outlier_detection.py create mode 100644 tests/outputs/test_outputs.py create mode 100644 tests/staging/test_staging.py create mode 100644 tests/test_placeholder.py create mode 100644 tests/utilities/test_utilities.py diff --git a/.env b/.env new file mode 100644 index 0000000..1cc2747 --- /dev/null +++ b/.env @@ -0,0 +1,34 @@ +# Environment variables go here, and can be read in by Python using the `python-dotenv` +# package, and `os.getenv`: +# +# ------------------------------------------------------------------------------------ +# from dotenv import load_dotenv +# import os +# +# # Load the environment variables from the `.env` file, overriding any system +# # environment variables +# load_dotenv(override=True) +# +# # Load secrets from the `.secrets` file, overriding any system environment variables +# load_dotenv(".secrets", override=True) +# +# # Example variable +# EXAMPLE_VARIABLE = os.getenv("EXAMPLE_VARIABLE") +# ------------------------------------------------------------------------------------ +# +# For folder/file path environment variables, use relative paths. +# +# DO NOT STORE SECRETS HERE - this file is version-controlled! You should store secrets +# in the untracked `.secrets` file. + +# Add environment variables for the `docs` directory +DIR_DOCS=./docs + +# Add environment variable for package directory +DIR_SRC = ./cons_results + +# Add environment variables for the `src` directories +DIR_SRC_COOKIECUTTER.REPO_NAME.LOWER().REPLACE('','_').REPLACE('-','_') =./cons_results + +# Add environment variables for the `tests` directory +DIR_TESTS=./tests diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..10ddb6e --- /dev/null +++ b/.envrc @@ -0,0 +1,23 @@ +# Orchestration file to load environment variables from the `.env` and `.secrets` files. +# +# Only used by systems with `direnv` (https://direnv.net/) installed. Environment +# variables can be read in by Python using `os.getenv` _without_ using `python-dotenv`: +# +# ------------------------------------------------------------------------------------ +# import os +# +# # Example variable +# EXAMPLE_VARIABLE = os.getenv("EXAMPLE_VARIABLE") +# ------------------------------------------------------------------------------------ +# +# DO NOT STORE SECRETS HERE - this file is version-controlled! You should store secrets +# in the untracked `.secrets` file. This is loaded here using the `dotenv_if_exists` +# command. + +# Add the working directory to `PYTHONPATH`; allows Jupyter notebooks in the +# `notebooks` folder to import `src` +export PYTHONPATH="$PYTHONPATH:$(pwd)" + +# Load the `.env` file, and `.secrets` (if it exists) +dotenv .env +dotenv_if_exists .secrets diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..e9bfa57 --- /dev/null +++ b/.flake8 @@ -0,0 +1,9 @@ +[flake8] +# Rule definitions: http://flake8.pycqa.org/en/latest/user/error-codes.html +# D203: 1 blank line required before class docstring +# W503: line break before binary operator +exclude = venv*,__pycache__,node_modules,bower_components,migrations +ignore = D203,W503 +max-complexity = 9 +max-line-length = 88 +extend-ignore = E203 diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..d52129a --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,62 @@ +# Pull Request Title + + + +# Summary + +Add your summary here - keep it brief, to the point, and in plain English. + +# Type of Change + + + +- [ ] Bug fix +- [ ] New feature +- [ ] Breaking change +- [ ] Documentation update +- [ ] Other (please describe): + + +# Checklists + + + +This pull request meets the following requirements: + +## Creator Checklist + +- [ ] Installable with all dependencies recorded +- [ ] Runs without error +- [ ] Follows PEP8 and project-specific conventions +- [ ] Appropriate use of comments, for example, no descriptive comments +- [ ] Functions documented using Numpy style docstrings +- [ ] Assumptions and decisions log considered and updated if appropriate +- [ ] Unit tests have been updated to cover essential functionality for a reasonable range of inputs and conditions +- [ ] Other forms of testing such as end-to-end and user-interface testing have been considered and updated as required + +If you feel some of these conditions do not apply for this pull request, please +add a comment to explain why. + +## Reviewer Checklist + +- [ ] Test suite passes (locally as a minimum) +- [ ] Peer reviewed with review recorded + +# Additional Information + +Please provide any additional information or context that would help the reviewer understand the changes in this pull request. + +# Related Issues + +Link any related issues or pull requests here. diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..0ba2033 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,48 @@ +name: cml_runtimes + +on: + # Triggers the workflow on pull requests to main branch + pull_request: + branches: [ main ] + +jobs: + commit-hooks: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v3 + with: + python-version: 3.10.13 + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + + - name: Check commit hooks + run: | + pre-commit run --all-files + + testing-cml: + runs-on: ubuntu-latest + strategy: + matrix: + cml_version: ["3.8", "3.9", "3.10","3.11"] + steps: + - name: checkout ml-runtimes #https://github.com/cloudera/ml-runtimes + uses: actions/checkout@master + with: + repository: cloudera/ml-runtimes + - name: build runtime cml_${{matrix.cml_version}} + run: docker build -t cml:${{matrix.cml_version}} -f 'pbj-workbench-python${{matrix.cml_version}}-standard.Dockerfile' . + - name: checkout to repository + uses: actions/checkout@v3 + - name: create container + run: docker run -id --name container_${{matrix.cml_version}} -v"$(pwd)"://home/cdsw cml:${{matrix.cml_version}} + - name: build in dev mode + run: docker exec container_${{matrix.cml_version}} pip install ."[dev]" + - name: check env + run: docker exec container_${{matrix.cml_version}} pip list + - name: test + run: docker exec container_${{matrix.cml_version}} pytest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3cfa36c --- /dev/null +++ b/.gitignore @@ -0,0 +1,885 @@ +# Created by https://www.toptal.com/developers/gitignore/api/vim,venv,pydev,linux,macos,flask,dotenv,django,direnv,python,windows,virtualenv,pycharm+all,visualstudio,jupyternotebooks,visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=vim,venv,pydev,linux,macos,flask,dotenv,django,direnv,python,windows,virtualenv,pycharm+all,visualstudio,jupyternotebooks,visualstudiocode + +### direnv ### +.direnv +#.envrc + +### Django ### +*.log +*.pot +*.pyc +__pycache__/ +local_settings.py +db.sqlite3 +db.sqlite3-journal +media + +# If your build process includes running collectstatic, then you probably don't need or want to include staticfiles/ +# in your Git repository. Update and uncomment the following line accordingly. +# /staticfiles/ + +### Django.Python Stack ### +# Byte-compiled / optimized / DLL files +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Translations +*.mo + +# Django stuff: + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +doc/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pythonenv* + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# profiling data +.prof + +### dotenv ### + +### Flask ### +instance/* +!instance/.gitignore + +### Flask.Python Stack ### +# Byte-compiled / optimized / DLL files + +# C extensions + +# Distribution / packaging + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. + +# Installer logs + +# Unit test / coverage reports + +# Translations + +# Django stuff: + +# Flask stuff: + +# Scrapy stuff: + +# Sphinx documentation + +# PyBuilder + +# Jupyter Notebook + +# IPython + +# pyenv + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow + +# Celery stuff + +# SageMath parsed files + +# Environments + +# Spyder project settings + +# Rope project settings + +# mkdocs documentation + +# mypy + +# Pyre type checker + +# pytype static type analyzer + +# profiling data + +### JupyterNotebooks ### +# gitignore template for Jupyter Notebooks +# website: http://jupyter.org/ + +*/.ipynb_checkpoints/* + +# IPython + +# Remove previous ipynb_checkpoints +# git rm -r .ipynb_checkpoints/ + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### PyCharm+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm+all Patch ### +# Ignores the whole .idea folder and all .iml files +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 + +*.iml +modules.xml +.idea/misc.xml +*.ipr + +# Sonarlint plugin +.idea/sonarlint + +### pydev ### +.pydevproject + +### Python ### +# Byte-compiled / optimized / DLL files + +# C extensions + +# Distribution / packaging + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. + +# Installer logs + +# Unit test / coverage reports + +# Translations + +# Django stuff: + +# Flask stuff: + +# Scrapy stuff: + +# Sphinx documentation + +# PyBuilder + +# Jupyter Notebook + +# IPython + +# pyenv + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow + +# Celery stuff + +# SageMath parsed files + +# Environments + +# Spyder project settings + +# Rope project settings + +# mkdocs documentation + +# mypy + +# Pyre type checker + +# pytype static type analyzer + +# profiling data + +### venv ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### VirtualEnv ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ + +### VisualStudioCode ### +.vscode/* +#!.vscode/tasks.json +#!.vscode/launch.json +*.code-workspace + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*[.json, .xml, .info] + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# End of https://www.toptal.com/developers/gitignore/api/vim,venv,pydev,linux,macos,flask,dotenv,django,direnv,python,windows,virtualenv,pycharm+all,visualstudio,jupyternotebooks,visualstudiocode + +### construction-survey-results ### + +# Ignore the contents of the `data` folder, except for the `README.md` file +data/* +!data/README.md + +# Ignore the contents of the `data` sub-folders, except for their `.gitkeep` files; adapted from +# https://stackoverflow.com/a/20652768 +!data/**/ +data/external/* +data/raw/* +data/interim/* +data/processed/* +!data/external/.gitkeep +!data/raw/.gitkeep +!data/interim/.gitkeep +!data/processed/.gitkeep + +# Ignore the `docs/reference/api` folder +docs/reference/api/* + +# Ignore the `.secrets` file +.secrets + +# Ignore R artifacts +*.Renviron +*.Rhistory + +# Ignore Sphinx documentation link checking folder +docs/_linkcheck/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a407a19 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,76 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/kynan/nbstripout + rev: 0.4.0 + hooks: + - id: nbstripout + name: nbstripout - Strip outputs from notebooks (auto-fixes) + args: + - --extra-keys + - "metadata.colab metadata.kernelspec cell.metadata.colab cell.metadata.executionInfo cell.metadata.id cell.metadata.outputId" + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-added-large-files + name: Check for files larger than 5 MB + args: [ "--maxkb=5120" ] + - id: end-of-file-fixer + name: Check for a blank line at the end of scripts (auto-fixes) + exclude: '\.Rd' + - id: trailing-whitespace + name: Check for trailing whitespaces (auto-fixes) + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort - Sort python imports (auto-fixes) + args: [ "--profile", "black", "--filter-files" ] + - id: isort + name: isort - Sort cython imports (auto-fixes) + types: [cython] + args: [ "--profile", "black", "--filter-files" ] + - id: isort + name: isort - Sort pyi imports (auto-fixes) + types: [pyi] + args: [ "--profile", "black", "--filter-files" ] + - repo: https://github.com/psf/black + rev: 22.8.0 # Replace by any tag/version: https://github.com/psf/black/tags + hooks: + - id: black + name: black - consistent Python code formatting (auto-fixes) + language_version: python # Should be a command that runs python3.6+ + - repo: https://github.com/PyCQA/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + name: flake8 - Python linting + - repo: https://github.com/nbQA-dev/nbQA + rev: 0.12.0 + hooks: + - id: nbqa-isort + name: nbqa-isort - Sort Python imports (notebooks; auto-fixes) + args: [ --nbqa-mutate ] + additional_dependencies: [ isort==5.8.0 ] + - id: nbqa-black + name: nbqa-black - consistent Python code formatting (notebooks; auto-fixes) + args: [ --nbqa-mutate ] + additional_dependencies: [ black==21.5b2 ] + # TODO: Disabled for now until it's clear how to add noqa to specific cells of a Jupyter notebook + #- id: nbqa-flake8 + # name: nbqa-flake8 - Python linting (notebooks) + # additional_dependencies: [ flake8==3.9.2 ] + - repo: https://github.com/Yelp/detect-secrets + rev: v1.0.3 + hooks: + - id: detect-secrets + name: detect-secrets - Detect secrets in staged code + args: [ "--baseline", ".secrets.baseline" ] + exclude: .*/tests/.*|^\.cruft\.json$ + - repo: https://github.com/PyCQA/bandit + rev: '1.7.5' + hooks: + - id: bandit + name: bandit - Checks for vulnerabilities + args: ["-c", "pyproject.toml"] + additional_dependencies: ["bandit[toml]"] diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000..58012f6 --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,94 @@ +{ + "version": "1.0.3", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], + "results": {}, + "generated_at": "2021-06-14T10:43:14Z" +} diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..1fa643e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,6 @@ +# Code of conduct for `construction_survey_results` + +[Our code of conduct can be found at +`docs/contributor_guide/CODE_OF_CONDUCT.md`][code-of-conduct]. + +[code-of-conduct]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/CODE_OF_CONDUCT.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..46ca528 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,6 @@ +# Contributing + +[Our contributing guidelines can be found at +`docs/contributor_guide/CONTRIBUTING.md`][contributing]. + +[contributing]: ./docs/contributor_guide/CONTRIBUTING.md diff --git a/LICENSE b/LICENSE index ad2c5a0..299c710 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 ONS Digital +Copyright (c) 2024 Crown copyright (Office for National Statistics) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..24203d8 --- /dev/null +++ b/Makefile @@ -0,0 +1,88 @@ +.PHONY: + coverage + coverage_html + coverage_xml + docs + docs_check_external_links + help + install + install_dev + prepare_docs_folder + +.DEFAULT_GOAL := help + +## Install the Python package for contributors, and install pre-commit hooks +install_dev: + python -m pip install -U pip setuptools + python -m pip install -e .[dev] + pre-commit install + +## Install the Python package for users +install: + python -m pip install -U pip setuptools + python -m pip install -e . + +## Create a `docs/_build` folder, if it does not exist. Otherwise delete any sub-folders and their contents within it +prepare_docs_folder: + if [ ! -d "./docs/_build" ]; then mkdir ./docs/_build; fi + find ./docs/_build -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} \; + +## Compile the Sphinx documentation in HTML format in the docs/_build folder from a clean build +docs: prepare_docs_folder install_dev + sphinx-build -b html ./docs ./docs/_build + +## Check external links in the Sphinx documentation using linkcheck in the docs/_build folder from a clean build +docs_check_external_links: prepare_docs_folder install_dev + sphinx-build -b linkcheck ./docs ./docs/_build + +## Run code coverage +coverage: install_dev + coverage run -m pytest + +## Run code coverage, and produce a HTML output +coverage_html: coverage + coverage html + +## Run code coverage, and produce an XML output +coverage_xml: coverage + coverage xml + +## Get help on all make commands; referenced from https://github.com/drivendata/cookiecutter-data-science +help: + @echo "$$(tput bold)Available rules:$$(tput sgr0)" + @echo + @sed -n -e "/^## / { \ + h; \ + s/.*//; \ + :doc" \ + -e "H; \ + n; \ + s/^## //; \ + t doc" \ + -e "s/:.*//; \ + G; \ + s/\\n## /---/; \ + s/\\n/ /g; \ + p; \ + }" ${MAKEFILE_LIST} \ + | LC_ALL='C' sort --ignore-case \ + | awk -F '---' \ + -v ncol=$$(tput cols) \ + -v indent=25 \ + -v col_on="$$(tput setaf 6)" \ + -v col_off="$$(tput sgr0)" \ + '{ \ + printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ + n = split($$2, words, " "); \ + line_length = ncol - indent; \ + for (i = 1; i <= n; i++) { \ + line_length -= length(words[i]) + 1; \ + if (line_length <= 0) { \ + line_length = ncol - indent - length(words[i]) - 1; \ + printf "\n%*s ", -indent, " "; \ + } \ + printf "%s ", words[i]; \ + } \ + printf "\n"; \ + }' \ + | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') diff --git a/README.md b/README.md index 42b9ebc..dc77978 100644 --- a/README.md +++ b/README.md @@ -1 +1,103 @@ -# construction-survey-results \ No newline at end of file +# `construction survey results` + +Project for construction survey + +```{warning} +Where this documentation refers to the root folder we mean where this README.md is +located. +``` + +## Getting started + +To start using this project, [first make sure your system meets its +requirements](#requirements). + +It's suggested that you install this pack and it's requirements within a virtual environment. + +## Installing the package (Python Only) + +Whilst in the root folder, in the command prompt, you can install the package and it's dependencies +using: + +```shell +python -m pip install -U pip setuptools +pip install -e . +``` +or use the `make` command: +```shell +make install +``` + +This installs an editable version of the package. Meaning, when you update the +package code, you do not have to reinstall it for the changes to take effect. +(This saves a lot of time when you test your code) + +Remember to update the setup and requirement files inline with any changes to your +package. The inital files contain the bare minimum to get you started. + +## Running the pipeline (Python only) + +The entry point for the pipeline is stored within the package and called `main.py`. +To run the pipeline, run the following code in the terminal (whilst in the root directory of the +project). + +```shell +python cons_results/main.py +``` + +Alternatively, most Python IDE's allow you to run the code directly from the IDE using a `run` button. + +## Required secrets and credentials + +To run this project, [you need a `.secrets` file with secrets/credentials as +environmental variables][docs-loading-environment-variables-secrets]. The +secrets/credentials should have the following environment variable name(s): + +| Secret/credential | Environment variable name | Description | +|-------------------|---------------------------|--------------------------------------------| +| Secret 1 | `SECRET_VARIABLE_1` | Plain English description of Secret 1. | +| Credential 1 | `CREDENTIAL_VARIABLE_1` | Plain English description of Credential 1. | + +Once you've added, [load these environment variables using +`.env`][docs-loading-environment-variables]. + +## Licence + +Unless stated otherwise, the codebase is released under the MIT License. This covers +both the codebase and any sample code in the documentation. The documentation is © +Crown copyright and available under the terms of the Open Government 3.0 licence. + +## Contributing + +[If you want to help us build, and improve `construction_survey_results`, view our +contributing guidelines][contributing]. + +### Requirements + +[```Contributors have some additional requirements!```][contributing] + +- Python 3.6.1+ installed +- a `.secrets` file with the [required secrets and + credentials](#required-secrets-and-credentials) +- [load environment variables][docs-loading-environment-variables] from `.env` + +To install the contributing requirements, open your terminal and enter: +```shell +python -m pip install -U pip setuptools +pip install -e .[dev] +pre-commit install +``` +or use the `make` command: +```shell +make install_dev +``` + +## Acknowledgements + +[This project structure is based on the `govcookiecutter` template +project][govcookiecutter]. + +[contributing]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/CONTRIBUTING.md +[govcookiecutter]: https://github.com/best-practice-and-impact/govcookiecutter +[docs-loading-environment-variables]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/user_guide/loading_environment_variables.md +[docs-loading-environment-variables-secrets]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/user_guide/loading_environment_variables.md#storing-secrets-and-credentials diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/cons_results/__init__.py b/cons_results/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cons_results/config.json b/cons_results/config.json new file mode 100644 index 0000000..fca7859 --- /dev/null +++ b/cons_results/config.json @@ -0,0 +1,8 @@ +{ + "survey": { + "results_filepath": "/d:/repos/construction-survey-results/cons_results/survey_results.csv" + }, + "logging": { + "file": "/var/log/construction_survey.log" + } +} diff --git a/cons_results/estimation/__init__.py b/cons_results/estimation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cons_results/imputation/__init__.py b/cons_results/imputation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cons_results/main.py b/cons_results/main.py new file mode 100644 index 0000000..29d1d0a --- /dev/null +++ b/cons_results/main.py @@ -0,0 +1,8 @@ +def run_pipeline(): + """This is the main function that runs the pipeline""" + print("Running pipeline") + print("Pipeline finished") + + +if __name__ == "__main__": + run_pipeline() diff --git a/cons_results/outlier_detection/__init__.py b/cons_results/outlier_detection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cons_results/outputs/__init__.py b/cons_results/outputs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cons_results/staging/__init__.py b/cons_results/staging/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cons_results/utilities/__init__.py b/cons_results/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..7011656 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,32 @@ +# `docs` folder overview + +All documentation for the project should be included in this folder in either +reStructuredText or Markdown files, with acceptable formatting for Sphinx. [Guidance on +how to write Sphinx documentation is supplied in the contributor +guide][writing-sphinx-documentation]. + +To build the documentation, run the `docs` command [from `Makefile` using the `make` +utility at the top-level of this repository][docs-makefile]. + +```shell +make docs +``` + +or, alternatively, run: + +```shell +sphinx-build -b linkcheck ./docs ./docs/_build +``` + +The HTML-version of this documentation can then be viewed at `docs/_build/index.html`, +relative to the top-level of this repository. + +## Analytical quality assurance (AQA) + +All analytical quality assurance (AQA) documents can be found in the `docs/aqa` folder. +These files document how this project meets organisational [guidance on producing +quality analysis for HM Government projects][aqua-book]. + +[aqua-book]: https://www.gov.uk/government/publications/the-aqua-book-guidance-on-producing-quality-analysis-for-government +[docs-makefile]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/Makefile +[writing-sphinx-documentation]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/writing_sphinx_documentation.md diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/aqa/README.md b/docs/aqa/README.md new file mode 100644 index 0000000..6e081ee --- /dev/null +++ b/docs/aqa/README.md @@ -0,0 +1,10 @@ +# Analytical quality assurance + +These pages summarise analytical quality assurance (AQA) required for this project: + +```{toctree} +:maxdepth: 2 +./aqa_plan.md +./data_log.md +./assumptions_caveats.md +``` diff --git a/docs/aqa/aqa_plan.md b/docs/aqa/aqa_plan.md new file mode 100644 index 0000000..3fa183d --- /dev/null +++ b/docs/aqa/aqa_plan.md @@ -0,0 +1,12 @@ +# Analytical quality assurance plan + +This analytical quality assurance (AQA) plan outlines [our implementation of the +Aqua Book][aqua-book] for this project. [Further resources related to the Aqua Book +are also available on GOV.UK][aqua-book-resources]. + +This is a living document, and should be updated and/or modified as necessary. For +example if new tasks not listed here become relevant to project success, please add +them to this plan. + +[aqua-book]: https://www.gov.uk/government/publications/the-aqua-book-guidance-on-producing-quality-analysis-for-government +[aqua-book-resources]: https://www.gov.uk/government/collections/aqua-book-resources diff --git a/docs/aqa/assumptions_caveats.md b/docs/aqa/assumptions_caveats.md new file mode 100644 index 0000000..30a5d03 --- /dev/null +++ b/docs/aqa/assumptions_caveats.md @@ -0,0 +1,30 @@ +# Assumptions and caveats log + +This log contains a list of assumptions and caveats used in this analysis. + +## Definitions + +Assumptions are RAG-rated according to the following definitions for quality and +impact[^1]: + +[^1]: With thanks to the Home Office Analytical Quality Assurance team for these definitions. + +| RAG | Assumption quality | Assumption impact | +|-------|---------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------| +| Green | Reliable assumption, well understood and/or documented; anything up to a validated & recent set of actual data. | Marginal assumptions; their changes have no or limited impact on the outputs. | +| Amber | Some evidence to support the assumption; may vary from a source with poor methodology to a good source that is a few years old. | Assumptions with a relevant, even if not critical, impact on the outputs. | +| Red | Little evidence to support the assumption; may vary from an opinion to a limited data source with poor methodology. | Core assumptions of the analysis; the output would be drastically affected by their change. | + +## Assumption 1: Insert plain English title here + +* Quality: Insert RAG rating here +* Impact: Insert RAG rating here + +Add plain English description here. + +## Assumption 2: Insert plain English title here + +* Quality: Insert RAG rating here +* Impact: Insert RAG rating here + +Add plain English description here. diff --git a/docs/aqa/data_log.md b/docs/aqa/data_log.md new file mode 100644 index 0000000..0a020f8 --- /dev/null +++ b/docs/aqa/data_log.md @@ -0,0 +1,30 @@ +# Data log + +This log contains a list of data sources used in this analysis. + +## Definitions + +Assumptions are RAG-rated according to the following definitions for quality and +suitability[^1]: + +[^1]: With thanks to the Home Office Analytical Quality Assurance team for these definitions. + +| RAG | Data quality | Data suitability | +|-------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Green | Data is well understood and there are no major issues with quality. Minor issues are understood and documented. | Data is best available for the required purpose and has been validated (for example against published statistics). | +| Amber | Data is well understood. There are quality issues (for example missing values, step changes, large number of outliers) that can be explained, documented or shown to have negligible impact. | Not the ideal data set for the analysis, but the best available at the time. Results will reflect the fact that it is not the ideal data set and it will subject to sensitivity analysis where appropriate. | +| Red | Data is not well understood. There are major quality issues that cannot be fully explained and/or have a significant impact on analysis outputs. | There are concerns about the suitability of the data set for this application, which could negatively affect the quality and accuracy of the analysis. Its derivation / sample size is not known. | + +## Source 1: Insert plain English title here + +* Quality: Insert RAG rating here +* Suitability: Insert RAG rating here + +Add plain English description here. + +### Source 2: Insert plain English title here + +* Quality: Insert RAG rating here +* Suitability: Insert RAG rating here + +Add plain English description here. diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..7f25d65 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,214 @@ +# construction-survey-results documentation build configuration file +# +# This file is execfile()d with the current +# directory set to its containing dir. +# +# Note that not all possible configuration values are +# present in this autogenerated file. +# +# All configuration values have a default; values that are +# commented out serve to show the default. +# +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +import os +import sys + +sys.path.insert(0, os.path.abspath("..")) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +needs_sphinx = "4.0" + +# Add any Sphinx extension module names here, as strings. +# They can be extensions coming +# with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "myst_parser", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "construction-survey-results" +author = "Your GitHub/GitLab organisation name, for example ukgovdatascience" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in +# various other places throughout the built documents. +# The short X.Y.Z version. +version = "0.0.1" +# The full version, including alpha/beta/rc tags. +release = "0.0.1" + +# List of patterns, relative to source directory, +# that match files and directories to +# ignore when looking for source files. +# These patterns also affect html_static_path and +# html_extra_path +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md"] + +# -- Options for HTML output ------------------------------ + +# The theme to use for HTML and HTML Help pages. +# See the documentation for a list of +# builtin themes. +html_theme = "alabaster" + +# Theme options are theme-specific and customize +# the look and feel of a theme further. +# For a list of options available for each theme, see the documentation. +# html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. +# " v documentation" by default. +# html_title = "None" + +# A shorter title for the navigation bar. +# Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) +# to place at the top of the sidebar. +# html_logo = None + +# The name of an image file (relative to this directory) +# to use as a favicon of the docs. This file should be a +# Windows icon file (.ico) being 16x16 or 32x32 pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) +# here, relative to this directory. +# They are copied after the builtin static files, so a file named +# "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Add any extra paths that contain custom files +# (such as robots.txt or .htaccess) here, +# relative to this directory. +# These files are copied directly to the root of the documentation. +# html_extra_path = [] + +# If not None, a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# The empty string is equivalent to '%b %d, %Y'. +# html_last_updated_fmt = None + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, +# maps page names to template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, +# and all pages will contain a tag referring to it. +# The value of this option must be the base URL from +# which the finished HTML is served. +# html_use_opensearch = "" + +# This is the file name suffix for HTML files (for example ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' +# html_search_language = "en" + +# A dictionary with options for the search language support, +# empty by default. 'ja' uses this config value. +# 'zh' user can custom change `jieba` dictionary path. +# html_search_options = {"type": "default"} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# html_search_scorer = "scorer.js" + +# Output file base name for HTML help builder. +htmlhelp_basename = "construction_survey_resultsdoc" + +# -- Options for autosection output ---------------- + +# Prefix document path to section labels, +# otherwise autogenerated labels would look +# like 'heading' rather than 'path/to/file:heading' +autosectionlabel_prefix_document = True + +# -- Options for autosummary output ----------------------- + +# Set the autosummary to generate stub files +autosummary_generate = True + +# -- Options for Napoleon extension ----------------------- + +# Napoleon settings to enable parsing of Google- and NumPy-style docstrings. +# napoleon_google_docstring = True +# napoleon_numpy_docstring = True +# napoleon_include_init_with_doc = False +# napoleon_include_private_with_doc = False +# napoleon_include_special_with_doc = True +# napoleon_use_admonition_for_examples = False +# napoleon_use_admonition_for_notes = False +# napoleon_use_admonition_for_references = False +# napoleon_use_ivar = False +# napoleon_use_param = True +# napoleon_use_rtype = True + +# -- Options for MyST ------------------------------------ + +# Enforce heading anchors for h1 to h6 headings +myst_heading_anchors = 6 + +# Enable MyST extensions +myst_enable_extensions = [ + "amsmath", + "colon_fence", + "deflist", + "dollarmath", + "html_admonition", + "html_image", + # "linkify", + "replacements", + "smartquotes", + "substitution", +] diff --git a/docs/contributor_guide/CODE_OF_CONDUCT.md b/docs/contributor_guide/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..fe9b53a --- /dev/null +++ b/docs/contributor_guide/CODE_OF_CONDUCT.md @@ -0,0 +1,98 @@ +# Code of conduct for `construction_survey_results` + +Contributors to this repository hosted by `Your GitHub/GitLab organisation name, for example ukgovdatascience` are expected to follow the +Contributor Covenant Code of Conduct, and those working within Her Majesty's Government +are also expected to follow the Civil Service Code. + +## Civil Service Code + +Contributors working within Her Majesty's Government must review the [Civil Service +Code][civil-service-code], and are expected to follow it in their contributions. + +## Contributor Covenant Code of Conduct + +### Definitions + +Where this Code of Conduct says: + +- "Project", we mean this `construction_survey_results` GitHub repository; +- "Maintainer", we mean the `Your GitHub/GitLab organisation name, for example ukgovdatascience` organisation owners; and +- "Leadership", we mean both `Your GitHub/GitLab organisation name, for example ukgovdatascience` organisation owners, line managers, and other + leadership within the Office for National Statistics. + +### Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and +maintainers pledge to make participation in our project, and our community a +harassment-free experience for everyone, regardless of age, body size, disability, +ethnicity, sex characteristics, gender identity and expression, level of experience, +education, socio-economic status, nationality, personal appearance, race, religion, or +sexual identity and orientation. + +### Our Standards + +Examples of behaviour that contributes to creating a positive environment include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behaviour by participants include: + +- The use of sexualised language or imagery and unwelcome sexual attention or advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic address, + without explicit permission +- Other conduct which could reasonably be considered inappropriate in a professional + setting + +### Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behaviour and are expected to take appropriate and fair corrective action in response +to any instances of unacceptable behaviour. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are not +aligned to this Code of Conduct, or to ban temporarily or permanently any contributor +for other behaviours that they deem inappropriate, threatening, offensive, or harmful. + +### Scope + +This Code of Conduct applies within all project spaces, and it also applies when an +individual is representing the project or its community in public spaces. Examples of +representing a project or community include using an official project e-mail address, +posting using an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +### Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behaviour may be reported by +contacting the project team at +[organisation@email.address][email-address]. All complaints will be +reviewed and investigated and will result in a response that is deemed necessary and +appropriate to the circumstances. The project team is obligated to maintain +confidentiality with regard to the reporter of an incident. Further details of +specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may +face temporary or permanent repercussions as determined by other members of the +project's leadership. + +### Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][contributor-covenant], +version 1.4, available at +[https://www.contributor-covenant.org/version/1/4/code-of-conduct/][contributor-covenant-code-of-conduct], +and the `alphagov` Code of Conduct available at +[https://github.com/alphagov/.github/blob/main/CODE_OF_CONDUCT.md][alphagov-code-of-conduct]. + +[alphagov-code-of-conduct]: https://github.com/alphagov/.github/blob/main/CODE_OF_CONDUCT.md +[civil-service-code]: https://www.gov.uk/government/publications/civil-service-code/the-civil-service-code +[contributor-covenant]: https://www.contributor-covenant.org +[contributor-covenant-code-of-conduct]: https://www.contributor-covenant.org/version/1/4/code-of-conduct/ +[email-address]: mailto:organisation@email.address diff --git a/docs/contributor_guide/CONTRIBUTING.md b/docs/contributor_guide/CONTRIBUTING.md new file mode 100644 index 0000000..61ac1b9 --- /dev/null +++ b/docs/contributor_guide/CONTRIBUTING.md @@ -0,0 +1,121 @@ +# Contributing + +We love contributions! We've compiled this documentation to help you understand our +contributing guidelines. [If you still have questions, please contact us][email] and +we'd be happy to help! + +## Code of Conduct + +[Please read `CODE_OF_CONDUCT.md` before contributing][code-of-conduct]. + +## Getting started + +To start contributing, open your terminal and install the package and +[pre-commit hooks][pre-commit] using: + +```shell +pip install -e .[dev] +pre-commit install +``` + +or use the `make` command: +```shell +make install_dev +``` + +The pre-commit hooks are a security feature to ensure, for example, no secrets[^1], +large data files, and Jupyter notebook outputs are accidentally committed into the +repository. [For more information on pre-commit hooks see our +documentation][docs-pre-commit-hooks]. + +[^1]: [Only secrets of specific patterns are detected by the pre-commit + hooks][docs-pre-commit-hooks-secrets-definition]. + +## Code conventions + +[We mainly follow the GDS Way in our code conventions][gds-way]. + +### Git and GitHub + +We use Git to version control the source code. [Please read the Quality assurance of code for analysis and research for details on Git best practice][duck-book-version-control]. This includes how to write good commit messages, how to branch correctly and solving merge conflicts. + +[If you want to modify the `.gitignore` files, see the template +documentation][docs-updating-gitignore] for further details. + +Our source code is stored on GitHub. Pull requests into `main` require at least one +approved review. + +### Python + +For Python code, [we follow the GDS Way Python style guide][gds-way-python] with a line +length of 88; the flake8 pre-commit hook should help with this! + +### Markdown + +Local links can be written as normal, but external links should be referenced at the +bottom of the Markdown file for clarity. For example: + +Use a [local link to reference the `README.md`](../../README.md) file, but [an external +link for GOV.UK][gov-uk]. + +We also try to wrap Markdown to a line length of 88 characters, but this is not +strictly enforced in all cases, for example with long hyperlinks. + +## Testing + +[Tests are written using the `pytest` framework][pytest], with its configuration in the +`pyproject.toml` file. Note, only tests in the `tests` folder are run. To run the +tests, enter the following command in your terminal: + +```shell +pytest +``` + +### Code coverage + +[Code coverage of Python scripts is measured using the `coverage` Python +package][coverage]; its configuration can be found in `pyproject.toml`. Note coverage +only extends to Python scripts in the `src` folder. + +To run code coverage, and view it as an HTML report, enter the following command in +your terminal: + +```shell +coverage run -m pytest +coverage html +``` + +or use the `make` command: + +```shell +make coverage_html +``` + +The HTML report can be accessed at `htmlcov/index.html`. + +## Documentation + +[We write our documentation in MyST Markdown for use in Sphinx][myst]. This is mainly +stored in the `docs` folder, unless it's more appropriate to store it elsewhere, like +this file. + +[Please read our guidance on how to write accessible +documentation][docs-write-accessible-documentation], as well as our [guidance on +writing Sphinx documentation][docs-write-sphinx-documentation]. This allows you to +build the documentation into an accessible, searchable website. + +[code-of-conduct]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/CODE_OF_CONDUCT.md +[coverage]: https://coverage.readthedocs.io/ +[docs-pre-commit-hooks]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/pre_commit_hooks.md +[docs-pre-commit-hooks-secrets-definition]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/pre_commit_hooks.md#definition-of-a-secret-according-to-detect-secrets +[docs-updating-gitignore]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/updating_gitignore.md +[docs-write-accessible-documentation]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/writing_accessible_documentation.md +[docs-write-sphinx-documentation]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/writing_sphinx_documentation.md +[gds-way]: https://gds-way.cloudapps.digital/ +[duck-book-version-control]: https://best-practice-and-impact.github.io/qa-of-code-guidance/version_control.html +[gds-way-python]: https://gds-way.cloudapps.digital/manuals/programming-languages/python/python.html#python-style-guide +[myst]: https://myst-parser.readthedocs.io/ +[pre-commit]: https://pre-commit.com +[pytest]: https://docs.pytest.org/ +[gov-uk]: https://www.gov.uk/ +[email]: mailto:organisation@email.address diff --git a/docs/contributor_guide/README.md b/docs/contributor_guide/README.md new file mode 100644 index 0000000..ee76a6b --- /dev/null +++ b/docs/contributor_guide/README.md @@ -0,0 +1,13 @@ +# Contributing guide + +This is the contributor guide for the `construction_survey_results` project. + +```{toctree} +:maxdepth: 2 +./CODE_OF_CONDUCT.md +./CONTRIBUTING.md +./pre_commit_hooks.md +./updating_gitignore.md +./writing_accessible_documentation.md +./writing_sphinx_documentation.md +``` diff --git a/docs/contributor_guide/pre_commit_hooks.md b/docs/contributor_guide/pre_commit_hooks.md new file mode 100644 index 0000000..d4a8805 --- /dev/null +++ b/docs/contributor_guide/pre_commit_hooks.md @@ -0,0 +1,173 @@ +# Pre-commit hooks + +[This repository uses the Python package `pre-commit` to manage pre-commit +hooks][pre-commit]. Pre-commit hooks are actions which are run automatically, typically +on each commit, to perform some common set of tasks. For example, a pre-commit hook +might be used to run any code linting automatically before code is committed, ensuring +common code quality. + +## Purpose + +For this repository, we are using `pre-commit` for a number of purposes: + +- checking for secrets being committed accidentally — there is a strict [definition of + a "secret"](#definition-of-a-secret-according-to-detect-secrets); and +- checking for any large files (over 5 MB) being committed. +- cleaning Jupyter notebooks, which means removing all outputs, execution counts, + Python kernels, and, for Google Colaboratory (Colab), stripping out user information. + +We have configured `pre-commit` to run automatically on every commit. By running on +each commit, we ensure that `pre-commit` will be able to detect all contraventions and +keep our repository in a healthy state. + +```{note} Pre-commit hooks and Google Colab +No pre-commit hooks will be run on Google Colab notebooks pushed directly to GitHub. +For security reasons, it is recommended that you manually download your notebook, and +commit up locally to ensure pre-commit hooks are run on your changes. +``` + +## Installation + +In order for `pre-commit` to run, action is needed to configure it on your system. + +- install the `pre-commit` package into your Python environment from + `requirements.txt`; and +- run `pre-commit install` in your terminal to set up `pre-commit` to run when code is + committed. + +## Using the `detect-secrets` pre-commit hook + +```{note} Secret detection limitations +The `detect-secrets` package does its best to prevent accidental committing of secrets, +but it may miss things. Instead, focus on good software development practices! See the +[definition of a secret for further +information](#definition-of-a-secret-according-to-detect-secrets). +``` + +[We use `detect-secrets` to check that no secrets are accidentally +committed][detect-secrets]. This hook requires you to generate a baseline file if one +is not already present within the root directory. To create the baseline file, run the +following at the root of the repository: + +```shell +detect-secrets scan > .secrets.baseline +``` + +Next, audit the baseline that has been generated by running: + +```shell +detect-secrets audit .secrets.baseline +``` + +When you run this command, you'll enter an interactive console. This will present you +with a list of high-entropy string and/or anything which could be a secret. It will +then ask you to verify whether this is the case. This allows the hook to remember false +positives in the future, and alert you to new secrets. + +### Definition of a "secret" according to `detect-secrets` + +The `detect-secrets` documentation, as of January 2021, says it works: + +> ...by running periodic diff outputs against heuristically crafted \[regular +> expression\] statements, to identify whether any new secret has been committed. + +This means it uses regular expression patterns to scan your code changes for anything +that looks like a secret according to the patterns. By definition, there are only a +limited number of patterns, so the `detect-secrets` package cannot detect every +conceivable type of secret. + +To understand what types of secrets will be detected, read the `detect-secrets` +documentation on caveats, and the list of supported plugins. Also, you should use +secret variable names with words that will trip the KeywordDetector plugin; see the +[`DENYLIST` variable for the full list of words][detect-secrets-keyword-detector]. + +### If `pre-commit` detects secrets during commit + +If `pre-commit` detects any secrets when you try to create a commit, it will detail +what it found and where to go to check the secret. + +If the detected secret is a false positive, there are two options to resolve this, and +prevent your commit from being blocked: + +- [inline allowlisting of false positives + (recommended)](#inline-allowlisting-recommended); or +- [updating the `.secrets.baseline` to include the false + positives](#updating-secretsbaseline). + +In either case, if an actual secret is detected (or a combination of actual secrets and +false positives), first remove the actual secret. Then following either of these +processes. + +#### Inline allowlisting (recommended) + +To exclude a false positive, add a `pragma` comment such as: + +```python +secret = "Password123" # pragma: allowlist secret +``` + +or + +```python +# pragma: allowlist nextline secret +secret = "Password123" +``` + +If the detected secret is actually a secret (or other sensitive information), remove +the secret and re-commit; there is no need to add any `pragma` comments. + +If your commit contains a mixture of false positives and actual secrets, remove the +actual secrets first before adding `pragma` comments to the false positives. + +#### Updating `.secrets.baseline` + +To exclude a false positive, you can also [update the `.secrets.baseline` by repeating +the same two commands as in the initial +setup](#using-the-detect-secrets-pre-commit-hook). + +During auditing, if the detected secret is actually a secret (or other sensitive +information), remove the secret and re-commit. There is no need to update the +`.secrets.baseline` file in this case. + +If your commit contains a mixture of false positives and actual secrets, remove the +actual secrets first before updating and auditing the `.secrets.baseline` file. + +## Keeping specific Jupyter notebook outputs + +It may be necessary or useful to keep certain output cells of a Jupyter notebook, for +example charts or graphs visualising some set of data. To do this, [according to the +documentation for the `nbstripout` package][nbstripout], either: + +1. add a `keep_output` tag to the desired cell; or +2. add `"keep_output": true` to the desired cell's metadata. + +You can access cell tags or metadata in Jupyter by enabling the "Tags" or +"Edit Metadata" toolbar (View > Cell Toolbar > Tags; View > Cell Toolbar > +Edit Metadata). + +For the tags approach, enter `keep_output` in the text field for each desired cell, and +press the "Add tag" button. For the metadata approach, press the "Edit Metadata" button +on each desired cell, and edit the metadata to look like this: + +```json +{ + "keep_output": true +} +``` + +This will tell the hook not to strip the resulting output of the desired cell(s), +allowing the output(s) to be committed. + +```{note} Tags and metadata on Google Colab +Currently (March 2020) there is no way to add tags and/or metadata to Google Colab +notebooks. + +It's strongly suggested that you download the Colab as a .ipynb file, and edit tags +and/or metadata using Jupyter before committing the code if you want to keep some +outputs. +``` + +[detect-secrets]: https://github.com/Yelp/detect-secrets +[detect-secrets-plugins]: https://github.com/Yelp/detect-secrets#currently-supported-plugins +[nbstripout]: https://github.com/kynan/nbstripout +[pre-commit]: https://pre-commit.com/ diff --git a/docs/contributor_guide/updating_gitignore.md b/docs/contributor_guide/updating_gitignore.md new file mode 100644 index 0000000..52cd119 --- /dev/null +++ b/docs/contributor_guide/updating_gitignore.md @@ -0,0 +1,10 @@ +# Updating the `.gitignore` file + +[The `.gitignore` used in this repository was created with generic exclusions from +gitignore.io][gitignore-io], with project-specific exclusions listed afterwards. + +If you want to add exclusions for new programming languages and/or IDEs, use the first +line to recreate the generic exclusions from gitignore.io. Add all other +project-specific exclusions afterwards. + +[gitignore-io]: https://www.toptal.com/developers/gitignore diff --git a/docs/contributor_guide/writing_accessible_documentation.md b/docs/contributor_guide/writing_accessible_documentation.md new file mode 100644 index 0000000..38ed5cf --- /dev/null +++ b/docs/contributor_guide/writing_accessible_documentation.md @@ -0,0 +1,52 @@ +# Writing accessible documentation + +[You can build this project's documentation into a website using +Sphinx][docs-write-sphinx-documentation]. If you work in the public sector, and build a +website, by law the website must be accessible. + +The full name of the accessibility regulations is the Public Sector Bodies (Websites +and Mobile Applications) (No. 2) Accessibility Regulations 2018. + +It came into force on 23 September 2018, and all public sector bodies have to meet +these requirements unless exempt. [GOV.UK has further details to help you understand +the impact of the 2018 requirements][govuk-accessibility] + +We use the following checklist to determine how accessible our documentation is, when +rendered as a website using Sphinx. + +- [check the website against the WAVE Web Accessibility Evaluation Tool][wave] +- check that link text is descriptive +- check the hierarchy of page headings, which should go in order from `h2` to `h4` with + no gaps +- remove italics, and bold text +- only use block capitals inside curly braces for placeholders in code examples +- check for accessible language + - use [`alex.js` to identify insensitive, and inconsiderate writing][alex-js] + - replace instances of `click` with `select` or `choose` + - remove latin phrases (`e.g.`, `i.e.`, `ad hoc`, `via`) + - [use GOV.UK inclusive language][govuk-language] + - [replace negative contractions][negative-contractions] + - aim not to have long sentences (maximum 25 words per sentence) + - aim not to have long paragraphs (maximum 5 lines per paragraph) + - check for unique titles in documentation + - check diagrams and images for alternative text as well as surrounding contextual + text + - remove diagrams/images that do not add anything to a user's understanding + - remove screenshots if possible + - [use accessible SVGs][govuk-design-system-images] + - [check for inaccessible formats][govuk-accessible-formats] + +This checklist was created by the Government Digital Service (GDS) technical writing +team with help from the GDS accessibility team. We then [draft a suitable accessibility +statement for the project; an example is available on +GOV.UK][govuk-sample-accessibility]. + +[alex-js]: https://alexjs.com/ +[docs-write-sphinx-documentation]: https://github.com/best-practice-and-impact/govcookiecutter/blob/main/%7B%7B%20cookiecutter.repo_name%20%7D%7D/docs/contributor_guide/writing_sphinx_documentation.md +[govuk-accessible-formats]: https://www.gov.uk/guidance/how-to-publish-on-gov-uk/accessible-pdfs +[govuk-accessibility]: https://www.gov.uk/guidance/accessibility-requirements-for-public-sector-websites-and-apps +[govuk-design-system-images]: https://design-system.service.gov.uk/styles/images/ +[govuk-language]: https://www.gov.uk/government/publications/inclusive-communication/inclusive-language-words-to-use-and-avoid-when-writing-about-disability +[govuk-sample-accessibility]: https://www.gov.uk/government/publications/sample-accessibility-statement +[negative-contractions]: https://www.englishclub.com/vocabulary/contractions-negative.htm +[wave]: https://wave.webaim.org/ diff --git a/docs/contributor_guide/writing_sphinx_documentation.md b/docs/contributor_guide/writing_sphinx_documentation.md new file mode 100644 index 0000000..564f50d --- /dev/null +++ b/docs/contributor_guide/writing_sphinx_documentation.md @@ -0,0 +1,128 @@ +# Writing Sphinx documentation + +[This project is set up to produce documentation using Sphinx][sphinx]; this page +should give you a quick overview on how to write documentation for it. If you'd like to +know how to write good documentation take a look at [Write the Docs guide on writing +documentation][writethedocs]. [For Agile projects, consider documenting +late][agilemodeling] as well. + +## Why should I bother? And why Sphinx? + +Keeping as much of the documentation in a centralised location is a good thing. It +means contributors, users, and anyone else can quickly find as much information as they +need to understand and/or run what you've done. + +Sphinx is a Python-based package to compile documentation into different formats, +including HTML. This means you can write your documentation and, with a single terminal +command, build it into a searchable website. + +It's widely used, such as for the documentation of the [`pandas`][pandas], and +[PyTorch][pytorch] Python packages as well as many [others][sphinx-examples]. It is +highly customisable with different extensions, and themes. Included with this project +is: + +- support for both [reStructuredText (ReST)][sphinx-rest], and [ReST-enabled + Markdown][myst]; +- automatic building of documentation from Python docstrings; and +- support for [ReStructuredText][docstring-rst], [NumPy][docstring-numpy], or + [Google][docstring-google] docstring formats. + +### Creating a searchable website + +To create a website with your documentation, run the following command in your terminal +at the top-level of this project: + +```shell +make docs +``` + +This should create an HTML version of your documentation accessible from +`docs/_build/index.html`. + +## Writing in reStructuredText + +[Sphinx provides good documentation on writing in ReST][sphinx-rest] — we would highly +recommend reading that for guidance. We will cover automatically creating docstrings in +the next subsection. + +### Automatically creating docstring documentation (ReST) + +Let us say that `construction_survey_results/__init__.py` has functions called `hello` and `world` imported +into it, and both have docstrings. To automatically generate docstring documentation, +create a ReST file, and add the following line to reference the `construction_survey_results` module: + +```rest +.. currentmodule:: construction_survey_results +``` + +Then, elsewhere in the body, [call the `autosummary` directive to generate the +docstrings as ReST stub files][sphinx-autosummary]. + +[This will create something similar to the `pandas` API +reference][pandas-api-reference]. + +## Writing in ReST-enabled Markdown + +[We use the `myst-parser` package (MyST) to write Markdown that can also include ReST +elements][myst]. The package documentation is detailed, so we would recommend reviewing +it. We will cover some of the more widely used elements in the following subsections. + +### Embedding ReST directives + +[Most ReST directives can be embedded into MyST Markdown][myst-rst-directives]. + +### Automatically creating docstring documentation (MyST Markdown) + +Let us say that `construction_survey_results/__init__.py` has functions called `hello` and `world` imported +into it, and both have docstrings. To automatically generate docstring documentation, +create a Markdown file, and add the following line to reference the `construction_survey_results` module: + +````md +```{eval-rst} +.. currentmodule:: construction_survey_results +``` +```` + +Then, elsewhere in the body, [call the `autosummary` directive to generate the +docstrings as ReST stub files][sphinx-autosummary]. + +### Including Markdown files outside the `docs` folder + +[MyST lets you include Markdown files outside the `docs` folder][myst-include]. + +If a Markdown file (`../example.md`) only contains links that do not reference anything +else in this project (including images), create a Markdown file within the `docs` +folder with the following lines: + +````md +```{include} ../example.md +``` +```` + +However, if it includes relative links referencing other files in this project +(including images), we need to tell MyST what those links actually refer. For example, +if the relative link is `../hello/world.md`, we need to create a Markdown file within +the `docs` folder with the following lines: + +````md +```{include} ../example.md +:relative-docs: ../hello +:relative-images: +``` +```` + +[agilemodeling]: http://agilemodeling.com/essays/documentLate.htm +[docstring-google]: https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings +[docstring-numpy]: https://numpydoc.readthedocs.io/en/latest/format.html +[docstring-rst]: https://www.python.org/dev/peps/pep-0287/ +[myst]: https://myst-parser.readthedocs.io/ +[myst-include]: https://myst-parser.readthedocs.io/en/latest/sphinx/use.html#include-a-file-from-outside-the-docs-folder-like-readme-md +[myst-rst-directives]: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html +[pandas]: https://pandas.pydata.org/docs/ +[pandas-api-reference]: https://pandas.pydata.org/docs/reference/index.html +[pytorch]: https://pytorch.org/docs/stable/index.html +[sphinx]: https://www.sphinx-doc.org/ +[sphinx-autosummary]: https://www.sphinx-doc.org/en/master/usage/extensions/autosummary.html +[sphinx-examples]: https://www.sphinx-doc.org/en/master/examples.html +[sphinx-rest]: https://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html +[writethedocs]: https://www.writethedocs.org/guide/writing/beginners-guide-to-docs/ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..424cf46 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,12 @@ +```{include} ../README.md +:relative-docs: ./docs +``` + +```{toctree} +:hidden: +:maxdepth: 2 +self +./user_guide/README.md +./contributor_guide/README.md +./aqa/README.md +``` diff --git a/docs/user_guide/README.md b/docs/user_guide/README.md new file mode 100644 index 0000000..9d600ad --- /dev/null +++ b/docs/user_guide/README.md @@ -0,0 +1,8 @@ +# User guide + +This is the user guide for the `construction_survey_results` project. + +```{toctree} +:maxdepth: 2 +./loading_environment_variables.md +``` diff --git a/docs/user_guide/loading_environment_variables.md b/docs/user_guide/loading_environment_variables.md new file mode 100644 index 0000000..97867e1 --- /dev/null +++ b/docs/user_guide/loading_environment_variables.md @@ -0,0 +1,90 @@ +# Loading environment variables + +[We use `python-dotenv` to load environment variables][python-dotenv], as these are only loaded when +inside the project folder. This can prevent accidental conflicts with identically named +variables. Alternatively you can use [`direnv` to load environment variables][direnv] if +you meet [certain conditions](#installing-direnv). + +## Using `python-dotenv` + +To load the environment variables, first make sure you have +python-dotenv install, and [make sure you have a `.secrets` file to store +secrets and credentials](#storing-secrets-and-credentials). Then to load in the +environment variables into a python script see instructions in `.env` file. + +## Using `direnv` + +To load the environment variables, first [follow the `direnv` installation +instructions](#installing-direnv), and [make sure you have a `.secrets` file to store +secrets and credentials](#storing-secrets-and-credentials). Then: + +1. Open your terminal; +2. Navigate to the project folder; and + - You should see the following message: + ```shell + direnv: error .envrc is blocked. Run `direnv allow` to approve its content. + ``` +3. Allow `direnv`. + ```shell + direnv allow + ``` + +You only need to do this once, and again each time `.envrc` and `.secrets` are modified. + +### Installing `direnv` + +These instructions assume you are running on macOS with administrator privileges using +a bash terminal. For other ways of installing `direnv`, and its shell hooks, consult +the `direnv` documentation. + +1. Open your terminal; +2. [Install `direnv` using Homebrew][homebrew]; + ```shell + brew install direnv + ``` +3. Add the shell hooks to your `.bash_profile`; + ```shell + echo 'eval "$(direnv hook bash)"' >> ~/.bash_profile + ``` +4. Check that the shell hooks have been added correctly; and + ```shell + cat ~/.bash_profile + ``` + - This should display `eval "$(direnv hook bash)"` +5. Restart your terminal. + +## Storing secrets and credentials + +Secrets and credentials must be stored in the `.secrets` file. This file is not +version-controlled, so no secrets should be committed to GitHub. + +In your terminal navigate to the root folder, and create a `.secrets` file. + +```shell +touch .secrets +``` + +Open this new `.secrets` file using your preferred text editor, and add any secrets as +environmental variables. For example, to add a JSON credentials file for Google +BigQuery, save the following changes to `.secrets`. + +```shell +GOOGLE_APPLICATION_CREDENTIALS="path/to/credentials.json" +``` + +Once complete, load the `.secrets` file using: + +```shell +from dotenv import load_dotenv +import os + +#Load secrets from the `.secrets` file, overriding any system environment variables +load_dotenv(".secrets", override=True) +#Example variable +EXAMPLE_VARIABLE = os.getenv("EXAMPLE_VARIABLE") +``` + +[python-dotenv]: https://saurabh-kumar.com/python-dotenv/ +[direnv]: https://direnv.net/ +[homebrew]: https://brew.sh/ +[env]: https://github.com/best-practice-and-impact/govcookiecutter/%7B%7B%20cookiecutter.repo_name%20%7D%7D/.env diff --git a/docs/user_guide/using_pytest.md b/docs/user_guide/using_pytest.md new file mode 100644 index 0000000..e7b2fed --- /dev/null +++ b/docs/user_guide/using_pytest.md @@ -0,0 +1,40 @@ +# Using pytest + +We use [`pytest`][pytest] to create and run all of our python based testing. Pytest is the most commonly used python module for testing python code. Testing your code is vital for followling coding best practices and has numerous benefits such as: + +* They help to debug your code +* They make your write more efficient code first time +* They make you think about what precisely your code is doing as you right it +* They provide a sort of documentation for your code +* They help to keep your deployment smooth. + +## Structure + +There should be a `tests` folder in the root directory of your repository containing all the tests that relate to your package. It sits outside of your package because users that want to just use your package will not necessarily need the tests. The tests are there for contributers to use, and if they are contributing they will clone the whole repository, not just the package. + +There is an example pytest folder structure and file in your package that demonstrates this structure. + +## Writing pytests + +For pytest to find your tests, all test files and tests must either start with `test_` or finish with `_test.py` + +The `test_example_module.py` example test file provides an example of these restrictions. + +## Running pytest +### In the terminal + +There are a few ways to run pytests in your terminal. The easiest is by running +```shell +pytest +``` +in your root directory. This will find any existing pytests within your directory and run them. + +If you only want to run pytests in a specific pytest file you can run +```shell +pytest tests/test_example_module.py +``` + +You can try both of these in the root directory of your new repository. + + +[pytest]: https://pypi.org/project/pytest/ diff --git a/make.bat b/make.bat new file mode 100644 index 0000000..6f79204 --- /dev/null +++ b/make.bat @@ -0,0 +1,72 @@ +@echo off + + +IF /I "%1"=="" GOTO .DEFAULT_GOAL +IF /I "%1"=="install" GOTO install +IF /I "%1"=="install_dev" GOTO install_dev +IF /I "%1"=="prepare_docs_folder" GOTO prepare_docs_folder +IF /I "%1"=="docs" GOTO docs +IF /I "%1"=="docs_check_external_links" GOTO docs_check_external_links +IF /I "%1"=="coverage" GOTO coverage +IF /I "%1"=="coverage_html" GOTO coverage_html +IF /I "%1"=="coverage_xml" GOTO coverage_xml +IF /I "%1"=="help" GOTO help +GOTO error + +:.DEFAULT_GOAL + GOTO help + +:install_dev + python -m pip install -U pip setuptools + python -m pip install -e .[dev] + pre-commit install + GOTO :EOF + +:install + python -m pip install -U pip setuptools + python -m pip install -e . + GOTO :EOF + +:prepare_docs_folder + IF exist "./docs/_build" ( rmdir /s /q "./docs/_build/" ) + mkdir ".\docs\_build" + GOTO :EOF + +:docs + CALL make.bat prepare_docs_folder + CALL make.bat install_dev + sphinx-build -b html ./docs ./docs/_build + GOTO :EOF + +:docs_check_external_links + CALL make.bat prepare_docs_folder + CALL make.bat install_dev + sphinx-build -b linkcheck ./docs ./docs/_build + GOTO :EOF + +:coverage + CALL make.bat install_dev + coverage run -m pytest + GOTO :EOF + +:coverage_html + CALL make.bat coverage + coverage html + GOTO :EOF + +:coverage_xml + CALL make.bat coverage + coverage xml + GOTO :EOF + +:help + ECHO make: Use one of the following commands: install, install_dev, docs, docs_check_external_links, coverage, coverage_html, coverage_xml. + GOTO :EOF + +:error + IF "%1"=="" ( + ECHO make: *** No targets specified and no makefile found. Stop. + ) ELSE ( + ECHO make: *** No rule to make target '%1%'. Stop. + ) + GOTO :EOF diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..64b919e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +# `coverage` configurations +[tool.coverage.run] +source = [ + "./" +] + +[tool.coverage.report] +exclude_lines = [ + "if __name__ == .__main__.:" +] + +# `isort` configurations +[tool.isort] +profile = "black" + +# `pytest` configurations +[tool.pytest.ini_options] +addopts = [ + "-vv", + "--doctest-modules" +] +doctest_optionflags = "NORMALIZE_WHITESPACE" +testpaths = [ + "./tests" +] + +# `bandit' configurations +[tool.bandit] +exclude_dirs = ["tests", "docs"] +skips = [] diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..d7be407 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,34 @@ +[metadata] +name = construction-survey-results +description = Project for construction survey +version = 0.0.0 +author = Office for National Statistics +platforms = win32 +classifiers = + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + +[options] +packages = + cons_results +install_requires = + pyyaml + pandas + numpy + monthly-business-survey-results @ git+https://github.com/ONSdigital/monthly-business-survey-results.git@v0.1.1 +python_requires = >=3.6 +zip_safe = no + + +[options.extras_require] +dev = + coverage + detect-secrets == 1.0.3 + myst-parser + pre-commit + pytest + python-dotenv + Sphinx + toml diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..7f1a176 --- /dev/null +++ b/setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + +if __name__ == "__main__": + setup() diff --git a/tests/estimation/test_estimation.py b/tests/estimation/test_estimation.py new file mode 100644 index 0000000..e9e2df5 --- /dev/null +++ b/tests/estimation/test_estimation.py @@ -0,0 +1 @@ +# PLACEHOLDER TO SETUP STRUCTURE diff --git a/tests/imputation/test_imputation.py b/tests/imputation/test_imputation.py new file mode 100644 index 0000000..e9e2df5 --- /dev/null +++ b/tests/imputation/test_imputation.py @@ -0,0 +1 @@ +# PLACEHOLDER TO SETUP STRUCTURE diff --git a/tests/outlier_detection/test_outlier_detection.py b/tests/outlier_detection/test_outlier_detection.py new file mode 100644 index 0000000..e9e2df5 --- /dev/null +++ b/tests/outlier_detection/test_outlier_detection.py @@ -0,0 +1 @@ +# PLACEHOLDER TO SETUP STRUCTURE diff --git a/tests/outputs/test_outputs.py b/tests/outputs/test_outputs.py new file mode 100644 index 0000000..e9e2df5 --- /dev/null +++ b/tests/outputs/test_outputs.py @@ -0,0 +1 @@ +# PLACEHOLDER TO SETUP STRUCTURE diff --git a/tests/staging/test_staging.py b/tests/staging/test_staging.py new file mode 100644 index 0000000..e9e2df5 --- /dev/null +++ b/tests/staging/test_staging.py @@ -0,0 +1 @@ +# PLACEHOLDER TO SETUP STRUCTURE diff --git a/tests/test_placeholder.py b/tests/test_placeholder.py new file mode 100644 index 0000000..ace0d42 --- /dev/null +++ b/tests/test_placeholder.py @@ -0,0 +1,2 @@ +def test_placeholder(): + assert 1 == 1 diff --git a/tests/utilities/test_utilities.py b/tests/utilities/test_utilities.py new file mode 100644 index 0000000..e9e2df5 --- /dev/null +++ b/tests/utilities/test_utilities.py @@ -0,0 +1 @@ +# PLACEHOLDER TO SETUP STRUCTURE