From 2161bb0b2ded327b1b33e8376032ae7c945f2665 Mon Sep 17 00:00:00 2001
From: Aaron Weeden <aaronwee@buffalo.edu>
Date: Wed, 22 Nov 2023 11:22:53 -0500
Subject: [PATCH] Update tests and testing instructions.

---
 .github/PULL_REQUEST_TEMPLATE.md              |  2 +-
 CHANGELOG.md                                  |  3 +
 docs/developing.md                            | 24 ++++++
 .../test_datawarehouse_integration.py         | 13 +++-
 ...obs-dimensions.csv => jobs-dimensions.csv} |  4 +
 ... => jobs-fieldofscience-filter-values.csv} |  3 +
 ...-dev-jobs-metrics.csv => jobs-metrics.csv} | 18 +++++
 ...e-learning-notebook-example-every-1000.csv | 78 ++++++++++---------
 .../data/{xdmod-dev-realms.csv => realms.csv} |  1 -
 .../test_datawarehouse_regression.py          | 26 ++++---
 tests/unit/test_datawarehouse_unit.py         |  6 +-
 11 files changed, 125 insertions(+), 53 deletions(-)
 rename tests/regression/data/{xdmod-dev-jobs-dimensions.csv => jobs-dimensions.csv} (79%)
 rename tests/regression/data/{xdmod-dev-jobs-fieldofscience-filter-values.csv => jobs-fieldofscience-filter-values.csv} (96%)
 rename tests/regression/data/{xdmod-dev-jobs-metrics.csv => jobs-metrics.csv} (84%)
 rename tests/regression/data/{xdmod-dev-realms.csv => realms.csv} (75%)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 8301194c..168b7dd2 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -27,4 +27,4 @@
 - [ ] `CHANGELOG.md` has been updated
 - [ ] The milestone is set correctly on the pull request
 - [ ] The appropriate labels have been added to the pull request
-- [ ] The following command produces no errors (replace your own token from https://xdmod.access-ci.org): `XDMOD_API_TOKEN=YOUR_TOKEN_HERE pytest -vvs -o log_cli=true tests/`
+- [ ] Running the automated tests (see `docs/developing.md`) produces no errors.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4407d1e4..8f7f9065 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,7 @@
 # xdmod-data Changelog
 
+## Main development branch
+- Update tests and testing instructions.
+
 ## v1.0.0 (2023-07-21)
 - Initial release.
diff --git a/docs/developing.md b/docs/developing.md
index bc92e1f6..3c738f6f 100644
--- a/docs/developing.md
+++ b/docs/developing.md
@@ -1,3 +1,27 @@
+# Testing the code
+1. Start up a virtual environment, e.g.:
+    ```
+    $ env_dir=~/xdmod-data-test-env                                            
+    $ python3 -m venv ${env_dir}                                                
+    $ source ${env_dir}/bin/activate
+    ```
+1. Install your local development copy of `xdmod-data` in editable mode:
+    ```
+    (env) $ python3 -m pip install --force-reinstall -e /path/to/your/xdmod-data
+    ```
+1. Install `pytest`:
+    ```
+    (env) $ python3 -m pip install --upgrade pytest
+    ```
+1. Create an empty file in your home directory at `~/.xdmod-data-token` and change the permissions to 600.
+1. With an [https://xdmod.access-ci.org](ACCESS XDMoD) account with "User" as the Top Role, create an API token if you do not already have one (sign in and click My Profile -> API Token).
+1. Add your token to the file `~/.xdmod-data-token`.
+1. Change directories to your local development copy of `xdmod-data`.
+1. Run the following command:
+    ```
+    (env) $ pytest -vvs -o log_cli=true tests/ 
+    ```
+
 # Developing a new version
 1. Make sure the version number is updated in `xdmod_data/__version__.py`.
 1. Create a Pull Request for the new version.
diff --git a/tests/integration/test_datawarehouse_integration.py b/tests/integration/test_datawarehouse_integration.py
index 9081751d..df1e2321 100644
--- a/tests/integration/test_datawarehouse_integration.py
+++ b/tests/integration/test_datawarehouse_integration.py
@@ -1,8 +1,12 @@
+import os
+from os.path import expanduser
+import pandas
+from pathlib import Path
 import pytest
 from xdmod_data.warehouse import DataWarehouse
-import pandas
 
-VALID_XDMOD_URL = 'https://xdmod-dev.ccr.xdmod.org:9001'
+VALID_XDMOD_URL = 'https://xdmod.access-ci.org'
+TOKEN_PATH = '~/.xdmod-data-token'
 INVALID_STR = 'asdlkfjsdlkfisdjkfjd'
 METHOD_PARAMS = {
     'get_data': (
@@ -99,6 +103,11 @@
             key_error_test_params += [(method, {'filters': value}, match)]
 
 
+with open(Path(expanduser(TOKEN_PATH)), 'r') as token_file:
+    token = token_file.read().replace('\n', '').strip()
+os.environ['XDMOD_API_TOKEN'] = token
+
+
 @pytest.fixture(scope='module')
 def dw_methods():
     with DataWarehouse(VALID_XDMOD_URL) as dw:
diff --git a/tests/regression/data/xdmod-dev-jobs-dimensions.csv b/tests/regression/data/jobs-dimensions.csv
similarity index 79%
rename from tests/regression/data/xdmod-dev-jobs-dimensions.csv
rename to tests/regression/data/jobs-dimensions.csv
index c0f249c4..798d3541 100644
--- a/tests/regression/data/xdmod-dev-jobs-dimensions.csv
+++ b/tests/regression/data/jobs-dimensions.csv
@@ -11,6 +11,8 @@ nsfdirectorate,NSF Directorate,The NSF directorate of the field of science indic
 nodecount,Node Count,A categorization of jobs into discrete groups based on node count.
 pi,PI,The principal investigator of a project.
 pi_institution,PI Institution,Organizations that have PIs with allocations.
+pi_institution_country,PI Institution Country,The country of the institution of the PI of the project associated with compute jobs.
+pi_institution_state,PI Institution State,The location of the institution of the PI of the project associated with the compute jobs.
 parentscience,Parent Science,The parent of the field of science indiciated on the allocation request pertaining to the running jobs.
 queue,Queue,Queue pertains to the low level job queues on each resource.
 resource,Resource,A resource is a remote computer that can run jobs.
@@ -19,4 +21,6 @@ provider,Service Provider,A service provider is an institution that hosts resour
 username,System Username,The specific system username of the users who ran jobs.
 person,User,"A person who is on a PIs allocation, hence able run jobs on resources."
 institution,User Institution,Organizations that have users with allocations.
+institution_country,User Institution Country,The name of the country of the institution of the person who ran the compute job.
+institution_state,User Institution State,The location of the institution of the person who ran the compute job.
 nsfstatus,User NSF Status,Categorization of the users who ran jobs.
diff --git a/tests/regression/data/xdmod-dev-jobs-fieldofscience-filter-values.csv b/tests/regression/data/jobs-fieldofscience-filter-values.csv
similarity index 96%
rename from tests/regression/data/xdmod-dev-jobs-fieldofscience-filter-values.csv
rename to tests/regression/data/jobs-fieldofscience-filter-values.csv
index 8130ee79..86ccb36e 100644
--- a/tests/regression/data/xdmod-dev-jobs-fieldofscience-filter-values.csv
+++ b/tests/regression/data/jobs-fieldofscience-filter-values.csv
@@ -1,4 +1,5 @@
 id,label
+189,"Agriculture, Forestry, and Fisheries"
 192,Agriculural Biotechnology
 236,Analytical and Materials Chemistry
 219,Applied Computer Science
@@ -43,6 +44,7 @@ id,label
 240,Magnetospheric and Upper Atmospheric Physics
 177,Materials Engineering
 175,Mechanical Engineering
+201,Media and communications
 187,Medical Biotechnology
 178,Medical Engineering
 182,Nanotechnology
@@ -76,4 +78,5 @@ id,label
 216,Statistics and Probability
 251,Systematics and Population Biology
 208,Training
+191,Veterinary Science
 222,Visualization and Human-Computer Systems
diff --git a/tests/regression/data/xdmod-dev-jobs-metrics.csv b/tests/regression/data/jobs-metrics.csv
similarity index 84%
rename from tests/regression/data/xdmod-dev-jobs-metrics.csv
rename to tests/regression/data/jobs-metrics.csv
index 32fb0bae..f5a9f14e 100644
--- a/tests/regression/data/xdmod-dev-jobs-metrics.csv
+++ b/tests/regression/data/jobs-metrics.csv
@@ -1,12 +1,30 @@
 id,label,description
+avg_ace,ACCESS Credit Equivalents Charged: Per Job (SU),"The average amount of ACCESS Credit Equivalents charged per compute job.<br/>
+
+The ACCESS Credit Equivalent is a measure of how much compute time was used on each resource.
+One ACCESS Credit Equivalent is defined as one CPU Hour on SDSC Expanse (an AMD EPYC 7742 based compute resource).
+The ACCESS Credit Equivalent allows comparison between usage of node-allocated, core-allocated and GPU-allocated 
+resources. It also allows a comparison between resources with different compute power per core.
+The <a href=""https://allocations.access-ci.org/exchange_calculator"" target=""_blank"" rel=""noopener noreferrer"">ACCESS allocations exchange calculator</a>
+lists conversion rates between an ACCESS Credit Equivalent and a service unit on a resource."
+total_ace,ACCESS Credit Equivalents Charged: Total (SU),"The total amount of ACCESS Credit Equivalents charged.<br/>
+
+The ACCESS Credit Equivalent is a measure of how much compute time was used on each resource.
+One ACCESS Credit Equivalent is defined as one CPU Hour on SDSC Expanse (an AMD EPYC 7742 based compute resource).
+The ACCESS Credit Equivalent allows comparison between usage of node-allocated, core-allocated and GPU-allocated 
+resources. It also allows a comparison between resources with different compute power per core.
+The <a href=""https://allocations.access-ci.org/exchange_calculator"" target=""_blank"" rel=""noopener noreferrer"">ACCESS allocations exchange calculator</a>
+lists conversion rates between an ACCESS Credit Equivalent and a service unit on a resource."
 utilization,ACCESS Utilization (%),"The percentage of the ACCESS obligation of a resource that has been utilized by ACCESS jobs.<br/><i> ACCESS Utilization:</i> The ratio of the total CPU hours consumed by ACCESS jobs over a given time period divided by the total CPU hours that the system is contractually required to provide to ACCESS during that period. It does not include non-ACCESS jobs.<br/>It is worth noting that this value is a rough estimate in certain cases where the resource providers don't provide accurate records of their system specifications, over time."
 rate_of_usage,Allocation Usage Rate (XD SU/Hour),The rate of ACCESS allocation usage in XD SUs per hour.
+rate_of_usage_ace,Allocation Usage Rate ACEs (SU/Hour),The rate of ACCESS allocation usage in ACCESS Credit Equivalents per hour.
 avg_cpu_hours,CPU Hours: Per Job,"The average CPU hours (number of CPU cores x wall time hours) per ACCESS job.<br/>For each job, the CPU usage is aggregated. For example, if a job used 1000 CPUs for one minute, it would be aggregated as 1000 CPU minutes or 16.67 CPU hours."
 total_cpu_hours,CPU Hours: Total,"The total CPU hours (number of CPU cores x wall time hours) used by ACCESS jobs.<br/>For each job, the CPU usage is aggregated. For example, if a job used 1000 CPUs for one minute, it would be aggregated as 1000 CPU minutes or 16.67 CPU hours."
 max_processors,Job Size: Max (Core Count),The maximum size ACCESS job in number of cores.<br/><i>Job Size: </i>The total number of processor cores used by a (parallel) job.
 min_processors,Job Size: Min (Core Count),The minimum size ACCESS job in number of cores.<br/><i>Job Size: </i>The total number of processor cores used by a (parallel) job.
 normalized_avg_processors,Job Size: Normalized (% of Total Cores),The percentage average size ACCESS job over total machine cores.<br><i>Normalized Job Size: </i>The percentage total number of processor cores used by a (parallel) job over the total number of cores on the machine.
 avg_processors,Job Size: Per Job (Core Count),The average job size per ACCESS job.<br><i>Job Size: </i>The number of processor cores used by a (parallel) job.
+avg_job_size_weighted_by_ace,Job Size: Weighted By ACEs (Core Count),The average job size weighted by charge in ACCESS Credit Equivalents (ACEs). Defined as <br><i>Average Job Size Weighted By ACEs: </i> sum(i = 0 to n){job i core count*job i charge in ACEs}/sum(i =  0 to n){job i charge in ACEs}
 avg_job_size_weighted_by_cpu_hours,Job Size: Weighted By CPU Hours (Core Count),The average ACCESS job size weighted by CPU Hours. Defined as <br><i>Average Job Size Weighted By CPU Hours: </i> sum(i = 0 to n){ job i core count * job i cpu hours}/sum(i =  0 to n){job i cpu hours}
 avg_job_size_weighted_by_xd_su,Job Size: Weighted By XD SUs (Core Count),The average ACCESS job size weighted by charge in XD SUs. Defined as <br><i>Average Job Size Weighted By XD SUs: </i> sum(i = 0 to n){job i core count*job i charge in xd sus}/sum(i =  0 to n){job i charge in xd sus}
 avg_nu,NUs Charged: Per Job,"The average amount of NUs charged per ACCESS job.<br/>
diff --git a/tests/regression/data/machine-learning-notebook-example-every-1000.csv b/tests/regression/data/machine-learning-notebook-example-every-1000.csv
index 61b701ce..7f44b29e 100644
--- a/tests/regression/data/machine-learning-notebook-example-every-1000.csv
+++ b/tests/regression/data/machine-learning-notebook-example-every-1000.csv
@@ -1,36 +1,44 @@
 ,Nodes,Requested Wall Time,Wait Time,Wall Time,CPU User,"Mount point ""home"" data written","Mount point ""scratch"" data written",Total memory used
-0,1,18000,2223,61,45.33680231265288,,,
-1000,1,18000,3725,287,32.42974787000051,,,49033524736
-2000,1,172800,49,2571,79.97259330857212,,,30924976176.761906
-3000,4,3600,42922,2532,96.81470878098752,45.018830174107066,176860091.8520887,252683009.7777778
-4000,1,43200,1489,4273,98.65262019923018,,,53486231639.14893
-5000,16,172800,380054,19,83.2019551884715,47.13914246437847,193884.53690558145,213282869
-6000,10,144000,112975,3986,94.96697493527975,619.5369574236191,497097.00057766255,761111556.7857141
-7000,1,21600,142222,6067,6.010297266432706,,,107759541905.19403
-8000,1,3600,1,240,0.2383380917742573,,,7726677333.333334
-9000,1,3600,1,195,0.3623863357343296,,,7948964864
-10000,1,780,2664,345,0.9211134064869447,0,20498764.587838568,3695674410
-11000,1,3600,6,966,38.232643491545396,,,40409398340.26666
-12000,1,3600,13,875,38.44329943700583,,,41062250723.55555
-13000,1,21600,154321,5853,6.006929680701932,,,117623635458.65286
-14000,1,3600,135,1445,69.2786738348236,,,146436012299.13046
-15000,1,28800,132001,250,59.259627771523135,,,39645850477.71429
-16000,1,7200,12,711,46.9863178239425,,,184524522837.3333
-17000,1,7200,10,837,47.48688897176907,,,175949883864.6154
-18000,1,172800,54302,3717,94.54949009872816,,,24592065888.524582
-19000,1,7200,150,1122,48.242235742621304,,,207471162595.55554
-20000,1,172800,53263,21002,61.33315672532931,,,45945555454.53298
-21000,1,172800,50221,27861,70.3833748019495,,,65631446785.93313
-22000,1,3600,1090,11,2.1611174966916664,0,1280262.9868873185,210875512
-23000,1,172800,49372,38029,81.86028948409559,,,28993637866.161156
-24000,1,172800,47292,46989,75.1580595471091,,,63021351985.75967
-25000,1,10800,168258,519,67.63795467248386,,,93487184281.60002
-26000,1,43200,91318,2377,1.9121205307623317,,,24738851002.181816
-27000,4,1800,114,434,78.82404727566929,1044.8869758885673,1181195931.4220536,311622745.5
-28000,1,480,530,40,2.640800690755364,0,676348399.5515662,299178751
-29000,1,28800,183935,42190,11.050664559030599,,,45653222140.898186
-30000,1,43200,141566,1090,1.8172013314352236,,,17360479729.371433
-31000,1,21600,257981,5779,5.9772409694676165,,,170223749152.16763
-32000,1,86400,5160,229,94.98545157786833,,,43320737792
-33000,4,14400,3,1788,67.71721566083241,2281.993725064994,1398054207.6041634,887018972
-34000,1,28800,250083,2206,99.43961023903431,,,234280343040.00003
+0,1,172800,11,506,,,,
+1000,1,86400,1,66,,,,
+2000,1,86400,18,752,,,,
+3000,1,86400,8,5434,,,,
+4000,1,86400,6,1572,,,,
+5000,1,172800,7,2592,,,,
+6000,1,14400,7,2800,,,,
+7000,1,3600,2894,1357,,,,
+8000,1,21600,116,7277,,,,
+9000,1,21600,2173,6764,,,,
+10000,1,21600,3574,7095,,,,
+11000,1,9000,4,3564,88.01518903173182,992.4354304606816,267087841.6178405,811231171.4285715
+12000,1,21600,158,5565,,,,
+13000,1,21600,59,6965,,,,
+14000,1,21600,9,7760,,,,
+15000,1,3600,22122,1335,,,,
+16000,1,28800,130,9421,12.262731018331898,19749.432156075072,0,787292327.46875
+17000,1,28800,6,1990,,,,
+18000,1,172800,13,73,,,,
+19000,1,172800,7,129,,,,
+20000,1,25200,4,25211,82.16279473845965,0,5113844.667942916,240912572.72941175
+21000,1,21600,18,6099,,,,
+22000,1,21600,27,7131,,,,
+23000,1,1800,61,1079,35.02319701051263,5818814744.200479,0,91742777.25
+24000,1,3600,5,2306,0.11814596015380158,,,33983854837.760006
+25000,1,960,1,59,2.025062333453586,0,0,118141168
+26000,1,172800,1,20494,87.54061396105656,548.1123048956289,0,224020798.15942028
+27000,4,7200,2,7214,99.2396948622311,441.0514348202534,34392345950.27519,1104895888.4
+28000,1,21600,13,55,1.2148444482641405,,,
+29000,1,21600,171,40,,,,
+30000,1,960,0,42,1.5504320217730077,0,0,112133180
+31000,1,1800,11,183,25.94758412119134,,,129784697856
+32000,1,21600,372,114,1.5571541609296509,,,92681043968
+33000,2,1800,134,139,55.875186246345784,533.6963000565588,754284385.8435647,137041136
+34000,1,7200,74,9,0.94096807333301,4681.666820975073,0,145688575
+35000,1,172800,22,83953,98.95217460976379,,,91787361316.73543
+36000,1,6000,8,152,0.4601673251104144,,,85277047466.66667
+37000,1,900,124,137,96.18834348033303,,,35703571797.333336
+38000,1,21600,12,56,24.892228849477622,,,
+39000,1,21600,12,134,26.487756894710913,,,113801609216
+40000,1,21600,12,229,45.74138522053433,,,42761861802.66667
+41000,1,21600,20,307,0.9428384414161763,,,27184930360.88889
+42000,1,21600,130,386,1.68608777466353,,,49510031732.36363
diff --git a/tests/regression/data/xdmod-dev-realms.csv b/tests/regression/data/realms.csv
similarity index 75%
rename from tests/regression/data/xdmod-dev-realms.csv
rename to tests/regression/data/realms.csv
index 011d3f26..f0fb63e9 100644
--- a/tests/regression/data/xdmod-dev-realms.csv
+++ b/tests/regression/data/realms.csv
@@ -5,5 +5,4 @@ Cloud,Cloud
 Gateways,Gateways
 Jobs,Jobs
 Requests,Requests
-ResourceAllocations,Resource Allocations
 SUPREMM,SUPREMM
diff --git a/tests/regression/test_datawarehouse_regression.py b/tests/regression/test_datawarehouse_regression.py
index 8500eaec..571ee60f 100644
--- a/tests/regression/test_datawarehouse_regression.py
+++ b/tests/regression/test_datawarehouse_regression.py
@@ -1,13 +1,21 @@
-import pytest
-from xdmod_data.warehouse import DataWarehouse
-import pandas
 import numpy
 import os
+from os.path import expanduser
+import pandas
+from pathlib import Path
+import pytest
+from xdmod_data.warehouse import DataWarehouse
 
-XDMOD_URL = 'https://xdmod-dev.ccr.xdmod.org:9001'
+XDMOD_URL = 'https://xdmod.access-ci.org'
+TOKEN_PATH = '~/.xdmod-data-token'
 DATA_DIR = os.path.dirname(__file__) + '/data'
 
 
+with open(Path(expanduser(TOKEN_PATH)), 'r') as token_file:
+    token = token_file.read().replace('\n', '').strip()
+os.environ['XDMOD_API_TOKEN'] = token
+
+
 @pytest.fixture(scope='module')
 def valid_dw():
     with DataWarehouse(XDMOD_URL) as dw:
@@ -33,7 +41,7 @@ def __assert_dfs_equal(
 
 def test_get_raw_data(valid_dw):
     data = valid_dw.get_raw_data(
-        duration=('2021-05-01', '2021-05-02'),
+        duration=('2023-05-01', '2023-05-02'),
         realm='SUPREMM',
         fields=(
             'CPU User',
@@ -67,27 +75,27 @@ def __assert_descriptor_dfs_equal(data_file, actual):
 
 def test_describe_realms(valid_dw):
     __assert_descriptor_dfs_equal(
-        'xdmod-dev-realms.csv',
+        'realms.csv',
         valid_dw.describe_realms(),
     )
 
 
 def test_describe_metrics(valid_dw):
     __assert_descriptor_dfs_equal(
-        'xdmod-dev-jobs-metrics.csv',
+        'jobs-metrics.csv',
         valid_dw.describe_metrics('Jobs'),
     )
 
 
 def test_describe_dimensions(valid_dw):
     __assert_descriptor_dfs_equal(
-        'xdmod-dev-jobs-dimensions.csv',
+        'jobs-dimensions.csv',
         valid_dw.describe_dimensions('Jobs'),
     )
 
 
 def test_get_filter_values(valid_dw):
     __assert_descriptor_dfs_equal(
-        'xdmod-dev-jobs-fieldofscience-filter-values.csv',
+        'jobs-fieldofscience-filter-values.csv',
         valid_dw.get_filter_values('Jobs', 'Field of Science'),
     )
diff --git a/tests/unit/test_datawarehouse_unit.py b/tests/unit/test_datawarehouse_unit.py
index 2d69ab6b..7af2d989 100644
--- a/tests/unit/test_datawarehouse_unit.py
+++ b/tests/unit/test_datawarehouse_unit.py
@@ -43,11 +43,7 @@ def test___enter___RuntimeError_xdmod_host_malformed():
 
 def test___enter___RuntimeError_xdmod_host_unresolved():
     invalid_host = 'https://' + INVALID_STR + '.xdmod.org'
-    with pytest.raises(
-        requests.exceptions.ConnectionError,
-        match='Failed to resolve \'' + INVALID_STR + '.xdmod.org\''
-        + ' \\(\\[Errno -2\\] Name or service not known\\)'
-    ):
+    with pytest.raises(Exception):
         with DataWarehouse(invalid_host):
             pass