Skip to content

Commit

Permalink
module function documentation updates, not yet fully implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
bgarwood committed Jul 28, 2023
1 parent cf767dc commit 23e1166
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 50 deletions.
6 changes: 4 additions & 2 deletions casaconfig/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
Interface specification for all user facing external functions in the casaconfig package.
"""
# __init__.py
from .private.get_data_dir import get_data_dir
from .private.measures_update import measures_update
from .private.pull_data import pull_data
from .private.data_available import data_available
from .private.data_update import data_update
from .private.measures_available import measures_available
from .private.measures_update import measures_update
from .private.set_casacore_path import set_casacore_path
from .private.get_config import get_config
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 AUI, Inc. Washington DC, USA
# Copyright 2023 AUI, Inc. Washington DC, USA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,23 +16,24 @@
"""


def get_data_dir():
def data_available():
"""
Return the path to the included data folder inside the casaconfig package
List available casarundata versions on CASA server
This is a convenience function that can be used to find the included data
directory inside the casaconfig package. That location need not be the same
as the datapath or measurespath.
This returns a list of the casarundata versions available on the CASA
download server. The version parameter of data_update must be one
of the values in that list if set (otherwise the most recent version
in this list is used).
Parameters
None
Returns
string - absolute path to included data directory
list - version names returned as list of strings
"""
import pkg_resources

datapath = pkg_resources.resource_filename('casaconfig', '__data__/')
print("Not yet implemented, nothing to return")
return []

return datapath

69 changes: 69 additions & 0 deletions casaconfig/private/data_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2023 AUI, Inc. Washington DC, USA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
this module will be included in the api
"""

def data_update(path, auto_update_rules=False, version=None, force=False, logger=None):
"""
Retrieve the casarundata from the CASA server and install it in path
See the External Data section in casadocs for more information about the data retrieved.
This retrieves the full set of CASA runtime data, including the measures data available
when the requested version of casarundata was produced.
A text file (readme.txt in top-level directory at path) records the version string
and the date when that version was installed in path.
If the version requested matches the one in that text file then this function does
nothing unless force is True.
If a specific version is not requested (the default) and the date in that text file
is today, then this function does nothing unless force is True even if there is a more
recent version available from the CASA server.
A file lock is used to prevent more that one data_update and measures_update from updating
any files in path at the same time. When locked, the lock file (datea_update.lock
in path) will contain information about the process that has the lock. When a data_update
gets the lock it will check the readme.txt file in path to make sure that an update is still
necessary (if force is True then an update always happens).
Some of the tables installed by data_update are only read when casatools starts. Use of
data_update should typically be followed by a restart of CASA so that any changes are seen by
the tools and task that use this data.
**Note:** data_update requires that the expected readme.txt file already exists at the top-level
directory at path. If the file does not exist or can not be interpreted as expected then
data_update will return without updating any data.
**Note:** if auto_update_rules is True the user must own path (in addition to having read and
write permissions there). The version must then also be None and the force option must be False.
**Note:** the most recent casarundata may not include the most recent measures data. A data_update
is typically followed by a measures update.
Parameters
- path (str) - Folder path to update. Must contain a valid readme.txt,
- auto_update_rules (bool=False) - If True then the user must be the owner of path, version must be None, and force must be False.
- version (str=None) - Version of casarundata to retrieve (usually in the form of casarundata-x.y.z.tar.gz, see data_available()). Default None retrieves the latest.
- force (bool=False) - If True, always re-download the casarundata. Default False will not download casarundata if already updated today unless the version parameter is specified and different from what was last downloaded.
- logger (casatools.logsink=None) - Instance of the casalogger to use for writing messages. Default None writes messages to the terminal.
Returns
None
"""

print("Not yet implemented, nothing has been updated or checked.")
return
33 changes: 22 additions & 11 deletions casaconfig/private/measures_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
this module will be included in the api
"""

def measures_update(path=None, version=None, force=False, logger=None):
def measures_update(path, auto_update_rules=False, version=None, force=False, logger=None):
"""
Retrieve IERS data used for measures calculations from ASTRON FTP server
Original data source is here: https://www.iers.org/IERS/EN/DataProducts/data.html
CASA maintains a separate Observatory table which is used my measures_update instead
CASA maintains a separate Observatory table which is used by measures_update instead
of the version that accompanies the ASTRON measures tables.
A text file (readme.txt in the geodetic directory in path) records the version string
Expand All @@ -34,35 +34,45 @@ def measures_update(path=None, version=None, force=False, logger=None):
is today, then this function does nothing unless force is True even if there is a more
recent version available from the ASTRON FTP server.
Automatic updating (when the measures_update config value is True) uses this
Automatic updating (when the measures_auto_update config value is True) uses this
function as the casatools module is starting so that the updated measures are in
place before any tool needs to use them.
Using measures_update after casatools has started should always be followed by exiting
and restarting casa (or the casatools module if modular casa components are being used).
A file lock is used to prevent more that one measures_update from updating
the measures files at the same time. When locked, the lock file (measures_update.lock
A file lock is used to prevent more that one measures_update and data_update from updating
the measures files in path at the same time. When locked, the lock file (data_update.lock
in path) will contain information about the process that has the lock. When a measures_update
gets the lock it will check the readme.txt file in the geodetic directory in path
to make sure that an update is still necessary (if force is True and update always happens).
to make sure that an update is still necessary (if force is True an update always happens).
Care should be used when using measures_update outside of the normal automatic
update that other casa sessions are not using the same measures at the same time,
especially if they may also be starting at that time. If a specific version is
requested or force is True there is a risk that the measures may be updated while
one of th ose other sessions are trying to load the same measures data, leading to
one of those other sessions are trying to load the same measures data, leading to
unpredictable results. The lock file will prevent simulateous updates from
happening but if each simultanous update eventually updates the same measures
location (because force is True or the updates are requesting different versions)
then the measures that any of those simultanous casatools modules sees is
unpredictable. Avoid multiple, simultanous updates outside of the automatic
update process.
**Note:** measures_update requires that the expected readme.txt file already exists in
the geodetic directory at path. If that file does not exist or can not be interpreted as
expected then measures_update will return without updating any data.
**Note:** if auto_update_rules is True the user must own path (in addition to having
read and write permissions there). The version must then also be None and the force option
must be False.
Parameters
- path (str=None) - Folder path to place updated measures data. Default None places it in package installation directory (the value returned by get_data_dir),
- version (str=None) - Version of measures data to retrieve (usually in the form of yyyymmdd-160001.ztar, see measures_available()). Default None retrieves the latest
- force (bool=False) - If True, always re-download the measures data. Default False will not download measures data if already updated today unless version parameter is specified and different from what was last downloaded.
- path (str) - Folder path to place updated measures data. Must contain a valid geodetic/readme.txt
- auto_update_rules (bool=False) - If True then the user must be the owner of path, version must be None, and force must be False.
- version (str=None) - Version of measures data to retrieve (usually in the form of yyyymmdd-160001.ztar, see measures_available()). Default None retrieves the latest.
- force (bool=False) - If True, always re-download the measures data. Default False will not download measures data if already updated today unless the version parameter is specified and different from what was last downloaded.
- logger (casatools.logsink=None) - Instance of the casalogger to use for writing messages. Default None writes messages to the terminal
Returns
Expand All @@ -82,7 +92,8 @@ def measures_update(path=None, version=None, force=False, logger=None):
import certifi
import fcntl

if path is None: path = pkg_resources.resource_filename('casaconfig', '__data__/')
print("Not fully implemented, some features do not yet behave as documented")

path = os.path.expanduser(path)
if not os.path.exists(path): os.mkdir(path)
current = None
Expand Down
46 changes: 20 additions & 26 deletions casaconfig/private/pull_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,24 @@
"""


def pull_data(path=None, branch=None, force=False, logger=None):
def pull_data(path, version=None, force=False, logger=None):
"""
Pull down the package data contents from github to the specified directory
Pull down the package data contents from the CASA host to the specified directory
The package installation directory (returned by get_data_dir()) is used when
path is not specified. Note that that location need not be the same as
datapath.
The path must be specified and must be empty or not exist.
The measures tables included in the package data at github will typically
not be the most recent versions. If measurespath is the same as path then
measures_update() should be used after pull_data to get the most recent
measures tables. If measurespath is a different directory then the measures
table found there are always used even if measures tables are also found
at other locations in datapath.
The measures tables included in the package data will typically
not be the most recent versions. To get the most recent measures data, measures_update
should be used after pull_data.
Some of the tables installed by pull_data are only read when casatools
starts. Use of pull_data (or measures_update) should typically be followed
by a restart so that any changes are seen by the tools and tasks that
use this data.
Some of the tables installed by pull_data are only read when casatools starts. Use of
pull_data should typically be followed by a restart so that any changes are seen by the
tools and tasks that use this data.
Parameters
- path (str=None) - Folder path to place casadata contents. Default None places it in package installation directory
- branch (str=None) - casadata repo branch to retrieve data from. Use 'master' for latest casadata trunk. Default None attempts
to get data from repo branch matching this installation version.
- force (bool=False) - If True, always re-download the data even if already present in path. Default False will not download data if already populated
- path (str) - Folder path to place casadata contents. It must be empty or not exist unless force is True.
- version (str=None) - casadata version to retrieve data from. Default None gets the most recent version.
- force (bool=False) - If True, always re-download the data even if already present in path. If True, path is **Not** first emptied so the result may contain files in addition to the ones just downloaded. Default False will not download data if already populated.
- logger (casatools.logsink=None) - Instance of the casalogger to use for writing messages. Default None writes messages to the terminal
Returns
Expand All @@ -54,14 +47,15 @@ def pull_data(path=None, branch=None, force=False, logger=None):
import numpy as np
import sys

if path is None: path = pkg_resources.resource_filename('casaconfig', '__data__/')
print("Not fully implemented, may not behave as documented.")

path = os.path.expanduser(path)
if not os.path.exists(path): os.mkdir(path)
if branch is None:
if version is None:
try:
branch = 'v'+importlib_metadata.version('casaconfig')
version = 'v'+importlib_metadata.version('casaconfig')
except:
branch = 'v0.0.0'
version = 'v0.0.0'

# check contents of destination folder
expected = ['catalogs', 'demo', 'geodetic', 'alma', 'nrao', 'ephemerides', 'telescope_layout', 'dish_models', 'gui']
Expand All @@ -74,10 +68,10 @@ def pull_data(path=None, branch=None, force=False, logger=None):
if logger is not None: logger.post('casaconfig downloading data contents to %s ...' % path, 'INFO')

try:
repo = git.Repo.clone_from('https://github.com/casangi/casaconfig.git', path+'/tmp', branch=branch)
repo = git.Repo.clone_from('https://github.com/casangi/casaconfig.git', path+'/tmp', branch=version)
except:
if logger is not None: logger.post('casaconfig cant find data branch %s, defaulting to master' % branch, 'WARN')
else: print("WARNING: can't find data branch %s, defaulting to master" % branch, file = sys.stderr )
if logger is not None: logger.post('casaconfig cant find data branch %s, defaulting to master' % version, 'WARN')
else: print("WARNING: can't find data branch %s, defaulting to master" % version, file = sys.stderr )

repo = git.Repo.clone_from('https://github.com/casangi/casaconfig.git', path + '/tmp', branch='master')

Expand Down

0 comments on commit 23e1166

Please sign in to comment.