-
Notifications
You must be signed in to change notification settings - Fork 2
/
merge_cfact.py
104 lines (83 loc) · 3.28 KB
/
merge_cfact.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
# coding: utf-8
import glob
import itertools as it
from pathlib import Path
import netCDF4 as nc
import numpy as np
import pandas as pd
from datetime import datetime
import subprocess
import attrici.postprocess as pp
import settings as s
# options for postprocess
rechunk = False
# cdo_processing needs rechunk
cdo_processing = False
TIME0 = datetime.now()
ts_dir = s.output_dir / "timeseries" / s.variable
cfact_dir = s.output_dir / "cfact" / s.variable
data_gen = ts_dir.glob("**/*" + s.storage_format)
cfact_dir.mkdir(parents=True, exist_ok=True)
cfact_file = cfact_dir / s.cfact_file
### check which data is available
data_list = []
lat_indices = []
lon_indices = []
for i in data_gen:
data_list.append(str(i))
lat_float = float(str(i).split("lat")[-1].split("_")[0])
lon_float = float(str(i).split("lon")[-1].split(s.storage_format)[0])
lat_indices.append(int(180 - 2 * lat_float - 0.5))
lon_indices.append(int(2 * lon_float - 0.5 + 360))
# adjust indices if datasets are subsets (lat/lon-shapes are smaller than 360/720)
# TODO: make this more robust
lat_indices = np.array(lat_indices) / s.lateral_sub
lon_indices = np.array(lon_indices) / s.lateral_sub
### access data from source file
obs = nc.Dataset(Path(s.input_dir) / s.dataset / s.source_file.lower(), "r")
time = obs.variables["time"][:]
lat = obs.variables["lat"][:]
lon = obs.variables["lon"][:]
# append later with more variables if needed
variables_to_report = {s.variable: "cfact", s.variable + "_orig": "y"}
# get headers and form empty netCDF file with all meatdata
headers = pp.read_from_disk(data_list[0]).keys()
print(data_list[0])
headers = headers.drop(["t", "ds", "gmt", "gmt_scaled"])
out = nc.Dataset(cfact_file, "w", format="NETCDF4")
pp.form_global_nc(out, time, lat, lon, headers, obs.variables["time"].units)
for (i, j, dfpath) in it.zip_longest(lat_indices, lon_indices, data_list):
df = pp.read_from_disk(dfpath)
for head in headers:
ts = df[head]
out.variables[head][:, i, j] = np.array(ts)
print("wrote data from", dfpath, "to", i, j)
out.close()
print("Successfully wrote", cfact_file, "file. Took")
print("It took {0:.1f} minutes.".format((datetime.now() - TIME0).total_seconds() / 60))
if rechunk:
cfact_rechunked = pp.rechunk_netcdf(cfact_file)
if cdo_processing:
cdo_ops = {
"monmean": "monmean -selvar,cfact,y",
"yearmean": "yearmean -selvar,cfact,y",
# "monmean_valid": "monmean -setrtomiss,-1e20,1.1574e-06 -selvar,cfact,y",
# "yearmean_valid": "yearmean -setrtomiss,-1e20,1.1574e-06 -selvar,cfact,y",
"trend": "trend -selvar,cfact,y",
# "trend_valid": "trend -setrtomiss,-1e20,1.1574e-06 -selvar,cfact,y",
}
for cdo_op in cdo_ops:
outfile = str(cfact_file).rstrip(".nc4") + "_" + cdo_op + ".nc4"
if "trend" in cdo_op:
outfile = (
outfile.rstrip(".nc4") + "_1.nc4 " + outfile.rstrip(".nc4") + "_2.nc4"
)
try:
cmd = "cdo " + cdo_ops[cdo_op] + " " + cfact_rechunked + " " + outfile
print(cmd)
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError:
cmd = "module load cdo && " + cmd
print(cmd)
subprocess.check_call(cmd, shell=True)