Skip to content

Commit

Permalink
Fix actions (#15)
Browse files Browse the repository at this point in the history
* feat: fetcher ability to list files from a path (for samples not published on DAS)

* Print into file

* run black

* run black

* change python version in yaml: >=3.8, <3.11

* set default mins and maxs to avoid crash

* bath ends with /

* example files for new fetcher

* fix: actions

* fix:plotting workflow

Co-authored-by: Andrey Pozdnyakov <[email protected]>
  • Loading branch information
Ming-Yan and andreypz authored Jan 11, 2023
1 parent 8764034 commit 6765222
Show file tree
Hide file tree
Showing 14 changed files with 181 additions and 72 deletions.
17 changes: 11 additions & 6 deletions .github/workflows/plotting_test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: plotting script test
name: Test plotter
on:
push:
branches: [ master ]
Expand All @@ -13,13 +13,17 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.10"]

defaults:
run:
shell: "bash -l {0}"

steps:
- uses: actions/checkout@v2
- uses: cvmfs-contrib/github-action-cvmfs@v2
with:
cvmfs_repositories: 'grid.cern.ch'

- name: Set conda environment
uses: conda-incubator/setup-miniconda@v2
Expand All @@ -38,15 +42,16 @@ jobs:
conda info
conda env list
conda list
- name: Install Repo
run: |
pip install -e .
- name: data/MC plotter
run: |
python plotting/plotdataMC.py --cfg testfile/btv_datamc.yml
- name: comparison plotter
run: |
python plotting/comparison.py --cfg testfile/btv_comapre.yml
python plotting/comparison.py --cfg testfile/btv_comapre.yml
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ voms-proxy-init --voms cms --vomses ~/.grid-security/vomses
Use the `./filefetcher/fetch.py` script:

```
python filefetcher/fetch.py --input input_DAS_list.txt --output ${output_name.json}
python filefetcher/fetch.py --input filefetcher/input_DAS_list.txt --output output_name.json
```
where the `input_DAS_list.txt` is a simple file with a list of dataset names extract from DAS (you need to create it yourself for the samples you want to run over), and output json file in creted in `./metadata` directory.

Expand Down
2 changes: 1 addition & 1 deletion env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.10
- python>=3.8, <3.11
- voms
- ca-policy-lcg
- ca-certificates
Expand Down
186 changes: 145 additions & 41 deletions filefetcher/fetch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys
from os import popen, listdir, makedirs, path, system
import json
import argparse

Expand All @@ -8,11 +9,16 @@
parser.add_argument(
"-i",
"--input",
default=r"singlemuon",
default=None,
type=str,
required=True,
help="List of samples in DAS (default: %(default)s)",
)
parser.add_argument(
"-o", "--output", default=r"test_my_samples", help="Site (default: %(default)s)"
"-o",
"--output",
default=r"test_my_samples.json",
help="Site (default: %(default)s)",
)
parser.add_argument(
"--xrd",
Expand All @@ -21,46 +27,144 @@
help="xrootd prefix string (default: %(default)s)",
)

parser.add_argument(
"--from_path",
action="store_true",
help="For samples that are not published on DAS. If this option is set then the format of the --inpit file must be adjusted. It should be: \n dataset_name path_to_files.",
default=False,
)

args = parser.parse_args()
fset = []

with open(args.input) as fp:
lines = fp.readlines()
for line in lines:
fset.append(line)

fdict = {}

for dataset in fset:
if dataset.startswith("#") or dataset.strip() == "":
# print("we skip this line:", line)
continue
dsname = dataset.strip().split("/")[1] # Dataset first name
Tier = dataset.strip().split("/")[
3
] # NANOAODSIM for regular samples, USER for private
instance = "prod/global"
if Tier == "USER":
instance = "prod/phys03"
print("Creating list of files for dataset", dsname, Tier, instance)
flist = (
os.popen(
(
"/cvmfs/cms.cern.ch/common/dasgoclient -query='instance={} file dataset={}'"
).format(instance, fset[fset.index(dataset)].rstrip())


def getFilesFromDas(args):
fset = []
with open(args.input) as fp:
lines = fp.readlines()
for line in lines:
fset.append(line)

fdict = {}

for dataset in fset:
if dataset.startswith("#") or dataset.strip() == "":
# print("we skip this line:", line)
continue
dsname = dataset.strip().split("/")[1] # Dataset first name
Tier = dataset.strip().split("/")[
3
] # NANOAODSIM for regular samples, USER for private
instance = "prod/global"
if Tier == "USER":
instance = "prod/phys03"
print("Creating list of files for dataset", dsname, Tier, instance)
flist = (
popen(
(
"/cvmfs/cms.cern.ch/common/dasgoclient -query='instance={} file dataset={}'"
).format(instance, fset[fset.index(dataset)].rstrip())
)
.read()
.split("\n")
)
.read()
.split("\n")
)

if dsname not in fdict:
fdict[dsname] = [args.xrd + f for f in flist if len(f) > 1]
else: # needed to collect all data samples into one common key "Data" (using append() would introduce a new element for the key)
fdict[dsname].extend([args.xrd + f for f in flist if len(f) > 1])
if dsname not in fdict:
fdict[dsname] = [args.xrd + f for f in flist if len(f) > 1]
else: # needed to collect all data samples into one common key "Data" (using append() would introduce a new element for the key)
fdict[dsname].extend([args.xrd + f for f in flist if len(f) > 1])

# pprint.pprint(fdict, depth=1)
return fdict


def getFilesFromPath(args, lim=None):

fdict = {}
fset = []
with open(args.input) as fp:
lines = fp.readlines()
for line in lines:
if line.startswith("#") or line.strip() == "":
continue
if line.startswith("/"):
print(
"You are trying to read files from path, but providing a dataset in DAS:\n",
line,
)
print("That's not gonna work, so we exit here")
sys.exit(1)
ds = line.strip().split()
print("ds=", ds)
dataset = ds[0]
fdict[ds[0]] = getRootFilesFromPath(ds[1])

return fdict


def getRootFilesFromPath(d, lim=None):

import subprocess

if "xrootd" in d:
sp = d.split("/")
siteIP = "/".join(sp[0:4])
pathToFiles = "/".join(sp[3:]) + "/"
allfiles = str(
subprocess.check_output(["xrdfs", siteIP, "ls", pathToFiles]), "utf-8"
).split("\n")
# rootfiles = [siteIP+'/'+f for i,f in enumerate(allfiles) if f.endswith(".root") and (lim==None or i<lim)]
else:
siteIP = ""
pathToFiles = d
allfiles = [
path.join(d, f) for i, f in enumerate(listdir(d)) if f.endswith(".root")
]

# print(siteIP, pathToFiles)
rootfiles = []
for file_or_dir in allfiles:
# print(file_or_dir)
if file_or_dir == "" or file_or_dir == pathToFiles:
continue
file_or_dir = siteIP + file_or_dir
if file_or_dir.endswith(".root"):
if lim == None or len(rootfiles) < lim:
rootfiles.append(file_or_dir)

elif not "log" in file_or_dir and not file_or_dir[-1] == "/":
file_or_dir = file_or_dir + "/"
print("file or dir:", file_or_dir)
if lim == None:
rootfiles.extend(getRootFilesFromPath(file_or_dir))
elif len(rootfiles) < lim:
rootfiles.extend(
getRootFilesFromPath(file_or_dir, lim - len(rootfiles))
)

# print("Input path:", d)
# print("List of root files to be processed:\n",rootfiles)

return rootfiles


def main(args):

if args.from_path:
print("do it from path: ")

fdict = getFilesFromPath(args)

else:

fdict = getFilesFromDas(args)

# print(fdict)
output_file = "./metadata/%s" % (args.output)
with open(output_file, "w") as fp:
json.dump(fdict, fp, indent=4)
print("The file is saved at: ", output_file)

# pprint.pprint(fdict, depth=1)

output_file = "./metadata/%s" % (args.output)
with open(output_file, "w") as fp:
json.dump(fdict, fp, indent=4)
print("The file is saved at: ", output_file)
if __name__ == "__main__":
print("This is the __main__ part")
main(args)
File renamed without changes.
3 changes: 3 additions & 0 deletions filefetcher/input_DAS_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/ZH_HToCC_ZToLL_M-125_TuneCP5_13TeV-powheg-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v1/NANOAODSIM
/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIISummer20UL17NanoAODv9-106X_mc2017_realistic_v9-v1/NANOAODSIM
/MuonEG/anovak-Run2017F-31Mar2018-v1_PFNanoAOD-132927756bb7172bff7e08e5e9b7221f/USER
2 changes: 2 additions & 0 deletions filefetcher/input_PATH_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DYJets_UNLOPS root://grid-cms-xrootd.physik.rwth-aachen.de//store/user/andrey/NanoGEN/dyeej_UNLOPS/
BReg_Test /net/scratch_cms3a/hcc/breg_nano17/ZH_Hbb/
6 changes: 0 additions & 6 deletions filefetcher/list.txt

This file was deleted.

4 changes: 2 additions & 2 deletions plotting/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
ax.set_xlabel(None)
ax.set_ylabel("Events")
rax.set_ylabel("Other/Ref")
ax.ticklabel_format(style="sci", scilimits=(-3, 3))
ax.ticklabel_format(style="sci", axis="y", scilimits=(-3, 3))
ax.get_yaxis().get_offset_text().set_position((-0.065, 1.05))
ax.legend()
rax.set_ylim(0.0, 2.0)
Expand Down Expand Up @@ -161,4 +161,4 @@
fig.savefig(f"plot/{config['output']}_{time}/compare_{var}{logext}.png")


print(f"The output is saved at: plot/{config['output']}_{time}")
print(f"The output is saved at: plot/{config['output']}_{time}/")
16 changes: 3 additions & 13 deletions runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,6 @@

from BTVNanoCommissioning.workflows import workflows

# This would crash if the ExampleWorkflow does not exist
from ExampleWorkflow.workflows import workflows
from ZplusJets.workflows import workflows

# from ExampleWorkflow.workflows import workflows
# from VHcc.workflows import workflows
# from Hpluscharm.workflows import workflows

# Should come up with a smarter way to import all worflows from subdirectories of ./src/


def validate(file):
try:
Expand Down Expand Up @@ -255,13 +245,13 @@ def get_main_parser():
print(f"Removing: {fi}")
os.system(f"rm {fi}")
if input("Write list of bad files? (y/n)") == "y":
corrupted_name = (args.samplejson).split('.json')[0]
with open(f'{corrupted_name}_corrupted.txt', 'w') as bad_txt:
corrupted_name = (args.samplejson).split(".json")[0]
with open(f"{corrupted_name}_corrupted.txt", "w") as bad_txt:
print("Writing:")
for fi in all_invalid:
print(f"Writing: {fi}")
bad_txt.write(fi)
bad_txt.write('\n')
bad_txt.write("\n")
sys.exit(0)

# load workflow
Expand Down
5 changes: 4 additions & 1 deletion runner_wconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,5 +415,8 @@ def retry_handler(exception, task_record):

save(output, config.outfile)

print(output)
# print(output) better to print this in a file:
with open(f"{config.outfile}.print.txt", "w") as f:
print(output, file=f)

print(f"Saving output to {config.outfile}")
7 changes: 7 additions & 0 deletions src/BTVNanoCommissioning/helpers/xsection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,13 @@
"energy": "13",
"comment": "Private sample",
},
{
"process_name": "DYJets_UNLOPS",
"cross_section": "2025",
"DAS": "root://grid-cms-xrootd.physik.rwth-aachen.de//store/user/andrey/NanoGEN/dyeej_UNLOPS/",
"energy": "13",
"comment": "Private sample",
},
{
"process_name": "Template",
"cross_section": "11",
Expand Down
1 change: 1 addition & 0 deletions src/BTVNanoCommissioning/utils/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ def plotratio(

def autoranger(hist):
val, axis = hist.values(), hist.axes[-1].edges
mins, maxs = 0, len(val)
for i in range(len(val)):
if val[i] != 0:
mins = i
Expand Down
2 changes: 1 addition & 1 deletion testfile/btv_compare.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,4 @@ rescale_yields:
runC : 100.0
inbox_text: "" #Optional, text write in anchor text box
norm : True #Optional, normalize to reference yields
log : True #Optional, log y axis
log : True #Optional, log y axis

0 comments on commit 6765222

Please sign in to comment.