diff --git a/.github/workflows/plotting_test.yml b/.github/workflows/plotting_test.yml index 29df99f1..6d6d6a6c 100644 --- a/.github/workflows/plotting_test.yml +++ b/.github/workflows/plotting_test.yml @@ -1,4 +1,4 @@ -name: plotting script test +name: Test plotter on: push: branches: [ master ] @@ -13,13 +13,17 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.10"] defaults: run: shell: "bash -l {0}" steps: + - uses: actions/checkout@v2 + - uses: cvmfs-contrib/github-action-cvmfs@v2 + with: + cvmfs_repositories: 'grid.cern.ch' - name: Set conda environment uses: conda-incubator/setup-miniconda@v2 @@ -38,15 +42,16 @@ jobs: conda info conda env list conda list - - + - name: Install Repo run: | pip install -e . - + - name: data/MC plotter run: | python plotting/plotdataMC.py --cfg testfile/btv_datamc.yml + - name: comparison plotter run: | - python plotting/comparison.py --cfg testfile/btv_comapre.yml \ No newline at end of file + python plotting/comparison.py --cfg testfile/btv_comapre.yml + diff --git a/README.md b/README.md index 7a47fc6f..bd56b10b 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ voms-proxy-init --voms cms --vomses ~/.grid-security/vomses Use the `./filefetcher/fetch.py` script: ``` -python filefetcher/fetch.py --input input_DAS_list.txt --output ${output_name.json} +python filefetcher/fetch.py --input filefetcher/input_DAS_list.txt --output output_name.json ``` where the `input_DAS_list.txt` is a simple file with a list of dataset names extract from DAS (you need to create it yourself for the samples you want to run over), and output json file in creted in `./metadata` directory. diff --git a/env.yml b/env.yml index 95be5e9b..89d22069 100644 --- a/env.yml +++ b/env.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - python=3.10 + - python>=3.8, <3.11 - voms - ca-policy-lcg - ca-certificates diff --git a/filefetcher/fetch.py b/filefetcher/fetch.py index ca88f9f5..b757f8a7 100644 --- a/filefetcher/fetch.py +++ b/filefetcher/fetch.py @@ -1,4 +1,5 @@ -import os +import sys +from os import popen, listdir, makedirs, path, system import json import argparse @@ -8,11 +9,16 @@ parser.add_argument( "-i", "--input", - default=r"singlemuon", + default=None, + type=str, + required=True, help="List of samples in DAS (default: %(default)s)", ) parser.add_argument( - "-o", "--output", default=r"test_my_samples", help="Site (default: %(default)s)" + "-o", + "--output", + default=r"test_my_samples.json", + help="Site (default: %(default)s)", ) parser.add_argument( "--xrd", @@ -21,46 +27,144 @@ help="xrootd prefix string (default: %(default)s)", ) +parser.add_argument( + "--from_path", + action="store_true", + help="For samples that are not published on DAS. If this option is set then the format of the --inpit file must be adjusted. It should be: \n dataset_name path_to_files.", + default=False, +) + args = parser.parse_args() -fset = [] - -with open(args.input) as fp: - lines = fp.readlines() - for line in lines: - fset.append(line) - -fdict = {} - -for dataset in fset: - if dataset.startswith("#") or dataset.strip() == "": - # print("we skip this line:", line) - continue - dsname = dataset.strip().split("/")[1] # Dataset first name - Tier = dataset.strip().split("/")[ - 3 - ] # NANOAODSIM for regular samples, USER for private - instance = "prod/global" - if Tier == "USER": - instance = "prod/phys03" - print("Creating list of files for dataset", dsname, Tier, instance) - flist = ( - os.popen( - ( - "/cvmfs/cms.cern.ch/common/dasgoclient -query='instance={} file dataset={}'" - ).format(instance, fset[fset.index(dataset)].rstrip()) + + +def getFilesFromDas(args): + fset = [] + with open(args.input) as fp: + lines = fp.readlines() + for line in lines: + fset.append(line) + + fdict = {} + + for dataset in fset: + if dataset.startswith("#") or dataset.strip() == "": + # print("we skip this line:", line) + continue + dsname = dataset.strip().split("/")[1] # Dataset first name + Tier = dataset.strip().split("/")[ + 3 + ] # NANOAODSIM for regular samples, USER for private + instance = "prod/global" + if Tier == "USER": + instance = "prod/phys03" + print("Creating list of files for dataset", dsname, Tier, instance) + flist = ( + popen( + ( + "/cvmfs/cms.cern.ch/common/dasgoclient -query='instance={} file dataset={}'" + ).format(instance, fset[fset.index(dataset)].rstrip()) + ) + .read() + .split("\n") ) - .read() - .split("\n") - ) - if dsname not in fdict: - fdict[dsname] = [args.xrd + f for f in flist if len(f) > 1] - else: # needed to collect all data samples into one common key "Data" (using append() would introduce a new element for the key) - fdict[dsname].extend([args.xrd + f for f in flist if len(f) > 1]) + if dsname not in fdict: + fdict[dsname] = [args.xrd + f for f in flist if len(f) > 1] + else: # needed to collect all data samples into one common key "Data" (using append() would introduce a new element for the key) + fdict[dsname].extend([args.xrd + f for f in flist if len(f) > 1]) + + # pprint.pprint(fdict, depth=1) + return fdict + + +def getFilesFromPath(args, lim=None): + + fdict = {} + fset = [] + with open(args.input) as fp: + lines = fp.readlines() + for line in lines: + if line.startswith("#") or line.strip() == "": + continue + if line.startswith("/"): + print( + "You are trying to read files from path, but providing a dataset in DAS:\n", + line, + ) + print("That's not gonna work, so we exit here") + sys.exit(1) + ds = line.strip().split() + print("ds=", ds) + dataset = ds[0] + fdict[ds[0]] = getRootFilesFromPath(ds[1]) + + return fdict + + +def getRootFilesFromPath(d, lim=None): + + import subprocess + + if "xrootd" in d: + sp = d.split("/") + siteIP = "/".join(sp[0:4]) + pathToFiles = "/".join(sp[3:]) + "/" + allfiles = str( + subprocess.check_output(["xrdfs", siteIP, "ls", pathToFiles]), "utf-8" + ).split("\n") + # rootfiles = [siteIP+'/'+f for i,f in enumerate(allfiles) if f.endswith(".root") and (lim==None or i