This repository has been archived by the owner on Sep 12, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 51
/
duplicateEvents.py
executable file
·79 lines (66 loc) · 2.46 KB
/
duplicateEvents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python
"""
Validates if a given workflow has duplicate events in its output
datasets. That is if a lumi is present in more than one file.
"""
import sys
import optparse
import dbs3Client
import reqMgrClient
def duplicateLumisWorkflow(url, workflow, verbose=False):
"""
Shows where the workflow hs duplicate events
"""
datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
duplicate = False
print 'workflow:',workflow
#check e
for dataset in datasets:
print 'dataset :', dataset
#if dbs3Client.duplicateLumi(dataset, verbose):
if dbs3Client.duplicateRunLumi(dataset, verbose, skipInvalid=True):
duplicate = True
#fast check, one dataset duplicated
if not verbose:
print 'Has duplicated lumis'
return True
if not duplicate:
print "No duplicate found"
return duplicate
def duplicateLumisDataset(url, dataset, verbose=False):
print 'dataset :', dataset
#if dbs3Client.duplicateLumi(dataset, verbose):
if dbs3Client.duplicateRunLumi(dataset, verbose, skipInvalid=True):
#fast check, one dataset duplicated
if not verbose:
print 'Has duplicated lumis'
return True
else:
print "No duplicate found"
return False
def main():
usage = "usage: %prog [options] [WORKFLOW]"
parser = optparse.OptionParser(usage=usage)
parser.add_option('-f', '--file', help='Text file with a list of wokflows.', dest='file')
parser.add_option('-d', '--dataset', help='Analyse a given dataset instead of a workflow.', dest='dataset')
parser.add_option('-v', '--verbose', help='Generates a printout of duplicated lumis', dest='verbose',
action='store_true', default=False)
options, args = parser.parse_args()
url = 'cmsweb.cern.ch'
if options.file:
workflows = [l.strip() for l in open(options.file) if l.strip()]
elif args:
workflows = args
elif options.dataset:
duplicateLumisDataset(url, options.dataset, options.verbose)
sys.exit(0)
else:
parser.error("Provide workflows or datasets to analyse")
sys.exit(0)
for workflow in workflows:
if duplicateLumisWorkflow(url, workflow, options.verbose):
print workflow, "has duplicated lumis"
else:
print "No duplicate found"
if __name__ == "__main__":
main()