Fix the DB fetch of the splitting algorithm for a 'crab remade' task (#…

…4760) * Read splitting algorithm from DB, resolve #4757 * Resolve #4757 for crab get* commands * Resolve TODOs * Fix Pylint warnings
dmwm · May 1, 2018 · 7b41c2d · 7b41c2d
1 parent 57c32a7
commit 7b41c2d
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 23 deletions.
diff --git a/src/python/CRABClient/Commands/SubCommand.py b/src/python/CRABClient/Commands/SubCommand.py
@@ -255,6 +255,9 @@ def __init__(self, logger, cmdargs = None, disable_interspersed_args = False):
         cmdargs = cmdargs or []
         (self.options, self.args) = self.parser.parse_args(cmdargs)
 
+        self.transferringIds = None
+        self.dest = None
+
         ## Validate the command options.
         self.validateOptions()
 

diff --git a/src/python/CRABClient/Commands/getcommand.py b/src/python/CRABClient/Commands/getcommand.py
@@ -25,10 +25,6 @@ class getcommand(SubCommand):
 
 
     def __call__(self, **argv):
-        # TODO: remove this 'if' once transition to status2 is complete
-        if argv.get('subresource') in ['data2', 'logs2']:
-            self.processAndStoreJobIds()
-
         ## Retrieve the transferLogs parameter from the task database.
         taskdbparam, configparam = '', ''
         if argv.get('subresource') in ['logs', 'logs2']:
@@ -45,10 +41,13 @@ def __call__(self, **argv):
         uri = self.getUrl(self.instance, resource = 'task')
         dictresult, status, _ =  server.get(uri, data = inputlist)
         self.logger.debug('Server result: %s' % dictresult)
+        splitting = None
         if status == 200:
             if 'desc' in dictresult and 'columns' in dictresult['desc']:
                 position = dictresult['desc']['columns'].index(taskdbparam)
                 transferFlag = dictresult['result'][position] #= 'T' or 'F'
+                position = dictresult['desc']['columns'].index('tm_split_algo')
+                splitting = dictresult['result'][position]
             else:
                 self.logger.debug("Unable to locate %s in server result." % (taskdbparam))
         ## If transferFlag = False, there is nothing to retrieve.
@@ -72,6 +71,12 @@ def __call__(self, **argv):
                 msg += " nor was any explicitly specified in the CRAB configuration."
                 self.logger.warning(msg)
 
+        #check the format of jobids
+        if getattr(self.options, 'jobids', None):
+            self.options.jobids = validateJobids(self.options.jobids, splitting != 'Automatic')
+
+        self.processAndStoreJobIds()
+
         #Retrieving output files location from the server
         self.logger.debug('Retrieving locations for task %s' % self.cachedinfo['RequestName'])
         inputlist =  [('workflow', self.cachedinfo['RequestName'])]
@@ -97,9 +102,7 @@ def __call__(self, **argv):
         totalfiles = len(dictresult['result'])
         fileInfoList = dictresult['result']
 
-        # TODO: remove this 'if' once transition to status2 is complete
-        if argv.get('subresource') in ['data2', 'logs2']:
-            self.insertPfns(fileInfoList)
+        self.insertPfns(fileInfoList)
 
         if len(fileInfoList) > 0:
             if self.options.dump or self.options.xroot:
@@ -260,7 +263,6 @@ def setOptions(self):
     def validateOptions(self):
         #Figuring out the destination directory
         SubCommand.validateOptions(self)
-        self.dest = None
         if self.options.outputpath is not None:
             if re.match("^[a-z]+://", self.options.outputpath):
                 self.dest = self.options.outputpath
@@ -273,11 +275,6 @@ def validateOptions(self):
         if getattr(self.options, 'quantity', None) == 'all':
             self.options.quantity = -1
 
-        #check the format of jobids
-        if getattr(self.options, 'jobids', None):
-            useLists = getattr(self.cachedinfo['OriginalConfig'].Data, 'splitting', 'Automatic') != 'Automatic'
-            self.options.jobids = validateJobids(self.options.jobids, useLists)
-
 
         if hasattr(self.options, 'command') and self.options.command != None:
             AvailableCommands = ['LCG', 'GFAL']

diff --git a/src/python/CRABClient/Commands/getlog.py b/src/python/CRABClient/Commands/getlog.py
@@ -6,21 +6,14 @@
 from CRABClient.ClientUtilities import colors
 from CRABClient.UserUtilities import getFileFromURL
 from CRABClient.Commands.getcommand import getcommand
-from CRABClient.ClientExceptions import RESTCommunicationException, ClientException, MissingOptionException
+from CRABClient.ClientExceptions import RESTCommunicationException, MissingOptionException
 
 from ServerUtilities import getProxiedWebDir
 from httplib import HTTPException
 
 
 class getlog(getcommand):
     """
-    Important: code here is identical to the old getlog implementation (aside from setting the subresource to
-    'logs2' when calling getcommand and the names of the command/class themselves). This was done because trying to
-    avoid copy-paste code isn't worth the effort in this case. When the status2 is working correctly, old code will
-    be easily removed and replaced with the 'getlog2' version. Also, the command 'getlog' itself is deprecated and
-    we don't expect to make any changes to it until it's removed.
-
-    Class description:
     Retrieve the log files of a number of jobs specified by the -q/--quantity option.
     -q logfiles per exit code are returned if transferLogs = False; otherwise all the log files
     collected by the LogCollect job are returned. The task is identified by the -d/--dir option.

diff --git a/src/python/CRABClient/Commands/status.py b/src/python/CRABClient/Commands/status.py
@@ -59,6 +59,7 @@ def __call__(self):
         maxMemory = int(getColumn(crabDBInfo, 'tm_maxmemory'))
         maxJobRuntime = int(getColumn(crabDBInfo, 'tm_maxjobruntime'))
         numCores = int(getColumn(crabDBInfo, 'tm_numcores'))
+        splitting = getColumn(crabDBInfo, 'tm_split_algo')
 
         #Print information from the database
         self.printTaskInfo(crabDBInfo, user)
@@ -146,8 +147,7 @@ def __call__(self):
             if self.jobids:
                 ## Check the format of the jobids option.
                 if self.options.jobids:
-                    useLists = getattr(self.cachedinfo['OriginalConfig'].Data, 'splitting', 'Automatic') != 'Automatic'
-                    jobidstuple = validateJobids(self.options.jobids, useLists)
+                    jobidstuple = validateJobids(self.options.jobids, splitting != 'Automatic')
                     self.jobids = [str(jobid) for (_, jobid) in jobidstuple]
                 self.checkUserJobids(statusCacheInfo, self.jobids)
             sortdict = self.printDetails(statusCacheInfo, self.jobids, not self.options.long, maxMemory, maxJobRuntime, numCores)