Skip to content

Commit

Permalink
skip .git directories when creating sandbox. Fix #5202 (#5207)
Browse files Browse the repository at this point in the history
* skip .git directories when creating sandbox. Fix #5202

* remove obsolete testS3upload()
  • Loading branch information
belforte authored Sep 22, 2023
1 parent 0e79417 commit 362a115
Showing 1 changed file with 19 additions and 48 deletions.
67 changes: 19 additions & 48 deletions src/python/CRABClient/JobType/UserTarball.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,57 +6,24 @@

from __future__ import division # make division work like in python3

import json
import os
import glob
import math
import time
import socket
import tarfile
import tempfile
import shutil
import hashlib
import uuid

from ServerUtilities import NEW_USER_SANDBOX_EXCLUSIONS, BOOTSTRAP_CFGFILE_DUMP
from ServerUtilities import FILE_SIZE_LIMIT
from ServerUtilities import uploadToS3

from CRABClient.ClientMapping import configParametersInfo
from CRABClient.JobType.ScramEnvironment import ScramEnvironment
from CRABClient.ClientUtilities import colors, BOOTSTRAP_CFGFILE, BOOTSTRAP_CFGFILE_PKL
from CRABClient.ClientExceptions import EnvironmentException, InputFileNotFoundException, CachefileNotFoundException, SandboxTooBigException
from CRABClient.ClientExceptions import EnvironmentException, InputFileNotFoundException, SandboxTooBigException
from CRABClient.ClientUtilities import execute_command

from ServerUtilities import NEW_USER_SANDBOX_EXCLUSIONS, BOOTSTRAP_CFGFILE_DUMP
from ServerUtilities import FILE_SIZE_LIMIT
from ServerUtilities import uploadToS3, tempSetLogLevel

def testS3upload(s3tester, archiveName, hashkey, logger):
cachename = "%s.tgz" % hashkey
try:
t1 = time.time()
timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
msecs = int((t1 - int(t1)) * 1000)
timestamp += '.%03d' % msecs
with tempSetLogLevel(logger=logger, level=1000): # disable all logging for this call
uploadToS3(crabserver=s3tester, objecttype='sandbox', filepath=archiveName,
tarballname=cachename, logger=logger)
status = 'OK'
logger.debug('Successfully uploaded tarball to S3 as well')
except Exception as e:
logger.debug('Tarball upload to S3 failed:\n%s', str(e))
status = 'FAIL'
reason = str(e)
t2 = time.time()
s3report = {'status':status}
if status == 'FAIL':
s3report['reason'] = reason
thisSite = socket.gethostname()
thisIP = socket.gethostbyname(thisSite)
tarballKB = os.stat(archiveName).st_size // 1024
s3report['timestamp'] = timestamp
s3report['clienthost'] = thisSite
s3report['clientip'] = thisIP
s3report['KBytes'] = tarballKB
s3report['seconds'] = int(t2-t1)
return s3report

def calculateChecksum(tarfile_, exclude=None):
"""
Expand Down Expand Up @@ -120,6 +87,16 @@ def calculateChecksum(tarfile_, exclude=None):
return checksum


def excludeGit(tarinfo):
"""
exclude .git subdirectory when creating archives
https://github.com/dmwm/CRABClient/issues/5202
"""
if '.git' in tarinfo.name:
return None
return tarinfo


class UserTarball(object):
"""
_UserTarball_
Expand Down Expand Up @@ -166,7 +143,7 @@ def addFiles(self, userFiles=None, cfgOutputName=None):
if os.path.exists(fullPath):
self.logger.debug("Adding directory %s to tarball" % fullPath)
self.checkdirectory(fullPath)
self.tarfile.add(fullPath, directory, recursive=True)
self.tarfile.add(fullPath, directory, recursive=True, filter=excludeGit)

# Recursively search for and add to tar some directories in $CMSSW_BASE/src/
# Note that recursiveDirs are **only** looked-for under the $CMSSW_BASE/src/ folder!
Expand All @@ -184,9 +161,9 @@ def addFiles(self, userFiles=None, cfgOutputName=None):
for root, _, _ in os.walk(srcPath):
if os.path.basename(root) in recursiveDirs:
directory = root.replace(srcPath, 'src')
self.logger.debug("Adding data directory %s to tarball" % root)
self.logger.debug("Adding directory %s to tarball" % root)
self.checkdirectory(root)
self.tarfile.add(root, directory, recursive=True)
self.tarfile.add(root, directory, recursive=True, filter=excludeGit)

# Tar up extra files the user needs
userFiles = userFiles or []
Expand All @@ -197,8 +174,7 @@ def addFiles(self, userFiles=None, cfgOutputName=None):
for filename in fileNames:
self.logger.debug("Adding file %s to tarball" % filename)
self.checkdirectory(filename)
self.tarfile.add(filename, os.path.basename(filename), recursive=True)

self.tarfile.add(filename, os.path.basename(filename), recursive=True, filter=excludeGit)

scriptExe = getattr(self.config.JobType, 'scriptExe', None)
if scriptExe:
Expand All @@ -211,7 +187,6 @@ def addFiles(self, userFiles=None, cfgOutputName=None):
self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_PKL), arcname=BOOTSTRAP_CFGFILE_PKL)
self.tarfile.add(os.path.join(basedir, BOOTSTRAP_CFGFILE_DUMP), arcname=BOOTSTRAP_CFGFILE_DUMP)


def addVenvDirectory(self, tarFile='sandbox.tgz'):
# adds CMSSW_BASE/venv directory to the (closed, compressed) sandbox
# venv directory is special because symbolic links have to be kept as such (no dereference)
Expand Down Expand Up @@ -263,7 +238,6 @@ def writeContent(self):
"""Save the content of the tarball"""
self.content = [(int(x.size), x.name) for x in self.tarfile.getmembers()]


def close(self):
"""
Calculate the checkum and close
Expand Down Expand Up @@ -342,22 +316,19 @@ def checkdirectory(self, dir_):
(colors.RED, colors.NORMAL, dir_, msg)
raise EnvironmentException(err)


def __getattr__(self, *args):
"""
Pass any unknown functions or attribute requests on to the TarFile object
"""
self.logger.debug("Passing getattr %s on to TarFile" % args)
return self.tarfile.__getattribute__(*args)


def __enter__(self):
"""
Allow use as context manager
"""
return self


def __exit__(self, excType, excValue, excTrace):
"""
Allow use as context manager
Expand Down

0 comments on commit 362a115

Please sign in to comment.