-
Notifications
You must be signed in to change notification settings - Fork 0
/
tmXRootUtils.py
102 lines (93 loc) · 6.52 KB
/
tmXRootUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import sys
if (sys.version_info.major < 3): sys.exit("Must be using py3 onwards. Current version info: {v}".format(v=sys.version_info))
if (sys.version_info.minor < 6): sys.exit("Must be using python 3.6 onwards. Current version info: {v}".format(v=sys.version_info))
import os, subprocess
from typing import List, Tuple
import tmProgressBar
def Parse_xrdfs_ls_OutputLine(xrdfs_ls_output_line: str) -> Tuple[bool, str]:
output_line_split = (xrdfs_ls_output_line.strip()).split()
if not(len(output_line_split) == 5): sys.exit("ERROR: Unable to parse xrdfs line: {l}".format(l=xrdfs_ls_output_line))
is_directory = (output_line_split[0][0] == 'd')
full_path = output_line_split[4]
return (is_directory, full_path)
def Query_xrdfs_adler32(xrd_prefix: str, file_full_path: str) -> str:
query_output = subprocess.check_output("xrdfs {p} query checksum {f}".format(p=xrd_prefix, f=file_full_path), shell=True, universal_newlines=True, executable="/bin/bash")
query_output_split = (query_output.strip()).split()
if not(len(query_output_split) == 2): sys.exit("ERROR: Unable to parse xrdfs checksum output: {o}".format(o=query_output))
if not(query_output_split[0] == "adler32"): sys.exit("ERROR: xrdfs checksum returns it in an unexpected format: {f}".format(f=query_output_split[0]))
return query_output_split[1]
def GetLocal_adler32(local_file_path: str) -> str:
adler32_output = subprocess.check_output("xrdadler32 {f}".format(f=local_file_path), shell=True, universal_newlines=True, executable="/bin/bash")
adler32_output_split = (adler32_output.strip()).split()
if not(len(adler32_output_split) == 2): sys.exit("ERROR: adler32 output not in expected format: {o}".format(o=adler32_output))
if not(adler32_output_split[1] == local_file_path): sys.exit("ERROR: adler32 output not in expected format: {o}".format(o=adler32_output))
return adler32_output_split[0]
def GetListOfFilesInDirectory(xrd_prefix: str, directory_path_without_xrd_prefix: str, print_verbose: bool) -> List[Tuple[str, str]]:
if print_verbose: print("Getting list of files and checksums from remote server...")
xrdfs_ls_output = subprocess.check_output("xrdfs {p} ls -l -R {d}".format(p=xrd_prefix, d=directory_path_without_xrd_prefix), shell=True, universal_newlines=True, executable="/bin/bash")
list_of_files = []
xrdfs_ls_output_nlines = xrdfs_ls_output.count(os.linesep)
progressBar = tmProgressBar.tmProgressBar(counterMaxValue=xrdfs_ls_output_nlines)
line_index = 1
line_index_refresh_freq = max(1, xrdfs_ls_output_nlines//100)
progressBar.initializeTimer()
for line in xrdfs_ls_output.splitlines():
if (len(line) == 0):
line_index += 1
continue
is_directory, full_path = Parse_xrdfs_ls_OutputLine(line)
if is_directory:
line_index += 1
continue
adler32_checksum_value = Query_xrdfs_adler32(xrd_prefix, full_path)
if not(full_path[:len(directory_path_without_xrd_prefix)] == directory_path_without_xrd_prefix):
sys.exit("ERROR: xrdfs ls output path {p} does not start with expected directory: {d}".format(p=full_path, d=directory_path_without_xrd_prefix))
partial_path = full_path[len(directory_path_without_xrd_prefix):] # get path relative to parent directory
while (partial_path[0] == "/"):
partial_path = partial_path[1:] # remove any leading slashes
list_of_files.append((partial_path, adler32_checksum_value))
if ((line_index == 1) or
(line_index % line_index_refresh_freq == 0) or
(line_index == xrdfs_ls_output_nlines)): progressBar.updateBar(fractionCompleted=line_index/xrdfs_ls_output_nlines, counterCurrentValue=line_index)
line_index += 1
progressBar.terminate()
return list_of_files
def CloneDirectoryLocally(xrd_prefix_remote: str, remote_path_without_xrd_prefix: str, path_local: str, print_verbose: bool) -> None:
if not(os.path.isdir(path_local)): subprocess.check_call("mkdir -p {p}".format(p=path_local), shell=True, executable="/bin/bash")
file_details = GetListOfFilesInDirectory(xrd_prefix_remote, remote_path_without_xrd_prefix, print_verbose)
progressBar = tmProgressBar.tmProgressBar(counterMaxValue=len(file_details))
file_index = 1
file_index_refresh_freq = max(1, len(file_details)//100)
progressBar.initializeTimer()
for relative_path, checksum_value in file_details:
needs_update = True
if print_verbose: print("Copying: {r}".format(r=relative_path))
if os.path.isfile("{o}/{r}".format(o=path_local, r=relative_path)):
local_checksum_value = GetLocal_adler32("{o}/{r}".format(o=path_local, r=relative_path))
if (local_checksum_value == checksum_value):
needs_update = False
if print_verbose:
if needs_update:
print("File {o}/{r} does not exist or has the wrong checksum. Copying...".format(o=path_local, r=relative_path))
else:
print("File {o}/{r} already exists and has the right checksum. Skipping!".format(o=path_local, r=relative_path))
if not(needs_update):
file_index += 1
continue
xrd_copy_command = "xrdcp --silent --nopbar --force --path --streams 15 {pref}//{parent}/{relpath} {outputdir}/{relpath}".format(pref=xrd_prefix_remote, parent=remote_path_without_xrd_prefix, relpath=relative_path, outputdir=path_local)
subprocess.check_call(xrd_copy_command, shell=True, executable="/bin/bash")
local_checksum_value = GetLocal_adler32("{o}/{r}".format(o=path_local, r=relative_path))
if (not(checksum_value == local_checksum_value)): sys.exit("ERROR: Checksums do not match after copying file with relative path: {p}".format(p=relative_path))
if ((file_index == 1) or
(file_index % file_index_refresh_freq == 0) or
(file_index == len(file_details))): progressBar.updateBar(fractionCompleted=file_index/len(file_details), counterCurrentValue=file_index)
file_index += 1
progressBar.terminate()
def test():
list_of_files_test = GetListOfFilesInDirectory(xrd_prefix="root://cmseos.fnal.gov", directory_path_without_xrd_prefix="/store/user/tmudholk/test", print_verbose=True)
print("Files found:")
for file_info in list_of_files_test:
print("Found: {i}".format(i=file_info))
CloneDirectoryLocally(xrd_prefix_remote="root://cmseos.fnal.gov", remote_path_without_xrd_prefix="/store/user/tmudholk/test", path_local="../test_tmXrootUtils", print_verbose=True)
if __name__ == "__main__":
test()