diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index 6ab652435950..f0f96237e80b 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -32,7 +32,7 @@ - + diff --git a/lib/galaxy/datatypes/chain.py b/lib/galaxy/datatypes/chain.py index acf451ce85c8..4e853865d042 100644 --- a/lib/galaxy/datatypes/chain.py +++ b/lib/galaxy/datatypes/chain.py @@ -9,6 +9,7 @@ from galaxy.datatypes.sniff import ( build_sniff_from_prefix, FilePrefix, + get_headers, ) from galaxy.util import ( commaify, @@ -91,41 +92,31 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: >>> fname = get_test_fname( '1.chain' ) >>> Chain().sniff( fname ) True + >>> fname = get_test_fname( '2.chain' ) + >>> Chain().sniff( fname ) + True >>> """ - fh = file_prefix.string_io() - for line in fh: - line = line.strip() - if line: # first non-empty line - if line.startswith("chain"): - tokens = line.split() - if not ( - len(tokens) in [12, 13] - and tokens[4] in self.strands - and tokens[9] in self.strands - and tokens[3].isdigit() - and tokens[5].isdigit() - and tokens[6].isdigit() - ): - return False - prior_token_len = 0 - for line in fh: - line = line.strip() - if line == "": - break - tokens = line.split() - if prior_token_len == 1: - return False - if len(tokens) not in [1, 3]: - return False - if not all(token.isdigit() for token in tokens): - return False - prior_token_len = len(tokens) - if prior_token_len == 1: - return True - else: - return False - return False + headers = get_headers(file_prefix, None, count=2, comment_designator="#") + if not ( + len(headers) == 2 + and len(headers[0]) in [12, 13] + and headers[0][0] == "chain" + and headers[0][1].isdecimal() + and headers[0][3].isdecimal() + and headers[0][4] in self.strands + and headers[0][5].isdecimal() + and headers[0][6].isdecimal() + and headers[0][8].isdecimal() + and headers[0][9] in self.strands + and headers[0][10].isdecimal() + and headers[0][11].isdecimal() + and headers[1][0].isdecimal() + and len(headers[1]) in [1, 3] + ): + return False + else: + return True @build_sniff_from_prefix @@ -161,34 +152,21 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: allowed_classes = ["fill", "gap"] strands = ["+", "-"] - fh = file_prefix.string_io() - for line in fh: - line = line.strip() - if line: # first non-empty line - if line.startswith("net"): - tokens = line.split() - if not (len(tokens) == 3 and tokens[2].isdigit()): - return False - for line in fh: - if line[0] != " ": # children are indented one space - return False - line = line.strip() - if line == "": - break - tokens = line.split() - if not ( - len(tokens) >= 7 # seven fixed fields - and len(tokens) <= 41 # plus seventeen optional name/value pairs - and tokens[0] in allowed_classes - and tokens[1].isdigit() - and tokens[2].isdigit() - and tokens[4] in strands - and tokens[5].isdigit() - and tokens[6].isdigit() - ): - return False - else: - return True - else: - return False - return False + headers = get_headers(file_prefix, None, count=2, comment_designator="#") + if not ( + len(headers) == 2 + and len(headers[0]) == 3 + and headers[0][0] == "net" + and headers[0][2].isdecimal() + and len(headers[1]) >= 7 # seven fixed fields + and len(headers[1]) <= 41 # plus seventeen optional name/value pairs + and headers[1][0] in allowed_classes + and headers[1][1].isdecimal() + and headers[1][2].isdecimal() + and headers[1][4] in strands + and headers[1][5].isdecimal() + and headers[1][6].isdecimal() + ): + return False + else: + return True diff --git a/lib/galaxy/datatypes/sequence.py b/lib/galaxy/datatypes/sequence.py index c88abfcde5cd..911a1a2db31a 100644 --- a/lib/galaxy/datatypes/sequence.py +++ b/lib/galaxy/datatypes/sequence.py @@ -1205,26 +1205,30 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: >>> fname = get_test_fname( 'alignment.lav' ) >>> Axt().sniff( fname ) False + >>> fname = get_test_fname( '2.chain' ) + >>> Axt().sniff( fname ) + False """ - headers = get_headers(file_prefix, None) - if len(headers) < 4: + headers = get_headers(file_prefix, None, count=4, comment_designator="#") + if not ( + len(headers) >= 3 + and len(headers[0]) == 9 + and headers[0][0] == "0" + and headers[0][2].isdecimal() + and headers[0][3].isdecimal() + and headers[0][5].isdecimal() + and headers[0][6].isdecimal() + and headers[0][7] in data.valid_strand + and headers[0][8].isdecimal() + and len(headers[1]) == 1 + and len(headers[2]) == 1 + ): return False - for hdr in headers: - if len(hdr) > 0 and hdr[0].startswith("##matrix=axt"): - return True - if len(hdr) > 0 and not hdr[0].startswith("#"): - if len(hdr) != 9: - return False - try: - for _ in (hdr[0], hdr[2], hdr[3], hdr[5], hdr[6], hdr[8]): - int(_) - except ValueError: - return False - if hdr[7] not in data.valid_strand: - return False - else: - return True - return False + # the optional fourth non-comment line has to be empty + if len(headers) == 4 and not headers[3] == []: + return False + else: + return True @build_sniff_from_prefix diff --git a/lib/galaxy/datatypes/test/2.chain b/lib/galaxy/datatypes/test/2.chain new file mode 100644 index 000000000000..822a4ff7b052 --- /dev/null +++ b/lib/galaxy/datatypes/test/2.chain @@ -0,0 +1,10 @@ +##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91 +##gapPenalties=axtChain O=400 E=30 +chain 67224 chr22 50818468 + 26560645 26561468 chr19 61431566 - 54838449 54839272 1 +823 + +chain 48985 chr22 50818468 + 26560497 26561116 chr19 61431566 + 29160089 29160708 2 +619 + +chain 46902 chr22 50818468 + 19792341 19793000 chr19 61431566 + 59180700 59181359 3 +659