diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 3166eb96..fdd7a492 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -16,7 +16,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + pip install wheel setuptools pip pybind11 --upgrade pip install -r ./requirements/dev.txt pip install pylint - name: Analysing the code with pylint diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 8f16209c..e14fc65e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -16,7 +16,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + pip install wheel setuptools pip pybind11 --upgrade pip install -r ./requirements/dev.txt pip install pytest - name: Build tests diff --git a/converter_app/readers/hplc.py b/converter_app/readers/hplc.py new file mode 100644 index 00000000..687e2543 --- /dev/null +++ b/converter_app/readers/hplc.py @@ -0,0 +1,82 @@ +import logging +import os +import shutil +import tempfile +import tarfile +import hplc as ph +from converter_app.readers.helper.base import Reader +from converter_app.readers.helper.reader import Readers + +logger = logging.getLogger(__name__) + + +class HplcReader(Reader): + """ + Reads tarballed hplc files with extension .tar.gz + """ + identifier = 'hplc_reader' + priority = 5 + + def __init__(self, file): + super().__init__(file) + self.df = None + self.temp_dir = None + + def check(self): + """ + :return: True if it fits + """ + result = self.file.name.endswith(".gz") or self.file.name.endswith(".xz") or self.file.name.endswith(".tar") + if result: + with tempfile.TemporaryDirectory() as temp_dir: + self.temp_dir = temp_dir + with tempfile.NamedTemporaryFile(delete=True) as temp_pdf: + try: + # Save the contents of FileStorage to the temporary file + self.file.fp.save(temp_pdf.name) + if self.file.name.endswith(".gz"): + mode = "r:gz" + elif self.file.name.endswith(".xz"): + mode = "r:xz" + elif self.file.name.endswith(".tar"): + mode = "r:" + else: + return False + with tarfile.open(temp_pdf.name, mode) as tar: + tar.extractall(self.temp_dir) + tar.close() + + for p in os.listdir(self.temp_dir): + file_path = os.path.join(self.temp_dir, p) + self.df = ph.read_chromatograms(file_path) + break + except ValueError: + return False + if not result and self.temp_dir is not None and os.path.exists(self.temp_dir) and os.path.isdir(self.temp_dir): + shutil.rmtree(self.temp_dir) + return result + + def prepare_tables(self): + tables = [] + + keys = list(self.df.keys()) + waves = [x for x in keys if x.startswith('Wave')] + waves.sort() + time = self.df['time'] + for wave_key in waves: + wave = self.df[wave_key] + table = self.append_table(tables) + kv = wave_key.split('_') + table['metadata'][kv[0]] = str(kv[1]) + table['metadata']['AllWaves'] = str(waves) + for i, t in enumerate(time): + table['rows'].append([t, float(wave[i])]) + + table['columns'] = [{ + 'key': str(idx), + 'name': f'{value}' + } for idx, value in enumerate(['Time', 'Wavelength'])] + return tables + + +Readers.instance().register(HplcReader) diff --git a/requirements/common.txt b/requirements/common.txt index e2155c92..13eb7388 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -12,4 +12,5 @@ Werkzeug~=2.2.2 jcamp~=1.2.2 PyMuPDF==1.23.7 pylint==3.0.3 -str2bool~=1.1 \ No newline at end of file +str2bool~=1.1 +parser-binary @ git+https://github.com/ComPlat/BinaryParser@main \ No newline at end of file