Skip to content

Commit

Permalink
Merge pull request #121 from ComPlat/100-new-powerxrd-reader
Browse files Browse the repository at this point in the history
100 new powerxrd reader
  • Loading branch information
StarmanMartin authored Nov 12, 2024
2 parents cb2f7ad + 088fea6 commit b9a9318
Show file tree
Hide file tree
Showing 30 changed files with 509 additions and 105 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install wheel setuptools pip pybind11 --upgrade
pip install -r ./requirements/dev.txt
pip install pylint
- name: Analysing the code with pylint
Expand Down
33 changes: 16 additions & 17 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,19 @@ jobs:
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r ./requirements/dev.txt
pip install pytest
- name: Build tests
run: |
python test_manager/__init__.py -t -g -tp
- name: Test the code with pytest
run: |
pytest ./test_manager/test_profiles.py
pytest ./test_manager/test_readers.py
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install wheel setuptools pip pybind11 --upgrade
pip install -r ./requirements/dev.txt
pip install pytest
- name: Build tests
run: |
python test_manager/__init__.py -t -g -tp
- name: Test the code with pytest
run: |
pytest .
17 changes: 12 additions & 5 deletions converter_app/converters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import datetime
import logging
import os
import re
Expand Down Expand Up @@ -364,15 +365,21 @@ def match_profile(cls, client_id, file_data):
"""
converter = None
matches = 0

latest_profile_uploaded = 0
for profile in Profile.list(client_id):
current_converter = cls(profile, file_data)
current_matches = current_converter.match()

try:
profile_uploaded = datetime.datetime.fromisoformat(
profile.as_dict['data']['metadata'].get('uploaded')).timestamp()
except (ValueError, TypeError):
profile_uploaded = 1
logger.info('profile=%s matches=%s', profile.id, current_matches)

if current_matches is not False and current_matches > matches:
if (current_matches is not False and
(current_matches > matches or current_matches == matches and
profile_uploaded > latest_profile_uploaded)):
matches = max(matches, current_matches)
latest_profile_uploaded = profile_uploaded
converter = current_converter
matches = current_matches

return converter
2 changes: 1 addition & 1 deletion converter_app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def list(cls, client_id):
profiles_path = Path(current_app.config['PROFILES_DIR']).joinpath(client_id)

if profiles_path.exists():
for file_path in Path.iterdir(profiles_path):
for file_path in sorted(Path.iterdir(profiles_path)):
profile_id = str(file_path.with_suffix('').name)
profile_data = cls.load(file_path)
yield cls(profile_data, client_id, profile_id)
Expand Down
2 changes: 1 addition & 1 deletion converter_app/readers/ascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def prepare_tables(self):
row = row.replace('n.a.', '')
float_match = self.float_pattern.findall(row)
if float_match:
float_match = [self.get_value(float_str) for float_str in float_match]
float_match = [self.get_value(float_str.strip()) for float_str in float_match]
count = len(float_match)

if table['rows'] and count != previous_count:
Expand Down
27 changes: 24 additions & 3 deletions converter_app/readers/helper/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ def __init__(self):
'rows': []
})

def add_metadata(self, key, value):
"""
Add metadata to table
:param key: Key of the metadata
:param value: Value of the metadata
:return:
"""
self['metadata'].add_unique(key, value)

def __add__(self, other):
raise NotImplementedError

Expand Down Expand Up @@ -47,7 +56,7 @@ class Reader:
"""
Base reader. Any reader needs to extend this abstract reader.
"""
float_pattern = re.compile(r'(-?\d+[,.]*\d*[eE+\-\d]*)\S*')
float_pattern = re.compile(r'[-+]?[0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?\s*')
float_de_pattern = re.compile(r'(-?[\d.]+,\d*[eE+\-\d]*)')
float_us_pattern = re.compile(r'(-?[\d,]+.\d*[eE+\-\d]*)')

Expand Down Expand Up @@ -117,7 +126,7 @@ def get_tables(self) -> list[Table]:
'name': f'Column #{idx + start_len_c}'
} for idx, value in enumerate(table['rows'][0][start_len_c:])]
table['columns'] = sorted(table['columns'], key=lambda x: int(x['key']))
for k,v in enumerate(table['columns'][:should_len_c]):
for k, v in enumerate(table['columns'][:should_len_c]):
v['key'] = f'{k}'

table['metadata']['rows'] = str(len(table['rows']))
Expand Down Expand Up @@ -168,13 +177,25 @@ def get_shape(self, row) -> list:
cell = str(cell).strip()
if cell in self.empty_values:
shape.append('')
elif self.float_pattern.match(cell):
elif self.float_pattern.fullmatch(cell):
shape.append('f')
else:
shape.append('s')

return shape

def as_number(self, value: str) -> float | int:
"""
Returns a numeric value if possible:
:raises ValueError: If not convertable
:param value: as string
:return: numeric value either int or float
"""
if re.match(r'^[+-]?\d+$', value) is not None:
return int(value)
return float(self.get_value(value))

def get_value(self, value: str) -> str:
"""
Checks if values is a stringified float and makes it to a standard.
Expand Down
87 changes: 87 additions & 0 deletions converter_app/readers/uxd_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import logging

from converter_app.models import File
from converter_app.readers.helper.reader import Readers
from converter_app.readers.helper.base import Reader

logger = logging.getLogger(__name__)


class UXDReader(Reader):
"""
Reader for UDX files. Files from: Powder Diffraction - Diffrac Plus
Test File: test_files/data_files/Powder Diffraction/Diffrac Plus/XCH-UXD/PD-01-02(2).UXD
"""

identifier = 'uxd_reader'
priority = 10

def __init__(self, file: File):
super().__init__(file)
self._file_extensions = ['.uxd']
self._table = None
self._version = 2
self._max_table_length = 0

def check(self):
return self.file.suffix.lower() in self._file_extensions

def _read_data(self, line: str):
if self._version == 2:
try:
new_row = [self.as_number(x.strip()) for x in line.split(' ') if x != '']
if len(new_row) > 0:
self._max_table_length = max(self._max_table_length, len(new_row))
self._table['rows'].append(new_row)
except ValueError:
pass
elif self._version == 3:
try:
value = [self.as_number(x.strip()) for x in line.split('\t')]
self._table['rows'].append([value[0], value[1]])
except ValueError:
pass

def _add_metadata(self, key, val):
if self.float_pattern.fullmatch(val):
val = self.get_value(val)
self._table.add_metadata(key, val)

def prepare_tables(self):
tables = []
self._table = self.append_table(tables)
data_rows = []
for row in self.file.fp.readlines():
line = row.decode(self.file.encoding).rstrip()

if len(line) > 1 and (line[0] == '_' or line[0] == ';'):
self._table['header'].append(line)
if line[0] == '_' and line[1] != '+' and '=' in line:
data = line.split('=')
key = data[0].strip()[1:]
value = data[1].strip().replace('\n', '')
self._add_metadata(key, value)
else:
data_rows.append(line)
try:
self._version = int(self._table['metadata'].get('FILEVERSION'))
except ValueError:
self._version = 0

for row in data_rows:
self._read_data(row)

for row in self._table['rows']:
while len(row) < self._max_table_length:
row.append('')

if 'START' in self._table['metadata'] and 'STEPSIZE' in self._table['metadata']:
end = self.as_number(self._table['metadata']['START']) + (
self.as_number(self._table['metadata']['STEPSIZE']) * (len(self._table['rows']) - 1))
self._table.add_metadata("END", end)

return tables


Readers.instance().register(UXDReader)
125 changes: 125 additions & 0 deletions converter_app/readers/xml_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import logging

import xml.etree.ElementTree as ET

from converter_app.models import File
from converter_app.readers.helper.reader import Readers
from converter_app.readers.helper.base import Reader

logger = logging.getLogger(__name__)


class XMLReader(Reader):
"""
Reader for XML files.
"""

identifier = 'xml_reader'
priority = 10

def __init__(self, file: File):
super().__init__(file)
self._file_extensions = ['.xml']
self._table = None
self._data_tables = []
self._potential_data_tables = {}

def check(self):
return self.file.suffix.lower() in self._file_extensions

def _get_tag_name(self, node: ET.Element):
return node.tag.split('}', 1)[-1]


def _filter_data_rows(self, node: ET.Element, text: str, xml_path: str) -> bool:
text_array = [x for x in text.strip().split(' ') if x != '']
shape = self.get_shape(text_array)
if all(x == 'f' for x in shape) and len(shape) > 1:
self._data_tables.append(self._generate_data_table(shape, xml_path, text_array, node))
return True
return False

def _generate_data_table(self, shape: list[str], xml_path: str, text_array: list[str], node: ET.Element):
return {
'shape': ''.join(shape),
'path': xml_path,
'values': [self.as_number(x) for x in text_array],
'node': node
}

def handle_node(self, node: ET.Element, xml_path: str, node_name: str):
"""
This method can be overridden to handle special nodes separately.
:param node: XML node Object
:param xml_path: Path in global XML-file to this node
:param node_name: Name of the Node
"""
pass

def _add_metadata(self, key: str, val: any, node: ET.Element):
m = self.float_pattern.fullmatch(val)
if key in self._potential_data_tables:
if m and self._potential_data_tables[key] is not None:
self._potential_data_tables[key]['values'].append(self.as_number(val))
self._potential_data_tables[key]['shape'] += 'f'
else:
self._potential_data_tables[key] = None
elif m:
self._potential_data_tables[key] = self._generate_data_table(['f'], key, [val], node)
self._table.add_metadata(key, val)

def _read_node(self, node: ET.Element, xml_path: str = '#'):
for child in node:
text = child.text

try:
local_name = self._get_tag_name(child)
new_path = f'{xml_path}.{local_name}'
except ValueError:
new_path = 'Unknown'
local_name = ''

self.handle_node(child, xml_path, local_name)

if text is not None and not self._filter_data_rows(child, text, new_path):
self._add_metadata(new_path, text.strip(), node)
for k, v in child.attrib.items():
self._add_metadata(f'{new_path}.{k}', v, node)

self._read_node(child, new_path)

def prepare_tables(self):
tables = []
self._table = self.append_table(tables)
root = ET.XML(self.file.content)
self._read_node(root)
self._merge_tables(self._data_tables, tables)

potential_tables = [x for k, x in self._potential_data_tables.items() if len(x['values']) > 1]
potential_tables.sort(key= lambda x : len(x['values']))
self._merge_tables(potential_tables, tables)


return tables

def _merge_tables(self, data_tables: list, tables):
current_shape = ''
for table_col in data_tables:
if current_shape != table_col['shape']:
current_shape = table_col['shape']
self._table = self.append_table(tables)
self._table['rows'] = [[] for x in range(len(table_col['values']))]

tag_name = self._get_tag_name(table_col['node'])
self._table.add_metadata(f"COL #{len(self._table['rows'][0])}", tag_name)
self._table.add_metadata(f"COL #{len(self._table['rows'][0])} XML PATH", table_col['path'])

for i, v in enumerate(table_col['values']):
self._table['rows'][i].append(v)

for k, v in table_col['node'].attrib.items():
self._table.add_metadata(f'{tag_name}.{k}', v)


Readers.instance().register(XMLReader)
Loading

0 comments on commit b9a9318

Please sign in to comment.