diff --git a/.appveyor.yml b/.appveyor.yml index 57be9e6..e3cfa2a 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -4,6 +4,15 @@ environment: matrix: - PYTHON_VERSION: 3.7 MINICONDA: C:\Miniconda3 + - PYTHON_VERSION: 3.8 + MINICONDA: C:\Miniconda3 + - PYTHON_VERSION: 3.9 + MINICONDA: C:\Miniconda3 + - PYTHON_VERSION: "3.10" + MINICONDA: C:\Miniconda3 + - PYTHON_VERSION: 3.11 + MINICONDA: C:\Miniconda3 + init: - ECHO %PYTHON_VERSION% %MINICONDA% diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000..954bd73 --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,44 @@ +name: tests + +on: + push: + paths-ignore: + - "README.md" + - "docs/**" + - "CHANGELOG.md" + + pull_request: + paths-ignore: + - "README.md" + - "docs/*" + - "CHANGELOG.md" + +jobs: + build_biopandas: + runs-on: ubuntu-latest + # https://github.com/marketplace/actions/setup-miniconda#use-a-default-shell + defaults: + run: + shell: bash -l {0} + + strategy: + matrix: + python-version: [3.7, 3.8, 3.9, "3.10", 3.11] + steps: + - name: Checkout repository + uses: actions/checkout@v3 + # See: https://github.com/marketplace/actions/setup-miniconda + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + miniforge-variant: Mambaforge + conda-channels: conda-forge + python-version: ${{ matrix.python-version }} + use-mamba: true + - name: Install BioPandas + run: pip install -e . + - name: Install Dev Dependencies + run: pip install mmtf-python numpy scipy pandas pytest looseversion importlib_resources + - name: Run unit tests and generate coverage report + run: pytest -s -v diff --git a/biopandas/__init__.py b/biopandas/__init__.py index ea87b48..0a00907 100644 --- a/biopandas/__init__.py +++ b/biopandas/__init__.py @@ -24,5 +24,5 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.5.1dev" +__version__ = "0.5.1" __author__ = "Sebastian Raschka " diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py index 6eb53bf..94c2ca0 100644 --- a/biopandas/pdb/pandas_pdb.py +++ b/biopandas/pdb/pandas_pdb.py @@ -9,8 +9,8 @@ import gzip import sys -import warnings import textwrap +import warnings from copy import deepcopy from io import StringIO from typing import List, Optional @@ -22,9 +22,10 @@ import pandas as pd from looseversion import LooseVersion -from .engines import amino3to1dict, pdb_df_columns, pdb_records from biopandas.constants import ATOMIC_MASSES +from .engines import amino3to1dict, pdb_df_columns, pdb_records + pd_version = LooseVersion(pd.__version__) @@ -115,29 +116,34 @@ def read_pdb_from_list(self, pdb_lines): self.header, self.code = self._parse_header_code() return self - def fetch_pdb(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"): + def fetch_pdb( + self, + pdb_code: Optional[str] = None, + uniprot_id: Optional[str] = None, + source: str = "pdb", + ): """Fetches PDB file contents from the Protein Databank at rcsb.org or AlphaFold database - at https://alphafold.ebi.ac.uk/. -. + at https://alphafold.ebi.ac.uk/. + . - Parameters - ---------- - pdb_code : str, optional - A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. - Defaults to `None`. + Parameters + ---------- + pdb_code : str, optional + A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. + Defaults to `None`. - uniprot_id : str, optional - A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. - Defaults to `None`. + uniprot_id : str, optional + A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. + Defaults to `None`. - source : str - The source to retrieve the structure from - (`"pdb"`, `"alphafold2-v3"`, `"alphafold2-v4"`(latest)). - Defaults to `"pdb"`. + source : str + The source to retrieve the structure from + (`"pdb"`, `"alphafold2-v3"`, `"alphafold2-v4"`(latest)). + Defaults to `"pdb"`. - Returns - --------- - self + Returns + --------- + self """ # Sanitize input @@ -145,15 +151,21 @@ def fetch_pdb(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None invalid_input_combination_1 = uniprot_id is not None and source == "pdb" invalid_input_combination_2 = pdb_code is not None and source in { - "alphafold2-v3", "alphafold2-v4"} + "alphafold2-v3", + "alphafold2-v4", + } if invalid_input_identifier_1 or invalid_input_identifier_2: raise ValueError("Please provide either a PDB code or a UniProt ID.") if invalid_input_combination_1: - raise ValueError("Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.") + raise ValueError( + "Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'." + ) elif invalid_input_combination_2: - raise ValueError(f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.") + raise ValueError( + f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}." + ) if source == "alphafold2-v3": af2_version = 3 @@ -164,8 +176,10 @@ def fetch_pdb(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = elif source == "pdb": self.pdb_path, self.pdb_text = self._fetch_pdb(pdb_code) else: - raise ValueError(f"Invalid source: {source}." - " Please use one of 'pdb' or 'alphafold2-v3' or 'alphafold2-v4'.") + raise ValueError( + f"Invalid source: {source}." + " Please use one of 'pdb' or 'alphafold2-v3' or 'alphafold2-v4'." + ) self._df = self._construct_df(pdb_lines=self.pdb_text.splitlines(True)) return self @@ -248,7 +262,7 @@ def impute_element(self, records=("ATOM", "HETATM"), inplace=False): ) return t - def add_remark(self, code, text='', indent=0): + def add_remark(self, code, text="", indent=0): """Add custom REMARK entry. The remark will be inserted to preserve the ordering of REMARK codes, i.e. if the code is @@ -275,57 +289,65 @@ def add_remark(self, code, text='', indent=0): """ # Prepare info from self - if 'OTHERS' in self.df: - df_others = self.df['OTHERS'] + if "OTHERS" in self.df: + df_others = self.df["OTHERS"] else: - df_others = pd.DataFrame(columns=['record_name', 'entry', 'line_idx']) - record_types = list(filter(lambda x: x in self.df, ['ATOM', 'HETATM', 'ANISOU'])) - remarks = df_others[df_others['record_name'] == 'REMARK']['entry'] + df_others = pd.DataFrame(columns=["record_name", "entry", "line_idx"]) + record_types = list( + filter(lambda x: x in self.df, ["ATOM", "HETATM", "ANISOU"]) + ) + remarks = df_others[df_others["record_name"] == "REMARK"]["entry"] # Find index and line_idx where to insert the remark to preserve remark code order if len(remarks): remark_codes = remarks.apply(lambda x: x.split(maxsplit=1)[0]).astype(int) - insertion_pos = remark_codes.searchsorted(code, side='right') + insertion_pos = remark_codes.searchsorted(code, side="right") if insertion_pos < len(remark_codes): # Remark in the middle insertion_idx = remark_codes.index[insertion_pos] - insertion_line_idx = df_others.loc[insertion_idx]['line_idx'] + insertion_line_idx = df_others.loc[insertion_idx]["line_idx"] else: # Last remark insertion_idx = len(remark_codes) - insertion_line_idx = df_others['line_idx'].iloc[-1] + 1 + insertion_line_idx = df_others["line_idx"].iloc[-1] + 1 else: # First remark insertion_idx = 0 - insertion_line_idx = min([self.df[r]['line_idx'].min() for r in record_types]) + insertion_line_idx = min( + [self.df[r]["line_idx"].min() for r in record_types] + ) # Wrap remark to fit into 80 characters per line and add indentation wrapper = textwrap.TextWrapper(width=80 - (11 + indent)) - lines = sum([wrapper.wrap(line.strip()) or [' '] for line in text.split('\n')], []) - lines = list(map(lambda x: f'{code:4} ' + indent*' ' + x, lines)) + lines = sum( + [wrapper.wrap(line.strip()) or [" "] for line in text.split("\n")], [] + ) + lines = list(map(lambda x: f"{code:4} " + indent * " " + x, lines)) # Shift data frame indices and row indices to create space for the remark # Create space in OTHERS - line_idx = df_others['line_idx'].copy() + line_idx = df_others["line_idx"].copy() line_idx[line_idx >= insertion_line_idx] += len(lines) - df_others['line_idx'] = line_idx + df_others["line_idx"] = line_idx index = pd.Series(df_others.index.copy()) index[index >= insertion_idx] += len(lines) df_others.index = index # Shift all other record types that follow inserted remark for records in record_types: df_records = self.df[records] - if not insertion_line_idx > df_records['line_idx'].max(): - df_records['line_idx'] += len(lines) + if not insertion_line_idx > df_records["line_idx"].max(): + df_records["line_idx"] += len(lines) # Put remark into 'OTHERS' data frame df_remark = { - idx: ['REMARK', line, line_idx] + idx: ["REMARK", line, line_idx] for idx, line, line_idx in zip( range(insertion_idx, insertion_idx + len(lines)), lines, range(insertion_line_idx, insertion_line_idx + len(lines)), ) } - df_remark = pd.DataFrame.from_dict(df_remark, orient='index', columns=df_others.columns) - self.df['OTHERS'] = pd.concat([df_others, df_remark]).sort_index() + df_remark = pd.DataFrame.from_dict( + df_remark, orient="index", columns=df_others.columns + ) + self.df["OTHERS"] = pd.concat([df_others, df_remark]).sort_index() @staticmethod def rmsd(df1, df2, s=None, invert=False, decimals=4): @@ -435,11 +457,13 @@ def _fetch_af2(uniprot_id: str, af2_version: int = 3): try: response = urlopen(url) txt = response.read() - txt = txt.decode('utf-8') if sys.version_info[0] >= 3 else txt.encode('ascii') + txt = ( + txt.decode("utf-8") if sys.version_info[0] >= 3 else txt.encode("ascii") + ) except HTTPError as e: - print(f'HTTP Error {e.code}') + print(f"HTTP Error {e.code}") except URLError as e: - print(f'URL Error {e.args}') + print(f"URL Error {e.args}") return url, txt def _parse_header_code(self): @@ -518,7 +542,7 @@ def _construct_df(pdb_lines): record = line[:6].rstrip() line_ele = ["" for _ in range(len(pdb_records[record]) + 1)] for idx, ele in enumerate(pdb_records[record]): - line_ele[idx] = line[ele["line"][0]: ele["line"][1]].strip() + line_ele[idx] = line[ele["line"][0] : ele["line"][1]].strip() line_ele[-1] = line_num line_lists[record].append(line_ele) else: @@ -847,7 +871,9 @@ def get_model(self, model_index: int) -> PandasPdb: biopandas_structure.label_models() if "ATOM" in biopandas_structure.df.keys(): - biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[biopandas_structure.df["ATOM"]["model_id"] == model_index] + biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[ + biopandas_structure.df["ATOM"]["model_id"] == model_index + ] if "HETATM" in biopandas_structure.df.keys(): biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[ biopandas_structure.df["HETATM"]["model_id"] == model_index @@ -877,15 +903,24 @@ def get_models(self, model_indices: List[int]) -> PandasPdb: if "ATOM" in biopandas_structure.df.keys(): biopandas_structure.df["ATOM"] = biopandas_structure.df["ATOM"].loc[ - [x in model_indices for x in biopandas_structure.df["ATOM"]["model_id"].tolist()] + [ + x in model_indices + for x in biopandas_structure.df["ATOM"]["model_id"].tolist() + ] ] if "HETATM" in biopandas_structure.df.keys(): biopandas_structure.df["HETATM"] = biopandas_structure.df["HETATM"].loc[ - [x in model_indices for x in biopandas_structure.df["HETATM"]["model_id"].tolist()] + [ + x in model_indices + for x in biopandas_structure.df["HETATM"]["model_id"].tolist() + ] ] if "ANISOU" in biopandas_structure.df.keys(): biopandas_structure.df["ANISOU"] = biopandas_structure.df["ANISOU"].loc[ - [x in model_indices for x in biopandas_structure.df["ANISOU"]["model_id"].tolist()] + [ + x in model_indices + for x in biopandas_structure.df["ANISOU"]["model_id"].tolist() + ] ] return biopandas_structure @@ -906,7 +941,7 @@ def to_pdb_stream(self, records: tuple[str] = ("ATOM", "HETATM")) -> StringIO: df = pd.concat([df[a] for a in records]) if "model_id" in df.columns: df = df.drop(columns=["model_id"]) - df.residue_number = df.residue_number.astype(int) + df["residue_number"] = pd.to_numeric(df.residue_number, errors="coerce") records = [r.strip() for r in list(set(df.record_name))] dfs = {r: df.loc[df.record_name == r] for r in records} @@ -921,8 +956,7 @@ def to_pdb_stream(self, records: tuple[str] = ("ATOM", "HETATM")) -> StringIO: if c in {"x_coord", "y_coord", "z_coord"}: for idx in range(dfs[r][c].values.shape[0]): if len(dfs[r][c].values[idx]) > 8: - dfs[r][c].values[idx] = str( - dfs[r][c].values[idx]).strip() + dfs[r][c].values[idx] = str(dfs[r][c].values[idx]).strip() if c not in {"line_idx", "OUT"}: dfs[r]["OUT"] = dfs[r]["OUT"] + dfs[r][c] @@ -941,7 +975,7 @@ def to_pdb_stream(self, records: tuple[str] = ("ATOM", "HETATM")) -> StringIO: output.seek(0) return output - def gyradius(self, records: tuple[str] = ("ATOM",), decimals: int = 4) -> float: + def gyradius(self, records: tuple[str] = ("ATOM",), decimals: int = 4) -> float: """Compute the Radius of Gyration of a molecule Parameters @@ -958,7 +992,7 @@ def gyradius(self, records: tuple[str] = ("ATOM",), decimals: int = 4) -> float rg : float Radius of Gyration of df in Angstrom - """ + """ if isinstance(records, str): warnings.warn( "Using a string as `records` argument is " @@ -970,16 +1004,19 @@ def gyradius(self, records: tuple[str] = ("ATOM",), decimals: int = 4) -> float records = (records,) if len(records) > 1: - df = pd.concat(objs=[self.df[record][["x_coord", - "y_coord", - "z_coord", - "element_symbol"]] - for record in records]) + df = pd.concat( + objs=[ + self.df[record][["x_coord", "y_coord", "z_coord", "element_symbol"]] + for record in records + ] + ) else: df = self.df[records[0]] coords = df[["x_coord", "y_coord", "z_coord"]].to_numpy() - masses = df["element_symbol"].map(lambda atom: ATOMIC_MASSES.get(atom, 0)).to_numpy() + masses = ( + df["element_symbol"].map(lambda atom: ATOMIC_MASSES.get(atom, 0)).to_numpy() + ) total_mass = masses.sum() center_of_mass = (masses[:, None] * coords).sum(axis=0) / total_mass distances = np.linalg.norm(coords - center_of_mass, axis=1) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 56db5e0..0126fd7 100755 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,17 +1,20 @@ # Release Notes ![](img/logos/3eiy_120.png) -- Supports `mol` files that have empty lines between blocks, (Via [Ruibin Liu](https://github.com/Ruibin-Liu) PR #[140](https://github.com/BioPandas/biopandas/pull/140#)) + +- Supports `mol` files that have empty lines between blocks, (Via [Ruibin Liu](https://github.com/Ruibin-Liu) PR #[140](https://github.com/BioPandas/biopandas/pull/140#)) The CHANGELOG for the current development version is available at [https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md](https://github.com/rasbt/biopandas/blob/main/docs/sources/CHANGELOG.md). -### 0.5.1dev1 (UNRELEASED) - -- Feature: added method to `PandasMmcif` that allow to select by model ids. PR #[145](https://github.com/BioPandas/biopandas/pull/145)) -- Dev: switched testing framework entirely to pytest. Drops nose dependency due to version conflicts with Python 3.12 (`nose`) and 3.8 (`nose`) PR #[146](https://github.com/BioPandas/biopandas/pull/146)) -- Avoid inclusion of test scripts and test data in the PyPI release of the Biopandas library. PR #[148](https://github.com/BioPandas/biopandas/pull/148). Addresses issue [#147](https://github.com/BioPandas/biopandas/issues/147) +### 0.5.1 (01/08/2024) +- Fix: improves support for writing PDBs with `OTHERS` records to stream. PR [#149](https://github.com/BioPandas/biopandas/pull/149). Addresses issue [#141](https://github.com/BioPandas/biopandas/issues/141). +- Feature: added method to `PandasMmcif` that allow to select by model ids. PR #[145](https://github.com/BioPandas/biopandas/pull/145) +- Dev: switched testing framework entirely to pytest. Drops nose dependency due to version conflicts with Python 3.12 (`nose`) and 3.8 (`nose`) PR #[146](https://github.com/BioPandas/biopandas/pull/146) +- Dev: adds GitHub actions-based CI. PR [#149](https://github.com/BioPandas/biopandas/pull/149). +- Avoid inclusion of test scripts and test data in the PyPI release of the Biopandas library. PR #[148](https://github.com/BioPandas/biopandas/pull/148). Addresses issue [#147](https://github.com/BioPandas/biopandas/issues/147) ### 0.5.0dev1 (31/7/2023) + - Implement add_remark for PandasPdb, (Via [Anton Bushuiev](https://github.com/anton-bushuiev) PR #[129](https://github.com/BioPandas/biopandas/pull/129)) - B_factor shifting one white space issue fix. (Via [Zehra Sarica](https://github.com/zehraacarsarica), PR #[134](https://github.com/BioPandas/biopandas/pull/134)) - Adds support for pathlib. (Via [Anton Bushuiev](https://github.com/anton-bushuiev), PR #[128](https://github.com/BioPandas/biopandas/pull/128)) @@ -20,7 +23,6 @@ The CHANGELOG for the current development version is available at - Improves reliability of parsing multicharacter chain IDs from MMTF files. (Via [Arian Jamasb](https://github.com/a-r-j), PR #[123](https://github.com/rasbt/biopandas/pull/123/files)) - Replaces null terminator chars in parsed MMTF dataframe with the empty string. (Via [Arian Jamasb](https://github.com/a-r-j), PR #[123](https://github.com/rasbt/biopandas/pull/123/files)) - ### 0.5.0dev0 (3/4/2023) ##### Downloads @@ -37,6 +39,7 @@ The CHANGELOG for the current development version is available at - Adds support for parsing [MMTF](https://mmtf.rcsb.org/) files. (via [Arian Jamasb](https://github.com/a-r-j), PR #[111](https://github.com/rasbt/biopandas/pull/111/files)) - Adds primitive functions for parsing PDB, mmCIF, and MMTF into dataframes. (via [Arian Jamasb](https://github.com/a-r-j), PR #[111](https://github.com/rasbt/biopandas/pull/111/files)) - Added support for [AlphaFolds 200M+ structures](https://www.deepmind.com/blog/alphafold-reveals-the-structure-of-the-protein-universe) via `PandasMmcif().fetch_mmcif(uniprot_id='Q5VSL9', source='alphafold2-v3')` and `PandasPdb().fetch_pdb(uniprot_id='Q5VSL9', source='alphafold2-v3')`. (Via [Arian Jamasb](https://github.com/a-r-j), PR #[102](https://github.com/rasbt/biopandas/pull/102/files)) + ##### Bug Fixes - Fix the `return` statement in `PandasPdb.to_pdb_stream()` to return `output` instead of `output.seek(0)`. (via [goniochromatic](https://github.com/github.com/goniochromatic/), PR #[116](https://github.com/rasbt/biopandas/pull/116/files)) @@ -44,7 +47,6 @@ The CHANGELOG for the current development version is available at - Fix some typos and general style issues. (via [goniochromatic](https://github.com/github.com/goniochromatic/), PR #[116](https://github.com/rasbt/biopandas/pull/116/files)) - Fix link for "How to contribute" in `README.md`. (via [goniochromatic](https://github.com/github.com/goniochromatic/), PR #[116](https://github.com/rasbt/biopandas/pull/116/files)) - ### 0.4.1 (05-13-2022) ##### Downloads @@ -56,7 +58,6 @@ The CHANGELOG for the current development version is available at - Remove walrus operator for Python 3.7 compatibility. - ### 0.4.0 (05-11-2022) ##### Downloads @@ -108,7 +109,7 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes @@ -117,7 +118,7 @@ The CHANGELOG for the current development version is available at ##### Bug Fixes - - - +- - ### 0.2.8 (03-30-2021) @@ -136,7 +137,7 @@ The CHANGELOG for the current development version is available at ##### Bug Fixes - - - +- - ### 0.2.7 (08-04-2020) @@ -147,11 +148,11 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes - - - +- - ##### Bug Fixes @@ -166,7 +167,7 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes @@ -175,7 +176,7 @@ The CHANGELOG for the current development version is available at ##### Bug Fixes - - - +- - ### 0.2.5 (07-09-2019) @@ -186,11 +187,11 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes - - - +- - ##### Bug Fixes @@ -205,7 +206,7 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes @@ -213,7 +214,7 @@ The CHANGELOG for the current development version is available at ##### Bug Fixes - - - +- - ### 0.2.3 (03-29-2018) @@ -224,7 +225,7 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes @@ -247,7 +248,7 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes @@ -267,7 +268,7 @@ The CHANGELOG for the current development version is available at ##### New Features - - - +- - ##### Changes @@ -299,7 +300,7 @@ The CHANGELOG for the current development version is available at ##### Bug Fixes - - - +- - ### 0.1.5 (2016-11-19) @@ -321,7 +322,7 @@ The CHANGELOG for the current development version is available at ##### Bug Fixes - - - +- - ### 0.1.4 (2015-11-24) diff --git a/tests/mmcif/data/__init__.py b/tests/mmcif/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/mmcif/test_read_mmcif.py b/tests/mmcif/test_read_mmcif.py index e2a907b..a9888ef 100644 --- a/tests/mmcif/test_read_mmcif.py +++ b/tests/mmcif/test_read_mmcif.py @@ -11,6 +11,7 @@ import importlib.resources as pkg_resources else: import importlib_resources as pkg_resources + from pathlib import Path from urllib.error import HTTPError @@ -97,7 +98,6 @@ def test__read_pdb(): """Test private _read_pdb""" ppdb = PandasMmcif() _, txt = ppdb._read_mmcif(TESTDATA_FILENAME) - print(txt) assert txt == three_eiy @@ -336,9 +336,7 @@ def test_mmcif_pdb_conversion(): ) assert_frame_equal( pdb.df["HETATM"].drop(columns=["line_idx"]), - mmcif_pdb.df["HETATM"] - .drop(columns=["line_idx"]) - .reset_index(drop=True), + mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True), ) # single chain test @@ -352,7 +350,5 @@ def test_mmcif_pdb_conversion(): ) assert_frame_equal( pdb.df["HETATM"].drop(columns=["line_idx"]), - mmcif_pdb.df["HETATM"] - .drop(columns=["line_idx"]) - .reset_index(drop=True), + mmcif_pdb.df["HETATM"].drop(columns=["line_idx"]).reset_index(drop=True), ) diff --git a/tests/mmtf/data/__init__.py b/tests/mmtf/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/mmtf/test_write_mmtf.py b/tests/mmtf/test_write_mmtf.py index f516f6c..0aa930b 100644 --- a/tests/mmtf/test_write_mmtf.py +++ b/tests/mmtf/test_write_mmtf.py @@ -20,7 +20,6 @@ def test_write_mmtf_bp(): "1rcf", ] for pdb in PDB_CODES: - print(pdb) pm1 = PandasMmtf().fetch_mmtf(pdb) pm1.to_mmtf("test.mmtf") assert os.path.exists("test.mmtf") @@ -51,7 +50,6 @@ def test_write_mmtf(): "1rcf", ] for pdb in PDB_CODES: - print(pdb) pm1 = PandasMmtf().fetch_mmtf(pdb) write_mmtf(pd.concat([pm1.df["ATOM"], pm1.df["HETATM"]]), "test.mmtf") assert os.path.exists("test.mmtf") diff --git a/tests/mol2/data/__init__.py b/tests/mol2/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pdb/data/__init__.py b/tests/pdb/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pdb/test_amino3to1.py b/tests/pdb/test_amino3to1.py index 276e2e3..4691906 100644 --- a/tests/pdb/test_amino3to1.py +++ b/tests/pdb/test_amino3to1.py @@ -159,7 +159,6 @@ def test_sameindex(): TESTDATA_1t48 = str(TEST_DATA.joinpath("1t48_995.pdb")) p1t48 = PandasPdb() p1t48.read_pdb(TESTDATA_1t48) - print(p1t48) p1t48.df["ATOM"].index = np.zeros(p1t48.df["ATOM"].shape[0], dtype=int) expect_res = [ @@ -490,12 +489,8 @@ def test_multichain(): expect_chain = ["A" for _ in range(88)] + ["B" for _ in range(94)] got_chain = list(transl["chain_id"].values) - got_res_a = list( - transl.loc[transl["chain_id"] == "A", "residue_name"].values - ) - got_res_b = list( - transl.loc[transl["chain_id"] == "B", "residue_name"].values - ) + got_res_a = list(transl.loc[transl["chain_id"] == "A", "residue_name"].values) + got_res_b = list(transl.loc[transl["chain_id"] == "B", "residue_name"].values) assert expect_chain == got_chain assert expect_res_a == got_res_a diff --git a/tests/pdb/test_read_pdb.py b/tests/pdb/test_read_pdb.py index abb5cdc..4462051 100644 --- a/tests/pdb/test_read_pdb.py +++ b/tests/pdb/test_read_pdb.py @@ -11,6 +11,7 @@ import importlib.resources as pkg_resources else: import importlib_resources as pkg_resources + from urllib.error import HTTPError import numpy as np @@ -94,8 +95,7 @@ def test__read_pdb(): """Test private _read_pdb""" ppdb = PandasPdb() - path, txt = ppdb._read_pdb(TESTDATA_FILENAME) - print(txt) + _, txt = ppdb._read_pdb(TESTDATA_FILENAME) assert txt == three_eiy @@ -104,8 +104,7 @@ def test__read_pdb_raises(): Test if ValueError is raised for wrong file formats.""" expect = ( - "Wrong file format; allowed file formats are " - ".pdb, .pdb.gz, .ent, .ent.gz" + "Wrong file format; allowed file formats are " ".pdb, .pdb.gz, .ent, .ent.gz" ) def run_code_1(): diff --git a/tests/pdb/test_write_pdb.py b/tests/pdb/test_write_pdb.py index f16e5ad..7fcc39b 100644 --- a/tests/pdb/test_write_pdb.py +++ b/tests/pdb/test_write_pdb.py @@ -10,6 +10,7 @@ import importlib.resources as pkg_resources else: import importlib_resources as pkg_resources + import os import warnings @@ -51,9 +52,7 @@ def test_defaults(): def test_nonexpected_column(): ppdb = PandasPdb() ppdb.read_pdb(TESTDATA_FILENAME) - ppdb.df["HETATM"]["test"] = pd.Series( - "test", index=ppdb.df["HETATM"].index - ) + ppdb.df["HETATM"]["test"] = pd.Series("test", index=ppdb.df["HETATM"].index) with warnings.catch_warnings(record=True) as w: ppdb.to_pdb(path=OUTFILE, records=["HETATM"]) with open(OUTFILE, "r") as f: @@ -159,6 +158,20 @@ def test_b_factor_shift(): assert tmp_df[ tmp_df["element_symbol"].isnull() | (tmp_df["element_symbol"] == "") ].empty - assert not tmp_df[ - tmp_df["blank_4"].isnull() | (tmp_df["blank_4"] == "") - ].empty + assert not tmp_df[tmp_df["blank_4"].isnull() | (tmp_df["blank_4"] == "")].empty + + +def test_to_pdb_stream(): + """Test public write_pdb_stream""" + ppdb = PandasPdb() + ppdb.read_pdb(TESTDATA_FILENAME) + stream = ppdb.to_pdb_stream() + + lines_to_check = open(TESTDATA_FILENAME).read().split("\n") + lines_to_check = [ + line for line in lines_to_check if line.startswith(("ATOM", "HETATM")) + ] + lines_to_check.append("") + + source_pdb = "\n".join(lines_to_check) + assert stream.read() == source_pdb