Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/aptanet_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "3737da88",
"metadata": {},
"outputs": [],
Expand All @@ -49,7 +49,7 @@
"import torch\n",
"\n",
"from pyaptamer.datasets import load_1gnh_structure\n",
"from pyaptamer.utils.struct_to_aaseq import struct_to_aaseq"
"from pyaptamer.utils import struct_to_aaseq"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion pyaptamer/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pyaptamer.datasets._loaders._one_gnh import load_1gnh_structure
from pyaptamer.datasets._loaders._online_databank import load_from_rcsb
from pyaptamer.datasets._loaders._pfoa_loader import load_pfoa_structure
from pyaptamer.datasets._loaders._pfoa import load_pfoa_structure

__all__ = [
"load_pfoa_structure",
Expand Down
2 changes: 1 addition & 1 deletion pyaptamer/datasets/_loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Loaders for different data structures."""

from pyaptamer.datasets._loaders._one_gnh import load_1gnh_structure
from pyaptamer.datasets._loaders._pfoa_loader import load_pfoa_structure
from pyaptamer.datasets._loaders._pfoa import load_pfoa_structure

__all__ = ["load_pfoa_structure", "load_1gnh_structure"]
2 changes: 1 addition & 1 deletion pyaptamer/datasets/_loaders/_online_databank.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from Bio.PDB import PDBList

from pyaptamer.utils.pdb_to_struct import pdb_to_struct
from pyaptamer.utils import pdb_to_struct


def load_from_rcsb(pdb_id, overwrite=False):
Expand Down
6 changes: 4 additions & 2 deletions pyaptamer/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
"rna2vec",
"pdb_to_struct",
"struct_to_aaseq",
"pdb_to_aaseq",
]

from pyaptamer.utils._pdb_to_aaseq import pdb_to_aaseq
from pyaptamer.utils._pdb_to_struct import pdb_to_struct
from pyaptamer.utils._rna import (
dna2rna,
encode_rna,
generate_all_aptamer_triplets,
rna2vec,
)
from pyaptamer.utils.pdb_to_struct import pdb_to_struct
from pyaptamer.utils.struct_to_aaseq import struct_to_aaseq
from pyaptamer.utils._struct_to_aaseq import struct_to_aaseq
49 changes: 49 additions & 0 deletions pyaptamer/utils/_pdb_to_aaseq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
__author__ = "satvshr"
__all__ = ["pdb_to_aaseq"]

import os

import pandas as pd
from Bio import SeqIO


def pdb_to_aaseq(pdb_file_path, return_type="list"):
"""
Extract amino-acid sequences (SEQRES) from a PDB file.

Parameters
----------
pdb_file_path : str or os.PathLike
Path to a PDB file.
return_type : {'list', 'pd.df'}, optional, default='list'
Format of the returned value:

- ``'list'`` : return a Python list of sequence strings (one per chain).
- ``'pd.df'`` : return a pandas.DataFrame indexed by chain id with a single
column ``'sequence'`` containing one-letter amino-acid strings.

Returns
-------
list of str or pandas.DataFrame
Depending on ``return_type``. If ``'list'``, returns a list of sequence
strings (one per SEQRES chain). If ``'pd.df'``, returns a DataFrame
where the index is the chain identifier when present (index name ``'chain'``)
and the column ``'sequence'`` contains the sequences. If no SEQRES records
are found, returns an empty list or empty DataFrame respectively.
"""
pdb_path = os.fspath(pdb_file_path)
sequences = []
chains = []

with open(pdb_path) as handle:
for record in SeqIO.parse(handle, "pdb-seqres"):
sequences.append(str(record.seq))
chains.append(getattr(record, "id", None))

if return_type == "list":
return sequences

if return_type == "pd.df":
df = pd.DataFrame({"chain": chains, "sequence": sequences})
df = df.set_index("chain")
return df
File renamed without changes.
File renamed without changes.
32 changes: 32 additions & 0 deletions pyaptamer/utils/tests/test_pdb_to_aaseq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
__author__ = "satvshr"

import os

from pyaptamer.utils import pdb_to_aaseq


def test_pdb_to_aaseq():
"""
Test that `pdb_to_aaseq` converts a PDB file path into a non-empty string
containing alphabetic characters.
"""
pdb_file_path = os.path.join(
os.path.dirname(__file__), "..", "..", "datasets", "data", "1gnh.pdb"
)
sequences = pdb_to_aaseq(pdb_file_path)

assert isinstance(sequences, list), "pdb_to_aaseq should return a list"
assert len(sequences) > 0, "Returned list should not be empty"

for seq in sequences:
assert isinstance(seq, str), "Each entry should be a string"
assert len(seq) > 0, "Each sequence string should not be empty"

sequences = pdb_to_aaseq(pdb_file_path, return_type="pd.df")

assert not sequences.empty, "Returned DataFrame should not be empty"
assert "sequence" in sequences.columns, "DataFrame should have a 'sequence' column"

for seq in sequences["sequence"]:
assert isinstance(seq, str), "Each entry should be a string"
assert len(seq) > 0, "Each sequence string should not be empty"
2 changes: 1 addition & 1 deletion pyaptamer/utils/tests/test_pdb_to_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os

from pyaptamer.utils.pdb_to_struct import pdb_to_struct
from pyaptamer.utils import pdb_to_struct


def test_pdb_to_struct():
Expand Down
2 changes: 1 addition & 1 deletion pyaptamer/utils/tests/test_struct_to_aaseq.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__author__ = "satvshr"

from pyaptamer.datasets import load_1gnh_structure
from pyaptamer.utils.struct_to_aaseq import struct_to_aaseq
from pyaptamer.utils import struct_to_aaseq


def test_struct_to_aaseq():
Expand Down
Loading