Skip to content

Commit

Permalink
Merge pull request #18 from Zsailer/clean-api
Browse files Browse the repository at this point in the history
clean api
  • Loading branch information
Zsailer authored Apr 25, 2018
2 parents 7c7ab36 + ffbfb35 commit 0b05124
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 182 deletions.
2 changes: 1 addition & 1 deletion phylopandas/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.5.0'
__version__ = '0.6.0'
82 changes: 18 additions & 64 deletions phylopandas/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,29 @@
from pandas_flavor import register_dataframe_accessor, register_series_accessor

from functools import wraps
from .seqio.write import _write_method
from . import seqio
from . import treeio


def verify_phylopandas_function(f):
""""""
@wraps(f)
def inner(data, *args, **kwargs):
# Sanity check.
if not hasattr(data, 'phylo'):
raise Exception("Object is not a PhyloPandas dataframe.")
return f(args, kwargs)
return inner


@register_series_accessor('phylo')
class PhyloPandasSeriesMethods(object):
"""
"""
def __init__(self, data):
self._data = data

@wraps(seqio.write.to_fasta)
def to_fasta(self, *args, **kwargs):
return seqio.write.to_fasta(self._data, *args, **kwargs)

@wraps(seqio.write.to_phylip)
def to_phylip(self, *args, **kwargs):
return seqio.write.to_phylip(self._data, *args, **kwargs)

@wraps(seqio.write.to_clustal)
def to_clustal(self, *args, **kwargs):
return seqio.write.to_clustal(self._data, *args, **kwargs)

@wraps(seqio.write.to_embl)
def to_embl(self, *args, **kwargs):
return seqio.write.to_embl(self._data, *args, **kwargs)

@wraps(seqio.write.to_swiss)
def to_swiss(self, *args, **kwargs):
return seqio.write.to_swiss(self._data, *args, **kwargs)

@wraps(seqio.write.to_nexus)
def to_nexus(self, *args, **kwargs):
return seqio.write.to_nexus(self._data, *args, **kwargs)
# -----------------------------------------------------------
# Extra read/write methods.
# -----------------------------------------------------------

@wraps(seqio.write.to_fastq)
def to_fastq(self, *args, **kwargs):
return seqio.write.to_fastq(self._data, *args, **kwargs)
to_fasta = _write_method('fasta')
to_phylip = _write_method('phylip')
to_clustal = _write_method('clustal')
to_embl = _write_method('embl')
to_nexus = _write_method('nexus')
to_swiss = _write_method('swiss')
to_fastq = _write_method('fastq')


@register_dataframe_accessor('phylo')
Expand All @@ -67,33 +41,13 @@ def __init__(self, data):
# Extra read/write methods.
# -----------------------------------------------------------

@wraps(seqio.write.to_fasta)
def to_fasta(self, *args, **kwargs):
return seqio.write.to_fasta(self._data, *args, **kwargs)

@wraps(seqio.write.to_phylip)
def to_phylip(self, *args, **kwargs):
return seqio.write.to_phylip(self._data, *args, **kwargs)

@wraps(seqio.write.to_clustal)
def to_clustal(self, *args, **kwargs):
return seqio.write.to_clustal(self._data, *args, **kwargs)

@wraps(seqio.write.to_embl)
def to_embl(self, *args, **kwargs):
return seqio.write.to_embl(self._data, *args, **kwargs)

@wraps(seqio.write.to_swiss)
def to_swiss(self, *args, **kwargs):
return seqio.write.to_swiss(self._data, *args, **kwargs)

@wraps(seqio.write.to_nexus)
def to_nexus(self, *args, **kwargs):
return seqio.write.to_nexus(self._data, *args, **kwargs)

@wraps(seqio.write.to_fastq)
def to_fastq(self, *args, **kwargs):
return seqio.write.to_fastq(self._data, *args, **kwargs)
to_fasta = _write_method('fasta')
to_phylip = _write_method('phylip')
to_clustal = _write_method('clustal')
to_embl = _write_method('embl')
to_nexus = _write_method('nexus')
to_swiss = _write_method('swiss')
to_fastq = _write_method('fastq')

# -----------------------------------------------------------
# Useful dataframe methods specific to sequencing data.
Expand Down
98 changes: 56 additions & 42 deletions phylopandas/seqio/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,32 @@
import pandas as pd


def _read(filename, schema, seq_label='sequence', alphabet=None, **kwargs):
def _read_doc_template(schema):
s = """Read a {} file.
Construct a PhyloPandas DataFrame with columns:
- name
- id
- description
- sequence
Parameters
----------
filename : str
File name of {} file.
seq_label : str (default='sequence')
Sequence column name in DataFrame.
""".format(schema, schema, schema)
return s


def _read(
filename,
schema,
seq_label='sequence',
alphabet=None,
**kwargs):
"""Use BioPython's sequence parsing module to convert any file format to
a Pandas DataFrame.
Expand All @@ -26,8 +51,10 @@ def _read(filename, schema, seq_label='sequence', alphabet=None, **kwargs):
# Check Alphabet if given
if alphabet is None:
alphabet = Bio.Alphabet.Alphabet()

elif alphabet in ['dna', 'rna', 'protein', 'nucleotide']:
alphabet = getattr(Bio.Alphabet, 'generic_{}'.format(alphabet))

else:
raise Exception(
"The alphabet is not recognized. Must be 'dna', 'rna', "
Expand All @@ -46,49 +73,36 @@ def _read(filename, schema, seq_label='sequence', alphabet=None, **kwargs):
data['name'].append(s.name)

# Port to DataFrame.
return data


def read_fasta(filename, **kwargs):
"""Read fasta format."""
data = _read(filename, schema='fasta', **kwargs)
return pd.DataFrame(data)


def read_phylip(filename, **kwargs):
"""Read phylip format."""
data = _read(filename, schema='phylip', **kwargs)
return pd.DataFrame(data)


def read_clustal(filename, **kwargs):
"""Read clustal format."""
data = _read(filename, schema='clustal', **kwargs)
return pd.DataFrame(data)


def read_embl(filename, **kwargs):
"""Read the EMBL flat file format."""
data = _read(filename, schema='embl', **kwargs)
return pd.DataFrame(data)


def read_nexus(filename, **kwargs):
"""Read the EMBL flat file format."""
data = _read(filename, schema='nexus', **kwargs)
return pd.DataFrame(data)


def read_swiss(filename, **kwargs):
"""Read Swiss-Prot aka UniProt format."""
data = _read(filename, schema='nexus', **kwargs)
return pd.DataFrame(data)


def read_fastq(filename, **kwargs):
"""Read FASTQ format."""
data = _read(filename, schema='fastq', **kwargs)
return pd.DataFrame(data)
def _read_function(schema):
"""Add a write method for named schema to a class.
"""
def func(
filename,
seq_label='sequence',
alphabet=None,
**kwargs):
# Use generic write class to write data.
return _read(
filename=filename,
schema=schema,
seq_label=seq_label,
alphabet=alphabet,
**kwargs
)
# Update docs
func.__doc__ = _read_doc_template(schema)
return func

# Various read functions to various formats.
read_fasta = _read_function('fasta')
read_phylip = _read_function('phylip')
read_clustal = _read_function('clustal')
read_embl = _read_function('embl')
read_nexus = _read_function('nexus')
read_swiss = _read_function('swiss')
read_fastq = _read_function('fastq')


def read_blast_xml(filename, **kwargs):
Expand Down
135 changes: 69 additions & 66 deletions phylopandas/seqio/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,24 @@
import Bio.Alphabet


def _seqio_doc_template(schema):
def _write_doc_template(schema):
s = """Write to {} format.
Parameters
----------
filename : str
File to write {} string to. If no filename is given, a fasta string
File to write {} string to. If no filename is given, a {} string
will be returned.
sequence_col : str (default='sequence')
Sequence column name in DataFrame.
""".format(schema, schema)
id_col : str (default='id')
ID column name in DataFrame
id_only : bool (default=False)
If True, use only the ID column to label sequences in fasta.
""".format(schema, schema, schema)
return s


Expand Down Expand Up @@ -114,69 +121,65 @@ def _write(
else:
return "".join([s.format(schema) for s in seq_records])

def _write_method(schema):
"""Add a write method for named schema to a class.
"""
def method(
self,
filename=None,
sequence_col='sequence',
id_col='id',
id_only=False,
alphabet=None,
**kwargs):
# Use generic write class to write data.
return _write(
self._data,
filename=filename,
schema=schema,
sequence_col=sequence_col,
id_col=id_col,
id_only=id_only,
alphabet=alphabet,
**kwargs
)
# Update docs
method.__doc__ = _write_doc_template(schema)
return method

def to_fasta(df, filename=None, sequence_col='sequence',
id_col='id', id_only=False, alphabet=None, **kwargs):
"""Write to fasta format.

Parameters
----------
filename : str
File to write fasta string to. If no filename is given, a fasta string
will be returned.
sequence_col : str (default='sequence')
Sequence column name in DataFrame.
id_col : str (default='id')
ID column name in DataFrame
id_only : bool (default=False)
If True, use only the ID column to label sequences in fasta.
def _write_function(schema):
"""Add a write method for named schema to a class.
"""
return _write(df, filename=filename, schema='fasta',
sequence_col=sequence_col, id_col=id_col, id_only=id_only,
alphabet=None, **kwargs)


def to_phylip(df, filename=None, sequence_col='sequence',
id_col='id', alphabet=None, **kwargs):
__doc__ = _seqio_doc_template('phylip')
return _write(df, filename=filename, schema='phylip',
sequence_col=sequence_col, id_col=id_col, id_only=True,
alphabet=None, **kwargs)


def to_clustal(df, filename=None, sequence_col='sequence',
id_col='id', alphabet=None, **kwargs):
__doc__ = _seqio_doc_template('clustal')
return _write(df, filename=filename, schema='clustal',
sequence_col=sequence_col, id_col=id_col, id_only=True,
alphabet=None, **kwargs)

def to_embl(df, alphabet, filename=None, sequence_col='sequence',
id_col='id', **kwargs):
__doc__ = _seqio_doc_template('embl')
return _write(df, filename=filename, schema='embl', sequence_col=sequence_col,
id_col=id_col, id_only=True, alphabet=alphabet, **kwargs)


def to_nexus(df, alphabet, filename=None, sequence_col='sequence',
id_col='id', id_only=False, **kwargs):
__doc__ = _seqio_doc_template('nexus')
return _write(df, alphabet=alphabet, filename=filename, schema='nexus',
sequence_col=sequence_col, id_col='id',
id_only=True, **kwargs)


def to_swiss(df, filename=None, sequence_col='sequence',
id_col='id', id_only=False, alphabet=None, **kwargs):
__doc__ = _seqio_doc_template('swiss')
return _write(df, alphabet=alphabet, filename=filename, schema='swiss',
sequence_col=sequence_col, id_col='id', id_only=True,
**kwargs)


def to_fastq(df, filename=None, sequence_col='sequence',
id_col='id', id_only=False, alphabet=None, **kwargs):
__doc__ = _seqio_doc_template('fastq')
return _write(df, filename=filename, schema='fastq',
sequence_col=sequence_col, id_col='id', id_only=True,
alphabet=None, **kwargs)
def func(
data,
filename=None,
sequence_col='sequence',
id_col='id',
id_only=False,
alphabet=None,
**kwargs):
# Use generic write class to write data.
return _write(
data,
filename=filename,
schema=schema,
sequence_col=sequence_col,
id_col=id_col,
id_only=id_only,
alphabet=alphabet,
**kwargs
)
# Update docs
func.__doc__ = _write_doc_template(schema)
return func


# Write functions to various formats.
to_fasta = _write_function('fasta')
to_phylip = _write_function('phylip')
to_clustal = _write_function('clustal')
to_embl = _write_function('embl')
to_nexus = _write_function('nexus')
to_swiss = _write_function('swiss')
to_fastq = _write_function('fastq')
Loading

0 comments on commit 0b05124

Please sign in to comment.