Merge pull request #18 from Zsailer/clean-api

clean api
Zsailer · Apr 25, 2018 · 0b05124 · 0b05124
2 parents 7c7ab36 + ffbfb35
commit 0b05124
Show file tree

Hide file tree

Showing 5 changed files with 145 additions and 182 deletions.
diff --git a/phylopandas/__version__.py b/phylopandas/__version__.py
@@ -1 +1 @@
-__version__ = '0.5.0'
+__version__ = '0.6.0'
diff --git a/phylopandas/core.py b/phylopandas/core.py
@@ -2,55 +2,29 @@
 from pandas_flavor import register_dataframe_accessor, register_series_accessor
 
 from functools import wraps
+from .seqio.write import _write_method
 from . import seqio
 from . import treeio
 
 
-def verify_phylopandas_function(f):
-    """"""
-    @wraps(f)
-    def inner(data, *args, **kwargs):
-        # Sanity check.
-        if not hasattr(data, 'phylo'):
-            raise Exception("Object is not a PhyloPandas dataframe.")
-        return f(args, kwargs)
-    return inner
-
-
 @register_series_accessor('phylo')
 class PhyloPandasSeriesMethods(object):
     """
     """
     def __init__(self, data):
         self._data = data
 
-    @wraps(seqio.write.to_fasta)
-    def to_fasta(self, *args, **kwargs):
-        return seqio.write.to_fasta(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_phylip)
-    def to_phylip(self, *args, **kwargs):
-        return seqio.write.to_phylip(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_clustal)
-    def to_clustal(self, *args, **kwargs):
-        return seqio.write.to_clustal(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_embl)
-    def to_embl(self, *args, **kwargs):
-        return seqio.write.to_embl(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_swiss)
-    def to_swiss(self, *args, **kwargs):
-        return seqio.write.to_swiss(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_nexus)
-    def to_nexus(self, *args, **kwargs):
-        return seqio.write.to_nexus(self._data, *args, **kwargs)
+    # -----------------------------------------------------------
+    # Extra read/write methods.
+    # -----------------------------------------------------------
 
-    @wraps(seqio.write.to_fastq)
-    def to_fastq(self, *args, **kwargs):
-        return seqio.write.to_fastq(self._data, *args, **kwargs)
+    to_fasta = _write_method('fasta')
+    to_phylip = _write_method('phylip')
+    to_clustal = _write_method('clustal')
+    to_embl = _write_method('embl')
+    to_nexus = _write_method('nexus')
+    to_swiss = _write_method('swiss')
+    to_fastq = _write_method('fastq')
 
 
 @register_dataframe_accessor('phylo')
@@ -67,33 +41,13 @@ def __init__(self, data):
     # Extra read/write methods.
     # -----------------------------------------------------------
 
-    @wraps(seqio.write.to_fasta)
-    def to_fasta(self, *args, **kwargs):
-        return seqio.write.to_fasta(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_phylip)
-    def to_phylip(self, *args, **kwargs):
-        return seqio.write.to_phylip(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_clustal)
-    def to_clustal(self, *args, **kwargs):
-        return seqio.write.to_clustal(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_embl)
-    def to_embl(self, *args, **kwargs):
-        return seqio.write.to_embl(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_swiss)
-    def to_swiss(self, *args, **kwargs):
-        return seqio.write.to_swiss(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_nexus)
-    def to_nexus(self, *args, **kwargs):
-        return seqio.write.to_nexus(self._data, *args, **kwargs)
-
-    @wraps(seqio.write.to_fastq)
-    def to_fastq(self, *args, **kwargs):
-        return seqio.write.to_fastq(self._data, *args, **kwargs)
+    to_fasta = _write_method('fasta')
+    to_phylip = _write_method('phylip')
+    to_clustal = _write_method('clustal')
+    to_embl = _write_method('embl')
+    to_nexus = _write_method('nexus')
+    to_swiss = _write_method('swiss')
+    to_fastq = _write_method('fastq')
 
     # -----------------------------------------------------------
     # Useful dataframe methods specific to sequencing data.

diff --git a/phylopandas/seqio/read.py b/phylopandas/seqio/read.py
@@ -13,7 +13,32 @@
 import pandas as pd
 
 
-def _read(filename, schema, seq_label='sequence', alphabet=None, **kwargs):
+def _read_doc_template(schema):
+    s = """Read a {} file.
+
+    Construct a PhyloPandas DataFrame with columns:
+        - name
+        - id
+        - description
+        - sequence
+
+    Parameters
+    ----------
+    filename : str
+        File name of {} file. 
+
+    seq_label : str (default='sequence')
+        Sequence column name in DataFrame.
+    """.format(schema, schema, schema)
+    return s
+
+
+def _read(
+    filename,
+    schema,
+    seq_label='sequence',
+    alphabet=None,
+    **kwargs):
     """Use BioPython's sequence parsing module to convert any file format to
     a Pandas DataFrame.
 
@@ -26,8 +51,10 @@ def _read(filename, schema, seq_label='sequence', alphabet=None, **kwargs):
     # Check Alphabet if given
     if alphabet is None:
         alphabet = Bio.Alphabet.Alphabet()
+
     elif alphabet in ['dna', 'rna', 'protein', 'nucleotide']:
         alphabet = getattr(Bio.Alphabet, 'generic_{}'.format(alphabet))
+
     else:
         raise Exception(
             "The alphabet is not recognized. Must be 'dna', 'rna', "
@@ -46,49 +73,36 @@ def _read(filename, schema, seq_label='sequence', alphabet=None, **kwargs):
         data['name'].append(s.name)
 
     # Port to DataFrame.
-    return data
-
-
-def read_fasta(filename, **kwargs):
-    """Read fasta format."""
-    data = _read(filename, schema='fasta', **kwargs)
-    return pd.DataFrame(data)
-
-
-def read_phylip(filename, **kwargs):
-    """Read phylip format."""
-    data = _read(filename, schema='phylip', **kwargs)
     return pd.DataFrame(data)
 
-
-def read_clustal(filename, **kwargs):
-    """Read clustal format."""
-    data = _read(filename, schema='clustal', **kwargs)
-    return pd.DataFrame(data)
-
-
-def read_embl(filename, **kwargs):
-    """Read the EMBL flat file format."""
-    data = _read(filename, schema='embl', **kwargs)
-    return pd.DataFrame(data)
-
-
-def read_nexus(filename, **kwargs):
-    """Read the EMBL flat file format."""
-    data = _read(filename, schema='nexus', **kwargs)
-    return pd.DataFrame(data)
-
-
-def read_swiss(filename, **kwargs):
-    """Read Swiss-Prot aka UniProt format."""
-    data = _read(filename, schema='nexus', **kwargs)
-    return pd.DataFrame(data)
-
-
-def read_fastq(filename, **kwargs):
-    """Read FASTQ format."""
-    data = _read(filename, schema='fastq', **kwargs)
-    return pd.DataFrame(data)
+def _read_function(schema):
+    """Add a write method for named schema to a class.
+    """
+    def func(
+        filename,
+        seq_label='sequence',
+        alphabet=None,
+        **kwargs):
+        # Use generic write class to write data.
+        return _read(
+            filename=filename,
+            schema=schema,
+            seq_label=seq_label,
+            alphabet=alphabet,
+            **kwargs
+        )
+    # Update docs
+    func.__doc__ = _read_doc_template(schema)
+    return func
+
+# Various read functions to various formats.
+read_fasta = _read_function('fasta')
+read_phylip = _read_function('phylip')
+read_clustal = _read_function('clustal')
+read_embl = _read_function('embl')
+read_nexus = _read_function('nexus')
+read_swiss = _read_function('swiss')
+read_fastq = _read_function('fastq')
 
 
 def read_blast_xml(filename, **kwargs):

diff --git a/phylopandas/seqio/write.py b/phylopandas/seqio/write.py
@@ -10,17 +10,24 @@
 import Bio.Alphabet
 
 
-def _seqio_doc_template(schema):
+def _write_doc_template(schema):
     s = """Write to {} format.
 
     Parameters
     ----------
     filename : str
-        File to write {} string to. If no filename is given, a fasta string
+        File to write {} string to. If no filename is given, a {} string
         will be returned.
+
     sequence_col : str (default='sequence')
         Sequence column name in DataFrame.
-    """.format(schema, schema)
+
+    id_col : str (default='id')
+        ID column name in DataFrame
+
+    id_only : bool (default=False)
+        If True, use only the ID column to label sequences in fasta.
+    """.format(schema, schema, schema)
     return s
 
 
@@ -114,69 +121,65 @@ def _write(
     else:
         return "".join([s.format(schema) for s in seq_records])
 
+def _write_method(schema):
+    """Add a write method for named schema to a class.
+    """
+    def method(
+        self,
+        filename=None,
+        sequence_col='sequence',
+        id_col='id',
+        id_only=False,
+        alphabet=None,
+        **kwargs):
+        # Use generic write class to write data.
+        return _write(
+            self._data,
+            filename=filename,
+            schema=schema,
+            sequence_col=sequence_col,
+            id_col=id_col,
+            id_only=id_only,
+            alphabet=alphabet,
+            **kwargs
+        )
+    # Update docs
+    method.__doc__ = _write_doc_template(schema)
+    return method
 
-def to_fasta(df, filename=None, sequence_col='sequence',
-             id_col='id', id_only=False, alphabet=None, **kwargs):
-    """Write to fasta format.
 
-    Parameters
-    ----------
-    filename : str
-        File to write fasta string to. If no filename is given, a fasta string
-        will be returned.
-    sequence_col : str (default='sequence')
-        Sequence column name in DataFrame.
-    id_col : str (default='id')
-        ID column name in DataFrame
-    id_only : bool (default=False)
-        If True, use only the ID column to label sequences in fasta.
+def _write_function(schema):
+    """Add a write method for named schema to a class.
     """
-    return _write(df, filename=filename, schema='fasta',
-                  sequence_col=sequence_col, id_col=id_col, id_only=id_only,
-                  alphabet=None, **kwargs)
-
-
-def to_phylip(df, filename=None, sequence_col='sequence',
-             id_col='id', alphabet=None, **kwargs):
-    __doc__ = _seqio_doc_template('phylip')
-    return _write(df, filename=filename, schema='phylip',
-                  sequence_col=sequence_col, id_col=id_col, id_only=True,
-                  alphabet=None, **kwargs)
-
-
-def to_clustal(df, filename=None, sequence_col='sequence',
-             id_col='id', alphabet=None, **kwargs):
-    __doc__ = _seqio_doc_template('clustal')
-    return _write(df, filename=filename, schema='clustal',
-                  sequence_col=sequence_col, id_col=id_col, id_only=True,
-                  alphabet=None, **kwargs)
-
-def to_embl(df, alphabet, filename=None, sequence_col='sequence',
-             id_col='id', **kwargs):
-    __doc__ = _seqio_doc_template('embl')
-    return _write(df, filename=filename, schema='embl', sequence_col=sequence_col,
-                  id_col=id_col, id_only=True, alphabet=alphabet, **kwargs)
-
-
-def to_nexus(df, alphabet, filename=None, sequence_col='sequence',
-             id_col='id', id_only=False, **kwargs):
-    __doc__ = _seqio_doc_template('nexus')
-    return _write(df, alphabet=alphabet, filename=filename, schema='nexus',
-                  sequence_col=sequence_col, id_col='id',
-                  id_only=True, **kwargs)
-
-
-def to_swiss(df, filename=None, sequence_col='sequence',
-             id_col='id', id_only=False, alphabet=None, **kwargs):
-    __doc__ = _seqio_doc_template('swiss')
-    return _write(df, alphabet=alphabet, filename=filename, schema='swiss',
-                  sequence_col=sequence_col, id_col='id', id_only=True,
-                  **kwargs)
-
-
-def to_fastq(df, filename=None, sequence_col='sequence',
-             id_col='id', id_only=False, alphabet=None, **kwargs):
-    __doc__ = _seqio_doc_template('fastq')
-    return _write(df, filename=filename, schema='fastq',
-                  sequence_col=sequence_col, id_col='id', id_only=True,
-                  alphabet=None, **kwargs)
+    def func(
+        data,
+        filename=None,
+        sequence_col='sequence',
+        id_col='id',
+        id_only=False,
+        alphabet=None,
+        **kwargs):
+        # Use generic write class to write data.
+        return _write(
+            data,
+            filename=filename,
+            schema=schema,
+            sequence_col=sequence_col,
+            id_col=id_col,
+            id_only=id_only,
+            alphabet=alphabet,
+            **kwargs
+        )
+    # Update docs
+    func.__doc__ = _write_doc_template(schema)
+    return func
+
+
+# Write functions to various formats.
+to_fasta = _write_function('fasta')
+to_phylip = _write_function('phylip')
+to_clustal = _write_function('clustal')
+to_embl = _write_function('embl')
+to_nexus = _write_function('nexus')
+to_swiss = _write_function('swiss')
+to_fastq = _write_function('fastq')