Source code for modlamp.plot

# -*- coding: utf-8 -*-
"""
.. currentmodule:: modlamp.plot

.. moduleauthor:: modlab Alex Mueller ETH Zurich <alex.mueller@pharma.ethz.ch>

This module incorporates functions to plot different feature plots. The following functions are available:

============================        ==============================================================================
Function                            Characteristics
============================        ==============================================================================
:py:func:`plot_feature`             Generate a box plot for visualizing the distribution of a given feature.
:py:func:`plot_2_features`          Generate a 2D scatter plot of 2 given features.
:py:func:`plot_3_features`          Generate a 3D scatter plot of 3 given features.
:py:func:`plot_profile`             Generates a profile plot of a sequence to visualize potential linear gradients.
:py:func:`helical_wheel`            Generates a helical wheel projection plot of a given sequence.
:py:func:`plot_pde`                 Generates a probability density estimation plot of given data arrays.
:py:func:`plot_violin`              Generates a violin plot for given classes and corresponding distributions.
:py:func:`plot_aa_distr`            Generates an amino acid frequency plot for all 20 natural amino acids.
============================        ==============================================================================

"""
import matplotlib.lines as lines
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy.stats.kde import gaussian_kde

from modlamp.core import count_aas, load_scale
from modlamp.descriptors import PeptideDescriptor, GlobalDescriptor

__author__ = "Alex Müller, Gisela Gabernet"
__docformat__ = "restructuredtext en"


[docs]def plot_feature(y_values, targets=None, y_label='feature values', x_tick_labels=None, filename=None, colors=None):
    """
    Function to generate a box plot of 1 given feature. The different target classes given in **targets** are plottet
    as separate boxes.

    :param y_values: Array of feature values to be plotted.
    :param targets: List of target class values [string/binary] for the given feature data.
    :param y_label: Axis label.
    :param x_tick_labels: list of labels to be assigned to the ticks on the x-axis. Must match the number of targets.
    :param filename: filename where to safe the plot. *default = None*
    :param colors: {list} colors to take for plotting (strings in HEX formats).
    :return: A feature box plot.
    :Example:

    >>> plot_feature(desc.descriptor,y_label='uH Eisenberg')  # desc: PeptideDescriptor instance

    .. image:: ../docs/static/uH_Eisenberg.png
        :height: 300px
    
    The same procedure also works for comparing two data sets:
    
    >>> plot_feature((p.descriptor, apd.descriptor), y_label='uH Eisenberg', x_tick_labels=['Library', 'APD3'])
    
    .. image:: ../docs/static/uH_APD3.png
        :height: 300px
    """
    if not colors:
        colors = ['#69D2E7', '#FA6900', '#E0E4CC', '#542437', '#53777A', 'black', '#C02942', '#031634']

    if type(y_values) == list:
        y_values = np.array(y_values)

    if len(targets) >= 1:
        data = []
        cntr = 0
        for n in set(targets):  # finding indices of the different targets in "targets" and plotting
            data.append(y_values[np.where(targets == n)])
            cntr += 1

        if x_tick_labels:
            labels = x_tick_labels
        else:
            labels = [str(i) for i in range(cntr)]

        colors = colors[:cntr]
    
    else:
        if x_tick_labels:
            labels = x_tick_labels
        else:
            labels = ['all data']
        data = y_values

    fig, ax = plt.subplots()
    # coloring faces of boxes
    median_props = dict(linestyle='-', linewidth='1', color='black')
    box = ax.boxplot(data, notch=True, patch_artist=True, medianprops=median_props, labels=labels)
    plt.setp(box['whiskers'], color='black')
    
    for patch, color in zip(box['boxes'], colors):
        patch.set(facecolor=color, edgecolor='black', alpha=0.8)
    
    ax.set_xlabel('Classes', fontweight='bold')
    ax.set_ylabel(y_label, fontweight='bold')
    ax.set_title('Feature Box-Plot', fontsize=16, fontweight='bold')
    # only left and bottom axes, no box
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    
    if filename:
        plt.savefig(filename, dpi=150)
    else:
        plt.show()


[docs]def plot_2_features(x_values, y_values, targets=None, x_label='', y_label='', filename=None, colors=None):
    """
    Function to generate a feature scatter plot of 2 given features. The different target classes given in **targets**
    are plottet in different colors.

    :param x_values: Array of values of the feature to be plotted on the x-axis.
    :param y_values: Array of values of the feature to be plotted on the y-axis.
    :param targets: List of target class values [string/binary] for the given feature data.
    :param x_label: X-axis label.
    :param y_label: Y-axis label.
    :param filename: filename where to safe the plot. *default = None*
    :param colors: {list} colors to take for plotting (strings in HEX formats).
    :return: A 2D feature scatter plot.
    :Example:

    >>> plot_2_features(a.descriptor,b.descriptor,x_label='uH',y_label='pI',targets=targs)

    .. image:: ../docs/static/2D_scatter.png
        :height: 300px
    """
    if not colors:
        colors = ['#69D2E7', '#FA6900', '#B5B8AB', '#542437', '#53777A', 'black', '#C02942', '#031634']
    
    fig, ax = plt.subplots()
    
    if len(targets) >= 1:
        for n in list(set(targets)):  # finding indices of the different targets in "targets" and plotting
            t = np.array([i for i, j in enumerate(targets) if j == n])
            xt = x_values[t]  # find all values in x for the given target
            yt = y_values[t]  # find all values in y for the given target
            ax.scatter(xt, yt, c=colors[n], alpha=1., s=25,
                       label='class ' + str(n))  # plot scatter for this target group
            ax.legend(loc='best')
    
    else:
        ax.scatter(x_values, y_values, c=colors[0], alpha=1., s=25)
    
    ax.set_xlabel(x_label, fontweight='bold')
    ax.set_ylabel(y_label, fontweight='bold')
    ax.set_title('2D Feature Plot', fontsize=16, fontweight='bold')
    # only left and bottom axes, no box
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    
    if filename:
        plt.savefig(filename, dpi=150)
    else:
        plt.show()


[docs]def plot_3_features(x_values, y_values, z_values, targets=None, x_label='', y_label='', z_label='', filename=None,
                    colors=None):
    """
    Function to generate a 3D feature scatter plot of 3 given features. The different target classes given in **targets**
    are plottet in different colors.

    :param x_values: Array of values of the feature to be plotted on the x-axis.
    :param y_values: Array of values of the feature to be plotted on the y-axis.
    :param z_values: Array of values of the feature to be plotted on the z-axis.
    :param targets: List of target class values {string/binary} for the given feature data.
    :param x_label: {str} X-axis label.
    :param y_label: {str} Y-axis label.
    :param z_label: {str} Z-axis label.
    :param filename: {str} filename where to safe the plot. *default = None* -> show the plot
    :param colors: {list} colors to take for plotting (strings in HEX formats).
    :return: A 3D feature scatter plot.
    :Example:

    >>> plot_3_features(a.descriptor,b.descriptor,c.descriptor,x_label='uH',y_label='pI',z_label='length')

    .. image:: ../docs/static/3D_scatter.png
        :height: 300px
    """
    
    if not colors:
        colors = ['#69D2E7', '#FA6900', '#E0E4CC', '#542437', '#53777A', 'black', '#C02942', '#031634']
    
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    
    if len(targets) >= 1:
        for n in list(set(targets)):  # finding indices of the different targets in "targets" and plotting
            t = np.array([i for i, j in enumerate(targets) if j == n])
            xt = x_values[t]  # find all values in x for the given target
            yt = y_values[t]  # find all values in y for the given target
            zt = z_values[t]  # find all values in y for the given target
            ax.scatter(xt, yt, zt, c=colors[n], alpha=1., s=25,
                       label='class ' + str(n))  # plot 3Dscatter for this target
            ax.legend(loc='best')
    
    else:  # plot 3D scatter for this target group
        ax.scatter(x_values, y_values, z_values, c=colors[0], alpha=1., s=25)
    
    ax.set_xlabel(x_label, fontweight='bold')
    ax.set_ylabel(y_label, fontweight='bold')
    ax.set_zlabel(z_label, fontweight='bold')
    ax.set_title('3D Feature Plot', fontsize=16, fontweight='bold')
    # only left and bottom axes, no box
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('bottom')
    
    if filename:
        plt.savefig(filename, dpi=150)
    else:
        plt.show()


[docs]def plot_profile(sequence, window=5, scalename='Eisenberg', filename=None, color='red', seq=False, ylim=None):
    """ Function to generate sequence profile plots of a given amino acid scale or a moment thereof.

    .. note::
        :func:`plot_profile` can only plot one-dimensional amino acid scales given in
        :class:`modlamp.descriptors.PeptideDescriptor`.

    :param sequence: {str} Peptide sequence for which the profile should be plotted.
    :param window: {int, uneven} Window size for which the average value is plotted for the center amino acid.
    :param scalename: {str} Amino acid scale to be used to describe the sequence.
    :param filename: {str} Filename  where to safe the plot. *default = None* --> show the plot
    :param color: {str} Color of the plot line.
    :param seq: {bool} Whether the amino acid sequence should be plotted as the title.
    :param ylim: {tuple of float} Y-Axis limits. Provide as tuple, e.g. (0.5, -0.2)
    :return: a profile plot of the input sequence interactively or with the specified *filename*
    :Example:

    >>> plot_profile('GLFDIVKKVVGALGSL', scalename='eisenberg')

    .. image:: ../docs/static/profileplot.png
        :height: 300px

    .. versionadded:: v2.1.5
    """
    # check if given scale is defined in PeptideDescriptor
    d = PeptideDescriptor(sequence, scalename)
    if len(d.scale['A']) > 1:
        raise KeyError("\nSorry\nThis function can only calculate profiles for 1D scales. '%s' has more than one "
                       "dimension" % scalename)
    seq_data = list()
    seq_profile = list()
    for a in sequence:
        seq_data.append(d.scale[a])  # describe sequence by given scale
    i = 0  # AA index
    while (i + window) < len(sequence):
        seq_profile.append(np.mean(seq_data[i:(i + window + 1)]))  # append average value for given window
        i += 1

    # plot
    fig, ax = plt.subplots()
    x_range = range(int(window / 2), int(len(sequence) - int(window) / 2))
    line = ax.plot(x_range, seq_profile)
    plt.setp(line, color=color, linewidth=2.0)

    # axis labes and title
    ax.set_xlabel('sequence position', fontweight='bold')
    ax.set_ylabel(scalename + ' value', fontweight='bold')
    ax.text(max(x_range) / 2 + 1, 1.05 * max(seq_profile), 'window size: ' + str(window),
            fontsize=16, fontweight='bold')
    if seq:
        ax.set_title(sequence, fontsize=16, fontweight='bold', y=1.02)
    if ylim:
        ax.set_ylim(ylim)
    else:
        ax.set_ylim(1.2 * max(seq_profile), 1.2 * min(seq_profile))

    # only left and bottom axes, no box
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')

    # show or save plot
    if filename:
        plt.savefig(filename, dpi=150)
    else:
        plt.show()


[docs]def helical_wheel(sequence, colorcoding='rainbow', lineweights=True, filename=None, seq=False, moment=False):
    """A function to project a given peptide sequence onto a helical wheel plot. It can be useful to illustrate the
    properties of alpha-helices, like positioning of charged and hydrophobic residues along the sequence.

    :param sequence: {str} the peptide sequence for which the helical wheel should be drawn
    :param colorcoding: {str} the color coding to be used, available: *rainbow*, *charge*, *polar*, *simple*,
        *amphipathic*, *none*
    :param lineweights: {boolean} defines whether connection lines decrease in thickness along the sequence
    :param filename: {str} filename  where to safe the plot. *default = None* --> show the plot
    :param seq: {bool} whether the amino acid sequence should be plotted as a title
    :param moment: {bool} whether the Eisenberg hydrophobic moment should be calculated and plotted
    :return: a helical wheel projection plot of the given sequence (interactively or in **filename**)
    :Example:

    >>> helical_wheel('GLFDIVKKVVGALG')
    >>> helical_wheel('KLLKLLKKLLKLLK', colorcoding='charge')
    >>> helical_wheel('AKLWLKAGRGFGRG', colorcoding='none', lineweights=False)
    >>> helical_wheel('ACDEFGHIKLMNPQRSTVWY')

    .. image:: ../docs/static/wheel1.png
        :height: 300px
    .. image:: ../docs/static/wheel2.png
        :height: 300px
    .. image:: ../docs/static/wheel3.png
        :height: 300px
    .. image:: ../docs/static/wheel4.png
        :height: 300px

    .. versionadded:: v2.1.5
    """
    # color mappings
    aa = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
    f_rainbow = ['#3e3e28', '#ffcc33', '#b30047', '#b30047', '#ffcc33', '#3e3e28', '#80d4ff', '#ffcc33', '#0047b3',
                 '#ffcc33', '#ffcc33', '#b366ff', '#29a329', '#b366ff', '#0047b3', '#ff66cc', '#ff66cc', '#ffcc33',
                 '#ffcc33', '#ffcc33']
    f_charge = ['#000000', '#000000', '#ff4d94', '#ff4d94', '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff',
                '#000000', '#000000', '#000000', '#000000', '#000000', '#80d4ff', '#000000', '#000000', '#000000',
                '#000000', '#000000']
    f_polar = ['#000000', '#000000', '#80d4ff', '#80d4ff', '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff',
               '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff', '#80d4ff', '#80d4ff', '#80d4ff', '#000000',
               '#000000', '#000000']
    f_simple = ['#ffcc33', '#ffcc33', '#0047b3', '#0047b3', '#ffcc33', '#7f7f7f', '#0047b3', '#ffcc33', '#0047b3',
                '#ffcc33', '#ffcc33', '#0047b3', '#ffcc33', '#0047b3', '#0047b3', '#0047b3', '#0047b3', '#ffcc33',
                '#ffcc33', '#ffcc33']
    f_none = ['#ffffff'] * 20
    f_amphi = ['#ffcc33', '#29a329', '#b30047', '#b30047', '#f79318', '#80d4ff', '#0047b3', '#ffcc33', '#0047b3',
               '#ffcc33', '#ffcc33', '#80d4ff', '#29a329', '#80d4ff', '#0047b3', '#80d4ff', '#80d4ff', '#ffcc33',
               '#f79318', '#f79318']
    t_rainbow = ['w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'w', 'k', 'k', 'k', 'k', 'k', 'w', 'k', 'k', 'k', 'k', 'k']
    t_charge = ['w', 'w', 'k', 'k', 'w', 'w', 'k', 'w', 'k', 'w', 'w', 'w', 'w', 'w', 'k', 'w', 'w', 'w', 'w', 'w']
    t_polar = ['w', 'w', 'k', 'k', 'w', 'w', 'k', 'w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'k', 'k', 'w', 'w', 'w']
    t_simple = ['k', 'k', 'w', 'w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'k', 'k', 'w', 'w', 'w', 'w', 'k', 'k', 'k']
    t_none = ['k'] * 20
    t_amphi = ['k', 'k', 'w', 'w', 'w', 'k', 'w', 'k', 'w', 'k', 'k', 'k', 'w', 'k', 'w', 'k', 'k', 'k', 'w', 'w']
    d_eisberg = load_scale('eisenberg')[1]  # eisenberg hydrophobicity values for HM
    
    if lineweights:
        lw = np.arange(0.1, 5.5, 5. / (len(sequence) - 1))  # line thickness array
        lw = lw[::-1]  # inverse order
    else:
        lw = [2.] * (len(sequence) - 1)
    
    # check which color coding to use
    if colorcoding == 'rainbow':
        df = dict(zip(aa, f_rainbow))
        dt = dict(zip(aa, t_rainbow))
    elif colorcoding == 'charge':
        df = dict(zip(aa, f_charge))
        dt = dict(zip(aa, t_charge))
    elif colorcoding == 'polar':
        df = dict(zip(aa, f_polar))
        dt = dict(zip(aa, t_polar))
    elif colorcoding == 'simple':
        df = dict(zip(aa, f_simple))
        dt = dict(zip(aa, t_simple))
    elif colorcoding == 'none':
        df = dict(zip(aa, f_none))
        dt = dict(zip(aa, t_none))
    elif colorcoding == 'amphipathic':
        df = dict(zip(aa, f_amphi))
        dt = dict(zip(aa, t_amphi))
    else:
        print("Unknown color coding, 'rainbow' used instead")
        df = dict(zip(aa, f_rainbow))
        dt = dict(zip(aa, t_rainbow))
    
    # degree to radian
    deg = np.arange(float(len(sequence))) * -100.
    deg = [d + 90. for d in deg]  # start at 270 degree in unit circle (on top)
    rad = np.radians(deg)
    
    # dict for coordinates and eisenberg values
    d_hydro = dict(zip(rad, [0.] * len(rad)))
    
    # create figure
    fig = plt.figure(frameon=False, figsize=(10, 10))
    ax = fig.add_subplot(111)
    old = None
    hm = list()
    
    # iterate over sequence
    for i, r in enumerate(rad):
        new = (np.cos(r), np.sin(r))  # new AA coordinates
        if i < 18:
            # plot the connecting lines
            if old is not None:
                line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k',
                                    linewidth=lw[i - 1])
                line.set_zorder(1)  # 1 = level behind circles
                ax.add_line(line)
        elif 17 < i < 36:
            line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k',
                                linewidth=lw[i - 1])
            line.set_zorder(1)  # 1 = level behind circles
            ax.add_line(line)
            new = (np.cos(r) * 1.2, np.sin(r) * 1.2)
        elif i == 36:
            line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k',
                                linewidth=lw[i - 1])
            line.set_zorder(1)  # 1 = level behind circles
            ax.add_line(line)
            new = (np.cos(r) * 1.4, np.sin(r) * 1.4)
        else:
            new = (np.cos(r) * 1.4, np.sin(r) * 1.4)
        
        # plot circles
        circ = patches.Circle(new, radius=0.1, transform=ax.transData, edgecolor='k', facecolor=df[sequence[i]])
        circ.set_zorder(2)  # level in front of lines
        ax.add_patch(circ)
        
        # check if N- or C-terminus and add subscript, then plot AA letter
        if i == 0:
            ax.text(new[0], new[1], sequence[i] + '$_N$', va='center', ha='center', transform=ax.transData,
                    size=32, color=dt[sequence[i]], fontweight='bold')
        elif i == len(sequence) - 1:
            ax.text(new[0], new[1], sequence[i] + '$_C$', va='center', ha='center', transform=ax.transData,
                    size=32, color=dt[sequence[i]], fontweight='bold')
        else:
            ax.text(new[0], new[1], sequence[i], va='center', ha='center', transform=ax.transData,
                    size=36, color=dt[sequence[i]], fontweight='bold')
        
        eb = d_eisberg[sequence[i]][0]  # eisenberg value for this AA
        hm.append([eb * new[0], eb * new[1]])  # save eisenberg hydrophobicity vector value to later calculate HM
        
        old = (np.cos(r), np.sin(r))  # save as previous coordinates
    
    # draw hydrophobic moment arrow if moment option
    if moment:
        v_hm = np.sum(np.array(hm), 0)
        x = .0333 * v_hm[0]
        y = .0333 * v_hm[1]
        ax.arrow(0., 0., x, y, head_width=0.04, head_length=0.03, transform=ax.transData,
                 color='k', linewidth=6.)
        desc = PeptideDescriptor(sequence)  # calculate hydrophobic moment
        desc.calculate_moment()
        if abs(x) < 0.2 and y > 0.:  # right positioning of HM text so arrow does not cover it
            z = -0.2
        else:
            z = 0.2
        plt.text(0., z, str(round(desc.descriptor[0][0], 3)), fontdict={'fontsize': 20, 'fontweight': 'bold',
                                                                        'ha': 'center'})
    
    # plot shape
    if len(sequence) < 19:
        ax.set_xlim(-1.2, 1.2)
        ax.set_ylim(-1.2, 1.2)
    else:
        ax.set_xlim(-1.4, 1.4)
        ax.set_ylim(-1.4, 1.4)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    cur_axes = plt.gca()
    cur_axes.axes.get_xaxis().set_visible(False)
    cur_axes.axes.get_yaxis().set_visible(False)
    plt.tight_layout()
    
    if seq:
        plt.title(sequence, fontweight='bold', fontsize=20)
    
    # show or save plot
    if filename:
        plt.savefig(filename, dpi=150)
    else:
        plt.show()


[docs]def plot_pde(data, title=None, axlabels=None, filename=None, legendloc=2, x_min=0, x_max=1, colors=None, alpha=0.2):
    """A function to plot probability density estimations of given data vectors / matrices (row wise)

    :param data: {list / array} data of which underlying probability density function should be estimated and plotted.
    :param title: {str} plot title
    :param axlabels: {list of str} list containing the axis labels for the plot
    :param filename: {str} filename  where to safe the plot. *default = None* --> show the plot
    :param legendloc: {int} location of the figures legend. 1 = top right, 2 = top left ...
    :param x_min: {number} x-axis minimum
    :param x_max: {number} x-axis maximum
    :param colors: {list} list of colors (readable by matplotlib, e.g. hex) to be used to plot different data classes
    :param alpha: {float} color alpha for filling pde curve
    :Example:

    >>> data = np.random.random((3,100))
    >>> plot_pde(data)

    .. image:: ../docs/static/pde.png
        :height: 300px

    .. versionadded:: v2.2.1
    """
    if not axlabels:
        axlabels = ['Data', 'Estimated Density']
    if not title:
        title = ""
    
    # transform input to numpy array and reshape if it only contains one data row
    data = np.array(data)
    
    if len(data.shape) == 1:
        data = data.reshape((1, -1))
    shp = data.shape
    
    # colors
    if not colors:
        colors = ['#0B486B', '#3B8686', '#79BD9A', '#A8DBA8', '#CFF09E', '#0000ff', '#bf00ff', '#ff0040', '#009900']
    elif len(colors) != len(data) and shp != 1:  # if not enough colors for all data subtypes
        colors *= len(data)
    
    # prepare figure
    fig, ax = plt.subplots()
    
    # set axis labels and limits
    if axlabels is None:
        axlabels = ['', '']
    ax.set_xlabel(axlabels[0], fontsize=18)
    ax.set_ylabel(axlabels[1], fontsize=18)
    fig.suptitle(title, fontsize=16, fontweight='bold')
    
    # only left and bottom axes, no box
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    
    # plot PDE for every data row
    # if one row only
    if shp[0] == 1:
        kde = gaussian_kde(
                data)  # this creates the kernel, given an array it will estimate the probability over that values
        space = np.linspace(x_min, x_max, 1000)  # these are the values over which the kernel will be evaluated
        line = ax.plot(space, kde(space), label='Data')  # plot line
        plt.setp(line, color=colors[0], linewidth=2.0, alpha=.9)  # set line width and color
        ax.fill_between(space, 0, kde(space), color=colors[0], alpha=alpha)  # fill area under line
    
    # if multiple rows
    else:
        for i, row in enumerate(data):
            kde = gaussian_kde(
                    row)  # this creates the kernel, given an array it will estimate the probability over that values
            space = np.linspace(x_min, x_max, 1000)  # these are the values over which the kernel will be evaluated
            line = ax.plot(space, kde(space), label='Run ' + str(i))  # plot line
            plt.setp(line, color=colors[i], linewidth=2.0, alpha=.9)  # set line width and color
            ax.fill_between(space, 0, kde(space), color=colors[i], alpha=alpha)  # fill area under line
    
    # show or save plot
    ax.legend(loc=legendloc)
    ax.set_xlim((x_min, x_max))
    if filename:
        plt.savefig(filename, dpi=150)
    else:
        plt.show()


[docs]def plot_violin(x, colors=None, bp=False, filename=None, title=None, axlabels=None, y_min=0, y_max=1):
    """    create violin plots out of given data array
    (adapted from `Flavio Coelho <https://pyinsci.blogspot.ch/2009/09/violin-plot-with-matplotlib.html>`_.)

    :param x: {numpy.array} data to be plotted
    :param colors: {str or list} face color of the violin plots, can also be list of colors with same dimension as **x**
    :param bp: {bool} print a box blot inside violin
    :param filename: {str} location / filename where to save the plot to. *default = None* --> show the plot
    :param title: {str} Title of the plot.
    :param axlabels: {list of str} list containing the axis labels for the plot
    :param y_min: {number} y-axis minimum.
    :param y_max: {number} y-axis maximum.
    :Example:

    >>> data = np.random.normal(size=[5, 100])
    >>> plot_violin(data, colors=['#0B486B', '#0B486B', '#0B486B', '#CFF09E', '#CFF09E'], bp=True, y_min=-3, y_max=3)

    .. image:: ../docs/static/violins.png
        :height: 300px

    .. versionadded:: v2.2.2
    """
    
    # transform input to list of arrays (better handled by plotting functions)
    x = np.array(x)
    
    # check color input and transform to list of right length
    if not colors:
        colors = ['#0B486B', '#3B8686', '#79BD9A', '#A8DBA8', '#CFF09E', '#0000ff', '#bf00ff', '#ff0040', '#009900']
    
    if isinstance(colors, str):
        colors = [colors] * len(x)
    
    # scaling for available space
    dist = len(x) - 1
    w = min(0.15 * max(dist, 1.0), 0.5)
    
    fig, ax = plt.subplots()
    if len(np.array(x).shape) == 1:  # if only one dimensional data
        k = gaussian_kde(x)  # kernel density estimation
        mi = k.dataset.min()  # lower bound of violin
        ma = k.dataset.max()  # upper bound of violin
        rng = np.arange(mi, ma, (ma - mi) / 100.)  # range over which the PDE is performed
        v = k.evaluate(rng)  # violin profile (density curve)
        v = v / v.max() * 0.3  # scaling the violin to the available space
        ax.fill_betweenx(rng, 1, v + 1, facecolor=colors[0], alpha=0.6)
        ax.fill_betweenx(rng, 1, -v + 1, facecolor=colors[0], alpha=0.6)
        
        if bp:  # print box plot if option is given
            medprops = dict(linestyle='-', linewidth='1', color='black')
            box = ax.boxplot(x, notch=1, positions=[1.], vert=1, patch_artist=True, medianprops=medprops)
            plt.setp(box['whiskers'], color='black')
            box['boxes'][0].set(facecolor=colors[0], edgecolor='black', alpha=0.7)
    
    else:  # one violin for every data element if multidimensional
        for p, d in enumerate(x):
            loc = p + 1
            k = gaussian_kde(d)  # kernel density estimation
            mi = k.dataset.min()  # lower bound of violin
            ma = k.dataset.max()  # upper bound of violin
            rng = np.arange(mi, ma, (ma - mi) / 100.)  # range over which the PDE is performed
            v = k.evaluate(rng)  # violin profile (density curve)
            v = v / v.max() * w  # scaling the violin to the available space
            ax.fill_betweenx(rng, loc, v + loc, facecolor=colors[p], alpha=0.6)
            ax.fill_betweenx(rng, loc, -v + loc, facecolor=colors[p], alpha=0.6)
        
        if bp:  # print box plots if option is given
            box = ax.boxplot(x.T, notch=1, vert=1, patch_artist=True)
            plt.setp(box['whiskers'], color='black')
            plt.setp(box['medians'], linestyle='-', linewidth=1.5, color='black')
            for p, patch in enumerate(box['boxes']):
                patch.set(facecolor=colors[p], edgecolor='black', alpha=0.7)
    
    # only left and bottom axes, no box
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    plt.tick_params(axis='x', which='both', top='off')
    ax.yaxis.set_ticks_position('left')
    ax.set_ylim((y_min, y_max))
    if axlabels is None:
        axlabels = ['', '']
    ax.set_xlabel(axlabels[0], fontsize=18)
    ax.set_ylabel(axlabels[1], fontsize=18)
    
    if title:
        ax.set_title(title, fontsize=16, fontweight='bold')
    else:
        ax.set_title('Violin Plots', fontsize=16, fontweight='bold')
    
    if filename:
        plt.savefig(filename, dpi=150)
    else:
        plt.show()


[docs]def plot_aa_distr(sequences, color='#83AF9B', filename=None):
    """Method to plot the amino acid distribution of a given list of sequences

    :param sequences: {list} list of sequences to calculate the amino acid distribution fore
    :param color: {str} color to be used (matplotlib style / hex)
    :param filename: {str} location / filename where to save the plot to. *default = None* --> show the plot
    :Example:

    >>> plot_aa_distr(['KLLKLLKKLLKLLK', 'WWRRWWRAARWWRRWWRR', 'ACDEFGHKLCMNPQRSTVWY', 'GGGGGIIKLWGGGGGGGGGGGGG'])

    .. image:: ../docs/static/AA_dist.png
        :height: 300px

    .. versionadded:: v2.2.5
    """
    concatseq = ''.join(sequences)
    aa = count_aas(concatseq, scale='relative')
    
    fig, ax = plt.subplots()
    for a in range(20):
        plt.bar(a, list(aa.values())[a], 0.9, color=color)
    
    plt.xlim([-0.75, 19.75])
    plt.ylim([0, max(aa.values()) + 0.05])
    plt.xticks(range(20), aa.keys(), fontweight='bold')
    plt.ylabel('Amino Acid Frequency', fontweight='bold')
    plt.title('Amino Acid Distribution', fontsize=16, fontweight='bold')
    
    # only left and bottom axes, no box
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    
    if filename:
        plt.savefig(filename, dpi=300)
    else:
        plt.show()