Source code for modlamp.plot

# -*- coding: utf-8 -*-
"""
.. currentmodule:: modlamp.plot

.. moduleauthor:: modlab Alex Mueller ETH Zurich <alex.mueller@pharma.ethz.ch>

This module incorporates functions to plot different feature plots. The following functions are available:

============================        ==============================================================================
Function                            Characteristics
============================        ==============================================================================
:py:func:`plot_feature`             Generate a box plot for visualizing the distribution of a given feature.
:py:func:`plot_2_features`          Generate a 2D scatter plot of 2 given features.
:py:func:`plot_3_features`          Generate a 3D scatter plot of 3 given features.
:py:func:`plot_profile`             Generates a profile plot of a sequence to visualize potential linear gradients.
:py:func:`helical_wheel`            Generates a helical wheel projection plot of a given sequence.
:py:func:`plot_pde`                 Generates a probability density estimation plot of given data arrays.
:py:func:`plot_violin`              Generates a violin plot for given classes and corresponding distributions.
:py:func:`plot_aa_distr`            Generates an amino acid frequency plot for all 20 natural amino acids.
============================        ==============================================================================

"""
import matplotlib.lines as lines
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy.stats.kde import gaussian_kde

from modlamp.core import count_aas, load_scale
from modlamp.descriptors import PeptideDescriptor, GlobalDescriptor

__author__ = "Alex Müller, Gisela Gabernet"
__docformat__ = "restructuredtext en"


[docs]def plot_feature(y_values, targets=None, y_label='feature values', x_tick_labels=None, filename=None, colors=None): """ Function to generate a box plot of 1 given feature. The different target classes given in **targets** are plottet as separate boxes. :param y_values: Array of feature values to be plotted. :param targets: List of target class values [string/binary] for the given feature data. :param y_label: Axis label. :param x_tick_labels: list of labels to be assigned to the ticks on the x-axis. Must match the number of targets. :param filename: filename where to safe the plot. *default = None* :param colors: {list} colors to take for plotting (strings in HEX formats). :return: A feature box plot. :Example: >>> plot_feature(desc.descriptor,y_label='uH Eisenberg') # desc: PeptideDescriptor instance .. image:: ../docs/static/uH_Eisenberg.png :height: 300px The same procedure also works for comparing two data sets: >>> plot_feature((p.descriptor, apd.descriptor), y_label='uH Eisenberg', x_tick_labels=['Library', 'APD3']) .. image:: ../docs/static/uH_APD3.png :height: 300px """ if not colors: colors = ['#69D2E7', '#FA6900', '#E0E4CC', '#542437', '#53777A', 'black', '#C02942', '#031634'] if type(y_values) == list: y_values = np.array(y_values) if len(targets) >= 1: data = [] cntr = 0 for n in set(targets): # finding indices of the different targets in "targets" and plotting data.append(y_values[np.where(targets == n)]) cntr += 1 if x_tick_labels: labels = x_tick_labels else: labels = [str(i) for i in range(cntr)] colors = colors[:cntr] else: if x_tick_labels: labels = x_tick_labels else: labels = ['all data'] data = y_values fig, ax = plt.subplots() # coloring faces of boxes median_props = dict(linestyle='-', linewidth='1', color='black') box = ax.boxplot(data, notch=True, patch_artist=True, medianprops=median_props, labels=labels) plt.setp(box['whiskers'], color='black') for patch, color in zip(box['boxes'], colors): patch.set(facecolor=color, edgecolor='black', alpha=0.8) ax.set_xlabel('Classes', fontweight='bold') ax.set_ylabel(y_label, fontweight='bold') ax.set_title('Feature Box-Plot', fontsize=16, fontweight='bold') # only left and bottom axes, no box ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') if filename: plt.savefig(filename, dpi=150) else: plt.show()
[docs]def plot_2_features(x_values, y_values, targets=None, x_label='', y_label='', filename=None, colors=None): """ Function to generate a feature scatter plot of 2 given features. The different target classes given in **targets** are plottet in different colors. :param x_values: Array of values of the feature to be plotted on the x-axis. :param y_values: Array of values of the feature to be plotted on the y-axis. :param targets: List of target class values [string/binary] for the given feature data. :param x_label: X-axis label. :param y_label: Y-axis label. :param filename: filename where to safe the plot. *default = None* :param colors: {list} colors to take for plotting (strings in HEX formats). :return: A 2D feature scatter plot. :Example: >>> plot_2_features(a.descriptor,b.descriptor,x_label='uH',y_label='pI',targets=targs) .. image:: ../docs/static/2D_scatter.png :height: 300px """ if not colors: colors = ['#69D2E7', '#FA6900', '#B5B8AB', '#542437', '#53777A', 'black', '#C02942', '#031634'] fig, ax = plt.subplots() if len(targets) >= 1: for n in list(set(targets)): # finding indices of the different targets in "targets" and plotting t = np.array([i for i, j in enumerate(targets) if j == n]) xt = x_values[t] # find all values in x for the given target yt = y_values[t] # find all values in y for the given target ax.scatter(xt, yt, c=colors[n], alpha=1., s=25, label='class ' + str(n)) # plot scatter for this target group ax.legend(loc='best') else: ax.scatter(x_values, y_values, c=colors[0], alpha=1., s=25) ax.set_xlabel(x_label, fontweight='bold') ax.set_ylabel(y_label, fontweight='bold') ax.set_title('2D Feature Plot', fontsize=16, fontweight='bold') # only left and bottom axes, no box ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') if filename: plt.savefig(filename, dpi=150) else: plt.show()
[docs]def plot_3_features(x_values, y_values, z_values, targets=None, x_label='', y_label='', z_label='', filename=None, colors=None): """ Function to generate a 3D feature scatter plot of 3 given features. The different target classes given in **targets** are plottet in different colors. :param x_values: Array of values of the feature to be plotted on the x-axis. :param y_values: Array of values of the feature to be plotted on the y-axis. :param z_values: Array of values of the feature to be plotted on the z-axis. :param targets: List of target class values {string/binary} for the given feature data. :param x_label: {str} X-axis label. :param y_label: {str} Y-axis label. :param z_label: {str} Z-axis label. :param filename: {str} filename where to safe the plot. *default = None* -> show the plot :param colors: {list} colors to take for plotting (strings in HEX formats). :return: A 3D feature scatter plot. :Example: >>> plot_3_features(a.descriptor,b.descriptor,c.descriptor,x_label='uH',y_label='pI',z_label='length') .. image:: ../docs/static/3D_scatter.png :height: 300px """ if not colors: colors = ['#69D2E7', '#FA6900', '#E0E4CC', '#542437', '#53777A', 'black', '#C02942', '#031634'] fig = plt.figure() ax = fig.add_subplot(111, projection='3d') if len(targets) >= 1: for n in list(set(targets)): # finding indices of the different targets in "targets" and plotting t = np.array([i for i, j in enumerate(targets) if j == n]) xt = x_values[t] # find all values in x for the given target yt = y_values[t] # find all values in y for the given target zt = z_values[t] # find all values in y for the given target ax.scatter(xt, yt, zt, c=colors[n], alpha=1., s=25, label='class ' + str(n)) # plot 3Dscatter for this target ax.legend(loc='best') else: # plot 3D scatter for this target group ax.scatter(x_values, y_values, z_values, c=colors[0], alpha=1., s=25) ax.set_xlabel(x_label, fontweight='bold') ax.set_ylabel(y_label, fontweight='bold') ax.set_zlabel(z_label, fontweight='bold') ax.set_title('3D Feature Plot', fontsize=16, fontweight='bold') # only left and bottom axes, no box ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('bottom') if filename: plt.savefig(filename, dpi=150) else: plt.show()
[docs]def plot_profile(sequence, window=5, scalename='Eisenberg', filename=None, color='red', seq=False, ylim=None): """ Function to generate sequence profile plots of a given amino acid scale or a moment thereof. .. note:: :func:`plot_profile` can only plot one-dimensional amino acid scales given in :class:`modlamp.descriptors.PeptideDescriptor`. :param sequence: {str} Peptide sequence for which the profile should be plotted. :param window: {int, uneven} Window size for which the average value is plotted for the center amino acid. :param scalename: {str} Amino acid scale to be used to describe the sequence. :param filename: {str} Filename where to safe the plot. *default = None* --> show the plot :param color: {str} Color of the plot line. :param seq: {bool} Whether the amino acid sequence should be plotted as the title. :param ylim: {tuple of float} Y-Axis limits. Provide as tuple, e.g. (0.5, -0.2) :return: a profile plot of the input sequence interactively or with the specified *filename* :Example: >>> plot_profile('GLFDIVKKVVGALGSL', scalename='eisenberg') .. image:: ../docs/static/profileplot.png :height: 300px .. versionadded:: v2.1.5 """ # check if given scale is defined in PeptideDescriptor d = PeptideDescriptor(sequence, scalename) if len(d.scale['A']) > 1: raise KeyError("\nSorry\nThis function can only calculate profiles for 1D scales. '%s' has more than one " "dimension" % scalename) seq_data = list() seq_profile = list() for a in sequence: seq_data.append(d.scale[a]) # describe sequence by given scale i = 0 # AA index while (i + window) < len(sequence): seq_profile.append(np.mean(seq_data[i:(i + window + 1)])) # append average value for given window i += 1 # plot fig, ax = plt.subplots() x_range = range(int(window / 2), int(len(sequence) - int(window) / 2)) line = ax.plot(x_range, seq_profile) plt.setp(line, color=color, linewidth=2.0) # axis labes and title ax.set_xlabel('sequence position', fontweight='bold') ax.set_ylabel(scalename + ' value', fontweight='bold') ax.text(max(x_range) / 2 + 1, 1.05 * max(seq_profile), 'window size: ' + str(window), fontsize=16, fontweight='bold') if seq: ax.set_title(sequence, fontsize=16, fontweight='bold', y=1.02) if ylim: ax.set_ylim(ylim) else: ax.set_ylim(1.2 * max(seq_profile), 1.2 * min(seq_profile)) # only left and bottom axes, no box ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') # show or save plot if filename: plt.savefig(filename, dpi=150) else: plt.show()
[docs]def helical_wheel(sequence, colorcoding='rainbow', lineweights=True, filename=None, seq=False, moment=False): """A function to project a given peptide sequence onto a helical wheel plot. It can be useful to illustrate the properties of alpha-helices, like positioning of charged and hydrophobic residues along the sequence. :param sequence: {str} the peptide sequence for which the helical wheel should be drawn :param colorcoding: {str} the color coding to be used, available: *rainbow*, *charge*, *polar*, *simple*, *amphipathic*, *none* :param lineweights: {boolean} defines whether connection lines decrease in thickness along the sequence :param filename: {str} filename where to safe the plot. *default = None* --> show the plot :param seq: {bool} whether the amino acid sequence should be plotted as a title :param moment: {bool} whether the Eisenberg hydrophobic moment should be calculated and plotted :return: a helical wheel projection plot of the given sequence (interactively or in **filename**) :Example: >>> helical_wheel('GLFDIVKKVVGALG') >>> helical_wheel('KLLKLLKKLLKLLK', colorcoding='charge') >>> helical_wheel('AKLWLKAGRGFGRG', colorcoding='none', lineweights=False) >>> helical_wheel('ACDEFGHIKLMNPQRSTVWY') .. image:: ../docs/static/wheel1.png :height: 300px .. image:: ../docs/static/wheel2.png :height: 300px .. image:: ../docs/static/wheel3.png :height: 300px .. image:: ../docs/static/wheel4.png :height: 300px .. versionadded:: v2.1.5 """ # color mappings aa = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'] f_rainbow = ['#3e3e28', '#ffcc33', '#b30047', '#b30047', '#ffcc33', '#3e3e28', '#80d4ff', '#ffcc33', '#0047b3', '#ffcc33', '#ffcc33', '#b366ff', '#29a329', '#b366ff', '#0047b3', '#ff66cc', '#ff66cc', '#ffcc33', '#ffcc33', '#ffcc33'] f_charge = ['#000000', '#000000', '#ff4d94', '#ff4d94', '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff', '#000000', '#000000', '#000000', '#000000', '#000000', '#80d4ff', '#000000', '#000000', '#000000', '#000000', '#000000'] f_polar = ['#000000', '#000000', '#80d4ff', '#80d4ff', '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff', '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff', '#80d4ff', '#80d4ff', '#80d4ff', '#000000', '#000000', '#000000'] f_simple = ['#ffcc33', '#ffcc33', '#0047b3', '#0047b3', '#ffcc33', '#7f7f7f', '#0047b3', '#ffcc33', '#0047b3', '#ffcc33', '#ffcc33', '#0047b3', '#ffcc33', '#0047b3', '#0047b3', '#0047b3', '#0047b3', '#ffcc33', '#ffcc33', '#ffcc33'] f_none = ['#ffffff'] * 20 f_amphi = ['#ffcc33', '#29a329', '#b30047', '#b30047', '#f79318', '#80d4ff', '#0047b3', '#ffcc33', '#0047b3', '#ffcc33', '#ffcc33', '#80d4ff', '#29a329', '#80d4ff', '#0047b3', '#80d4ff', '#80d4ff', '#ffcc33', '#f79318', '#f79318'] t_rainbow = ['w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'w', 'k', 'k', 'k', 'k', 'k', 'w', 'k', 'k', 'k', 'k', 'k'] t_charge = ['w', 'w', 'k', 'k', 'w', 'w', 'k', 'w', 'k', 'w', 'w', 'w', 'w', 'w', 'k', 'w', 'w', 'w', 'w', 'w'] t_polar = ['w', 'w', 'k', 'k', 'w', 'w', 'k', 'w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'k', 'k', 'w', 'w', 'w'] t_simple = ['k', 'k', 'w', 'w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'k', 'k', 'w', 'w', 'w', 'w', 'k', 'k', 'k'] t_none = ['k'] * 20 t_amphi = ['k', 'k', 'w', 'w', 'w', 'k', 'w', 'k', 'w', 'k', 'k', 'k', 'w', 'k', 'w', 'k', 'k', 'k', 'w', 'w'] d_eisberg = load_scale('eisenberg')[1] # eisenberg hydrophobicity values for HM if lineweights: lw = np.arange(0.1, 5.5, 5. / (len(sequence) - 1)) # line thickness array lw = lw[::-1] # inverse order else: lw = [2.] * (len(sequence) - 1) # check which color coding to use if colorcoding == 'rainbow': df = dict(zip(aa, f_rainbow)) dt = dict(zip(aa, t_rainbow)) elif colorcoding == 'charge': df = dict(zip(aa, f_charge)) dt = dict(zip(aa, t_charge)) elif colorcoding == 'polar': df = dict(zip(aa, f_polar)) dt = dict(zip(aa, t_polar)) elif colorcoding == 'simple': df = dict(zip(aa, f_simple)) dt = dict(zip(aa, t_simple)) elif colorcoding == 'none': df = dict(zip(aa, f_none)) dt = dict(zip(aa, t_none)) elif colorcoding == 'amphipathic': df = dict(zip(aa, f_amphi)) dt = dict(zip(aa, t_amphi)) else: print("Unknown color coding, 'rainbow' used instead") df = dict(zip(aa, f_rainbow)) dt = dict(zip(aa, t_rainbow)) # degree to radian deg = np.arange(float(len(sequence))) * -100. deg = [d + 90. for d in deg] # start at 270 degree in unit circle (on top) rad = np.radians(deg) # dict for coordinates and eisenberg values d_hydro = dict(zip(rad, [0.] * len(rad))) # create figure fig = plt.figure(frameon=False, figsize=(10, 10)) ax = fig.add_subplot(111) old = None hm = list() # iterate over sequence for i, r in enumerate(rad): new = (np.cos(r), np.sin(r)) # new AA coordinates if i < 18: # plot the connecting lines if old is not None: line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k', linewidth=lw[i - 1]) line.set_zorder(1) # 1 = level behind circles ax.add_line(line) elif 17 < i < 36: line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k', linewidth=lw[i - 1]) line.set_zorder(1) # 1 = level behind circles ax.add_line(line) new = (np.cos(r) * 1.2, np.sin(r) * 1.2) elif i == 36: line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k', linewidth=lw[i - 1]) line.set_zorder(1) # 1 = level behind circles ax.add_line(line) new = (np.cos(r) * 1.4, np.sin(r) * 1.4) else: new = (np.cos(r) * 1.4, np.sin(r) * 1.4) # plot circles circ = patches.Circle(new, radius=0.1, transform=ax.transData, edgecolor='k', facecolor=df[sequence[i]]) circ.set_zorder(2) # level in front of lines ax.add_patch(circ) # check if N- or C-terminus and add subscript, then plot AA letter if i == 0: ax.text(new[0], new[1], sequence[i] + b'$_N$', va='center', ha='center', transform=ax.transData, size=32, color=dt[sequence[i]], fontweight='bold') elif i == len(sequence) - 1: ax.text(new[0], new[1], sequence[i] + b'$_C$', va='center', ha='center', transform=ax.transData, size=32, color=dt[sequence[i]], fontweight='bold') else: ax.text(new[0], new[1], sequence[i], va='center', ha='center', transform=ax.transData, size=36, color=dt[sequence[i]], fontweight='bold') eb = d_eisberg[sequence[i]][0] # eisenberg value for this AA hm.append([eb * new[0], eb * new[1]]) # save eisenberg hydrophobicity vector value to later calculate HM old = (np.cos(r), np.sin(r)) # save as previous coordinates # draw hydrophobic moment arrow if moment option if moment: v_hm = np.sum(np.array(hm), 0) x = .0333 * v_hm[0] y = .0333 * v_hm[1] ax.arrow(0., 0., x, y, head_width=0.04, head_length=0.03, transform=ax.transData, color='k', linewidth=6.) desc = PeptideDescriptor(sequence) # calculate hydrophobic moment desc.calculate_moment() if abs(x) < 0.2 and y > 0.: # right positioning of HM text so arrow does not cover it z = -0.2 else: z = 0.2 plt.text(0., z, str(round(desc.descriptor[0][0], 3)), fontdict={'fontsize': 20, 'fontweight': 'bold', 'ha': 'center'}) # plot shape if len(sequence) < 19: ax.set_xlim(-1.2, 1.2) ax.set_ylim(-1.2, 1.2) else: ax.set_xlim(-1.4, 1.4) ax.set_ylim(-1.4, 1.4) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['bottom'].set_visible(False) cur_axes = plt.gca() cur_axes.axes.get_xaxis().set_visible(False) cur_axes.axes.get_yaxis().set_visible(False) plt.tight_layout() if seq: plt.title(sequence, fontweight='bold', fontsize=20) # show or save plot if filename: plt.savefig(filename, dpi=150) else: plt.show()
[docs]def plot_pde(data, title=None, axlabels=None, filename=None, legendloc=2, x_min=0, x_max=1, colors=None, alpha=0.2): """A function to plot probability density estimations of given data vectors / matrices (row wise) :param data: {list / array} data of which underlying probability density function should be estimated and plotted. :param title: {str} plot title :param axlabels: {list of str} list containing the axis labels for the plot :param filename: {str} filename where to safe the plot. *default = None* --> show the plot :param legendloc: {int} location of the figures legend. 1 = top right, 2 = top left ... :param x_min: {number} x-axis minimum :param x_max: {number} x-axis maximum :param colors: {list} list of colors (readable by matplotlib, e.g. hex) to be used to plot different data classes :param alpha: {float} color alpha for filling pde curve :Example: >>> data = np.random.random((3,100)) >>> plot_pde(data) .. image:: ../docs/static/pde.png :height: 300px .. versionadded:: v2.2.1 """ if not axlabels: axlabels = ['Data', 'Estimated Density'] if not title: title = "" # transform input to numpy array and reshape if it only contains one data row data = np.array(data) if len(data.shape) == 1: data = data.reshape((1, -1)) shp = data.shape # colors if not colors: colors = ['#0B486B', '#3B8686', '#79BD9A', '#A8DBA8', '#CFF09E', '#0000ff', '#bf00ff', '#ff0040', '#009900'] elif len(colors) != len(data) and shp != 1: # if not enough colors for all data subtypes colors *= len(data) # prepare figure fig, ax = plt.subplots() # set axis labels and limits if axlabels is None: axlabels = ['', ''] ax.set_xlabel(axlabels[0], fontsize=18) ax.set_ylabel(axlabels[1], fontsize=18) fig.suptitle(title, fontsize=16, fontweight='bold') # only left and bottom axes, no box ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') # plot PDE for every data row # if one row only if shp[0] == 1: kde = gaussian_kde( data) # this creates the kernel, given an array it will estimate the probability over that values space = np.linspace(x_min, x_max, 1000) # these are the values over which the kernel will be evaluated line = ax.plot(space, kde(space), label='Data') # plot line plt.setp(line, color=colors[0], linewidth=2.0, alpha=.9) # set line width and color ax.fill_between(space, 0, kde(space), color=colors[0], alpha=alpha) # fill area under line # if multiple rows else: for i, row in enumerate(data): kde = gaussian_kde( row) # this creates the kernel, given an array it will estimate the probability over that values space = np.linspace(x_min, x_max, 1000) # these are the values over which the kernel will be evaluated line = ax.plot(space, kde(space), label='Run ' + str(i)) # plot line plt.setp(line, color=colors[i], linewidth=2.0, alpha=.9) # set line width and color ax.fill_between(space, 0, kde(space), color=colors[i], alpha=alpha) # fill area under line # show or save plot ax.legend(loc=legendloc) ax.set_xlim((x_min, x_max)) if filename: plt.savefig(filename, dpi=150) else: plt.show()
[docs]def plot_violin(x, colors=None, bp=False, filename=None, title=None, axlabels=None, y_min=0, y_max=1): """ create violin plots out of given data array (adapted from `Flavio Coelho <https://pyinsci.blogspot.ch/2009/09/violin-plot-with-matplotlib.html>`_.) :param x: {numpy.array} data to be plotted :param colors: {str or list} face color of the violin plots, can also be list of colors with same dimension as **x** :param bp: {bool} print a box blot inside violin :param filename: {str} location / filename where to save the plot to. *default = None* --> show the plot :param title: {str} Title of the plot. :param axlabels: {list of str} list containing the axis labels for the plot :param y_min: {number} y-axis minimum. :param y_max: {number} y-axis maximum. :Example: >>> data = np.random.normal(size=[5, 100]) >>> plot_violin(data, colors=['#0B486B', '#0B486B', '#0B486B', '#CFF09E', '#CFF09E'], bp=True, y_min=-3, y_max=3) .. image:: ../docs/static/violins.png :height: 300px .. versionadded:: v2.2.2 """ # transform input to list of arrays (better handled by plotting functions) x = np.array(x) # check color input and transform to list of right length if not colors: colors = ['#0B486B', '#3B8686', '#79BD9A', '#A8DBA8', '#CFF09E', '#0000ff', '#bf00ff', '#ff0040', '#009900'] if isinstance(colors, str): colors = [colors] * len(x) # scaling for available space dist = len(x) - 1 w = min(0.15 * max(dist, 1.0), 0.5) fig, ax = plt.subplots() if len(np.array(x).shape) == 1: # if only one dimensional data k = gaussian_kde(x) # kernel density estimation mi = k.dataset.min() # lower bound of violin ma = k.dataset.max() # upper bound of violin rng = np.arange(mi, ma, (ma - mi) / 100.) # range over which the PDE is performed v = k.evaluate(rng) # violin profile (density curve) v = v / v.max() * 0.3 # scaling the violin to the available space ax.fill_betweenx(rng, 1, v + 1, facecolor=colors[0], alpha=0.6) ax.fill_betweenx(rng, 1, -v + 1, facecolor=colors[0], alpha=0.6) if bp: # print box plot if option is given medprops = dict(linestyle='-', linewidth='1', color='black') box = ax.boxplot(x, notch=1, positions=[1.], vert=1, patch_artist=True, medianprops=medprops) plt.setp(box['whiskers'], color='black') box['boxes'][0].set(facecolor=colors[0], edgecolor='black', alpha=0.7) else: # one violin for every data element if multidimensional for p, d in enumerate(x): loc = p + 1 k = gaussian_kde(d) # kernel density estimation mi = k.dataset.min() # lower bound of violin ma = k.dataset.max() # upper bound of violin rng = np.arange(mi, ma, (ma - mi) / 100.) # range over which the PDE is performed v = k.evaluate(rng) # violin profile (density curve) v = v / v.max() * w # scaling the violin to the available space ax.fill_betweenx(rng, loc, v + loc, facecolor=colors[p], alpha=0.6) ax.fill_betweenx(rng, loc, -v + loc, facecolor=colors[p], alpha=0.6) if bp: # print box plots if option is given box = ax.boxplot(x.T, notch=1, vert=1, patch_artist=True) plt.setp(box['whiskers'], color='black') plt.setp(box['medians'], linestyle='-', linewidth=1.5, color='black') for p, patch in enumerate(box['boxes']): patch.set(facecolor=colors[p], edgecolor='black', alpha=0.7) # only left and bottom axes, no box ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.tick_params(axis='x', which='both', top='off') ax.yaxis.set_ticks_position('left') ax.set_ylim((y_min, y_max)) if axlabels is None: axlabels = ['', ''] ax.set_xlabel(axlabels[0], fontsize=18) ax.set_ylabel(axlabels[1], fontsize=18) if title: ax.set_title(title, fontsize=16, fontweight='bold') else: ax.set_title('Violin Plots', fontsize=16, fontweight='bold') if filename: plt.savefig(filename, dpi=150) else: plt.show()
[docs]def plot_aa_distr(sequences, color='#83AF9B', filename=None): """Method to plot the amino acid distribution of a given list of sequences :param sequences: {list} list of sequences to calculate the amino acid distribution fore :param color: {str} color to be used (matplotlib style / hex) :param filename: {str} location / filename where to save the plot to. *default = None* --> show the plot :Example: >>> plot_aa_distr(['KLLKLLKKLLKLLK', 'WWRRWWRAARWWRRWWRR', 'ACDEFGHKLCMNPQRSTVWY', 'GGGGGIIKLWGGGGGGGGGGGGG']) .. image:: ../docs/static/AA_dist.png :height: 300px .. versionadded:: v2.2.5 """ concatseq = ''.join(sequences) aa = count_aas(concatseq, scale='relative') fig, ax = plt.subplots() for i, v in enumerate([k for k, w in aa.items()]): plt.bar(i, v, 0.9, color=color) plt.xlim([-0.75, 19.75]) plt.ylim([0, max(aa.values()) + 0.05]) plt.xticks(range(20), aa.keys(), fontweight='bold') plt.ylabel('Amino Acid Frequency', fontweight='bold') plt.title('Amino Acid Distribution', fontsize=16, fontweight='bold') # only left and bottom axes, no box ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') if filename: plt.savefig(filename, dpi=300) else: plt.show()