diff --git a/.travis.yml b/.travis.yml index 15242668..65d608b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,8 +22,7 @@ install: - conda config --add channels conda-forge - conda create -n testenv --yes pip python=$PYTHON matplotlib - source activate testenv - - pip install pytest==4.3 - - pip install . + - pip install .[dev] script: pytest dabest diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2a7ca10a..1350d01b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,9 +8,9 @@ ## Did you write a patch that fixes a bug? -- Open a new GitHub [pull request](https://help.github.com/en/articles/about-pull-requests)(PR for short) with the patch. +- Open a new GitHub [pull request](https://help.github.com/en/articles/about-pull-requests) (PR for short) with the patch. -- Create the PR into the development branch, which is indicated by `v{latest version number}_dev`. +- Create the PR into the development branch, which is indicated by `v{latest version number}-dev`. - Clearly state the problem and solution in the PR description. Include the relevant [issue number](https://guides.github.com/features/issues/) if applicable. diff --git a/README.md b/README.md index 7e56dc3b..8032273e 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![Free-to-view citation](https://zenodo.org/badge/DOI/10.1038/s41592-019-0470-3.svg)](https://rdcu.be/bHhJ4) [![License](https://img.shields.io/badge/License-BSD%203--Clause--Clear-orange.svg)](https://spdx.org/licenses/BSD-3-Clause-Clear.html) + ## About DABEST is a package for **D**ata **A**nalysis using **B**ootstrap-Coupled **EST**imation. @@ -22,21 +23,12 @@ An estimation plot has two key features. DABEST powers [estimationstats.com](https://www.estimationstats.com/), allowing everyone access to high-quality estimation plots. -## Requirements - -DABEST has been tested on Python 3.5, 3.6, and 3.7. - -In addition, the following packages are also required: -- [numpy](https://www.numpy.org) (1.15) -- [scipy](https://www.scipy.org) (1.2) -- [matplotlib](https://www.matplotlib.org) (3.0) -- [seaborn](https://seaborn.pydata.org) (0.9) -- [pandas](https://pandas.pydata.org) (0.24). - -To obtain these package dependencies easily, it is highly recommended to download the [Anaconda distribution](https://www.continuum.io/downloads) of Python. ## Installation +This package is tested on Python 3.5, 3.6, and 3.7. +It is highly recommended to download the [Anaconda distribution](https://www.continuum.io/downloads) of Python in order to obtain the dependencies easily. + You can install this package via `pip`. To install, at the command line run @@ -56,6 +48,7 @@ Then, navigate to the cloned repo in the command line and run pip install . ``` + ## Usage ```python3 @@ -99,6 +92,7 @@ All contributions are welcome; please read the [Guidelines for contributing](htt We also have a [Code of Conduct](https://github.com/ACCLAB/DABEST-python/blob/master/CODE_OF_CONDUCT.md) to foster an inclusive and productive space. + ## Acknowledgements We would like to thank alpha testers from the [Claridge-Chang lab](https://www.claridgechang.net/): [Sangyu Xu](https://github.com/sangyu), [Xianyuan Zhang](https://github.com/XYZfar), [Farhan Mohammad](https://github.com/farhan8igib), Jurga Mituzaitė, and Stanislav Ott. diff --git a/dabest/__init__.py b/dabest/__init__.py index 6dbb4c7e..ed3bceec 100644 --- a/dabest/__init__.py +++ b/dabest/__init__.py @@ -23,4 +23,4 @@ from ._stats_tools import effsize as effsize from ._classes import TwoGroupsEffectSize -__version__ = "0.2.5" +__version__ = "0.2.6" diff --git a/dabest/_classes.py b/dabest/_classes.py index 99eb2e82..f9ffbb0e 100644 --- a/dabest/_classes.py +++ b/dabest/_classes.py @@ -460,7 +460,7 @@ def __init__(self, control, test, effect_size, 'statistic_wilcoxon': nan} """ - from numpy import array, isnan + from numpy import array, isnan, isinf from numpy import sort as npsort from numpy.random import choice, seed @@ -522,6 +522,20 @@ def __init__(self, control, test, effect_size, control, test, is_paired, effect_size, resamples, random_seed) self.__bootstraps = npsort(bootstraps) + + # Added in v0.2.6. + # Raises a UserWarning if there are any infiinities in the bootstraps. + num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)]) + + if num_infinities > 0: + warn_msg = "There are {} bootstrap(s) that are not defined. "\ + "This is likely due to smaple sample sizes. "\ + "The values in a bootstrap for a group will be more likely "\ + "to be all equal, with a resulting variance of zero. "\ + "The computation of Cohen's d and Hedges' g thus "\ + "involved a division by zero. " + warnings.warn(warn_msg.format(num_infinities), + category=UserWarning) self.__bias_correction = ci2g.compute_meandiff_bias_correction( self.__bootstraps, self.__difference) @@ -1103,6 +1117,7 @@ def plot(self, color_col=None, fig_size=None, dpi=100, + ax=None, swarmplot_kwargs=None, violinplot_kwargs=None, @@ -1112,6 +1127,7 @@ def plot(self, color_col=None, legend_kwargs=None): """ Creates an estimation plot for the effect size of interest. + Parameters ---------- @@ -1176,6 +1192,9 @@ def plot(self, color_col=None, The desired dimensions of the figure as a (length, width) tuple. dpi : int, default 100 The dots per inch of the resulting figure. + ax : matplotlib.Axes, default None + Provide an existing Axes for the plots to be created. If no Axes is + specified, a new matplotlib Figure will be created. swarmplot_kwargs : dict, default None Pass any keyword arguments accepted by the seaborn `swarmplot` command here, as a dict. If None, the following keywords are @@ -1206,9 +1225,14 @@ def plot(self, color_col=None, Returns ------- - A :class:`matplotlib.figure.Figure` with 2 Axes. - + A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``. + The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it. + + If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` + itself, while the effect size axes is accessed at ``ax.contrast_axes``. + See the last example below. + Examples -------- @@ -1244,6 +1268,14 @@ def plot(self, color_col=None, ... "Test 2", "Test 3") ... ) >>> fig6 = my_shared_control.mean_diff.plot() + + Creating estimation plots in individual panels of a figure. + + >>> f, axx = plt.subplots(nrows=2, ncols=2, figsize=(15, 15)) + >>> my_data.mean_diff.plot(ax=axx.flat[0]) + >>> my_data_paired.mean_diff.plot(ax=axx.flat[1]) + >>> my_shared_control.mean_diff.plot(ax=axx.flat[2]) + >>> my_shared_control.mean_diff.plot(ax=axx.flat[3], float_contrast=False) """ @@ -1344,4 +1376,4 @@ def dabest_obj(self): Returns the `dabest` object that invoked the current EffectSizeDataFrame class. """ - return self.__dabest_obj + return self.__dabest_obj \ No newline at end of file diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index 282e39c8..fc01e87c 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -159,7 +159,24 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, # reset seed np.random.seed() - + + # check whether there are any infinities in the bootstrap, + # which likely indicates the sample sizes are too small as + # the computation of Cohen's d and Hedges' g necessitated + # a division by zero. + # Added in v0.2.6. + + # num_infinities = len(out[np.isinf(out)]) + # print(num_infinities) + # if num_infinities > 0: + # warn_msg = "There are {} bootstraps that are not defined. "\ + # "This is likely due to smaple sample sizes. "\ + # "The values in a bootstrap for a group will be more likely "\ + # "to be all equal, with a resulting variance of zero. "\ + # "The computation of Cohen's d and Hedges' g will therefore "\ + # "involved a division by zero. " + # warnings.warn(warn_msg.format(num_infinities), category="UserWarning") + return out diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py index 2292f2a3..69c9cd5b 100644 --- a/dabest/_stats_tools/effsize.py +++ b/dabest/_stats_tools/effsize.py @@ -217,11 +217,13 @@ def cohens_d(control, test, is_paired=False): # assume the two arrays are ordered already. delta = test - control M = np.mean(delta) - return M / average_sd + divisor = average_sd else: M = np.mean(test) - np.mean(control) - return M / pooled_sd + divisor = pooled_sd + + return M / divisor diff --git a/dabest/plotter.py b/dabest/plotter.py index efe73384..a28fb5af 100644 --- a/dabest/plotter.py +++ b/dabest/plotter.py @@ -31,6 +31,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): fig_size=None, dpi=100, + ax=None, swarmplot_kwargs=None, violinplot_kwargs=None, @@ -254,28 +255,76 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # Initialise the figure. # sns.set(context="talk", style='ticks') - init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs["dpi"]) + init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs["dpi"], + tight_layout=True) + + width_ratios_ga = [2.5, 1] + h_space_cummings = 0.3 + if plot_kwargs["ax"] is not None: + # New in v0.2.6. + # Use inset axes to create the estimation plot inside a single axes. + # Author: Adam L Nekimken. (PR #73) + inset_contrast = True + rawdata_axes = plot_kwargs["ax"] + ax_position = rawdata_axes.get_position() # [[x0, y0], [x1, y1]] + + fig = rawdata_axes.get_figure() + + if float_contrast is True: + axins = rawdata_axes.inset_axes( + [1, 0, + width_ratios_ga[1]/width_ratios_ga[0], 1]) + rawdata_axes.set_position( # [l, b, w, h] + [ax_position.x0, + ax_position.y0, + (ax_position.x1 - ax_position.x0) * (width_ratios_ga[0] / + sum(width_ratios_ga)), + (ax_position.y1 - ax_position.y0)]) + + contrast_axes = axins - # Here, we hardcode some figure parameters. - if float_contrast is True: - fig, axx = plt.subplots(ncols=2, - gridspec_kw={"width_ratios": [2.5, 1], - "wspace": 0}, - **init_fig_kwargs) + else: + axins = rawdata_axes.inset_axes([0, -1 - h_space_cummings, 1, 1]) + plot_height = ((ax_position.y1 - ax_position.y0) / + (2 + h_space_cummings)) + rawdata_axes.set_position( + [ax_position.x0, + ax_position.y0 + (1 + h_space_cummings) * plot_height, + (ax_position.x1 - ax_position.x0), + plot_height]) + + # If the contrast axes are NOT floating, create lists to store + # raw ylims and raw tick intervals, so that I can normalize + # their ylims later. + contrast_ax_ylim_low = list() + contrast_ax_ylim_high = list() + contrast_ax_ylim_tickintervals = list() + contrast_axes = axins + rawdata_axes.contrast_axes = axins else: - fig, axx = plt.subplots(nrows=2, - gridspec_kw={"hspace": 0.3}, - **init_fig_kwargs) - - # If the contrast axes are NOT floating, create lists to store raw ylims - # and raw tick intervals, so that I can normalize their ylims later. - contrast_ax_ylim_low = list() - contrast_ax_ylim_high = list() - contrast_ax_ylim_tickintervals = list() + inset_contrast = False + # Here, we hardcode some figure parameters. + if float_contrast is True: + fig, axx = plt.subplots( + ncols=2, + gridspec_kw={"width_ratios": width_ratios_ga, + "wspace": 0}, + **init_fig_kwargs) - rawdata_axes = axx[0] - contrast_axes = axx[1] + else: + fig, axx = plt.subplots(nrows=2, + gridspec_kw={"hspace": 0.3}, + **init_fig_kwargs) + # If the contrast axes are NOT floating, create lists to store + # raw ylims and raw tick intervals, so that I can normalize + # their ylims later. + contrast_ax_ylim_low = list() + contrast_ax_ylim_high = list() + contrast_ax_ylim_tickintervals = list() + + rawdata_axes = axx[0] + contrast_axes = axx[1] rawdata_axes.set_frame_on(False) contrast_axes.set_frame_on(False) @@ -423,7 +472,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): current_ci_high = results.bca_high[j] # Create the violinplot. - v = contrast_axes.violinplot(current_bootstrap, + # New in v0.2.6: drop negative infinities before plotting. + v = contrast_axes.violinplot(current_bootstrap[~np.isinf(current_bootstrap)], positions=[tick], **violinplot_kwargs) # Turn the violinplot into half, and color it the same as the swarmplot. @@ -651,19 +701,19 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # Compute the end of each x-axes line. rightend_ticks = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip) - - for ax in fig.axes: + + for ax in [rawdata_axes, contrast_axes]: sns.despine(ax=ax, bottom=True) - + ylim = ax.get_ylim() xlim = ax.get_xlim() redraw_axes_kwargs['y'] = ylim[0] - + for k, start_tick in enumerate(ticks_to_skip): end_tick = rightend_ticks[k] ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) - + ax.set_ylim(ylim) del redraw_axes_kwargs['y'] diff --git a/dabest/pytest.ini b/dabest/pytest.ini index 23d0e592..9ed7c623 100644 --- a/dabest/pytest.ini +++ b/dabest/pytest.ini @@ -1,4 +1,9 @@ [pytest] filterwarnings = ignore::UserWarning - ignore::DeprecationWarning \ No newline at end of file + ignore::DeprecationWarning + +addopts = --mpl --mpl-baseline-path=dabest/tests/baseline_images + +markers = + mpl_image_compare: mark a test as implementing mpl image comparison. \ No newline at end of file diff --git a/dabest/tests/README.md b/dabest/tests/README.md index 5a91e42b..e0fab09a 100644 --- a/dabest/tests/README.md +++ b/dabest/tests/README.md @@ -1,9 +1,8 @@ # Testing -We use [pytest](https://docs.pytest.org/en/latest) to execute the tests. More documentation of the testing paradigm will be added in the near future. +We use [pytest](https://docs.pytest.org/en/latest) to execute the tests. For testing of plot generation, we use the [mpl plugin](https://github.com/matplotlib/pytest-mpl) for pytest. A range of different plots are created, and compared against the baseline images in the `baseline_images` subfolder. To run the tests, go to the root of this repo directory and run - ```shell pytest dabest ``` diff --git a/dabest/tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png b/dabest/tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png new file mode 100644 index 00000000..4215214b Binary files /dev/null and b/dabest/tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png b/dabest/tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png new file mode 100644 index 00000000..662e1a83 Binary files /dev/null and b/dabest/tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png differ diff --git a/dabest/tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png b/dabest/tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png new file mode 100644 index 00000000..ff73cfa4 Binary files /dev/null and b/dabest/tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png differ diff --git a/dabest/tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png b/dabest/tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png new file mode 100644 index 00000000..3e69e6b2 Binary files /dev/null and b/dabest/tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png differ diff --git a/dabest/tests/baseline_images/test_04_gardner_altman_paired_meandiff.png b/dabest/tests/baseline_images/test_04_gardner_altman_paired_meandiff.png new file mode 100644 index 00000000..c96849f3 Binary files /dev/null and b/dabest/tests/baseline_images/test_04_gardner_altman_paired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png b/dabest/tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png new file mode 100644 index 00000000..0be82d07 Binary files /dev/null and b/dabest/tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png b/dabest/tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png new file mode 100644 index 00000000..74b03c9b Binary files /dev/null and b/dabest/tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png differ diff --git a/dabest/tests/baseline_images/test_07_cummings_multi_group_unpaired.png b/dabest/tests/baseline_images/test_07_cummings_multi_group_unpaired.png new file mode 100644 index 00000000..d4a9b23b Binary files /dev/null and b/dabest/tests/baseline_images/test_07_cummings_multi_group_unpaired.png differ diff --git a/dabest/tests/baseline_images/test_08_cummings_multi_group_paired.png b/dabest/tests/baseline_images/test_08_cummings_multi_group_paired.png new file mode 100644 index 00000000..06196200 Binary files /dev/null and b/dabest/tests/baseline_images/test_08_cummings_multi_group_paired.png differ diff --git a/dabest/tests/baseline_images/test_09_cummings_shared_control.png b/dabest/tests/baseline_images/test_09_cummings_shared_control.png new file mode 100644 index 00000000..7f92a7df Binary files /dev/null and b/dabest/tests/baseline_images/test_09_cummings_shared_control.png differ diff --git a/dabest/tests/baseline_images/test_10_cummings_multi_groups.png b/dabest/tests/baseline_images/test_10_cummings_multi_groups.png new file mode 100644 index 00000000..488dc8f1 Binary files /dev/null and b/dabest/tests/baseline_images/test_10_cummings_multi_groups.png differ diff --git a/dabest/tests/baseline_images/test_11_inset_plots.png b/dabest/tests/baseline_images/test_11_inset_plots.png new file mode 100644 index 00000000..953a67cf Binary files /dev/null and b/dabest/tests/baseline_images/test_11_inset_plots.png differ diff --git a/dabest/tests/baseline_images/test_12_gardner_altman_ylabel.png b/dabest/tests/baseline_images/test_12_gardner_altman_ylabel.png new file mode 100644 index 00000000..430ba0c8 Binary files /dev/null and b/dabest/tests/baseline_images/test_12_gardner_altman_ylabel.png differ diff --git a/dabest/tests/baseline_images/test_13_multi_2group_color.png b/dabest/tests/baseline_images/test_13_multi_2group_color.png new file mode 100644 index 00000000..8261bac0 Binary files /dev/null and b/dabest/tests/baseline_images/test_13_multi_2group_color.png differ diff --git a/dabest/tests/baseline_images/test_14_gardner_altman_paired_color.png b/dabest/tests/baseline_images/test_14_gardner_altman_paired_color.png new file mode 100644 index 00000000..b003dd4a Binary files /dev/null and b/dabest/tests/baseline_images/test_14_gardner_altman_paired_color.png differ diff --git a/dabest/tests/baseline_images/test_15_change_palette_a.png b/dabest/tests/baseline_images/test_15_change_palette_a.png new file mode 100644 index 00000000..cdc37ca7 Binary files /dev/null and b/dabest/tests/baseline_images/test_15_change_palette_a.png differ diff --git a/dabest/tests/baseline_images/test_16_change_palette_b.png b/dabest/tests/baseline_images/test_16_change_palette_b.png new file mode 100644 index 00000000..a446f8cc Binary files /dev/null and b/dabest/tests/baseline_images/test_16_change_palette_b.png differ diff --git a/dabest/tests/baseline_images/test_17_change_palette_c.png b/dabest/tests/baseline_images/test_17_change_palette_c.png new file mode 100644 index 00000000..4ea64c39 Binary files /dev/null and b/dabest/tests/baseline_images/test_17_change_palette_c.png differ diff --git a/dabest/tests/baseline_images/test_18_desat.png b/dabest/tests/baseline_images/test_18_desat.png new file mode 100644 index 00000000..a5e39e27 Binary files /dev/null and b/dabest/tests/baseline_images/test_18_desat.png differ diff --git a/dabest/tests/baseline_images/test_19_dot_sizes.png b/dabest/tests/baseline_images/test_19_dot_sizes.png new file mode 100644 index 00000000..87ad8851 Binary files /dev/null and b/dabest/tests/baseline_images/test_19_dot_sizes.png differ diff --git a/dabest/tests/baseline_images/test_20_change_ylims.png b/dabest/tests/baseline_images/test_20_change_ylims.png new file mode 100644 index 00000000..8ebd0997 Binary files /dev/null and b/dabest/tests/baseline_images/test_20_change_ylims.png differ diff --git a/dabest/tests/baseline_images/test_21_invert_ylim.png b/dabest/tests/baseline_images/test_21_invert_ylim.png new file mode 100644 index 00000000..f72e823d Binary files /dev/null and b/dabest/tests/baseline_images/test_21_invert_ylim.png differ diff --git a/dabest/tests/baseline_images/test_22_ticker_gardner_altman.png b/dabest/tests/baseline_images/test_22_ticker_gardner_altman.png new file mode 100644 index 00000000..2e57f4e2 Binary files /dev/null and b/dabest/tests/baseline_images/test_22_ticker_gardner_altman.png differ diff --git a/dabest/tests/baseline_images/test_23_ticker_cumming.png b/dabest/tests/baseline_images/test_23_ticker_cumming.png new file mode 100644 index 00000000..4486cc3a Binary files /dev/null and b/dabest/tests/baseline_images/test_23_ticker_cumming.png differ diff --git a/dabest/tests/baseline_images/test_25_style_sheets.png b/dabest/tests/baseline_images/test_25_style_sheets.png new file mode 100644 index 00000000..28c056e9 Binary files /dev/null and b/dabest/tests/baseline_images/test_25_style_sheets.png differ diff --git a/dabest/tests/test_03_plotting.py b/dabest/tests/test_03_plotting.py index 6459e583..ecdca87e 100644 --- a/dabest/tests/test_03_plotting.py +++ b/dabest/tests/test_03_plotting.py @@ -1,143 +1,270 @@ -# #! /usr/bin/env python +#!/usr/bin/env python3 -# Load Libraries +# -*- coding: utf-8 -*- -import pytest -import matplotlib as mpl -import matplotlib.pyplot as plt -mpl.use('Agg') +import pytest import numpy as np -import scipy as sp import pandas as pd -import seaborn as sns -from .._api import load -from .utils import create_dummy_dataset, get_swarm_yspans - - - -def test_gardner_altman_unpaired(): - - base_mean = np.random.randint(10, 101) - seed, ptp, df = create_dummy_dataset(base_mean=base_mean) - print('\nSeed = {}; base mean = {}'.format(seed, base_mean)) - - for c in df.columns[1:-1]: - print('{}...'.format(c)) - - # Create Gardner-Altman plot. - rand_swarm_ylim = (np.random.uniform(base_mean-10, base_mean, 1), - np.random.uniform(base_mean, base_mean+10, 1)) - two_group_unpaired = load(df, idx=(df.columns[0], c)) - f1 = two_group_unpaired.mean_diff.plot(swarm_ylim=rand_swarm_ylim, - swarm_label="Raw swarmplot...", - contrast_label="Contrast!") - - rawswarm_axes = f1.axes[0] - contrast_axes = f1.axes[1] - - # Check ylims match the desired ones. - assert rawswarm_axes.get_ylim()[0] == pytest.approx(rand_swarm_ylim[0]) - assert rawswarm_axes.get_ylim()[1] == pytest.approx(rand_swarm_ylim[1]) - - - # Check each swarmplot group matches canonical seaborn swarmplot. - _, swarmplt = plt.subplots(1) - swarmplt.set_ylim(rand_swarm_ylim) - sns.swarmplot(data=df[[df.columns[0], c]], ax=swarmplt) - sns_yspans = [] - for coll in swarmplt.collections: - sns_yspans.append(get_swarm_yspans(coll)) - dabest_yspans = [get_swarm_yspans(coll) - for coll in rawswarm_axes.collections] - for j, span in enumerate(sns_yspans): - assert span == pytest.approx(dabest_yspans[j]) - - # Check xtick labels. - swarm_xticks = [a.get_text() for a in rawswarm_axes.get_xticklabels()] - assert swarm_xticks[0] == "{}\nN = 30".format(df.columns[0]) - assert swarm_xticks[1] == "{}\nN = 30".format(c) - - contrast_xticks = [a.get_text() for a in contrast_axes.get_xticklabels()] - assert contrast_xticks[1] == "{}\nminus\n{}".format(c, df.columns[0]) - - # Check ylabels. - assert rawswarm_axes.get_ylabel() == "Raw swarmplot..." - assert contrast_axes.get_ylabel() == "Contrast!" - - - - - -def test_cummings_unpaired(): - base_mean = np.random.randint(-5, 5) - seed, ptp, df = create_dummy_dataset(base_mean=base_mean, expt_groups=7) - print('\nSeed = {}; base mean = {}'.format(seed, base_mean)) - - IDX = (('0','5'), ('3','2'), ('4', '1', '6')) - multi_2group_unpaired = load(df, idx=IDX) - - rand_swarm_ylim = (np.random.uniform(base_mean-10, base_mean, 1), - np.random.uniform(base_mean, base_mean+10, 1)) - - if base_mean == 0: - # Have to set the contrast ylim, because the way I dynamically generate - # the contrast ylims will flunk out with base_mean = 0. - rand_contrast_ylim = (-0.5, 0.5) - else: - rand_contrast_ylim = (-base_mean/3, base_mean/3) - - f1 = multi_2group_unpaired.mean_diff.plot(swarm_ylim=rand_swarm_ylim, - contrast_ylim=rand_contrast_ylim, - swarm_label="Raw swarmplot!", - contrast_label="Contrast...") - - rawswarm_axes = f1.axes[0] - contrast_axes = f1.axes[1] - - # Check swarm ylims match the desired ones. - assert rawswarm_axes.get_ylim()[0] == pytest.approx(rand_swarm_ylim[0]) - assert rawswarm_axes.get_ylim()[1] == pytest.approx(rand_swarm_ylim[1]) - - # Check contrast ylims match the desired ones. - assert contrast_axes.get_ylim()[0] == pytest.approx(rand_contrast_ylim[0]) - assert contrast_axes.get_ylim()[1] == pytest.approx(rand_contrast_ylim[1]) - - # Check xtick labels. - idx_flat = [g for t in IDX for g in t] - swarm_xticks = [a.get_text() for a in rawswarm_axes.get_xticklabels()] - for j, xtick in enumerate(swarm_xticks): - assert xtick == "{}\nN = 30".format(idx_flat[j]) - - contrast_xticks = [a.get_text() for a in contrast_axes.get_xticklabels()] - assert contrast_xticks[1] == "5\nminus\n0" - assert contrast_xticks[3] == "2\nminus\n3" - assert contrast_xticks[5] == "1\nminus\n4" - assert contrast_xticks[6] == "6\nminus\n4" - - # Check ylabels. - assert rawswarm_axes.get_ylabel() == "Raw swarmplot!" - assert contrast_axes.get_ylabel() == "Contrast..." - - - - - -def test_gardner_altman_paired(): - base_mean = np.random.randint(-5, 5) - seed, ptp, df = create_dummy_dataset(base_mean=base_mean) - - - # Check that the plot data matches the raw data. - two_group_paired = load(df, idx=("1", "2"), id_col="idcol", paired=True) - f1 = two_group_paired.mean_diff.plot() - rawswarm_axes = f1.axes[0] - contrast_axes = f1.axes[1] - assert df['1'].tolist() == [l.get_ydata()[0] for l in rawswarm_axes.lines] - assert df['2'].tolist() == [l.get_ydata()[1] for l in rawswarm_axes.lines] +import matplotlib as mpl +mpl.use('Agg') +import matplotlib.pyplot as plt +import matplotlib.ticker as Ticker - # Check that id_col must be specified. - err_to_catch = "`id_col` must be specified if `is_paired` is set to True." - with pytest.raises(IndexError, match=err_to_catch): - this_will_not_work = load(df, idx=("1", "2"), paired=True) +from .._api import load +from .utils import create_demo_dataset + + + +df = create_demo_dataset() + +two_groups_unpaired = load(df, idx=("Control 1", "Test 1")) + +two_groups_paired = load(df, idx=("Control 1", "Test 1"), + paired=True, id_col="ID") + +multi_2group = load(df, idx=(("Control 1", "Test 1",), + ("Control 2", "Test 2")) + ) + +multi_2group_paired = load(df, idx=(("Control 1", "Test 1"), + ("Control 2", "Test 2")), + paired=True, id_col="ID") + +shared_control = load(df, idx=("Control 1", "Test 1", + "Test 2", "Test 3", + "Test 4", "Test 5", "Test 6") + ) + +multi_groups = load(df, idx=(("Control 1", "Test 1",), + ("Control 2", "Test 2","Test 3"), + ("Control 3", "Test 4","Test 5", "Test 6") + )) + + + +@pytest.mark.mpl_image_compare +def test_01_gardner_altman_unpaired_meandiff(): + return two_groups_unpaired.mean_diff.plot(); + + + +@pytest.mark.mpl_image_compare +def test_02_gardner_altman_unpaired_mediandiff(): + return two_groups_unpaired.median_diff.plot(); + + + +@pytest.mark.mpl_image_compare +def test_03_gardner_altman_unpaired_hedges_g(): + return two_groups_unpaired.hedges_g.plot(); + + + +@pytest.mark.mpl_image_compare +def test_04_gardner_altman_paired_meandiff(): + return two_groups_paired.mean_diff.plot(); + + + +@pytest.mark.mpl_image_compare +def test_04_gardner_altman_paired_hedges_g(): + return two_groups_paired.hedges_g.plot(); + + + +@pytest.mark.mpl_image_compare +def test_05_cummings_two_group_unpaired_meandiff(): + return two_groups_unpaired.mean_diff.plot(fig_size=(4, 6), + float_contrast=False); + + + +@pytest.mark.mpl_image_compare +def test_06_cummings_two_group_paired_meandiff(): + return two_groups_paired.mean_diff.plot(fig_size=(6, 6), + float_contrast=False); + + + +@pytest.mark.mpl_image_compare +def test_07_cummings_multi_group_unpaired(): + return multi_2group.mean_diff.plot(); + + + + +@pytest.mark.mpl_image_compare +def test_08_cummings_multi_group_paired(): + return multi_2group_paired.mean_diff.plot(fig_size=(6, 6)); + + + +@pytest.mark.mpl_image_compare +def test_09_cummings_shared_control(): + return shared_control.mean_diff.plot(); + + + +@pytest.mark.mpl_image_compare +def test_10_cummings_multi_groups(): + return multi_groups.mean_diff.plot(); + + + +@pytest.mark.mpl_image_compare(tolerance=20) +def test_11_inset_plots(): + + # Load the iris dataset. Requires internet access. + iris = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv") + iris_melt = pd.melt(iris.reset_index(), + id_vars=["species", "index"], var_name="metric") + + + + # Load the above data into `dabest`. + iris_dabest1 = load(data=iris, x="species", y="petal_width", + idx=("setosa", "versicolor", "virginica")) + + iris_dabest2 = load(data=iris, x="species", y="sepal_width", + idx=("setosa", "versicolor")) + + iris_dabest3 = load(data=iris_melt[iris_melt.species=="setosa"], + x="metric", y="value", + idx=("sepal_length", "sepal_width"), + paired=True, id_col="index") + + + + # Create Figure. + fig, ax = plt.subplots(nrows=2, ncols=2, + figsize=(15, 15), + gridspec_kw={"wspace":0.5}) + + iris_dabest1.mean_diff.plot(ax=ax.flat[0]); + + iris_dabest2.mean_diff.plot(ax=ax.flat[1]); + + iris_dabest3.mean_diff.plot(ax=ax.flat[2]); + + iris_dabest3.mean_diff.plot(ax=ax.flat[3], float_contrast=False); + + return fig + + + +@pytest.mark.mpl_image_compare +def test_12_gardner_altman_ylabel(): + return two_groups_unpaired.mean_diff.plot(swarm_label="This is my\nrawdata", + contrast_label="The bootstrap\ndistribtions!"); + + + +@pytest.mark.mpl_image_compare +def test_13_multi_2group_color(): + return multi_2group.mean_diff.plot(color_col="Gender"); + + + +@pytest.mark.mpl_image_compare +def test_14_gardner_altman_paired_color(): + return two_groups_paired.mean_diff.plot(fig_size=(6, 6), + color_col="Gender"); + + +@pytest.mark.mpl_image_compare +def test_15_change_palette_a(): + return multi_2group.mean_diff.plot(fig_size=(7, 6), + color_col="Gender", + custom_palette="Dark2"); + + +@pytest.mark.mpl_image_compare +def test_16_change_palette_b(): + return multi_2group.mean_diff.plot(custom_palette="Paired"); + + + +my_color_palette = {"Control 1" : "blue", + "Test 1" : "purple", + "Control 2" : "#cb4b16", # This is a hex string. + "Test 2" : (0., 0.7, 0.2) # This is a RGB tuple. + } + +@pytest.mark.mpl_image_compare +def test_17_change_palette_c(): + return multi_2group.mean_diff.plot(custom_palette=my_color_palette); + + + +@pytest.mark.mpl_image_compare +def test_18_desat(): + return multi_2group.mean_diff.plot(custom_palette=my_color_palette, + swarm_desat=0.75, + halfviolin_desat=0.25); + + + +@pytest.mark.mpl_image_compare +def test_19_dot_sizes(): + return multi_2group.mean_diff.plot(raw_marker_size=3, + es_marker_size=12); + + + + +@pytest.mark.mpl_image_compare +def test_20_change_ylims(): + return multi_2group.mean_diff.plot(swarm_ylim=(0, 5), + contrast_ylim=(-2, 2)); + + +@pytest.mark.mpl_image_compare +def test_21_invert_ylim(): + return multi_2group.mean_diff.plot(contrast_ylim=(2, -2), + contrast_label="More negative is better!"); + + + +@pytest.mark.mpl_image_compare +def test_22_ticker_gardner_altman(): + + f = two_groups_unpaired.mean_diff.plot() + + rawswarm_axes = f.axes[0] + contrast_axes = f.axes[1] + + rawswarm_axes.yaxis.set_major_locator(Ticker.MultipleLocator(1)) + rawswarm_axes.yaxis.set_minor_locator(Ticker.MultipleLocator(0.5)) + + contrast_axes.yaxis.set_major_locator(Ticker.MultipleLocator(0.5)) + contrast_axes.yaxis.set_minor_locator(Ticker.MultipleLocator(0.25)) + + return f + + + +@pytest.mark.mpl_image_compare +def test_23_ticker_cumming(): + f = multi_2group.mean_diff.plot(swarm_ylim=(0,6), + contrast_ylim=(-3, 1)) + + rawswarm_axes = f.axes[0] + contrast_axes = f.axes[1] + + rawswarm_axes.yaxis.set_major_locator(Ticker.MultipleLocator(2)) + rawswarm_axes.yaxis.set_minor_locator(Ticker.MultipleLocator(1)) + + contrast_axes.yaxis.set_major_locator(Ticker.MultipleLocator(0.5)) + contrast_axes.yaxis.set_minor_locator(Ticker.MultipleLocator(0.25)) + + return f + + + +@pytest.mark.mpl_image_compare +def test_25_style_sheets(): + plt.style.use("dark_background") + + return multi_2group.mean_diff.plot(); \ No newline at end of file diff --git a/dabest/tests/utils.py b/dabest/tests/utils.py index 2efd506c..6dc5da37 100644 --- a/dabest/tests/utils.py +++ b/dabest/tests/utils.py @@ -1,45 +1,42 @@ -def create_dummy_dataset(seed=None, n=30, base_mean=0, - plus_minus=5, expt_groups=7, - scale_means=1., scale_std=1.): - """ - Creates a dummy dataset for plotting. - Returns the seed used to generate the random numbers, - the maximum possible difference between mean differences, - and the dataset itself. - """ +def create_demo_dataset(seed=9999, N=20): + import numpy as np - import scipy as sp import pandas as pd + from scipy.stats import norm # Used in generation of populations. - # Set a random seed. - if seed is None: - random_seed = np.random.randint(low=1, high=1000, size=1)[0] - else: - if isinstance(seed, int): - random_seed = seed - else: - raise TypeError('{} is not an integer.'.format(seed)) + np.random.seed(9999) # Fix the seed so the results are replicable. + # pop_size = 10000 # Size of each population. + + # Create samples + c1 = norm.rvs(loc=3, scale=0.4, size=N) + c2 = norm.rvs(loc=3.5, scale=0.75, size=N) + c3 = norm.rvs(loc=3.25, scale=0.4, size=N) - # Generate a set of random means - np.random.seed(random_seed) - MEANS = np.repeat(base_mean, expt_groups) + \ - np.random.uniform(base_mean-plus_minus, base_mean+plus_minus, - expt_groups) * scale_means - SCALES = np.random.random(size=expt_groups) * scale_std + t1 = norm.rvs(loc=3.5, scale=0.5, size=N) + t2 = norm.rvs(loc=2.5, scale=0.6, size=N) + t3 = norm.rvs(loc=3, scale=0.75, size=N) + t4 = norm.rvs(loc=3.5, scale=0.75, size=N) + t5 = norm.rvs(loc=3.25, scale=0.4, size=N) + t6 = norm.rvs(loc=3.25, scale=0.4, size=N) - max_mean_diff = np.ptp(MEANS) - dataset = list() - for i, m in enumerate(MEANS): - pop = sp.stats.norm.rvs(loc=m, scale=SCALES[i], size=10000) - sample = np.random.choice(pop, size=n, replace=False) - dataset.append(sample) + # Add a `gender` column for coloring the data. + females = np.repeat('Female', N/2).tolist() + males = np.repeat('Male', N/2).tolist() + gender = females + males - df = pd.DataFrame(dataset).T - df["idcol"] = pd.Series(range(1, n+1)) - df.columns = [str(c) for c in df.columns] + # Add an `id` column for paired data plotting. + id_col = pd.Series(range(1, N+1)) - return random_seed, max_mean_diff, df + # Combine samples and gender into a DataFrame. + df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1, + 'Control 2' : c2, 'Test 2' : t2, + 'Control 3' : c3, 'Test 3' : t3, + 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6, + 'Gender' : gender, 'ID' : id_col + }) + + return df @@ -58,3 +55,48 @@ def get_swarm_yspans(coll, round_result=False, decimals=12): return y.min(), y.max() except ValueError: return None + + + +# def create_dummy_dataset(seed=None, n=30, base_mean=0, +# plus_minus=5, expt_groups=7, +# scale_means=1., scale_std=1.): +# """ +# Creates a dummy dataset for plotting. +# Returns the seed used to generate the random numbers, +# the maximum possible difference between mean differences, +# and the dataset itself. +# """ +# import numpy as np +# import scipy as sp +# import pandas as pd +# +# # Set a random seed. +# if seed is None: +# random_seed = np.random.randint(low=1, high=1000, size=1)[0] +# else: +# if isinstance(seed, int): +# random_seed = seed +# else: +# raise TypeError('{} is not an integer.'.format(seed)) +# +# # Generate a set of random means +# np.random.seed(random_seed) +# MEANS = np.repeat(base_mean, expt_groups) + \ +# np.random.uniform(base_mean-plus_minus, base_mean+plus_minus, +# expt_groups) * scale_means +# SCALES = np.random.random(size=expt_groups) * scale_std +# +# max_mean_diff = np.ptp(MEANS) +# +# dataset = list() +# for i, m in enumerate(MEANS): +# pop = sp.stats.norm.rvs(loc=m, scale=SCALES[i], size=10000) +# sample = np.random.choice(pop, size=n, replace=False) +# dataset.append(sample) +# +# df = pd.DataFrame(dataset).T +# df["idcol"] = pd.Series(range(1, n+1)) +# df.columns = [str(c) for c in df.columns] +# +# return random_seed, max_mean_diff, df diff --git a/docs/source/_images/tutorial_27_1.png b/docs/source/_images/tutorial_27_1.png deleted file mode 100644 index e8007116..00000000 Binary files a/docs/source/_images/tutorial_27_1.png and /dev/null differ diff --git a/docs/source/_images/tutorial_28_0.png b/docs/source/_images/tutorial_28_0.png new file mode 100644 index 00000000..cd801d51 Binary files /dev/null and b/docs/source/_images/tutorial_28_0.png differ diff --git a/docs/source/_images/tutorial_28_1.png b/docs/source/_images/tutorial_28_1.png deleted file mode 100644 index 993931a6..00000000 Binary files a/docs/source/_images/tutorial_28_1.png and /dev/null differ diff --git a/docs/source/_images/tutorial_29_0.png b/docs/source/_images/tutorial_29_0.png new file mode 100644 index 00000000..974d55dd Binary files /dev/null and b/docs/source/_images/tutorial_29_0.png differ diff --git a/docs/source/_images/tutorial_30_0.png b/docs/source/_images/tutorial_30_0.png deleted file mode 100644 index 2657922d..00000000 Binary files a/docs/source/_images/tutorial_30_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_31_0.png b/docs/source/_images/tutorial_31_0.png new file mode 100644 index 00000000..f121a936 Binary files /dev/null and b/docs/source/_images/tutorial_31_0.png differ diff --git a/docs/source/_images/tutorial_32_1.png b/docs/source/_images/tutorial_32_1.png deleted file mode 100644 index 1a98398d..00000000 Binary files a/docs/source/_images/tutorial_32_1.png and /dev/null differ diff --git a/docs/source/_images/tutorial_33_0.png b/docs/source/_images/tutorial_33_0.png index 98c5b41d..25b8c01c 100644 Binary files a/docs/source/_images/tutorial_33_0.png and b/docs/source/_images/tutorial_33_0.png differ diff --git a/docs/source/_images/tutorial_34_0.png b/docs/source/_images/tutorial_34_0.png new file mode 100644 index 00000000..7ff93abf Binary files /dev/null and b/docs/source/_images/tutorial_34_0.png differ diff --git a/docs/source/_images/tutorial_35_0.png b/docs/source/_images/tutorial_35_0.png deleted file mode 100644 index 6564843e..00000000 Binary files a/docs/source/_images/tutorial_35_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_36_0.png b/docs/source/_images/tutorial_36_0.png new file mode 100644 index 00000000..9ffa7426 Binary files /dev/null and b/docs/source/_images/tutorial_36_0.png differ diff --git a/docs/source/_images/tutorial_37_0.png b/docs/source/_images/tutorial_37_0.png deleted file mode 100644 index 4120c88c..00000000 Binary files a/docs/source/_images/tutorial_37_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_38_0.png b/docs/source/_images/tutorial_38_0.png new file mode 100644 index 00000000..17ecf4e8 Binary files /dev/null and b/docs/source/_images/tutorial_38_0.png differ diff --git a/docs/source/_images/tutorial_42_0.png b/docs/source/_images/tutorial_42_0.png deleted file mode 100644 index 047287b6..00000000 Binary files a/docs/source/_images/tutorial_42_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_43_0.png b/docs/source/_images/tutorial_43_0.png new file mode 100644 index 00000000..e01ec359 Binary files /dev/null and b/docs/source/_images/tutorial_43_0.png differ diff --git a/docs/source/_images/tutorial_47_0.png b/docs/source/_images/tutorial_47_0.png deleted file mode 100644 index 5605b28f..00000000 Binary files a/docs/source/_images/tutorial_47_0.png and /dev/null differ diff --git a/docs/source/_images/tutorial_48_0.png b/docs/source/_images/tutorial_48_0.png new file mode 100644 index 00000000..d5691bae Binary files /dev/null and b/docs/source/_images/tutorial_48_0.png differ diff --git a/docs/source/_images/tutorial_53_0.png b/docs/source/_images/tutorial_53_0.png new file mode 100644 index 00000000..66b0a542 Binary files /dev/null and b/docs/source/_images/tutorial_53_0.png differ diff --git a/docs/source/_images/tutorial_53_1.png b/docs/source/_images/tutorial_53_1.png deleted file mode 100644 index f6630c61..00000000 Binary files a/docs/source/_images/tutorial_53_1.png and /dev/null differ diff --git a/docs/source/_images/tutorial_56_0.png b/docs/source/_images/tutorial_56_0.png new file mode 100644 index 00000000..d86bfc53 Binary files /dev/null and b/docs/source/_images/tutorial_56_0.png differ diff --git a/docs/source/_images/tutorial_56_1.png b/docs/source/_images/tutorial_56_1.png deleted file mode 100644 index 93a9d4cf..00000000 Binary files a/docs/source/_images/tutorial_56_1.png and /dev/null differ diff --git a/docs/source/_images/tutorial_58_0.png b/docs/source/_images/tutorial_58_0.png index 16c8a9e6..e3d0650e 100644 Binary files a/docs/source/_images/tutorial_58_0.png and b/docs/source/_images/tutorial_58_0.png differ diff --git a/docs/source/_images/tutorial_59_0.png b/docs/source/_images/tutorial_59_0.png new file mode 100644 index 00000000..2e407ae8 Binary files /dev/null and b/docs/source/_images/tutorial_59_0.png differ diff --git a/docs/source/_images/tutorial_59_1.png b/docs/source/_images/tutorial_59_1.png deleted file mode 100644 index b4242212..00000000 Binary files a/docs/source/_images/tutorial_59_1.png and /dev/null differ diff --git a/docs/source/_images/tutorial_61_0.png b/docs/source/_images/tutorial_61_0.png index 4179902f..045b2e95 100644 Binary files a/docs/source/_images/tutorial_61_0.png and b/docs/source/_images/tutorial_61_0.png differ diff --git a/docs/source/_images/tutorial_62_0.png b/docs/source/_images/tutorial_62_0.png index 4dc26bef..891ed76d 100644 Binary files a/docs/source/_images/tutorial_62_0.png and b/docs/source/_images/tutorial_62_0.png differ diff --git a/docs/source/_images/tutorial_64_0.png b/docs/source/_images/tutorial_64_0.png index 02883a45..ff3fa6d5 100644 Binary files a/docs/source/_images/tutorial_64_0.png and b/docs/source/_images/tutorial_64_0.png differ diff --git a/docs/source/_images/tutorial_66_0.png b/docs/source/_images/tutorial_66_0.png index 94b4d328..0a494e37 100644 Binary files a/docs/source/_images/tutorial_66_0.png and b/docs/source/_images/tutorial_66_0.png differ diff --git a/docs/source/_images/tutorial_68_0.png b/docs/source/_images/tutorial_68_0.png index 8c219c45..d9a3de61 100644 Binary files a/docs/source/_images/tutorial_68_0.png and b/docs/source/_images/tutorial_68_0.png differ diff --git a/docs/source/_images/tutorial_70_0.png b/docs/source/_images/tutorial_70_0.png index d028824a..e9a9dde3 100644 Binary files a/docs/source/_images/tutorial_70_0.png and b/docs/source/_images/tutorial_70_0.png differ diff --git a/docs/source/_images/tutorial_72_0.png b/docs/source/_images/tutorial_72_0.png index 04dbc048..37ec0120 100644 Binary files a/docs/source/_images/tutorial_72_0.png and b/docs/source/_images/tutorial_72_0.png differ diff --git a/docs/source/_images/tutorial_74_0.png b/docs/source/_images/tutorial_74_0.png new file mode 100644 index 00000000..21a8eec3 Binary files /dev/null and b/docs/source/_images/tutorial_74_0.png differ diff --git a/docs/source/_images/tutorial_74_1.png b/docs/source/_images/tutorial_74_1.png deleted file mode 100644 index 2388abe0..00000000 Binary files a/docs/source/_images/tutorial_74_1.png and /dev/null differ diff --git a/docs/source/_images/tutorial_75_0.png b/docs/source/_images/tutorial_75_0.png index 5f957e66..64e60c8c 100644 Binary files a/docs/source/_images/tutorial_75_0.png and b/docs/source/_images/tutorial_75_0.png differ diff --git a/docs/source/_images/tutorial_77_0.png b/docs/source/_images/tutorial_77_0.png new file mode 100644 index 00000000..5b0334c1 Binary files /dev/null and b/docs/source/_images/tutorial_77_0.png differ diff --git a/docs/source/_images/tutorial_79_0.png b/docs/source/_images/tutorial_79_0.png new file mode 100644 index 00000000..38205d9c Binary files /dev/null and b/docs/source/_images/tutorial_79_0.png differ diff --git a/docs/source/_images/tutorial_78_0.png b/docs/source/_images/tutorial_82_0.png similarity index 56% rename from docs/source/_images/tutorial_78_0.png rename to docs/source/_images/tutorial_82_0.png index 24a93777..fe7d5ac3 100644 Binary files a/docs/source/_images/tutorial_78_0.png and b/docs/source/_images/tutorial_82_0.png differ diff --git a/docs/source/about.rst b/docs/source/about.rst index 3e4254e7..7738cf1a 100644 --- a/docs/source/about.rst +++ b/docs/source/about.rst @@ -7,10 +7,19 @@ About Authors -------- -`dabest` is written in Python by `Joses W. Ho `_, with design and input from `Adam Claridge-Chang `_ and other lab members. +DABEST is written in Python by `Joses W. Ho `_, with design and input from `Adam Claridge-Chang `_ and other lab members. To find out more about the authors’ research, please visit the `Claridge-Chang lab webpage `_. +Contributors +------------ + +- Mason Malone (`@MasonM `_) with `PR #30 `_: Fix plot error when effect size is 0. + +- Matthew Edwards (`@mje-nz `_) with `PR #71 `_: Specify dependencies correctly in ``setup.py``. + +- Adam Nekimken (`@anekimken `_) with `PR #73 `_: Implement inset axes so estimation plots can be plotted on a pre-determined :py:mod:`matplotlib` :py:class:`Axes` object. + Typography ---------- @@ -20,7 +29,7 @@ This documentation uses `Spectral `_ for the bod License ------- -The `dabest` package in Python is licenced under the `BSD 3-clause Clear License `_. +The DABEST package in Python is licenced under the `BSD 3-clause Clear License `_. Copyright (c) 2016-2019, Joses W. Ho All rights reserved. diff --git a/docs/source/conf.py b/docs/source/conf.py index 3c0ded1e..c6864d07 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -77,7 +77,7 @@ # The short X.Y version. version = '0.2' # The full version, including alpha/beta/rc tags. -release = '0.2.4' +release = '0.2.6' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/getting-started.rst b/docs/source/getting-started.rst index 64617715..1fe95500 100644 --- a/docs/source/getting-started.rst +++ b/docs/source/getting-started.rst @@ -8,7 +8,7 @@ Getting Started Requirements ------------ -Python 3.7 is strongly recommended. ``dabest`` has also been tested with Python 3.5 and 3.6. +Python 3.7 is strongly recommended. DABEST has also been tested with Python 3.5 and 3.6. In addition, the following packages are also required: @@ -47,7 +47,7 @@ Then, navigate to the cloned repo in the command line and run Testing ------- -To test ``dabest``, you will need to install `pytest `_. +To test DABEST, you will need to install `pytest `_. Run ``pytest`` in the root directory of the source distribution. This runs the test suite in ``dabest/tests`` folder. The test suite will ensure that the bootstrapping functions and the plotting functions perform as expected. diff --git a/docs/source/index.rst b/docs/source/index.rst index 60a21802..db6f5dab 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,8 +18,11 @@ Analyze your data with estimation statistics! News ---- +October 2019: + - v0.2.6 released. This release has one new feature (plotting of estimation plot inside any :py:mod:`matplotlib` :py:class:`Axes`; see the section on :ref:`inset_plot` in the :doc:`tutorial`). There are also two bug patches for the handling of bootstrap plotting, and of dependency installation. For more information, see the :doc:`release-notes`. + September 2019: - - v0.2.5 released. This release addresses two feature requests, and also patches two bugs: one affecting the paired difference CIs, and one involving NaNs in unused/irrelevant columns. See the :doc:`release-notes`. + - v0.2.5 released. This release addresses two feature requests, and also patches two bugs: one affecting the paired difference CIs, and one involving NaNs in unused/irrelevant columns. May 2019: - v0.2.4 released. This is a patch for a set of bugs that mis-aligned Gardner-Altman plots, and also adds the capability to tweak the x-position of the Tufte gapped lines. diff --git a/docs/source/release-notes.rst b/docs/source/release-notes.rst index 7b2d1abf..48e5944f 100644 --- a/docs/source/release-notes.rst +++ b/docs/source/release-notes.rst @@ -4,11 +4,23 @@ Release Notes ============= -v0.2.5 +v0.2.6 ------ This release addresses a number of bugs and feature improvements. There are no known breaking changes to the API; hence **all users are strongly encouraged to upgrade to the latest version**. +Feature additions: + - It is now possible to specify a pre-determined :py:mod:`matplotlib` :py:class:`Axes` to create the estimation plot in. See the new section in the tutorial for more information. (`Pull request #73 `_; thanks to Adam Nekimken (`@anekimken `_). + - + + +Bug-fixes: + - Ensure all dependencies are installed along with DABEST. (`Pull request #71 `_; thanks to Matthew Edwards (`@mje-nz `_). + - Handle infinities in bootstraps during plotting. (`Issue #72 `_, `Pull request #74 `_) + +v0.2.5 +------ + Feature additions: - Adding Ns of each group to the results DataFrame. (`Issue #45 `_) - Auto-labelling the swarmplot rawdata axes y-label. (`Issue #51 `_) @@ -35,7 +47,7 @@ This release fixes a bug that did not handle when the supplied ``x`` was a :py:m v0.2.2 ------ -This release fixes a `bug `_ that has a mean difference or median difference of exactly 0. +This release fixes a `bug `_ that has a mean difference or median difference of exactly 0. (`Pull request #73 `_; thanks to Mason Malone (`@MasonM `_). v0.2.1 @@ -58,7 +70,7 @@ Now, every analysis session begins with ``dabest.load()``. my_data = dabest.load(my_dataframe, idx=("Control", "Test")) -This creates a ``dabest`` object with effect sizes as instances. +This creates a :py:class:`Dabest` object with effect sizes as instances. .. code-block:: python :linenos: @@ -112,14 +124,14 @@ v0.1.6 Several keywords have been added to allow more fine-grained control over a selection of plot elements. -* `swarm_dotsize` -* `difference_dotsize` -* `ci_linewidth` -* `summary_linewidth` +* ``swarm_dotsize`` +* ``difference_dotsize`` +* ``ci_linewidth`` +* ``summary_linewidth`` -The new keyword `context` allows you to set the plotting context as defined by seaborn's `plotting_context() `_ . +The new keyword ``context`` allows you to set the plotting context as defined by seaborn's `plotting_context() `_ . -Now, if `paired=True`, you will need to supply an `id_col`, which is a column in the DataFrame which specifies which sample the datapoint belongs to. See the :doc:`tutorial` for more details. +Now, if ``paired=True``, you will need to supply an ``id_col``, which is a column in the DataFrame which specifies which sample the datapoint belongs to. See the :doc:`tutorial` for more details. v0.1.5 @@ -138,7 +150,7 @@ Update dependencies to Aesthetic changes -* add `tick_length` and `tick_pad` arguments to allow tweaking of the axes tick lengths, and padding of the tick labels, respectively. +* add ``tick_length`` and ``tick_pad`` arguments to allow tweaking of the axes tick lengths, and padding of the tick labels, respectively. v0.1.3 @@ -149,7 +161,7 @@ Update dependencies to Bugfixes -* fix bug that did not label `swarm_label` if raw data was in tidy form +* fix bug that did not label ``swarm_label`` if raw data was in tidy form * fix bug that did not dropnans for unpaired diff diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 6ca1dc47..fb327fec 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -19,13 +19,13 @@ Load Libraries .. parsed-literal:: - We're using DABEST v0.2.5 + We're using DABEST v0.2.6 Create dataset for demo ----------------------- -Here, we create a dataset to illustrate how ``dabest`` functions. In +Here, we create a dataset to illustrate how DABEST functions. In this dataset, each column corresponds to a group of observations. .. code-block:: python3 @@ -59,10 +59,10 @@ this dataset, each column corresponds to a group of observations. id_col = pd.Series(range(1, Ns+1)) # Combine samples and gender into a DataFrame. - df = pd.DataFrame({'Control 1' : c1, 'Control 2' : c2, 'Control 3' : c3, - 'Test 1' : t1, 'Test 2' : t2, 'Test 3' : t3, - 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6, - + df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1, + 'Control 2' : c2, 'Test 2' : t2, + 'Control 3' : c3, 'Test 3' : t3, + 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6, 'Gender' : gender, 'ID' : id_col }) @@ -205,11 +205,9 @@ list. .. code-block:: python3 :linenos: - two_groups_unpaired = dabest.load(df, - idx=("Control 1", "Test 1"), - resamples=5000) + two_groups_unpaired = dabest.load(df, idx=("Control 1", "Test 1"), resamples=5000) -Calling this ``Dabest`` object gives you a gentle greeting, as well as +Calling this :py:class:`Dabest`; object gives you a gentle greeting, as well as the comparisons that can be computed. .. code-block:: python3 @@ -222,11 +220,11 @@ the comparisons that can be computed. .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:34:59 2019. + The current time is Thu Oct 3 17:38:00 2019. Effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -246,10 +244,8 @@ dataset that indicates the identity of each observation, using the .. code-block:: python3 :linenos: - two_groups_paired = dabest.load(df, - idx=("Control 1", "Test 1"), - paired=True, - id_col="ID") + two_groups_paired = dabest.load(df, idx=("Control 1", "Test 1"), + paired=True, id_col="ID") .. code-block:: python3 :linenos: @@ -261,11 +257,11 @@ dataset that indicates the identity of each observation, using the .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:34:59 2019. + The current time is Thu Oct 3 17:38:00 2019. Paired effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -280,22 +276,23 @@ produced. .. code-block:: python3 :linenos: - two_groups_unpaired_ci90 = dabest.load(df, - idx=("Control 1", "Test 1"), - ci=90) - - two_groups_unpaired_ci90 # prints out the text below! + two_groups_unpaired_ci90 = dabest.load(df, idx=("Control 1", "Test 1"), ci=90) + +.. code-block:: python3 + :linenos: + + two_groups_unpaired_ci90 .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:34:59 2019. + The current time is Thu Oct 3 17:38:00 2019. Effect size(s) with 90% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -307,7 +304,7 @@ produced. Effect sizes ------------ -``dabest`` now features a range of effect sizes: - the mean difference +DABEST now features a range of effect sizes: - the mean difference (``mean_diff``) - the median difference (``median_diff``) - `Cohen's *d* `__ (``cohens_d``) - `Hedges' @@ -316,7 +313,7 @@ Effect sizes delta `__ (``cliffs_delta``) -Each of these are attributes of the ``Dabest`` object. +Each of these are attributes of the :py:class:`Dabest` object. .. code-block:: python3 :linenos: @@ -328,11 +325,11 @@ Each of these are attributes of the ``Dabest`` object. .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:35:00 2019. + The current time is Thu Oct 3 17:38:00 2019. The unpaired mean difference between Control 1 and Test 1 is 0.48 [95%CI 0.205, 0.774]. The two-sided p-value of the Mann-Whitney test is 0.00163. @@ -366,10 +363,12 @@ tests. You can access the results as a pandas DataFrame as well. + + .. code-block:: python3 :linenos: - - pd.options.display.max_columns = 50 # to display all columns. + + pd.options.display.max_columns = 50 two_groups_unpaired.mean_diff.results @@ -537,11 +536,11 @@ Let's compute the Hedges' g for our comparison. .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:35:01 2019. + The current time is Thu Oct 3 17:38:01 2019. The unpaired Hedges' g between Control 1 and Test 1 is 1.03 [95%CI 0.317, 1.62]. The two-sided p-value of the Mann-Whitney test is 0.00163. @@ -650,6 +649,14 @@ inspiration here. Every effect size instance has access to the ``.plot()`` method. This means you can quickly create plots for different effect sizes easily. +.. .. code-block:: python3 +.. :linenos: +.. +.. import matplotlib as mpl +.. import warnings +.. # warnings.filterwarnings("ignore", category=mpl.cbook.mplDeprecation) +.. warnings.filterwarnings("ignore") + .. code-block:: python3 :linenos: @@ -657,7 +664,7 @@ means you can quickly create plots for different effect sizes easily. -.. image:: _images/tutorial_27_1.png +.. image:: _images/tutorial_28_0.png .. code-block:: python3 @@ -667,9 +674,7 @@ means you can quickly create plots for different effect sizes easily. - - -.. image:: _images/tutorial_28_1.png +.. image:: _images/tutorial_29_0.png Instead of a Gardner-Altman plot, you can produce a **Cumming estimation @@ -686,7 +691,7 @@ Tufte's dictum to maximise the data-ink ratio. -.. image:: _images/tutorial_30_0.png +.. image:: _images/tutorial_31_0.png For paired data, we use @@ -701,8 +706,7 @@ Both Gardner-Altman and Cumming plots support this. - -.. image:: _images/tutorial_32_1.png +.. image:: _images/tutorial_33_0.png .. code-block:: python3 @@ -712,10 +716,10 @@ Both Gardner-Altman and Cumming plots support this. -.. image:: _images/tutorial_33_0.png +.. image:: _images/tutorial_34_0.png -The ``dabest`` package also implements a range of estimation plot +The DABEST package also implements a range of estimation plot designs aimed at depicting common experimental designs. The **multi-two-group estimation plot** tiles two or more Cumming plots @@ -737,7 +741,7 @@ meta-analyses to aggregate and compare data from different experiments. -.. image:: _images/tutorial_35_0.png +.. image:: _images/tutorial_36_0.png The multi-two-group design also accomodates paired comparisons. @@ -755,7 +759,7 @@ The multi-two-group design also accomodates paired comparisons. -.. image:: _images/tutorial_37_0.png +.. image:: _images/tutorial_38_0.png The **shared control plot** displays another common experimental @@ -783,11 +787,11 @@ to ``idx`` has more than two data columns. .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:35:05 2019. + The current time is Thu Oct 3 17:38:24 2019. Effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -811,11 +815,11 @@ to ``idx`` has more than two data columns. .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:35:06 2019. + The current time is Thu Oct 3 17:38:25 2019. The unpaired mean difference between Control 1 and Test 1 is 0.48 [95%CI 0.205, 0.774]. The two-sided p-value of the Mann-Whitney test is 0.00163. @@ -850,10 +854,10 @@ to ``idx`` has more than two data columns. -.. image:: _images/tutorial_42_0.png +.. image:: _images/tutorial_43_0.png -``dabest`` thus empowers you to robustly perform and elegantly present +DABEST thus empowers you to robustly perform and elegantly present complex visualizations and statistics. .. code-block:: python3 @@ -875,11 +879,11 @@ complex visualizations and statistics. .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:35:07 2019. + The current time is Thu Oct 3 17:38:26 2019. Effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -903,11 +907,11 @@ complex visualizations and statistics. .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:35:08 2019. + The current time is Thu Oct 3 17:38:27 2019. The unpaired mean difference between Control 1 and Test 1 is 0.48 [95%CI 0.205, 0.774]. The two-sided p-value of the Mann-Whitney test is 0.00163. @@ -942,13 +946,13 @@ complex visualizations and statistics. -.. image:: _images/tutorial_47_0.png +.. image:: _images/tutorial_48_0.png Using long (aka 'melted') data frames ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``dabest.plot`` can also work with 'melted' or 'long' data. This term is +DABEST can also work with 'melted' or 'long' data. This term is so used because each row will now correspond to a single datapoint, with one column carrying the value and other columns carrying 'metadata' describing that datapoint. @@ -1063,11 +1067,11 @@ When your data is in this format, you will need to specify the ``x`` and .. parsed-literal:: - DABEST v0.2.5 + DABEST v0.2.6 ============= Good afternoon! - The current time is Wed Sep 4 14:35:09 2019. + The current time is Thu Oct 3 17:38:27 2019. Effect size(s) with 95% confidence intervals will be computed for: 1. Test 1 minus Control 1 @@ -1083,8 +1087,7 @@ When your data is in this format, you will need to specify the ``x`` and - -.. image:: _images/tutorial_53_1.png +.. image:: _images/tutorial_53_0.png Controlling plot aesthetics @@ -1100,8 +1103,7 @@ Changing the y-axes labels. - -.. image:: _images/tutorial_56_1.png +.. image:: _images/tutorial_56_0.png Color the rawdata according to another column in the dataframe. @@ -1123,9 +1125,7 @@ Color the rawdata according to another column in the dataframe. - - -.. image:: _images/tutorial_59_1.png +.. image:: _images/tutorial_59_0.png Changing the palette used with ``custom_palette``. Any valid matplotlib @@ -1240,7 +1240,7 @@ better outcome), you can simply invert the tuple passed to You can add minor ticks and also change the tick frequency by accessing the axes directly. -Each estimation plot produced by ``dabest`` has 2 axes. The first one +The estimation plot produced by ``dabest.plot()`` has 2 axes. The first one contains the rawdata swarmplot; the second one contains the bootstrap effect size differences. @@ -1262,8 +1262,7 @@ effect size differences. - -.. image:: _images/tutorial_74_1.png +.. image:: _images/tutorial_74_0.png .. code-block:: python3 @@ -1285,8 +1284,66 @@ effect size differences. .. image:: _images/tutorial_75_0.png +.. _inset_plot: + +Creating estimation plots in existing axes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*Implemented in v0.2.6 by Adam Nekimken.* + +``dabest.plot`` has an ``ax`` keyword that accepts any Matplotlib +``Axes``. The entire estimation plot will be created in the specified +``Axes``. + +.. code-block:: python3 + :linenos: + + from matplotlib import pyplot as plt + f, axx = plt.subplots(nrows=2, ncols=2, + figsize=(15, 15), + + # ensure proper width-wise spacing. + gridspec_kw={'wspace': 0.25} + ) + + two_groups_unpaired.mean_diff.plot(ax=axx.flat[0]); + + two_groups_paired.mean_diff.plot(ax=axx.flat[1]); + + multi_2group.mean_diff.plot(ax=axx.flat[2]); + + multi_2group_paired.mean_diff.plot(ax=axx.flat[3]); + + + +.. image:: _images/tutorial_77_0.png + + +In this case, to access the individual rawdata axes, use +``name_of_axes`` to manipulate the rawdata swarmplot axes, and +``name_of_axes.contrast_axes`` to gain access to the effect size axes. + +.. code-block:: python3 + :linenos: + + topleft_axes = axx.flat[0] + topleft_axes.set_ylabel("New y-axis label for rawdata") + topleft_axes.contrast_axes.set_ylabel("New y-axis label for effect size") + + + + + +.. image:: _images/tutorial_79_0.png + + + +Applying style sheets +~~~~~~~~~~~~~~~~~~~~~ -with v0.2.0, ``dabest`` can now apply `matplotlib style +*Implemented in v0.2.0*. + +DABEST can now apply `matplotlib style sheets `__ to estimation plots. You can refer to this `gallery `__ @@ -1297,10 +1354,13 @@ of style sheets for reference. import matplotlib.pyplot as plt plt.style.use("dark_background") - + +.. code-block:: python3 + :linenos: + multi_2group.mean_diff.plot(); -.. image:: _images/tutorial_78_0.png +.. image:: _images/tutorial_82_0.png diff --git a/setup.py b/setup.py index 9e1ae6bd..5365d289 100644 --- a/setup.py +++ b/setup.py @@ -27,73 +27,27 @@ """ -# Modified from from setup.py in seaborn. -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - - - -def need_to_install(module, version): - desired_major_version = int(version.split('.')[0]) - desired_minor_version = int(version.split('.')[1]) - - INSTALLED_VERSION_MAJOR = int(module.__version__.split('.')[0]) - INSTALLED_VERSION_MINOR = int(module.__version__.split('.')[1]) - - if INSTALLED_VERSION_MAJOR < desired_major_version: - return True - - elif INSTALLED_VERSION_MAJOR == desired_major_version and \ - INSTALLED_VERSION_MINOR < desired_minor_version: - return True - - else: - return False - - - -def check_dependencies(): - from importlib import import_module - - modules = {'numpy' : '1.15', - 'scipy' : '1.2', - 'statsmodels': '0.9', - 'pandas' : '0.24', - 'matplotlib' : '3.0', - 'seaborn' : '0.9'} - to_install = [] - - for module, version in modules.items(): - try: - my_module = import_module(module) - - if need_to_install(my_module, version): - to_install.append("{}=={}".format(module, version)) - - except ImportError: - to_install.append("{}=={}".format(module, version)) - - return to_install - - - if __name__ == "__main__": - - installs = check_dependencies() - setup( name='dabest', author='Joses W. Ho', author_email='joseshowh@gmail.com', maintainer='Joses W. Ho', maintainer_email='joseshowh@gmail.com', - version='0.2.5', + version='0.2.6', description=DESCRIPTION, long_description=LONG_DESCRIPTION, packages=find_packages(), - install_requires=installs, + install_requires=[ + 'numpy~=1.15', + 'scipy~=1.2', + # 'statsmodels~=0.9', + 'pandas<0.25', + 'matplotlib~=3.0', + 'seaborn~=0.9' + ], + extras_require={'dev': ['pytest==5.2', 'pytest-mpl==0.10']}, + python_requires='>=3.5', url='https://acclab.github.io/DABEST-python-docs', download_url='https://www.github.com/ACCLAB/DABEST-python', license='BSD 3-clause Clear License'