|  | 
|  | 1 | +"""Benchmarks for ArrayProxy slicing of gzipped and non-gzipped files | 
|  | 2 | +
 | 
|  | 3 | +Run benchmarks with:: | 
|  | 4 | +
 | 
|  | 5 | +    import nibabel as nib | 
|  | 6 | +    nib.bench() | 
|  | 7 | +
 | 
|  | 8 | +If you have doctests enabled by default in nose (with a noserc file or | 
|  | 9 | +environment variable), and you have a numpy version <= 1.6.1, this will also | 
|  | 10 | +run the doctests, let's hope they pass. | 
|  | 11 | +
 | 
|  | 12 | +Run this benchmark with: | 
|  | 13 | +
 | 
|  | 14 | +    nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' /path/to/bench_arrayproxy_slicing.py | 
|  | 15 | +""" | 
|  | 16 | + | 
|  | 17 | +from timeit import timeit | 
|  | 18 | +import contextlib | 
|  | 19 | +import gc | 
|  | 20 | +import itertools as it | 
|  | 21 | +import numpy as np | 
|  | 22 | +import mock | 
|  | 23 | + | 
|  | 24 | +import nibabel as nib | 
|  | 25 | +from nibabel.tmpdirs import InTemporaryDirectory | 
|  | 26 | +from nibabel.openers import HAVE_INDEXED_GZIP | 
|  | 27 | + | 
|  | 28 | +from .butils import print_git_title | 
|  | 29 | +from ..rstutils import rst_table | 
|  | 30 | + | 
|  | 31 | +# if memory_profiler is installed, we get memory usage results | 
|  | 32 | +try: | 
|  | 33 | +    from memory_profiler import memory_usage | 
|  | 34 | +except ImportError: | 
|  | 35 | +    memory_usage = None | 
|  | 36 | + | 
|  | 37 | + | 
|  | 38 | +# Each test involves loading an image of shape SHAPE, and then slicing it | 
|  | 39 | +# NITERS times | 
|  | 40 | +NITERS = 50 | 
|  | 41 | +SHAPE = (100, 100, 100, 100) | 
|  | 42 | + | 
|  | 43 | +# One test is run for each combination of SLICEOBJS, KEEP_OPENS, and HAVE_IGZIP | 
|  | 44 | + | 
|  | 45 | +# ':' gets replaced with slice(None) | 
|  | 46 | +# '?' gets replaced with a random index into the relevant axis | 
|  | 47 | +# numbers (assumed to be between 0 and 1) get scaled to the axis shape | 
|  | 48 | +SLICEOBJS = [ | 
|  | 49 | +    ('?', ':', ':', ':'), | 
|  | 50 | +    (':', ':', ':', '?'), | 
|  | 51 | +    ('?', '?', '?', ':'), | 
|  | 52 | +] | 
|  | 53 | + | 
|  | 54 | +KEEP_OPENS = [False, True] | 
|  | 55 | + | 
|  | 56 | +if HAVE_INDEXED_GZIP: | 
|  | 57 | +    HAVE_IGZIP = [False, True] | 
|  | 58 | +else: | 
|  | 59 | +    HAVE_IGZIP = [False] | 
|  | 60 | + | 
|  | 61 | + | 
|  | 62 | +@contextlib.contextmanager | 
|  | 63 | +def patch_indexed_gzip(have_igzip): | 
|  | 64 | + | 
|  | 65 | +    atts = ['nibabel.openers.HAVE_INDEXED_GZIP', | 
|  | 66 | +            'nibabel.arrayproxy.HAVE_INDEXED_GZIP'] | 
|  | 67 | + | 
|  | 68 | +    with mock.patch(atts[0], have_igzip), mock.patch(atts[1], have_igzip): | 
|  | 69 | +        yield | 
|  | 70 | + | 
|  | 71 | + | 
|  | 72 | +def bench_arrayproxy_slicing(): | 
|  | 73 | + | 
|  | 74 | +    print_git_title('\nArrayProxy gzip slicing') | 
|  | 75 | + | 
|  | 76 | +    # each test is a tuple containing | 
|  | 77 | +    # (HAVE_INDEXED_GZIP, keep_file_open, sliceobj) | 
|  | 78 | +    tests = list(it.product(HAVE_IGZIP, KEEP_OPENS, SLICEOBJS)) | 
|  | 79 | + | 
|  | 80 | +    # remove tests where HAVE_INDEXED_GZIP is True and keep_file_open is False, | 
|  | 81 | +    # because if keep_file_open is False, HAVE_INDEXED_GZIP has no effect | 
|  | 82 | +    tests = [t for t in tests if not (t[0] and not t[1])] | 
|  | 83 | + | 
|  | 84 | +    testfile = 'testfile.nii' | 
|  | 85 | +    testfilegz = 'test.nii.gz' | 
|  | 86 | + | 
|  | 87 | +    def get_test_label(test): | 
|  | 88 | +        have_igzip = test[0] | 
|  | 89 | +        keep_open = test[1] | 
|  | 90 | + | 
|  | 91 | +        if not (have_igzip and keep_open): | 
|  | 92 | +            return 'gzip' | 
|  | 93 | +        else: | 
|  | 94 | +            return 'indexed_gzip' | 
|  | 95 | + | 
|  | 96 | +    def fix_sliceobj(sliceobj): | 
|  | 97 | +        new_sliceobj = [] | 
|  | 98 | +        for i, s in enumerate(sliceobj): | 
|  | 99 | +            if s == ':': | 
|  | 100 | +                new_sliceobj.append(slice(None)) | 
|  | 101 | +            elif s == '?': | 
|  | 102 | +                new_sliceobj.append(np.random.randint(0, SHAPE[i])) | 
|  | 103 | +            else: | 
|  | 104 | +                new_sliceobj.append(int(s * SHAPE[i])) | 
|  | 105 | +        return tuple(new_sliceobj) | 
|  | 106 | + | 
|  | 107 | +    def fmt_sliceobj(sliceobj): | 
|  | 108 | +        slcstr = [] | 
|  | 109 | +        for i, s in enumerate(sliceobj): | 
|  | 110 | +            if s in ':?': | 
|  | 111 | +                slcstr.append(s) | 
|  | 112 | +            else: | 
|  | 113 | +                slcstr.append(str(int(s * SHAPE[i]))) | 
|  | 114 | +        return '[{}]'.format(', '.join(slcstr)) | 
|  | 115 | + | 
|  | 116 | +    with InTemporaryDirectory(): | 
|  | 117 | + | 
|  | 118 | +        print('Generating test data... ({} MB)'.format( | 
|  | 119 | +            int(round(np.prod(SHAPE) * 4 / 1048576.)))) | 
|  | 120 | + | 
|  | 121 | +        data = np.array(np.random.random(SHAPE), dtype=np.float32) | 
|  | 122 | + | 
|  | 123 | +        # zero out 10% of voxels so gzip has something to compress | 
|  | 124 | +        mask = np.random.random(SHAPE[:3]) > 0.1 | 
|  | 125 | +        if len(SHAPE) > 3: | 
|  | 126 | +            data[mask, :] = 0 | 
|  | 127 | +        else: | 
|  | 128 | +            data[mask] = 0 | 
|  | 129 | + | 
|  | 130 | +        # save uncompressed and compressed versions of the image | 
|  | 131 | +        img = nib.nifti1.Nifti1Image(data, np.eye(4)) | 
|  | 132 | +        nib.save(img, testfilegz) | 
|  | 133 | +        nib.save(img, testfile) | 
|  | 134 | + | 
|  | 135 | +        # each result is a tuple containing | 
|  | 136 | +        # (label, keep_open, sliceobj, testtime, basetime, testmem, basemem) | 
|  | 137 | +        # | 
|  | 138 | +        # where "basetime" is the time taken to load and slice a memmapped | 
|  | 139 | +        # (uncompressed)image, and "basemem" is memory usage for the same | 
|  | 140 | +        results = [] | 
|  | 141 | + | 
|  | 142 | +        # We use the same random seed for each slice object, | 
|  | 143 | +        seeds = [np.random.randint(0, 2 ** 32) for s in SLICEOBJS] | 
|  | 144 | + | 
|  | 145 | +        for ti, test in enumerate(tests): | 
|  | 146 | + | 
|  | 147 | +            label = get_test_label(test) | 
|  | 148 | +            have_igzip, keep_open, sliceobj = test | 
|  | 149 | +            seed = seeds[SLICEOBJS.index(sliceobj)] | 
|  | 150 | + | 
|  | 151 | +            print('Running test {} of {} ({})...'.format( | 
|  | 152 | +                ti + 1, len(tests), label)) | 
|  | 153 | + | 
|  | 154 | +            # load uncompressed and compressed versions of the image | 
|  | 155 | +            img = nib.load(testfile, keep_file_open=keep_open) | 
|  | 156 | + | 
|  | 157 | +            with patch_indexed_gzip(have_igzip): | 
|  | 158 | +                imggz = nib.load(testfilegz, keep_file_open=keep_open) | 
|  | 159 | + | 
|  | 160 | +            def basefunc(): | 
|  | 161 | +                img.dataobj[fix_sliceobj(sliceobj)] | 
|  | 162 | + | 
|  | 163 | +            def testfunc(): | 
|  | 164 | +                with patch_indexed_gzip(have_igzip): | 
|  | 165 | +                    imggz.dataobj[fix_sliceobj(sliceobj)] | 
|  | 166 | + | 
|  | 167 | +            # make sure nothing is floating around from the previous test | 
|  | 168 | +            # iteration, so memory profiling is (hopefully) more accurate | 
|  | 169 | +            gc.collect() | 
|  | 170 | + | 
|  | 171 | +            if memory_usage is not None: | 
|  | 172 | +                membaseline = max(memory_usage(lambda: None)) | 
|  | 173 | +                testmem = max(memory_usage(testfunc)) - membaseline | 
|  | 174 | +                basemem = max(memory_usage(basefunc)) - membaseline | 
|  | 175 | +            else: | 
|  | 176 | +                testmem = np.nan | 
|  | 177 | +                basemem = np.nan | 
|  | 178 | + | 
|  | 179 | +            # reset the random number generator, so test and baseline use the | 
|  | 180 | +            # same slices | 
|  | 181 | +            np.random.seed(seed) | 
|  | 182 | +            testtime = float(timeit(testfunc, number=NITERS)) / float(NITERS) | 
|  | 183 | +            np.random.seed(seed) | 
|  | 184 | +            basetime = float(timeit(basefunc, number=NITERS)) / float(NITERS) | 
|  | 185 | + | 
|  | 186 | +            results.append((label, keep_open, sliceobj, testtime, basetime, | 
|  | 187 | +                            testmem, basemem)) | 
|  | 188 | + | 
|  | 189 | +    data = np.zeros((len(results), 4)) | 
|  | 190 | +    data[:, 0] = [r[3] for r in results] | 
|  | 191 | +    data[:, 1] = [r[4] for r in results] | 
|  | 192 | +    try: | 
|  | 193 | +        data[:, 2] = [r[3] / r[4] for r in results] | 
|  | 194 | +    except: | 
|  | 195 | +        data[:, 2] = np.nan | 
|  | 196 | +    data[:, 3] = [r[5] - r[6] for r in results] | 
|  | 197 | + | 
|  | 198 | +    rowlbls = ['Type {}, keep_open {}, slice {}'.format( | 
|  | 199 | +        r[0], r[1], fmt_sliceobj(r[2])) for r in results] | 
|  | 200 | +    collbls = ['Time', 'Baseline time', 'Time ratio', 'Memory deviation'] | 
|  | 201 | + | 
|  | 202 | +    print(rst_table(data, rowlbls, collbls)) | 
0 commit comments