diff --git a/CHANGES b/CHANGES index e1e7355106..35ea60f708 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,7 @@ Next release ============ +* ENH: SelectFiles: a streamlined version of DataGrabber * ENH: New interfaces: spm.ResliceToReference * FIX: Deals properly with 3d files in SPM Realign diff --git a/nipype/__init__.py b/nipype/__init__.py index 20a05c291b..c9f9684f2a 100644 --- a/nipype/__init__.py +++ b/nipype/__init__.py @@ -17,7 +17,7 @@ from pipeline import Node, MapNode, Workflow from interfaces import (fsl, spm, freesurfer, afni, ants, slicer, dipy, nipy, - mrtrix, camino, DataGrabber, DataSink, + mrtrix, camino, DataGrabber, DataSink, SelectFiles, IdentityInterface, Rename, Function, Select, Merge) diff --git a/nipype/interfaces/__init__.py b/nipype/interfaces/__init__.py index 56d6e8f376..b81deb9d70 100644 --- a/nipype/interfaces/__init__.py +++ b/nipype/interfaces/__init__.py @@ -7,6 +7,6 @@ """ __docformat__ = 'restructuredtext' -from io import DataGrabber, DataSink +from io import DataGrabber, DataSink, SelectFiles from utility import IdentityInterface, Rename, Function, Select, Merge import fsl, spm, freesurfer, afni, ants, slicer, dipy, nipy, mrtrix, camino diff --git a/nipype/interfaces/io.py b/nipype/interfaces/io.py index 2e1be6e01e..a319ca9a76 100644 --- a/nipype/interfaces/io.py +++ b/nipype/interfaces/io.py @@ -18,7 +18,9 @@ """ import glob +import string import os +import os.path as op import shutil import re import tempfile @@ -528,9 +530,9 @@ def _list_outputs(self): filledtemplate = template if argtuple: try: - filledtemplate = template%tuple(argtuple) + filledtemplate = template % tuple(argtuple) except TypeError as e: - raise TypeError(e.message + ": Template %s failed to convert with args %s"%(template, str(tuple(argtuple)))) + raise TypeError(e.message + ": Template %s failed to convert with args %s" % (template, str(tuple(argtuple)))) outfiles = glob.glob(filledtemplate) if len(outfiles) == 0: msg = 'Output key: %s Template: %s returned no files' % (key, filledtemplate) @@ -551,18 +553,146 @@ def _list_outputs(self): outputs[key] = outputs[key][0] return outputs -class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): + +class SelectFilesInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): + + base_directory = Directory(exists=True, + desc="Root path common to templates.") + sort_filelist = traits.Bool(True, usedefault=True, + desc="When matching mutliple files, return them in sorted order.") + raise_on_empty = traits.Bool(True, usedefault=True, + desc="Raise an exception if a template pattern matches no files.") + force_lists = traits.Bool(False, usedefault=True, + desc="Return all values as lists even when matching a single file.") + + +class SelectFiles(IOBase): + """Flexibly collect data from disk to feed into workflows. + + This interface uses the {}-based string formatting syntax to plug + values (possibly known only at workflow execution time) into string + templates and collect files from persistant storage. These templates + can also be combined with glob wildcards. The field names in the + formatting template (i.e. the terms in braces) will become inputs + fields on the interface, and the keys in the templates dictionary + will form the output fields. + + Examples + -------- + + >>> from nipype import SelectFiles, Node + >>> templates={"T1": "{subject_id}/struct/T1.nii", + ... "epi": "{subject_id}/func/f[0, 1].nii"} + >>> dg = Node(SelectFiles(templates), "selectfiles") + >>> dg.inputs.subject_id = "subj1" + >>> dg.outputs.get() + {'T1': , 'epi': } + + The same thing with dynamic grabbing of specific files: + + >>> templates["epi"] = "{subject_id}/func/f{run!s}.nii" + >>> dg = Node(SelectFiles(templates), "selectfiles") + >>> dg.inputs.subject_id = "subj1" + >>> dg.inputs.run = [2, 4] + + """ + input_spec = SelectFilesInputSpec + output_spec = DynamicTraitedSpec + _always_run = True + + def __init__(self, templates, **kwargs): + """Create an instance with specific input fields. + + Parameters + ---------- + templates : dictionary + Mapping from string keys to string template values. + The keys become output fields on the interface. + The templates should use {}-formatting syntax, where + the names in curly braces become inputs fields on the interface. + Format strings can also use glob wildcards to match multiple + files. At runtime, the values of the interface inputs will be + plugged into these templates, and the resulting strings will be + used to select files. + + """ + super(SelectFiles, self).__init__(**kwargs) + + # Infer the infields and outfields from the template + infields = [] + for name, template in templates.iteritems(): + for _, field_name, _, _ in string.Formatter().parse(template): + if field_name is not None and field_name not in infields: + infields.append(field_name) + + self._infields = infields + self._outfields = list(templates) + self._templates = templates + + # Add the dynamic input fields + undefined_traits = {} + for field in infields: + self.inputs.add_trait(field, traits.Any) + undefined_traits[field] = Undefined + self.inputs.trait_set(trait_change_notify=False, **undefined_traits) + + def _add_output_traits(self, base): + """Add the dynamic output fields""" + return add_traits(base, self._templates.keys()) + + def _list_outputs(self): + """Find the files and expose them as interface outputs.""" + outputs = {} + info = dict([(k, v) for k, v in self.inputs.__dict__.items() + if k in self._infields]) + + for field, template in self._templates.iteritems(): + + # Build the full template path + if isdefined(self.inputs.base_directory): + template = op.abspath(op.join( + self.inputs.base_directory, template)) + else: + template = op.abspath(template) + + # Fill in the template and glob for files + filled_template = template.format(**info) + filelist = glob.glob(filled_template) + + # Handle the case where nothing matched + if not filelist: + msg = "No files were found matching %s template: %s" % ( + field, template) + if self.inputs.raise_on_empty: + raise IOError(msg) + else: + warn(msg) + + # Possibly sort the list + if self.inputs.sort_filelist: + filelist.sort() + + # Handle whether this must be a list or not + if not self.inputs.force_lists: + filelist = list_to_filename(filelist) + + outputs[field] = filelist + + return outputs + + +class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): root_paths = traits.Either(traits.List(), traits.Str(), mandatory=True,) - match_regex = traits.Str('(.+)', + match_regex = traits.Str('(.+)', usedefault=True, desc=("Regular expression for matching " "paths.")) ignore_regexes = traits.List(desc=("List of regular expressions, " "if any match the path it will be " "ignored.") - ) + ) max_depth = traits.Int(desc="The maximum depth to search beneath " "the root_paths") min_depth = traits.Int(desc="The minimum depth to search beneath " @@ -573,23 +703,19 @@ class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): class DataFinder(IOBase): - """Search for paths that match a given regular expression. Allows a less + """Search for paths that match a given regular expression. Allows a less proscriptive approach to gathering input files compared to DataGrabber. - Will recursively search any subdirectories by default. This can be limited - with the min/max depth options. - - Matched paths are available in the output 'out_paths'. Any named groups of - captured text from the regular expression are also available as ouputs of + Will recursively search any subdirectories by default. This can be limited + with the min/max depth options. + Matched paths are available in the output 'out_paths'. Any named groups of + captured text from the regular expression are also available as ouputs of the same name. - Examples -------- >>> from nipype.interfaces.io import DataFinder - - Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name, + Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name, starting in the current directory. - >>> df = DataFinder() >>> df.inputs.root_paths = '.' >>> df.inputs.match_regex = '.+/(?P.+(qT1|ep2d_fid_T1).+)/(?P.+)\.nii.gz' @@ -599,13 +725,11 @@ class DataFinder(IOBase): './018-ep2d_fid_T1_Gd2/acquisition.nii.gz', './016-ep2d_fid_T1_Gd1/acquisition.nii.gz', './013-ep2d_fid_T1_pre/acquisition.nii.gz'] - >>> print result.outputs.series_dir # doctest: +SKIP ['027-ep2d_fid_T1_Gd4', '018-ep2d_fid_T1_Gd2', '016-ep2d_fid_T1_Gd1', '013-ep2d_fid_T1_pre'] - >>> print result.outputs.basename # doctest: +SKIP ['acquisition', 'acquisition', @@ -613,31 +737,27 @@ class DataFinder(IOBase): 'acquisition'] """ - input_spec = DataFinderInputSpec output_spec = DynamicTraitedSpec _always_run = True - + def _match_path(self, target_path): #Check if we should ignore the path for ignore_re in self.ignore_regexes: if ignore_re.search(target_path): return - #Check if we can match the path match = self.match_regex.search(target_path) if not match is None: match_dict = match.groupdict() - if self.result is None: - self.result = {'out_paths' : []} + self.result = {'out_paths': []} for key in match_dict.keys(): self.result[key] = [] - self.result['out_paths'].append(target_path) for key, val in match_dict.iteritems(): self.result[key].append(val) - + def _run_interface(self, runtime): #Prepare some of the inputs if isinstance(self.inputs.root_paths, str): @@ -655,33 +775,27 @@ def _run_interface(self, runtime): self.ignore_regexes = [] else: self.ignore_regexes = \ - [re.compile(regex) + [re.compile(regex) for regex in self.inputs.ignore_regexes] - self.result = None for root_path in self.inputs.root_paths: #Handle tilda/env variables and remove extra seperators root_path = os.path.normpath(os.path.expandvars(os.path.expanduser(root_path))) - #Check if the root_path is a file if os.path.isfile(root_path): if min_depth == 0: self._match_path(root_path) continue - - #Walk through directory structure checking paths + #Walk through directory structure checking paths for curr_dir, sub_dirs, files in os.walk(root_path): #Determine the current depth from the root_path - curr_depth = (curr_dir.count(os.sep) - + curr_depth = (curr_dir.count(os.sep) - root_path.count(os.sep)) - - #If the max path depth has been reached, clear sub_dirs + #If the max path depth has been reached, clear sub_dirs #and files - if (not max_depth is None and - curr_depth >= max_depth): + if not max_depth is not None and curr_depth >= max_depth: sub_dirs[:] = [] files = [] - #Test the path for the curr_dir and all files if curr_depth >= min_depth: self._match_path(curr_dir) @@ -689,22 +803,21 @@ def _run_interface(self, runtime): for infile in files: full_path = os.path.join(curr_dir, infile) self._match_path(full_path) - - if (self.inputs.unpack_single and + if (self.inputs.unpack_single and len(self.result['out_paths']) == 1 - ): + ): for key, vals in self.result.iteritems(): self.result[key] = vals[0] - if not self.result: raise RuntimeError("Regular expression did not match any files!") return runtime - + def _list_outputs(self): outputs = self._outputs().get() outputs.update(self.result) return outputs + class FSSourceInputSpec(BaseInterfaceInputSpec): subjects_dir = Directory(mandatory=True, desc='Freesurfer subjects directory.') diff --git a/nipype/interfaces/tests/test_io.py b/nipype/interfaces/tests/test_io.py index 5246f2e299..f71b896932 100644 --- a/nipype/interfaces/tests/test_io.py +++ b/nipype/interfaces/tests/test_io.py @@ -3,17 +3,53 @@ import os import glob import shutil +import os.path as op from tempfile import mkstemp, mkdtemp +import nipype from nipype.testing import assert_equal, assert_true, assert_false import nipype.interfaces.io as nio from nipype.interfaces.base import Undefined + def test_datagrabber(): dg = nio.DataGrabber() yield assert_equal, dg.inputs.template, Undefined yield assert_equal, dg.inputs.base_directory, Undefined - yield assert_equal, dg.inputs.template_args,{'outfiles': []} + yield assert_equal, dg.inputs.template_args, {'outfiles': []} + + +def test_selectfiles(): + base_dir = op.dirname(nipype.__file__) + templates = {"model": "interfaces/{package}/model.py", + "preprocess": "interfaces/{package}/pre*.py"} + dg = nio.SelectFiles(templates, base_directory=base_dir) + yield assert_equal, dg._infields, ["package"] + yield assert_equal, sorted(dg._outfields), ["model", "preprocess"] + dg.inputs.package = "fsl" + res = dg.run() + wanted = op.join(op.dirname(nipype.__file__), "interfaces/fsl/model.py") + yield assert_equal, res.outputs.model, wanted + + dg = nio.SelectFiles(templates, + base_directory=base_dir, + force_lists=True) + outfields = sorted(dg._outputs().get()) + yield assert_equal, outfields, ["model", "preprocess"] + + dg.inputs.package = "spm" + res = dg.run() + wanted = op.join(op.dirname(nipype.__file__), + "interfaces/spm/preprocess.py") + yield assert_equal, res.outputs.preprocess, [wanted] + + templates = {"converter": "interfaces/dcm{to!s}nii.py"} + dg = nio.SelectFiles(templates, base_directory=base_dir) + dg.inputs.to = 2 + res = dg.run() + wanted = op.join(base_dir, "interfaces/dcm2nii.py") + yield assert_equal, res.outputs.converter, wanted + def test_datasink(): ds = nio.DataSink() @@ -21,11 +57,12 @@ def test_datasink(): yield assert_equal, ds.inputs.base_directory, Undefined yield assert_equal, ds.inputs.strip_dir, Undefined yield assert_equal, ds.inputs._outputs, {} - ds = nio.DataSink(base_directory = 'foo') + ds = nio.DataSink(base_directory='foo') yield assert_equal, ds.inputs.base_directory, 'foo' ds = nio.DataSink(infields=['test']) yield assert_true, 'test' in ds.inputs.copyable_trait_names() + def test_datasink_substitutions(): indir = mkdtemp(prefix='-Tmp-nipype_ds_subs_in') outdir = mkdtemp(prefix='-Tmp-nipype_ds_subs_out') @@ -36,52 +73,58 @@ def test_datasink_substitutions(): open(f, 'w') ds = nio.DataSink( parametrization=False, - base_directory = outdir, - substitutions = [('ababab', 'ABABAB')], + base_directory=outdir, + substitutions=[('ababab', 'ABABAB')], # end archoring ($) is used to assure operation on the filename # instead of possible temporary directories names matches # Patterns should be more comprehendable in the real-world usage # cases since paths would be quite more sensible - regexp_substitutions = [(r'xABABAB(\w*)\.n$', r'a-\1-b.n'), - ('(.*%s)[-a]([^%s]*)$' % ((os.path.sep,)*2), - r'\1!\2')] ) + regexp_substitutions=[(r'xABABAB(\w*)\.n$', r'a-\1-b.n'), + ('(.*%s)[-a]([^%s]*)$' % ((os.path.sep,) * 2), + r'\1!\2')]) setattr(ds.inputs, '@outdir', files) ds.run() yield assert_equal, \ sorted([os.path.basename(x) for x in glob.glob(os.path.join(outdir, '*'))]), \ - ['!-yz-b.n', 'ABABAB.n'] # so we got re used 2nd and both patterns + ['!-yz-b.n', 'ABABAB.n'] # so we got re used 2nd and both patterns shutil.rmtree(indir) shutil.rmtree(outdir) + def _temp_analyze_files(): """Generate temporary analyze file pair.""" - fd, orig_img = mkstemp(suffix = '.img', dir=mkdtemp()) + fd, orig_img = mkstemp(suffix='.img', dir=mkdtemp()) orig_hdr = orig_img[:-4] + '.hdr' fp = file(orig_hdr, 'w+') fp.close() return orig_img, orig_hdr + def test_datasink_copydir(): orig_img, orig_hdr = _temp_analyze_files() outdir = mkdtemp() pth, fname = os.path.split(orig_img) - ds = nio.DataSink(base_directory = outdir, parameterization=False) - setattr(ds.inputs,'@outdir',pth) + ds = nio.DataSink(base_directory=outdir, parameterization=False) + setattr(ds.inputs, '@outdir', pth) ds.run() - file_exists = lambda: os.path.exists(os.path.join(outdir, pth.split(os.path.sep)[-1], fname)) + sep = os.path.sep + file_exists = lambda: os.path.exists(os.path.join(outdir, + pth.split(sep)[-1], + fname)) yield assert_true, file_exists() shutil.rmtree(pth) orig_img, orig_hdr = _temp_analyze_files() pth, fname = os.path.split(orig_img) ds.inputs.remove_dest_dir = True - setattr(ds.inputs,'outdir',pth) + setattr(ds.inputs, 'outdir', pth) ds.run() yield assert_false, file_exists() shutil.rmtree(outdir) shutil.rmtree(pth) + def test_freesurfersource(): fss = nio.FreeSurferSource() yield assert_equal, fss.inputs.hemi, 'both'