Skip to content

Commit 07170ae

Browse files
committed
Merge pull request #623 from mwaskom/enh/better_datagrabber
Improved alternate DataGrabber interface
2 parents 935b865 + 89cd205 commit 07170ae

File tree

5 files changed

+212
-55
lines changed

5 files changed

+212
-55
lines changed

CHANGES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
Next release
22
============
33

4+
* ENH: SelectFiles: a streamlined version of DataGrabber
45
* ENH: New interfaces: spm.ResliceToReference
56

67
* FIX: Deals properly with 3d files in SPM Realign

nipype/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from pipeline import Node, MapNode, Workflow
1919
from interfaces import (fsl, spm, freesurfer, afni, ants, slicer, dipy, nipy,
20-
mrtrix, camino, DataGrabber, DataSink,
20+
mrtrix, camino, DataGrabber, DataSink, SelectFiles,
2121
IdentityInterface, Rename, Function, Select, Merge)
2222

2323

nipype/interfaces/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
"""
88
__docformat__ = 'restructuredtext'
99

10-
from io import DataGrabber, DataSink
10+
from io import DataGrabber, DataSink, SelectFiles
1111
from utility import IdentityInterface, Rename, Function, Select, Merge
1212
import fsl, spm, freesurfer, afni, ants, slicer, dipy, nipy, mrtrix, camino

nipype/interfaces/io.py

Lines changed: 153 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
1919
"""
2020
import glob
21+
import string
2122
import os
23+
import os.path as op
2224
import shutil
2325
import re
2426
import tempfile
@@ -528,9 +530,9 @@ def _list_outputs(self):
528530
filledtemplate = template
529531
if argtuple:
530532
try:
531-
filledtemplate = template%tuple(argtuple)
533+
filledtemplate = template % tuple(argtuple)
532534
except TypeError as e:
533-
raise TypeError(e.message + ": Template %s failed to convert with args %s"%(template, str(tuple(argtuple))))
535+
raise TypeError(e.message + ": Template %s failed to convert with args %s" % (template, str(tuple(argtuple))))
534536
outfiles = glob.glob(filledtemplate)
535537
if len(outfiles) == 0:
536538
msg = 'Output key: %s Template: %s returned no files' % (key, filledtemplate)
@@ -551,18 +553,146 @@ def _list_outputs(self):
551553
outputs[key] = outputs[key][0]
552554
return outputs
553555

554-
class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
556+
557+
class SelectFilesInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
558+
559+
base_directory = Directory(exists=True,
560+
desc="Root path common to templates.")
561+
sort_filelist = traits.Bool(True, usedefault=True,
562+
desc="When matching mutliple files, return them in sorted order.")
563+
raise_on_empty = traits.Bool(True, usedefault=True,
564+
desc="Raise an exception if a template pattern matches no files.")
565+
force_lists = traits.Bool(False, usedefault=True,
566+
desc="Return all values as lists even when matching a single file.")
567+
568+
569+
class SelectFiles(IOBase):
570+
"""Flexibly collect data from disk to feed into workflows.
571+
572+
This interface uses the {}-based string formatting syntax to plug
573+
values (possibly known only at workflow execution time) into string
574+
templates and collect files from persistant storage. These templates
575+
can also be combined with glob wildcards. The field names in the
576+
formatting template (i.e. the terms in braces) will become inputs
577+
fields on the interface, and the keys in the templates dictionary
578+
will form the output fields.
579+
580+
Examples
581+
--------
582+
583+
>>> from nipype import SelectFiles, Node
584+
>>> templates={"T1": "{subject_id}/struct/T1.nii",
585+
... "epi": "{subject_id}/func/f[0, 1].nii"}
586+
>>> dg = Node(SelectFiles(templates), "selectfiles")
587+
>>> dg.inputs.subject_id = "subj1"
588+
>>> dg.outputs.get()
589+
{'T1': <undefined>, 'epi': <undefined>}
590+
591+
The same thing with dynamic grabbing of specific files:
592+
593+
>>> templates["epi"] = "{subject_id}/func/f{run!s}.nii"
594+
>>> dg = Node(SelectFiles(templates), "selectfiles")
595+
>>> dg.inputs.subject_id = "subj1"
596+
>>> dg.inputs.run = [2, 4]
597+
598+
"""
599+
input_spec = SelectFilesInputSpec
600+
output_spec = DynamicTraitedSpec
601+
_always_run = True
602+
603+
def __init__(self, templates, **kwargs):
604+
"""Create an instance with specific input fields.
605+
606+
Parameters
607+
----------
608+
templates : dictionary
609+
Mapping from string keys to string template values.
610+
The keys become output fields on the interface.
611+
The templates should use {}-formatting syntax, where
612+
the names in curly braces become inputs fields on the interface.
613+
Format strings can also use glob wildcards to match multiple
614+
files. At runtime, the values of the interface inputs will be
615+
plugged into these templates, and the resulting strings will be
616+
used to select files.
617+
618+
"""
619+
super(SelectFiles, self).__init__(**kwargs)
620+
621+
# Infer the infields and outfields from the template
622+
infields = []
623+
for name, template in templates.iteritems():
624+
for _, field_name, _, _ in string.Formatter().parse(template):
625+
if field_name is not None and field_name not in infields:
626+
infields.append(field_name)
627+
628+
self._infields = infields
629+
self._outfields = list(templates)
630+
self._templates = templates
631+
632+
# Add the dynamic input fields
633+
undefined_traits = {}
634+
for field in infields:
635+
self.inputs.add_trait(field, traits.Any)
636+
undefined_traits[field] = Undefined
637+
self.inputs.trait_set(trait_change_notify=False, **undefined_traits)
638+
639+
def _add_output_traits(self, base):
640+
"""Add the dynamic output fields"""
641+
return add_traits(base, self._templates.keys())
642+
643+
def _list_outputs(self):
644+
"""Find the files and expose them as interface outputs."""
645+
outputs = {}
646+
info = dict([(k, v) for k, v in self.inputs.__dict__.items()
647+
if k in self._infields])
648+
649+
for field, template in self._templates.iteritems():
650+
651+
# Build the full template path
652+
if isdefined(self.inputs.base_directory):
653+
template = op.abspath(op.join(
654+
self.inputs.base_directory, template))
655+
else:
656+
template = op.abspath(template)
657+
658+
# Fill in the template and glob for files
659+
filled_template = template.format(**info)
660+
filelist = glob.glob(filled_template)
661+
662+
# Handle the case where nothing matched
663+
if not filelist:
664+
msg = "No files were found matching %s template: %s" % (
665+
field, template)
666+
if self.inputs.raise_on_empty:
667+
raise IOError(msg)
668+
else:
669+
warn(msg)
670+
671+
# Possibly sort the list
672+
if self.inputs.sort_filelist:
673+
filelist.sort()
674+
675+
# Handle whether this must be a list or not
676+
if not self.inputs.force_lists:
677+
filelist = list_to_filename(filelist)
678+
679+
outputs[field] = filelist
680+
681+
return outputs
682+
683+
684+
class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
555685
root_paths = traits.Either(traits.List(),
556686
traits.Str(),
557687
mandatory=True,)
558-
match_regex = traits.Str('(.+)',
688+
match_regex = traits.Str('(.+)',
559689
usedefault=True,
560690
desc=("Regular expression for matching "
561691
"paths."))
562692
ignore_regexes = traits.List(desc=("List of regular expressions, "
563693
"if any match the path it will be "
564694
"ignored.")
565-
)
695+
)
566696
max_depth = traits.Int(desc="The maximum depth to search beneath "
567697
"the root_paths")
568698
min_depth = traits.Int(desc="The minimum depth to search beneath "
@@ -573,23 +703,19 @@ class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
573703

574704

575705
class DataFinder(IOBase):
576-
"""Search for paths that match a given regular expression. Allows a less
706+
"""Search for paths that match a given regular expression. Allows a less
577707
proscriptive approach to gathering input files compared to DataGrabber.
578-
Will recursively search any subdirectories by default. This can be limited
579-
with the min/max depth options.
580-
581-
Matched paths are available in the output 'out_paths'. Any named groups of
582-
captured text from the regular expression are also available as ouputs of
708+
Will recursively search any subdirectories by default. This can be limited
709+
with the min/max depth options.
710+
Matched paths are available in the output 'out_paths'. Any named groups of
711+
captured text from the regular expression are also available as ouputs of
583712
the same name.
584-
585713
Examples
586714
--------
587715
588716
>>> from nipype.interfaces.io import DataFinder
589-
590-
Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name,
717+
Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name,
591718
starting in the current directory.
592-
593719
>>> df = DataFinder()
594720
>>> df.inputs.root_paths = '.'
595721
>>> df.inputs.match_regex = '.+/(?P<series_dir>.+(qT1|ep2d_fid_T1).+)/(?P<basename>.+)\.nii.gz'
@@ -599,45 +725,39 @@ class DataFinder(IOBase):
599725
'./018-ep2d_fid_T1_Gd2/acquisition.nii.gz',
600726
'./016-ep2d_fid_T1_Gd1/acquisition.nii.gz',
601727
'./013-ep2d_fid_T1_pre/acquisition.nii.gz']
602-
603728
>>> print result.outputs.series_dir # doctest: +SKIP
604729
['027-ep2d_fid_T1_Gd4',
605730
'018-ep2d_fid_T1_Gd2',
606731
'016-ep2d_fid_T1_Gd1',
607732
'013-ep2d_fid_T1_pre']
608-
609733
>>> print result.outputs.basename # doctest: +SKIP
610734
['acquisition',
611735
'acquisition',
612736
'acquisition',
613737
'acquisition']
614738
615739
"""
616-
617740
input_spec = DataFinderInputSpec
618741
output_spec = DynamicTraitedSpec
619742
_always_run = True
620-
743+
621744
def _match_path(self, target_path):
622745
#Check if we should ignore the path
623746
for ignore_re in self.ignore_regexes:
624747
if ignore_re.search(target_path):
625748
return
626-
627749
#Check if we can match the path
628750
match = self.match_regex.search(target_path)
629751
if not match is None:
630752
match_dict = match.groupdict()
631-
632753
if self.result is None:
633-
self.result = {'out_paths' : []}
754+
self.result = {'out_paths': []}
634755
for key in match_dict.keys():
635756
self.result[key] = []
636-
637757
self.result['out_paths'].append(target_path)
638758
for key, val in match_dict.iteritems():
639759
self.result[key].append(val)
640-
760+
641761
def _run_interface(self, runtime):
642762
#Prepare some of the inputs
643763
if isinstance(self.inputs.root_paths, str):
@@ -655,56 +775,49 @@ def _run_interface(self, runtime):
655775
self.ignore_regexes = []
656776
else:
657777
self.ignore_regexes = \
658-
[re.compile(regex)
778+
[re.compile(regex)
659779
for regex in self.inputs.ignore_regexes]
660-
661780
self.result = None
662781
for root_path in self.inputs.root_paths:
663782
#Handle tilda/env variables and remove extra seperators
664783
root_path = os.path.normpath(os.path.expandvars(os.path.expanduser(root_path)))
665-
666784
#Check if the root_path is a file
667785
if os.path.isfile(root_path):
668786
if min_depth == 0:
669787
self._match_path(root_path)
670788
continue
671-
672-
#Walk through directory structure checking paths
789+
#Walk through directory structure checking paths
673790
for curr_dir, sub_dirs, files in os.walk(root_path):
674791
#Determine the current depth from the root_path
675-
curr_depth = (curr_dir.count(os.sep) -
792+
curr_depth = (curr_dir.count(os.sep) -
676793
root_path.count(os.sep))
677-
678-
#If the max path depth has been reached, clear sub_dirs
794+
#If the max path depth has been reached, clear sub_dirs
679795
#and files
680-
if (not max_depth is None and
681-
curr_depth >= max_depth):
796+
if not max_depth is not None and curr_depth >= max_depth:
682797
sub_dirs[:] = []
683798
files = []
684-
685799
#Test the path for the curr_dir and all files
686800
if curr_depth >= min_depth:
687801
self._match_path(curr_dir)
688802
if curr_depth >= (min_depth - 1):
689803
for infile in files:
690804
full_path = os.path.join(curr_dir, infile)
691805
self._match_path(full_path)
692-
693-
if (self.inputs.unpack_single and
806+
if (self.inputs.unpack_single and
694807
len(self.result['out_paths']) == 1
695-
):
808+
):
696809
for key, vals in self.result.iteritems():
697810
self.result[key] = vals[0]
698-
699811
if not self.result:
700812
raise RuntimeError("Regular expression did not match any files!")
701813
return runtime
702-
814+
703815
def _list_outputs(self):
704816
outputs = self._outputs().get()
705817
outputs.update(self.result)
706818
return outputs
707819

820+
708821
class FSSourceInputSpec(BaseInterfaceInputSpec):
709822
subjects_dir = Directory(mandatory=True,
710823
desc='Freesurfer subjects directory.')

0 commit comments

Comments
 (0)