18
18
19
19
"""
20
20
import glob
21
+ import string
21
22
import os
23
+ import os .path as op
22
24
import shutil
23
25
import re
24
26
import tempfile
@@ -528,9 +530,9 @@ def _list_outputs(self):
528
530
filledtemplate = template
529
531
if argtuple :
530
532
try :
531
- filledtemplate = template % tuple (argtuple )
533
+ filledtemplate = template % tuple (argtuple )
532
534
except TypeError as e :
533
- raise TypeError (e .message + ": Template %s failed to convert with args %s" % (template , str (tuple (argtuple ))))
535
+ raise TypeError (e .message + ": Template %s failed to convert with args %s" % (template , str (tuple (argtuple ))))
534
536
outfiles = glob .glob (filledtemplate )
535
537
if len (outfiles ) == 0 :
536
538
msg = 'Output key: %s Template: %s returned no files' % (key , filledtemplate )
@@ -551,18 +553,146 @@ def _list_outputs(self):
551
553
outputs [key ] = outputs [key ][0 ]
552
554
return outputs
553
555
554
- class DataFinderInputSpec (DynamicTraitedSpec , BaseInterfaceInputSpec ):
556
+
557
+ class SelectFilesInputSpec (DynamicTraitedSpec , BaseInterfaceInputSpec ):
558
+
559
+ base_directory = Directory (exists = True ,
560
+ desc = "Root path common to templates." )
561
+ sort_filelist = traits .Bool (True , usedefault = True ,
562
+ desc = "When matching mutliple files, return them in sorted order." )
563
+ raise_on_empty = traits .Bool (True , usedefault = True ,
564
+ desc = "Raise an exception if a template pattern matches no files." )
565
+ force_lists = traits .Bool (False , usedefault = True ,
566
+ desc = "Return all values as lists even when matching a single file." )
567
+
568
+
569
+ class SelectFiles (IOBase ):
570
+ """Flexibly collect data from disk to feed into workflows.
571
+
572
+ This interface uses the {}-based string formatting syntax to plug
573
+ values (possibly known only at workflow execution time) into string
574
+ templates and collect files from persistant storage. These templates
575
+ can also be combined with glob wildcards. The field names in the
576
+ formatting template (i.e. the terms in braces) will become inputs
577
+ fields on the interface, and the keys in the templates dictionary
578
+ will form the output fields.
579
+
580
+ Examples
581
+ --------
582
+
583
+ >>> from nipype import SelectFiles, Node
584
+ >>> templates={"T1": "{subject_id}/struct/T1.nii",
585
+ ... "epi": "{subject_id}/func/f[0, 1].nii"}
586
+ >>> dg = Node(SelectFiles(templates), "selectfiles")
587
+ >>> dg.inputs.subject_id = "subj1"
588
+ >>> dg.outputs.get()
589
+ {'T1': <undefined>, 'epi': <undefined>}
590
+
591
+ The same thing with dynamic grabbing of specific files:
592
+
593
+ >>> templates["epi"] = "{subject_id}/func/f{run!s}.nii"
594
+ >>> dg = Node(SelectFiles(templates), "selectfiles")
595
+ >>> dg.inputs.subject_id = "subj1"
596
+ >>> dg.inputs.run = [2, 4]
597
+
598
+ """
599
+ input_spec = SelectFilesInputSpec
600
+ output_spec = DynamicTraitedSpec
601
+ _always_run = True
602
+
603
+ def __init__ (self , templates , ** kwargs ):
604
+ """Create an instance with specific input fields.
605
+
606
+ Parameters
607
+ ----------
608
+ templates : dictionary
609
+ Mapping from string keys to string template values.
610
+ The keys become output fields on the interface.
611
+ The templates should use {}-formatting syntax, where
612
+ the names in curly braces become inputs fields on the interface.
613
+ Format strings can also use glob wildcards to match multiple
614
+ files. At runtime, the values of the interface inputs will be
615
+ plugged into these templates, and the resulting strings will be
616
+ used to select files.
617
+
618
+ """
619
+ super (SelectFiles , self ).__init__ (** kwargs )
620
+
621
+ # Infer the infields and outfields from the template
622
+ infields = []
623
+ for name , template in templates .iteritems ():
624
+ for _ , field_name , _ , _ in string .Formatter ().parse (template ):
625
+ if field_name is not None and field_name not in infields :
626
+ infields .append (field_name )
627
+
628
+ self ._infields = infields
629
+ self ._outfields = list (templates )
630
+ self ._templates = templates
631
+
632
+ # Add the dynamic input fields
633
+ undefined_traits = {}
634
+ for field in infields :
635
+ self .inputs .add_trait (field , traits .Any )
636
+ undefined_traits [field ] = Undefined
637
+ self .inputs .trait_set (trait_change_notify = False , ** undefined_traits )
638
+
639
+ def _add_output_traits (self , base ):
640
+ """Add the dynamic output fields"""
641
+ return add_traits (base , self ._templates .keys ())
642
+
643
+ def _list_outputs (self ):
644
+ """Find the files and expose them as interface outputs."""
645
+ outputs = {}
646
+ info = dict ([(k , v ) for k , v in self .inputs .__dict__ .items ()
647
+ if k in self ._infields ])
648
+
649
+ for field , template in self ._templates .iteritems ():
650
+
651
+ # Build the full template path
652
+ if isdefined (self .inputs .base_directory ):
653
+ template = op .abspath (op .join (
654
+ self .inputs .base_directory , template ))
655
+ else :
656
+ template = op .abspath (template )
657
+
658
+ # Fill in the template and glob for files
659
+ filled_template = template .format (** info )
660
+ filelist = glob .glob (filled_template )
661
+
662
+ # Handle the case where nothing matched
663
+ if not filelist :
664
+ msg = "No files were found matching %s template: %s" % (
665
+ field , template )
666
+ if self .inputs .raise_on_empty :
667
+ raise IOError (msg )
668
+ else :
669
+ warn (msg )
670
+
671
+ # Possibly sort the list
672
+ if self .inputs .sort_filelist :
673
+ filelist .sort ()
674
+
675
+ # Handle whether this must be a list or not
676
+ if not self .inputs .force_lists :
677
+ filelist = list_to_filename (filelist )
678
+
679
+ outputs [field ] = filelist
680
+
681
+ return outputs
682
+
683
+
684
+ class DataFinderInputSpec (DynamicTraitedSpec , BaseInterfaceInputSpec ):
555
685
root_paths = traits .Either (traits .List (),
556
686
traits .Str (),
557
687
mandatory = True ,)
558
- match_regex = traits .Str ('(.+)' ,
688
+ match_regex = traits .Str ('(.+)' ,
559
689
usedefault = True ,
560
690
desc = ("Regular expression for matching "
561
691
"paths." ))
562
692
ignore_regexes = traits .List (desc = ("List of regular expressions, "
563
693
"if any match the path it will be "
564
694
"ignored." )
565
- )
695
+ )
566
696
max_depth = traits .Int (desc = "The maximum depth to search beneath "
567
697
"the root_paths" )
568
698
min_depth = traits .Int (desc = "The minimum depth to search beneath "
@@ -573,23 +703,19 @@ class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
573
703
574
704
575
705
class DataFinder (IOBase ):
576
- """Search for paths that match a given regular expression. Allows a less
706
+ """Search for paths that match a given regular expression. Allows a less
577
707
proscriptive approach to gathering input files compared to DataGrabber.
578
- Will recursively search any subdirectories by default. This can be limited
579
- with the min/max depth options.
580
-
581
- Matched paths are available in the output 'out_paths'. Any named groups of
582
- captured text from the regular expression are also available as ouputs of
708
+ Will recursively search any subdirectories by default. This can be limited
709
+ with the min/max depth options.
710
+ Matched paths are available in the output 'out_paths'. Any named groups of
711
+ captured text from the regular expression are also available as ouputs of
583
712
the same name.
584
-
585
713
Examples
586
714
--------
587
715
588
716
>>> from nipype.interfaces.io import DataFinder
589
-
590
- Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name,
717
+ Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name,
591
718
starting in the current directory.
592
-
593
719
>>> df = DataFinder()
594
720
>>> df.inputs.root_paths = '.'
595
721
>>> df.inputs.match_regex = '.+/(?P<series_dir>.+(qT1|ep2d_fid_T1).+)/(?P<basename>.+)\.nii.gz'
@@ -599,45 +725,39 @@ class DataFinder(IOBase):
599
725
'./018-ep2d_fid_T1_Gd2/acquisition.nii.gz',
600
726
'./016-ep2d_fid_T1_Gd1/acquisition.nii.gz',
601
727
'./013-ep2d_fid_T1_pre/acquisition.nii.gz']
602
-
603
728
>>> print result.outputs.series_dir # doctest: +SKIP
604
729
['027-ep2d_fid_T1_Gd4',
605
730
'018-ep2d_fid_T1_Gd2',
606
731
'016-ep2d_fid_T1_Gd1',
607
732
'013-ep2d_fid_T1_pre']
608
-
609
733
>>> print result.outputs.basename # doctest: +SKIP
610
734
['acquisition',
611
735
'acquisition',
612
736
'acquisition',
613
737
'acquisition']
614
738
615
739
"""
616
-
617
740
input_spec = DataFinderInputSpec
618
741
output_spec = DynamicTraitedSpec
619
742
_always_run = True
620
-
743
+
621
744
def _match_path (self , target_path ):
622
745
#Check if we should ignore the path
623
746
for ignore_re in self .ignore_regexes :
624
747
if ignore_re .search (target_path ):
625
748
return
626
-
627
749
#Check if we can match the path
628
750
match = self .match_regex .search (target_path )
629
751
if not match is None :
630
752
match_dict = match .groupdict ()
631
-
632
753
if self .result is None :
633
- self .result = {'out_paths' : []}
754
+ self .result = {'out_paths' : []}
634
755
for key in match_dict .keys ():
635
756
self .result [key ] = []
636
-
637
757
self .result ['out_paths' ].append (target_path )
638
758
for key , val in match_dict .iteritems ():
639
759
self .result [key ].append (val )
640
-
760
+
641
761
def _run_interface (self , runtime ):
642
762
#Prepare some of the inputs
643
763
if isinstance (self .inputs .root_paths , str ):
@@ -655,56 +775,49 @@ def _run_interface(self, runtime):
655
775
self .ignore_regexes = []
656
776
else :
657
777
self .ignore_regexes = \
658
- [re .compile (regex )
778
+ [re .compile (regex )
659
779
for regex in self .inputs .ignore_regexes ]
660
-
661
780
self .result = None
662
781
for root_path in self .inputs .root_paths :
663
782
#Handle tilda/env variables and remove extra seperators
664
783
root_path = os .path .normpath (os .path .expandvars (os .path .expanduser (root_path )))
665
-
666
784
#Check if the root_path is a file
667
785
if os .path .isfile (root_path ):
668
786
if min_depth == 0 :
669
787
self ._match_path (root_path )
670
788
continue
671
-
672
- #Walk through directory structure checking paths
789
+ #Walk through directory structure checking paths
673
790
for curr_dir , sub_dirs , files in os .walk (root_path ):
674
791
#Determine the current depth from the root_path
675
- curr_depth = (curr_dir .count (os .sep ) -
792
+ curr_depth = (curr_dir .count (os .sep ) -
676
793
root_path .count (os .sep ))
677
-
678
- #If the max path depth has been reached, clear sub_dirs
794
+ #If the max path depth has been reached, clear sub_dirs
679
795
#and files
680
- if (not max_depth is None and
681
- curr_depth >= max_depth ):
796
+ if not max_depth is not None and curr_depth >= max_depth :
682
797
sub_dirs [:] = []
683
798
files = []
684
-
685
799
#Test the path for the curr_dir and all files
686
800
if curr_depth >= min_depth :
687
801
self ._match_path (curr_dir )
688
802
if curr_depth >= (min_depth - 1 ):
689
803
for infile in files :
690
804
full_path = os .path .join (curr_dir , infile )
691
805
self ._match_path (full_path )
692
-
693
- if (self .inputs .unpack_single and
806
+ if (self .inputs .unpack_single and
694
807
len (self .result ['out_paths' ]) == 1
695
- ):
808
+ ):
696
809
for key , vals in self .result .iteritems ():
697
810
self .result [key ] = vals [0 ]
698
-
699
811
if not self .result :
700
812
raise RuntimeError ("Regular expression did not match any files!" )
701
813
return runtime
702
-
814
+
703
815
def _list_outputs (self ):
704
816
outputs = self ._outputs ().get ()
705
817
outputs .update (self .result )
706
818
return outputs
707
819
820
+
708
821
class FSSourceInputSpec (BaseInterfaceInputSpec ):
709
822
subjects_dir = Directory (mandatory = True ,
710
823
desc = 'Freesurfer subjects directory.' )
0 commit comments