Merge branch 'master' of https://github.com/carolFrohlich/nipype

carolFrohlich · carolFrohlich · commit 70897b2fac62 · 2015-10-09T15:22:39.000-04:00
add ResourceMultiprocPlugi
diff --git a/nipype/interfaces/base.py b/nipype/interfaces/base.py
@@ -750,6 +750,8 @@ def __init__(self, **inputs):
             raise Exception('No input_spec in class: %s' %
                             self.__class__.__name__)
         self.inputs = self.input_spec(**inputs)
+        self.memory = 1
+        self.num_threads = 1
 
     @classmethod
     def help(cls, returnhelp=False):
diff --git a/nipype/pipeline/plugins/__init__.py b/nipype/pipeline/plugins/__init__.py
@@ -9,10 +9,13 @@
 from .condor import CondorPlugin
 from .dagman import CondorDAGManPlugin
 from .multiproc import MultiProcPlugin
+from .multiproc import ResourceMultiProcPlugin
 from .ipython import IPythonPlugin
 from .somaflow import SomaFlowPlugin
 from .pbsgraph import PBSGraphPlugin
 from .sgegraph import SGEGraphPlugin
 from .lsf import LSFPlugin
 from .slurm import SLURMPlugin
 from .slurmgraph import SLURMGraphPlugin
+
+from .callback_log import log_nodes_cb
diff --git a/nipype/pipeline/plugins/base.py b/nipype/pipeline/plugins/base.py
@@ -17,7 +17,6 @@
 import numpy as np
 import scipy.sparse as ssp
 
-
 from ..utils import (nx, dfs_preorder, topological_sort)
 from ..engine import (MapNode, str2bool)
 
@@ -261,10 +260,15 @@ def run(self, graph, config, updatehash=False):
                                             graph=graph)
             else:
                 logger.debug('Not submitting')
-            sleep(float(self._config['execution']['poll_sleep_duration']))
+            self._wait()
         self._remove_node_dirs()
         report_nodes_not_run(notrun)
 
+
+
+    def _wait(self):
+        sleep(float(self._config['execution']['poll_sleep_duration']))
+
     def _get_result(self, taskid):
         raise NotImplementedError
 
diff --git a/nipype/pipeline/plugins/callback_log.py b/nipype/pipeline/plugins/callback_log.py
@@ -0,0 +1,29 @@
+import datetime
+import logging
+
+def log_nodes_cb(node, status):
+    print 'status', status
+    logger = logging.getLogger('callback')
+    if status == 'start':
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' +\
+        node._id + '"' + ',"start":' + '"' +str(datetime.datetime.now()) +\
+        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) +  '}'
+
+        logger.debug(message)
+
+    elif status == 'end':
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
+        node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
+        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) +  '}'
+
+        logger.debug(message)
+
+    else:
+        message  = '{"name":' + '"' + node.name + '"' + ',"id":' + '"' + \
+        node._id + '"' + ',"finish":' + '"' + str(datetime.datetime.now()) +\
+        '"' + ',"memory":' + str(node._interface.memory) + ',"num_threads":' \
+        + str(node._interface.num_threads) + ',"error":"True"}'
+
+        logger.debug(message)
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
@@ -12,6 +12,7 @@
 
 from .base import (DistributedPluginBase, report_crash)
 
+
 def run_node(node, updatehash):
     result = dict(result=None, traceback=None)
     try:
@@ -22,6 +23,7 @@ def run_node(node, updatehash):
         result['result'] = node.result
     return result
 
+
 class NonDaemonProcess(Process):
     """A non-daemon process to support internal multiprocessing.
     """
@@ -66,6 +68,7 @@ def __init__(self, plugin_args=None):
         else:
             self.pool = Pool(processes=n_procs)
 
+
     def _get_result(self, taskid):
         if taskid not in self._taskresult:
             raise RuntimeError('Multiproc task %d not found'%taskid)
@@ -80,8 +83,7 @@ def _submit_job(self, node, updatehash=False):
                 node.inputs.terminal_output = 'allatonce'
         except:
             pass
-        self._taskresult[self._taskid] = self.pool.apply_async(run_node,
-                                                               (node,
+        self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node,
                                                                 updatehash,))
         return self._taskid
 
@@ -96,3 +98,169 @@ def _report_crash(self, node, result=None):
 
     def _clear_task(self, taskid):
         del self._taskresult[taskid]
+
+
+
+import numpy as np
+from copy import deepcopy
+from ..engine import (MapNode, str2bool)
+import datetime
+import psutil
+from ... import logging
+import semaphore_singleton
+logger = logging.getLogger('workflow')
+
+def release_lock(args):
+    semaphore_singleton.semaphore.release()
+
+class ResourceMultiProcPlugin(MultiProcPlugin):
+    """Execute workflow with multiprocessing not sending more jobs at once
+    than the system can support.
+
+    The plugin_args input to run can be used to control the multiprocessing
+    execution and defining the maximum amount of memory and threads that 
+    should be used. When those parameters are not specified,
+    the number of threads and memory of the system is used.
+
+    System consuming nodes should be tagged:
+    memory_consuming_node.interface.memory = 8 #Gb
+    thread_consuming_node.interface.num_threads = 16
+
+    The default number of threads and memory for a node is 1. 
+
+    Currently supported options are:
+
+    - num_thread: maximum number of threads to be executed in parallel
+    - memory: maximum memory that can be used at once.
+
+    """
+
+    def __init__(self, plugin_args=None):
+        super(ResourceMultiProcPlugin, self).__init__(plugin_args=plugin_args)
+        self.plugin_args = plugin_args
+        self.processors = cpu_count()
+        memory = psutil.virtual_memory()
+        self.memory = memory.total / (1024*1024*1024)
+        if self.plugin_args:
+            if 'n_procs' in self.plugin_args:
+                self.processors = self.plugin_args['n_procs']
+            if 'memory' in self.plugin_args:
+                self.memory = self.plugin_args['memory']
+
+    def _wait(self):
+        if len(self.pending_tasks) > 0:
+            semaphore_singleton.semaphore.acquire()
+        semaphore_singleton.semaphore.release()
+
+
+    def _submit_job(self, node, updatehash=False):
+        self._taskid += 1
+        try:
+            if node.inputs.terminal_output == 'stream':
+                node.inputs.terminal_output = 'allatonce'
+        except:
+            pass
+        self._taskresult[self._taskid] = self.pool.apply_async(run_node, (node,
+                                                                updatehash,), callback=release_lock)
+        return self._taskid
+
+    def _send_procs_to_workers(self, updatehash=False, graph=None):
+        """ Sends jobs to workers when system resources are available.
+            Check memory (gb) and cores usage before running jobs.
+        """
+        executing_now = []
+
+        # Check to see if a job is available
+        jobids = np.flatnonzero((self.proc_pending == True) & (self.depidx.sum(axis=0) == 0).__array__())
+
+        #check available system resources by summing all threads and memory used
+        busy_memory = 0
+        busy_processors = 0
+        for jobid in jobids:
+            busy_memory+= self.procs[jobid]._interface.memory
+            busy_processors+= self.procs[jobid]._interface.num_threads
+                
+        free_memory = self.memory - busy_memory
+        free_processors = self.processors - busy_processors
+
+
+        #check all jobs without dependency not run
+        jobids = np.flatnonzero((self.proc_done == False) & (self.depidx.sum(axis=0) == 0).__array__())
+
+
+        #sort jobs ready to run first by memory and then by number of threads
+        #The most resource consuming jobs run first
+        jobids = sorted(jobids, key=lambda item: (self.procs[item]._interface.memory, self.procs[item]._interface.num_threads))
+
+        logger.debug('Free memory: %d, Free processors: %d', free_memory, free_processors)
+
+
+        #while have enough memory and processors for first job
+        #submit first job on the list
+        for jobid in jobids:
+            logger.debug('Next Job: %d, memory: %d, threads: %d' %(jobid, self.procs[jobid]._interface.memory, self.procs[jobid]._interface.num_threads))
+
+            if self.procs[jobid]._interface.memory <= free_memory and self.procs[jobid]._interface.num_threads <= free_processors:
+                logger.info('Executing: %s ID: %d' %(self.procs[jobid]._id, jobid))
+                executing_now.append(self.procs[jobid])
+                
+                if isinstance(self.procs[jobid], MapNode):
+                    try:
+                        num_subnodes = self.procs[jobid].num_subnodes()
+                    except Exception:
+                        self._clean_queue(jobid, graph)
+                        self.proc_pending[jobid] = False
+                        continue
+                    if num_subnodes > 1:
+                        submit = self._submit_mapnode(jobid)
+                        if not submit:
+                            continue
+
+                # change job status in appropriate queues
+                self.proc_done[jobid] = True
+                self.proc_pending[jobid] = True
+
+                free_memory -= self.procs[jobid]._interface.memory
+                free_processors -= self.procs[jobid]._interface.num_threads
+
+                # Send job to task manager and add to pending tasks
+                if self._status_callback:
+                    self._status_callback(self.procs[jobid], 'start')
+                
+                if str2bool(self.procs[jobid].config['execution']['local_hash_check']):
+                    logger.debug('checking hash locally')
+                    try:
+                        hash_exists, _, _, _ = self.procs[
+                            jobid].hash_exists()
+                        logger.debug('Hash exists %s' % str(hash_exists))
+                        if (hash_exists and (self.procs[jobid].overwrite == False or (self.procs[jobid].overwrite == None and not self.procs[jobid]._interface.always_run))):
+                            self._task_finished_cb(jobid)
+                            self._remove_node_dirs()
+                            continue
+                    except Exception:
+                        self._clean_queue(jobid, graph)
+                        self.proc_pending[jobid] = False
+                        continue
+                logger.debug('Finished checking hash')
+
+                if self.procs[jobid].run_without_submitting:
+                    logger.debug('Running node %s on master thread' %self.procs[jobid])
+                    try:
+                        self.procs[jobid].run()
+                    except Exception:
+                        self._clean_queue(jobid, graph)
+                    self._task_finished_cb(jobid)
+                    self._remove_node_dirs()
+
+                else:
+                    logger.debug('submitting', jobid)
+                    tid = self._submit_job(deepcopy(self.procs[jobid]), updatehash=updatehash)
+                    if tid is None:
+                        self.proc_done[jobid] = False
+                        self.proc_pending[jobid] = False
+                    else:
+                        self.pending_tasks.insert(0, (tid, jobid))
+            else:
+                break
+
+        logger.debug('No jobs waiting to execute')
diff --git a/nipype/pipeline/plugins/semaphore_singleton.py b/nipype/pipeline/plugins/semaphore_singleton.py
@@ -0,0 +1,2 @@
+import threading
+semaphore = threading.Semaphore(1)
diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
diff --git a/nipype/utils/draw_gantt_chart.py b/nipype/utils/draw_gantt_chart.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+import threading`
	`2`	`+semaphore = threading.Semaphore(1)`