|
@@ -5,11 +5,113 @@ Process Pools.
|
|
|
"""
|
|
|
import multiprocessing
|
|
|
|
|
|
-from multiprocessing.pool import Pool
|
|
|
+from multiprocessing.pool import Pool, worker
|
|
|
from celery.datastructures import ExceptionInfo
|
|
|
from celery.utils import gen_unique_id
|
|
|
from functools import partial as curry
|
|
|
|
|
|
+MAX_RESTART_FREQ = 10
|
|
|
+MAX_RESTART_FREQ_TIME = 60
|
|
|
+
|
|
|
+
|
|
|
+class DynamicPool(Pool):
|
|
|
+ """Version of :class:`multiprocessing.Pool` that can dynamically grow
|
|
|
+ in size."""
|
|
|
+
|
|
|
+ def __init__(self, processes=None, initializer=None, initargs=()):
|
|
|
+ super(DynamicPool, self).__init__(processes=processes,
|
|
|
+ initializer=initializer,
|
|
|
+ initargs=initargs)
|
|
|
+ self._initializer = initializer
|
|
|
+ self._initargs = initargs
|
|
|
+
|
|
|
+ def add_worker(self):
|
|
|
+ """Add another worker to the pool."""
|
|
|
+ w = self.Process(target=worker,
|
|
|
+ args=(self._inqueue, self._outqueue,
|
|
|
+ self._initializer, self._initargs))
|
|
|
+ self._pool.append(w)
|
|
|
+ w.name = w.name.replace("Process", "PoolWorker")
|
|
|
+ w.daemon = True
|
|
|
+ w.start()
|
|
|
+
|
|
|
+ def grow(self, size=1):
|
|
|
+ """Add ``size`` new workers to the pool."""
|
|
|
+ map(self._add_worker, range(size))
|
|
|
+
|
|
|
+ def get_worker_pids(self):
|
|
|
+ """Returns the process id's of all the pool workers."""
|
|
|
+ return [process.pid for process in self.processes]
|
|
|
+
|
|
|
+ def replace_dead_workers(self):
|
|
|
+ dead = [process for process in self.processes
|
|
|
+ if not process.is_alive()]
|
|
|
+ if dead:
|
|
|
+ dead_pids = [process.pid for process in dead]
|
|
|
+ self._pool = [process for process in self._pool
|
|
|
+ if process.pid not in dead_pids]
|
|
|
+ self.grow(len(dead))
|
|
|
+
|
|
|
+ return dead
|
|
|
+
|
|
|
+ @property
|
|
|
+ def processes(self):
|
|
|
+ return self._pool
|
|
|
+
|
|
|
+
|
|
|
+class PoolSupervisor(object):
|
|
|
+ """Supervisor implementing the "one_for_one" strategy.
|
|
|
+
|
|
|
+ :param target: See :attr:`target`.
|
|
|
+ :param max_restart_freq: See :attr:`max_restart_freq`.
|
|
|
+ :param max_restart_freq_time: See :attr:`max_restart_freq_time`.
|
|
|
+
|
|
|
+ .. attribute:: target
|
|
|
+
|
|
|
+ The target pool to supervise.
|
|
|
+
|
|
|
+ .. attribute:: max_restart_freq
|
|
|
+
|
|
|
+ Limit the number of restarts which can occur in a given time interval.
|
|
|
+
|
|
|
+ The max restart frequency is the number of restarts that can occur
|
|
|
+ within the interval :attr:`max_restart_freq_time`.
|
|
|
+
|
|
|
+ The restart mechanism prevents situations where the process repeatedly
|
|
|
+ dies for the same reason. If this happens both the process and the
|
|
|
+ supervisor is terminated.
|
|
|
+
|
|
|
+ .. attribute:: max_restart_freq_time
|
|
|
+
|
|
|
+ See :attr:`max_restart_freq`.
|
|
|
+
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, target, max_restart_freq=MAX_RESTART_FREQ,
|
|
|
+ max_restart_freq_time=MAX_RESTART_FREQ_TIME):
|
|
|
+ self.target = target
|
|
|
+ self.max_restart_freq = max_restart_freq * len(target.processes)
|
|
|
+ self.max_restart_freq_time = max_restart_freq_time
|
|
|
+ self.restart_frame_time = None
|
|
|
+ self.restarts_in_frame = 0
|
|
|
+
|
|
|
+ def restart_freq_exceeded(self):
|
|
|
+ if not self.restart_frame_time:
|
|
|
+ self.restart_frame_time = time.time()
|
|
|
+ return False
|
|
|
+ time_exceeded = time.time() > self.max_restart_frame_time + \
|
|
|
+ self.max_restart_freq_time
|
|
|
+ if time_exceeded and self.restarts_in_frame >= self.max_restart_freq:
|
|
|
+ return True
|
|
|
+
|
|
|
+ def supervise(self):
|
|
|
+ dead = self.target.replace_dead_workers()
|
|
|
+ if dead:
|
|
|
+ self.restarts_in_frame += len(dead)
|
|
|
+ if self.restart_freq_exceeded():
|
|
|
+ raise MaxRestartsExceededError(
|
|
|
+ "Pool supervisor: Max restart frequencey exceeded.")
|
|
|
+
|
|
|
|
|
|
class TaskPool(object):
|
|
|
"""Process Pool for processing tasks in parallel.
|
|
@@ -32,7 +134,7 @@ class TaskPool(object):
|
|
|
self.limit = limit
|
|
|
self.logger = logger or multiprocessing.get_logger()
|
|
|
self._pool = None
|
|
|
- self._processes = None
|
|
|
+ self._supervisor = None
|
|
|
|
|
|
def start(self):
|
|
|
"""Run the task pool.
|
|
@@ -40,13 +142,12 @@ class TaskPool(object):
|
|
|
Will pre-fork all workers so they're ready to accept tasks.
|
|
|
|
|
|
"""
|
|
|
- self._processes = {}
|
|
|
- self._pool = Pool(processes=self.limit)
|
|
|
+ self._pool = DynamicPool(processes=self.limit)
|
|
|
+ self._supervisor = PoolSupervisor(self._pool)
|
|
|
|
|
|
def stop(self):
|
|
|
"""Terminate the pool."""
|
|
|
self._pool.terminate()
|
|
|
- self._processes = {}
|
|
|
self._pool = None
|
|
|
|
|
|
def apply_async(self, target, args=None, kwargs=None, callbacks=None,
|
|
@@ -62,42 +163,27 @@ class TaskPool(object):
|
|
|
callbacks = callbacks or []
|
|
|
errbacks = errbacks or []
|
|
|
meta = meta or {}
|
|
|
- tid = gen_unique_id()
|
|
|
|
|
|
- on_return = curry(self.on_return, tid, callbacks, errbacks,
|
|
|
+ on_return = curry(self.on_return, callbacks, errbacks,
|
|
|
on_ack, meta)
|
|
|
|
|
|
+ self._supervisor.supervise()
|
|
|
+
|
|
|
self.logger.debug("TaskPool: Apply %s (args:%s kwargs:%s)" % (
|
|
|
target, args, kwargs))
|
|
|
- result = self._pool.apply_async(target, args, kwargs,
|
|
|
- callback=on_return)
|
|
|
|
|
|
- self._processes[tid] = [result, callbacks, errbacks, meta]
|
|
|
+ return self._pool.apply_async(target, args, kwargs,
|
|
|
+ callback=on_return)
|
|
|
|
|
|
- return result
|
|
|
|
|
|
- def on_return(self, tid, callbacks, errbacks, on_ack, meta, ret_value):
|
|
|
+ def on_return(self, callbacks, errbacks, on_ack, meta, ret_value):
|
|
|
"""What to do when the process returns."""
|
|
|
|
|
|
# Acknowledge the task as being processed.
|
|
|
if on_ack:
|
|
|
on_ack()
|
|
|
|
|
|
- try:
|
|
|
- del(self._processes[tid])
|
|
|
- except KeyError:
|
|
|
- pass
|
|
|
- else:
|
|
|
- self.on_ready(callbacks, errbacks, meta, ret_value)
|
|
|
-
|
|
|
- def full(self):
|
|
|
- """Is the pool full?
|
|
|
-
|
|
|
- :returns: ``True`` if the maximum number of concurrent processes
|
|
|
- has been reached.
|
|
|
-
|
|
|
- """
|
|
|
- return len(self._processes.values()) >= self.limit
|
|
|
+ self.on_ready(callbacks, errbacks, meta, ret_value)
|
|
|
|
|
|
def get_worker_pids(self):
|
|
|
"""Returns the process id's of all the pool workers."""
|