|
@@ -32,7 +32,6 @@ from weakref import WeakValueDictionary, ref
|
|
|
from amqp.utils import promise
|
|
|
from billiard import forking_enable
|
|
|
from billiard import pool as _pool
|
|
|
-from billiard.exceptions import WorkerLostError
|
|
|
from billiard.pool import (
|
|
|
RUN, CLOSE, TERMINATE, ACK, NACK, EX_RECYCLE, WorkersJoined, CoroStop,
|
|
|
)
|
|
@@ -297,6 +296,16 @@ class AsynPool(_pool.Pool):
|
|
|
|
|
|
# denormalized set of all inqueues.
|
|
|
self._all_inqueues = set()
|
|
|
+
|
|
|
+ # Set of fds being written to (busy)
|
|
|
+ self._active_writes = set()
|
|
|
+
|
|
|
+ # Set of active co-routines currently writing jobs.
|
|
|
+ self._active_writers = set()
|
|
|
+
|
|
|
+ # Holds jobs waiting to be written to child processes.
|
|
|
+ self.outbound_buffer = deque()
|
|
|
+
|
|
|
super(AsynPool, self).__init__(processes, *args, **kwargs)
|
|
|
|
|
|
for proc in self._pool:
|
|
@@ -305,730 +314,715 @@ class AsynPool(_pool.Pool):
|
|
|
self._fileno_to_inq[proc.inqW_fd] = proc
|
|
|
self._fileno_to_outq[proc.outqR_fd] = proc
|
|
|
self._fileno_to_synq[proc.synqW_fd] = proc
|
|
|
+ self.on_soft_timeout = self._timeout_handler.on_soft_timeout
|
|
|
+ self.on_hard_timeout = self._timeout_handler.on_hard_timeout
|
|
|
|
|
|
- def get_process_queues(self):
|
|
|
- """Get queues for a new process.
|
|
|
-
|
|
|
- Here we will find an unused slot, as there should always
|
|
|
- be one available when we start a new process.
|
|
|
- """
|
|
|
- return next(q for q, owner in items(self._queues)
|
|
|
- if owner is None)
|
|
|
-
|
|
|
- def on_grow(self, n):
|
|
|
- """Grow the pool by ``n`` proceses."""
|
|
|
- diff = max(self._processes - len(self._queues), 0)
|
|
|
- if diff:
|
|
|
- self._queues.update(
|
|
|
- dict((self.create_process_queues(), None) for _ in range(diff))
|
|
|
- )
|
|
|
-
|
|
|
- def on_shrink(self, n):
|
|
|
- """Shrink the pool by ``n`` processes."""
|
|
|
- pass
|
|
|
+ def register_with_event_loop(self, hub):
|
|
|
+ """Registers the async pool with the current event loop."""
|
|
|
+ self._create_timelimit_handlers(hub)
|
|
|
+ self._create_process_handlers(hub)
|
|
|
+ self._create_write_handlers(hub)
|
|
|
|
|
|
- def create_process_queues(self):
|
|
|
- """Creates new in, out (and optionally syn) queues,
|
|
|
- returned as a tuple."""
|
|
|
- inq, outq, synq = _SimpleQueue(), _SimpleQueue(), None
|
|
|
- inq._writer.setblocking(0)
|
|
|
- if self.synack:
|
|
|
- synq = _SimpleQueue()
|
|
|
- synq._writer.setblocking(0)
|
|
|
- return inq, outq, synq
|
|
|
+ # Maintain_pool is called whenever a process exits.
|
|
|
+ [hub.add_reader(fd, self.maintain_pool)
|
|
|
+ for fd in self.process_sentinels]
|
|
|
+ # Handle_result_event is called whenever one of the
|
|
|
+ # result queues are readable.
|
|
|
+ [hub.add_reader(fd, self.handle_result_event)
|
|
|
+ for fd in self._fileno_to_outq]
|
|
|
|
|
|
- def on_process_alive(self, pid):
|
|
|
- """Handler called when the WORKER_UP message is received
|
|
|
- from a child process, which marks the process as ready
|
|
|
- to receive work."""
|
|
|
- try:
|
|
|
- proc = next(w for w in self._pool if w.pid == pid)
|
|
|
- except StopIteration:
|
|
|
- # process already exited :( this will be handled elsewhere.
|
|
|
- return
|
|
|
- self._fileno_to_inq[proc.inqW_fd] = proc
|
|
|
- self._fileno_to_synq[proc.synqW_fd] = proc
|
|
|
- self._all_inqueues.add(proc.inqW_fd)
|
|
|
+ # Timers include calling maintain_pool at a regular interval
|
|
|
+ # to be certain processes are restarted.
|
|
|
+ for handler, interval in items(self.timers):
|
|
|
+ hub.call_repeatedly(interval, handler)
|
|
|
|
|
|
- def on_job_process_down(self, job, pid_gone):
|
|
|
- """Handler called for each job when the process it was assigned to
|
|
|
- exits."""
|
|
|
- if job._write_to:
|
|
|
- # job was partially written
|
|
|
- self.on_partial_read(job, job._write_to)
|
|
|
- elif job._scheduled_for:
|
|
|
- # job was only scheduled to be written to this process,
|
|
|
- # but no data was sent so put it back on the outbound_buffer.
|
|
|
- self._put_back(job)
|
|
|
+ hub.on_tick.add(self.on_poll_start)
|
|
|
|
|
|
- def on_job_process_lost(self, job, pid, exitcode):
|
|
|
- """Handler called for each *started* job when the process it
|
|
|
- was assigned to exited by mysterious means (error exitcodes and
|
|
|
- signals)"""
|
|
|
- self.mark_as_worker_lost(job, exitcode)
|
|
|
+ def _create_timelimit_handlers(self, hub, now=time):
|
|
|
+ """For async pool this sets up the handlers used
|
|
|
+ to implement time limits."""
|
|
|
+ call_later = hub.call_later
|
|
|
+ trefs = self._tref_for_id = WeakValueDictionary()
|
|
|
|
|
|
- def _process_cleanup_queues(self, proc):
|
|
|
- """Handler called to clean up a processes queues after process
|
|
|
- exit."""
|
|
|
- try:
|
|
|
- self._queues[self._find_worker_queues(proc)] = None
|
|
|
- except (KeyError, ValueError):
|
|
|
- pass
|
|
|
+ def on_timeout_set(R, soft, hard):
|
|
|
+ if soft:
|
|
|
+ trefs[R._job] = call_later(
|
|
|
+ soft * 1000.0,
|
|
|
+ self._on_soft_timeout, (R._job, soft, hard, hub),
|
|
|
+ )
|
|
|
+ elif hard:
|
|
|
+ trefs[R._job] = call_later(
|
|
|
+ hard * 1000.0,
|
|
|
+ self._on_hard_timeout, (R._job, )
|
|
|
+ )
|
|
|
+ self.on_timeout_set = on_timeout_set
|
|
|
|
|
|
- @staticmethod
|
|
|
- def _stop_task_handler(task_handler):
|
|
|
- """Called at shutdown to tell processes that we are shutting down."""
|
|
|
- for proc in task_handler.pool:
|
|
|
- proc.inq._writer.setblocking(1)
|
|
|
+ def _discard_tref(job):
|
|
|
try:
|
|
|
- proc.inq.put(None)
|
|
|
- except OSError as exc:
|
|
|
- if get_errno(exc) != errno.EBADF:
|
|
|
- raise
|
|
|
+ tref = trefs.pop(job)
|
|
|
+ tref.cancel()
|
|
|
+ del(tref)
|
|
|
+ except (KeyError, AttributeError):
|
|
|
+ pass # out of scope
|
|
|
+ self._discard_tref = _discard_tref
|
|
|
|
|
|
- def create_result_handler(self):
|
|
|
- return super(AsynPool, self).create_result_handler(
|
|
|
- fileno_to_outq=self._fileno_to_outq,
|
|
|
- on_process_alive=self.on_process_alive,
|
|
|
- )
|
|
|
+ def on_timeout_cancel(R):
|
|
|
+ _discard_tref(R._job)
|
|
|
+ self.on_timeout_cancel = on_timeout_cancel
|
|
|
|
|
|
- def _process_register_queues(self, proc, queues):
|
|
|
- """Marks new ownership for ``queues`` so that the fileno indices are
|
|
|
- updated."""
|
|
|
- assert queues in self._queues
|
|
|
- b = len(self._queues)
|
|
|
- self._queues[queues] = proc
|
|
|
- assert b == len(self._queues)
|
|
|
+ def _on_soft_timeout(self, job, soft, hard, hub, now=time):
|
|
|
+ # only used by async pool.
|
|
|
+ if hard:
|
|
|
+ self._tref_for_id[job] = hub.call_at(
|
|
|
+ now() + (hard - soft),
|
|
|
+ self._on_hard_timeout, (job, ),
|
|
|
+ )
|
|
|
+ try:
|
|
|
+ result = self._cache[job]
|
|
|
+ except KeyError:
|
|
|
+ pass # job ready
|
|
|
+ else:
|
|
|
+ self.on_soft_timeout(result)
|
|
|
+ finally:
|
|
|
+ if not hard:
|
|
|
+ # remove tref
|
|
|
+ self._discard_tref(job)
|
|
|
|
|
|
- def _find_worker_queues(self, proc):
|
|
|
- """Find the queues owned by ``proc``."""
|
|
|
+ def _on_hard_timeout(self, job):
|
|
|
+ # only used by async pool.
|
|
|
try:
|
|
|
- return next(q for q, owner in items(self._queues)
|
|
|
- if owner == proc)
|
|
|
- except StopIteration:
|
|
|
- raise ValueError(proc)
|
|
|
+ result = self._cache[job]
|
|
|
+ except KeyError:
|
|
|
+ pass # job ready
|
|
|
+ else:
|
|
|
+ self.on_hard_timeout(result)
|
|
|
+ finally:
|
|
|
+ # remove tref
|
|
|
+ self._discard_tref(job)
|
|
|
|
|
|
- def _setup_queues(self):
|
|
|
- # this is only used by the original pool which uses a shared
|
|
|
- # queue for all processes.
|
|
|
+ def _create_process_handlers(self, hub, READ=READ, ERR=ERR):
|
|
|
+ """For async pool this will create the handlers called
|
|
|
+ when a process is up/down and etc."""
|
|
|
+ add_reader, hub_remove = hub.add_reader, hub.remove
|
|
|
+ cache = self._cache
|
|
|
+ all_inqueues = self._all_inqueues
|
|
|
+ fileno_to_inq = self._fileno_to_inq
|
|
|
+ fileno_to_outq = self._fileno_to_outq
|
|
|
+ fileno_to_synq = self._fileno_to_synq
|
|
|
+ maintain_pool = self.maintain_pool
|
|
|
+ handle_result_event = self.handle_result_event
|
|
|
+ process_flush_queues = self.process_flush_queues
|
|
|
|
|
|
- # these attributes makes no sense for us, but we will still
|
|
|
- # have to initialize them.
|
|
|
- self._inqueue = self._outqueue = \
|
|
|
- self._quick_put = self._quick_get = self._poll_result = None
|
|
|
+ def on_process_up(proc):
|
|
|
+ """Called when a WORKER_UP message is received from process."""
|
|
|
+ # If we got the same fd as a previous process then we will also
|
|
|
+ # receive jobs in the old buffer, so we need to reset the
|
|
|
+ # job._write_to and job._scheduled_for attributes used to recover
|
|
|
+ # message boundaries when processes exit.
|
|
|
+ infd = proc.inqW_fd
|
|
|
+ for job in values(cache):
|
|
|
+ if job._write_to and job._write_to.inqW_fd == infd:
|
|
|
+ job._write_to = proc
|
|
|
+ if job._scheduled_for and job._scheduled_for.inqW_fd == infd:
|
|
|
+ job._scheduled_for = proc
|
|
|
+ fileno_to_outq[proc.outqR_fd] = proc
|
|
|
+ # maintain_pool is called whenever a process exits.
|
|
|
+ add_reader(proc.sentinel, maintain_pool)
|
|
|
+ # handle_result_event is called when the processes outqueue is
|
|
|
+ # readable.
|
|
|
+ add_reader(proc.outqR_fd, handle_result_event)
|
|
|
+ self.on_process_up = on_process_up
|
|
|
|
|
|
- def process_flush_queues(self, proc):
|
|
|
- """Flushes all queues, including the outbound buffer, so that
|
|
|
- all tasks that have not been started will be discarded.
|
|
|
+ def on_process_down(proc):
|
|
|
+ """Called when a worker process exits."""
|
|
|
+ process_flush_queues(proc)
|
|
|
+ fileno_to_outq.pop(proc.outqR_fd, None)
|
|
|
+ fileno_to_inq.pop(proc.inqW_fd, None)
|
|
|
+ fileno_to_synq.pop(proc.synqW_fd, None)
|
|
|
+ all_inqueues.discard(proc.inqW_fd)
|
|
|
+ hub_remove(proc.sentinel)
|
|
|
+ hub_remove(proc.outqR_fd)
|
|
|
+ self.on_process_down = on_process_down
|
|
|
|
|
|
- In Celery this is called whenever the transport connection is lost
|
|
|
- (consumer restart).
|
|
|
+ def _create_write_handlers(self, hub,
|
|
|
+ pack=struct.pack, dumps=_pickle.dumps,
|
|
|
+ protocol=HIGHEST_PROTOCOL):
|
|
|
+ """For async pool this creates the handlers used to write data to
|
|
|
+ child processes."""
|
|
|
+ fileno_to_inq = self._fileno_to_inq
|
|
|
+ fileno_to_synq = self._fileno_to_synq
|
|
|
+ outbound = self.outbound_buffer
|
|
|
+ pop_message = outbound.popleft
|
|
|
+ put_message = outbound.append
|
|
|
+ all_inqueues = self._all_inqueues
|
|
|
+ active_writes = self._active_writes
|
|
|
+ diff = all_inqueues.difference
|
|
|
+ add_reader, add_writer = hub.add_reader, hub.add_writer
|
|
|
+ hub_add, hub_remove = hub.add, hub.remove
|
|
|
+ mark_write_fd_as_active = active_writes.add
|
|
|
+ mark_write_gen_as_active = self._active_writers.add
|
|
|
+ write_generator_done = self._active_writers.discard
|
|
|
+ get_job = self._cache.__getitem__
|
|
|
+ # puts back at the end of the queue
|
|
|
+ self._put_back = outbound.appendleft
|
|
|
+ precalc = {ACK: self._create_payload(ACK, (0, )),
|
|
|
+ NACK: self._create_payload(NACK, (0, ))}
|
|
|
|
|
|
- """
|
|
|
- resq = proc.outq._reader
|
|
|
- on_state_change = self._result_handler.on_state_change
|
|
|
- while not resq.closed and resq.poll(0) and self._state != TERMINATE:
|
|
|
- try:
|
|
|
- task = resq.recv()
|
|
|
- except (IOError, EOFError) as exc:
|
|
|
- debug('got %r while flushing process %r',
|
|
|
- exc, proc, exc_info=1)
|
|
|
- break
|
|
|
- else:
|
|
|
- if task is not None:
|
|
|
- on_state_change(task)
|
|
|
- else:
|
|
|
- debug('got sentinel while flushing process %r', proc)
|
|
|
+ def on_poll_start():
|
|
|
+ # called for every event loop iteration, and if there
|
|
|
+ # are messages pending this will schedule writing one message
|
|
|
+ # by registering the 'schedule_writes' function for all currently
|
|
|
+ # inactive inqueues (not already being written to)
|
|
|
|
|
|
- def on_partial_read(self, job, proc):
|
|
|
- """Called when a job was only partially written to a child process
|
|
|
- and it exited."""
|
|
|
- # worker terminated by signal:
|
|
|
- # we cannot reuse the sockets again, because we don't know if
|
|
|
- # the process wrote/read anything frmo them, and if so we cannot
|
|
|
- # restore the message boundaries.
|
|
|
- if proc.exitcode != EX_RECYCLE:
|
|
|
- if not job._accepted:
|
|
|
- # job was not acked, so find another worker to send it to.
|
|
|
- self._put_back(job)
|
|
|
- writer = getattr(job, '_writer')
|
|
|
- writer = writer and writer() or None
|
|
|
- if writer:
|
|
|
- self._active_writers.discard(writer)
|
|
|
+ # consolidate means the event loop will merge them
|
|
|
+ # and call the callback once with the list writable fds as
|
|
|
+ # argument. Using this means we minimize the risk of having
|
|
|
+ # the same fd receive every task if the pipe read buffer is not
|
|
|
+ # full.
|
|
|
+ if outbound:
|
|
|
+ [hub_add(fd, None, WRITE | ERR, consolidate=True)
|
|
|
+ for fd in diff(active_writes)]
|
|
|
+ self.on_poll_start = on_poll_start
|
|
|
|
|
|
- # Replace queues to avoid reuse
|
|
|
- before = len(self._queues)
|
|
|
- try:
|
|
|
- queues = self._find_worker_queues(proc)
|
|
|
- if self.destroy_queues(queues):
|
|
|
- self._queues[self.create_process_queues()] = None
|
|
|
- except ValueError:
|
|
|
- # Not in queue map, make sure sockets are closed.
|
|
|
- self.destroy_queues((proc.inq, proc.outq, proc.synq))
|
|
|
- assert len(self._queues) == before
|
|
|
+ def on_inqueue_close(fd):
|
|
|
+ # Makes sure the fd is removed from tracking when
|
|
|
+ # the connection is closed, this is essential as fds may be reused.
|
|
|
+ active_writes.discard(fd)
|
|
|
+ all_inqueues.discard(fd)
|
|
|
+ self.on_inqueue_close = on_inqueue_close
|
|
|
|
|
|
- def destroy_queues(self, queues):
|
|
|
- """Destroy queues that can no longer be used, so that they
|
|
|
- be replaced by new sockets."""
|
|
|
- removed = 1
|
|
|
- try:
|
|
|
- self._queues.pop(queues)
|
|
|
- except KeyError:
|
|
|
- removed = 0
|
|
|
- try:
|
|
|
- self.on_inqueue_close(queues[0]._writer.fileno())
|
|
|
- except IOError:
|
|
|
- pass
|
|
|
- for queue in queues:
|
|
|
- if queue:
|
|
|
- for sock in (queue._reader, queue._writer):
|
|
|
- if not sock.closed:
|
|
|
+ def schedule_writes(ready_fds, shuffle=random.shuffle):
|
|
|
+ # Schedule write operation to ready file descriptor.
|
|
|
+ # The file descriptor is writeable, but that does not
|
|
|
+ # mean the process is currently reading from the socket.
|
|
|
+ # The socket is buffered so writeable simply means that
|
|
|
+ # the buffer can accept at least 1 byte of data.
|
|
|
+ shuffle(ready_fds)
|
|
|
+ for ready_fd in ready_fds:
|
|
|
+ if ready_fd in active_writes:
|
|
|
+ # already writing to this fd
|
|
|
+ continue
|
|
|
+ try:
|
|
|
+ job = pop_message()
|
|
|
+ except IndexError:
|
|
|
+ # no more messages, remove all inactive fds from the hub.
|
|
|
+ # this is important since the fds are always writeable
|
|
|
+ # as long as there's 1 byte left in the buffer, and so
|
|
|
+ # this may create a spinloop where the event loop
|
|
|
+ # always wakes up.
|
|
|
+ for inqfd in diff(active_writes):
|
|
|
+ hub_remove(inqfd)
|
|
|
+ break
|
|
|
+
|
|
|
+ else:
|
|
|
+ if not job._accepted: # job not accepted by another worker
|
|
|
try:
|
|
|
- sock.close()
|
|
|
- except (IOError, OSError):
|
|
|
+ # keep track of what process the write operation
|
|
|
+ # was scheduled for.
|
|
|
+ proc = job._scheduled_for = fileno_to_inq[ready_fd]
|
|
|
+ except KeyError:
|
|
|
+ # write was scheduled for this fd but the process
|
|
|
+ # has since exited and the message must be sent to
|
|
|
+ # another process.
|
|
|
+ put_message(job)
|
|
|
+ continue
|
|
|
+ cor = _write_job(proc, ready_fd, job)
|
|
|
+ job._writer = ref(cor)
|
|
|
+ mark_write_gen_as_active(cor)
|
|
|
+ mark_write_fd_as_active(ready_fd)
|
|
|
+
|
|
|
+ # Try to write immediately, in case there's an error.
|
|
|
+ try:
|
|
|
+ next(cor)
|
|
|
+ add_writer(ready_fd, cor)
|
|
|
+ except StopIteration:
|
|
|
pass
|
|
|
- return removed
|
|
|
+ hub.consolidate_callback = schedule_writes
|
|
|
|
|
|
- def _create_payload(self, type_, args,
|
|
|
- dumps=_pickle.dumps, pack=struct.pack,
|
|
|
- protocol=HIGHEST_PROTOCOL):
|
|
|
- body = dumps((type_, args), protocol=protocol)
|
|
|
- size = len(body)
|
|
|
- header = pack('>I', size)
|
|
|
- return header, body, size
|
|
|
+ def send_job(tup):
|
|
|
+ # Schedule writing job request for when one of the process
|
|
|
+ # inqueues are writable.
|
|
|
+ body = dumps(tup, protocol=protocol)
|
|
|
+ body_size = len(body)
|
|
|
+ header = pack('>I', body_size)
|
|
|
+ # index 1,0 is the job ID.
|
|
|
+ job = get_job(tup[1][0])
|
|
|
+ job._payload = header, body, body_size
|
|
|
+ put_message(job)
|
|
|
+ self._quick_put = send_job
|
|
|
|
|
|
- @classmethod
|
|
|
- def _set_result_sentinel(cls, _outqueue, _pool):
|
|
|
- # unused
|
|
|
- pass
|
|
|
+ write_stats = self.write_stats = Counter()
|
|
|
|
|
|
- def _help_stuff_finish_args(self):
|
|
|
- # Pool._help_stuff_finished is a classmethod so we have to use this
|
|
|
- # trick to modify the arguments passed to it.
|
|
|
- return (self._pool, )
|
|
|
+ def on_not_recovering(proc):
|
|
|
+ # XXX Theoretically a possibility, but not seen in practice yet.
|
|
|
+ raise Exception(
|
|
|
+ 'Process writable but cannot write. Contact support!')
|
|
|
|
|
|
- @classmethod
|
|
|
- def _help_stuff_finish(cls, pool):
|
|
|
- debug(
|
|
|
- 'removing tasks from inqueue until task handler finished',
|
|
|
- )
|
|
|
- fileno_to_proc = {}
|
|
|
- inqR = set()
|
|
|
- for w in pool:
|
|
|
+ def _write_job(proc, fd, job):
|
|
|
+ # writes job to the worker process.
|
|
|
+ # Operation must complete if more than one byte of data
|
|
|
+ # was written. If the broker connection is lost
|
|
|
+ # and no data was written the operation shall be cancelled.
|
|
|
+ header, body, body_size = job._payload
|
|
|
+ errors = 0
|
|
|
try:
|
|
|
- fd = w.inq._reader.fileno()
|
|
|
- inqR.add(fd)
|
|
|
- fileno_to_proc[fd] = w
|
|
|
- except IOError:
|
|
|
- pass
|
|
|
- while inqR:
|
|
|
- readable, _, again = _select(inqR, timeout=0.5)
|
|
|
- if again:
|
|
|
- continue
|
|
|
- if not readable:
|
|
|
- break
|
|
|
- for fd in readable:
|
|
|
- fileno_to_proc[fd].inq._reader.recv()
|
|
|
- sleep(0)
|
|
|
-
|
|
|
-
|
|
|
-class TaskPool(BasePool):
|
|
|
- """Multiprocessing Pool implementation."""
|
|
|
- Pool = AsynPool
|
|
|
- BlockingPool = _pool.Pool
|
|
|
+ # job result keeps track of what process the job is sent to.
|
|
|
+ job._write_to = proc
|
|
|
+ send = proc.send_job_offset
|
|
|
|
|
|
- uses_semaphore = True
|
|
|
- write_stats = None
|
|
|
+ Hw = Bw = 0
|
|
|
+ # write header
|
|
|
+ while Hw < 4:
|
|
|
+ try:
|
|
|
+ Hw += send(header, Hw)
|
|
|
+ except Exception as exc:
|
|
|
+ if get_errno(exc) not in UNAVAIL:
|
|
|
+ raise
|
|
|
+ # suspend until more data
|
|
|
+ errors += 1
|
|
|
+ if errors > 100:
|
|
|
+ on_not_recovering(proc)
|
|
|
+ raise StopIteration()
|
|
|
+ yield
|
|
|
+ errors = 0
|
|
|
|
|
|
- def on_start(self):
|
|
|
- """Run the task pool.
|
|
|
+ # write body
|
|
|
+ while Bw < body_size:
|
|
|
+ try:
|
|
|
+ Bw += send(body, Bw)
|
|
|
+ except Exception as exc:
|
|
|
+ if get_errno(exc) not in UNAVAIL:
|
|
|
+ raise
|
|
|
+ # suspend until more data
|
|
|
+ errors += 1
|
|
|
+ if errors > 100:
|
|
|
+ on_not_recovering(proc)
|
|
|
+ raise StopIteration()
|
|
|
+ yield
|
|
|
+ errors = 0
|
|
|
+ finally:
|
|
|
+ write_stats[proc.index] += 1
|
|
|
+ # message written, so this fd is now available
|
|
|
+ active_writes.discard(fd)
|
|
|
+ write_generator_done(job._writer()) # is a weakref
|
|
|
|
|
|
- Will pre-fork all workers so they're ready to accept tasks.
|
|
|
+ def send_ack(response, pid, job, fd, WRITE=WRITE, ERR=ERR):
|
|
|
+ # Only used when synack is enabled.
|
|
|
+ # Schedule writing ack response for when the fd is writeable.
|
|
|
+ msg = Ack(job, fd, precalc[response])
|
|
|
+ callback = promise(write_generator_done)
|
|
|
+ cor = _write_ack(fd, msg, callback=callback)
|
|
|
+ mark_write_gen_as_active(cor)
|
|
|
+ mark_write_fd_as_active(fd)
|
|
|
+ callback.args = (cor, )
|
|
|
+ add_writer(fd, cor)
|
|
|
+ self.send_ack = send_ack
|
|
|
|
|
|
- """
|
|
|
- if self.options.get('maxtasksperchild'):
|
|
|
+ def _write_ack(fd, ack, callback=None):
|
|
|
+ # writes ack back to the worker if synack enabled.
|
|
|
+ # this operation *MUST* complete, otherwise
|
|
|
+ # the worker process will hang waiting for the ack.
|
|
|
+ header, body, body_size = ack[2]
|
|
|
try:
|
|
|
- from billiard.connection import Connection
|
|
|
- Connection.send_offset
|
|
|
- except (ImportError, AttributeError):
|
|
|
- # billiard C extension not installed
|
|
|
- warning(MAXTASKS_NO_BILLIARD)
|
|
|
-
|
|
|
- forking_enable(self.forking_enable)
|
|
|
- Pool = (self.BlockingPool if self.options.get('threads', True)
|
|
|
- else self.Pool)
|
|
|
- P = self._pool = Pool(processes=self.limit,
|
|
|
- initializer=process_initializer,
|
|
|
- synack=False,
|
|
|
- **self.options)
|
|
|
-
|
|
|
- # Create proxy methods
|
|
|
- self.on_apply = P.apply_async
|
|
|
- self.on_soft_timeout = P._timeout_handler.on_soft_timeout
|
|
|
- self.on_hard_timeout = P._timeout_handler.on_hard_timeout
|
|
|
- self.maintain_pool = P.maintain_pool
|
|
|
- self.terminate_job = P.terminate_job
|
|
|
- self.grow = P.grow
|
|
|
- self.shrink = P.shrink
|
|
|
- self.restart = P.restart
|
|
|
- self.maybe_handle_result = P._result_handler.handle_event
|
|
|
- self.handle_result_event = P.handle_result_event
|
|
|
-
|
|
|
- # Holds jobs waiting to be written to child processes.
|
|
|
- self.outbound_buffer = deque()
|
|
|
-
|
|
|
- # Set of fds being written to (busy)
|
|
|
- self._active_writes = set()
|
|
|
-
|
|
|
- # Set of active co-routines currently writing jobs.
|
|
|
- self._active_writers = set()
|
|
|
-
|
|
|
- def did_start_ok(self):
|
|
|
- return self._pool.did_start_ok()
|
|
|
-
|
|
|
- def on_stop(self):
|
|
|
- """Gracefully stop the pool."""
|
|
|
- if self._pool is not None and self._pool._state in (RUN, CLOSE):
|
|
|
- self._pool.close()
|
|
|
- self._pool.join()
|
|
|
- self._pool = None
|
|
|
-
|
|
|
- def on_terminate(self):
|
|
|
- """Force terminate the pool."""
|
|
|
- if self._pool is not None:
|
|
|
- self._pool.terminate()
|
|
|
- self._pool = None
|
|
|
-
|
|
|
- def on_close(self):
|
|
|
- if self._pool is not None and self._pool._state == RUN:
|
|
|
- self._pool.close()
|
|
|
+ try:
|
|
|
+ proc = fileno_to_synq[fd]
|
|
|
+ except KeyError:
|
|
|
+ # process died, we can safely discard the ack at this
|
|
|
+ # point.
|
|
|
+ raise StopIteration()
|
|
|
+ send = proc.send_syn_offset
|
|
|
|
|
|
- def _get_info(self):
|
|
|
- return {
|
|
|
- 'max-concurrency': self.limit,
|
|
|
- 'processes': [p.pid for p in self._pool._pool],
|
|
|
- 'max-tasks-per-child': self._pool._maxtasksperchild or 'N/A',
|
|
|
- 'put-guarded-by-semaphore': self.putlocks,
|
|
|
- 'timeouts': (self._pool.soft_timeout or 0,
|
|
|
- self._pool.timeout or 0),
|
|
|
- 'writes': self.human_write_stats(),
|
|
|
- }
|
|
|
+ Hw = Bw = 0
|
|
|
+ # write header
|
|
|
+ while Hw < 4:
|
|
|
+ try:
|
|
|
+ Hw += send(header, Hw)
|
|
|
+ except Exception as exc:
|
|
|
+ if get_errno(exc) not in UNAVAIL:
|
|
|
+ raise
|
|
|
+ yield
|
|
|
|
|
|
- def human_write_stats(self):
|
|
|
- if self.write_stats is None:
|
|
|
- return 'N/A'
|
|
|
- vals = list(values(self.write_stats))
|
|
|
- total = sum(vals)
|
|
|
+ # write body
|
|
|
+ while Bw < body_size:
|
|
|
+ try:
|
|
|
+ Bw += send(body, Bw)
|
|
|
+ except Exception as exc:
|
|
|
+ if get_errno(exc) not in UNAVAIL:
|
|
|
+ raise
|
|
|
+ # suspend until more data
|
|
|
+ yield
|
|
|
+ finally:
|
|
|
+ if callback:
|
|
|
+ callback()
|
|
|
+ # message written, so this fd is now available
|
|
|
+ active_writes.discard(fd)
|
|
|
|
|
|
- def per(v, total):
|
|
|
- return '{0:.2f}%'.format((float(v) / total) * 100.0 if v else 0)
|
|
|
+ def flush(self):
|
|
|
+ if self._state == TERMINATE:
|
|
|
+ return
|
|
|
+ # cancel all tasks that have not been accepted so that NACK is sent.
|
|
|
+ for job in values(self._cache):
|
|
|
+ if not job._accepted:
|
|
|
+ job._cancel()
|
|
|
|
|
|
- return {
|
|
|
- 'total': total,
|
|
|
- 'avg': per(total / len(self.write_stats) if total else 0, total),
|
|
|
- 'all': ', '.join(per(v, total) for v in vals),
|
|
|
- 'raw': ', '.join(map(str, vals)),
|
|
|
- }
|
|
|
+ # clear the outgoing buffer as the tasks will be redelivered by
|
|
|
+ # the broker anyway.
|
|
|
+ if self.outbound_buffer:
|
|
|
+ self.outbound_buffer.clear()
|
|
|
+ try:
|
|
|
+ # ...but we must continue writing the payloads we already started
|
|
|
+ # to keep message boundaries.
|
|
|
+ # The messages may be NACK'ed later if synack is enabled.
|
|
|
+ if self._state == RUN:
|
|
|
+ # flush outgoing buffers
|
|
|
+ intervals = fxrange(0.01, 0.1, 0.01, repeatlast=True)
|
|
|
+ while self._active_writers:
|
|
|
+ writers = list(self._active_writers)
|
|
|
+ for gen in writers:
|
|
|
+ if (gen.__name__ == '_write_job' and
|
|
|
+ gen_not_started(gen)):
|
|
|
+ # has not started writing the job so can
|
|
|
+ # discard the task, but we must also remove
|
|
|
+ # it from the Pool._cache.
|
|
|
+ job_to_discard = None
|
|
|
+ for job in values(self._cache):
|
|
|
+ if job._writer() is gen: # _writer is saferef
|
|
|
+ # removes from Pool._cache
|
|
|
+ job_to_discard = job
|
|
|
+ break
|
|
|
+ if job_to_discard:
|
|
|
+ job_to_discard.discard()
|
|
|
+ self._active_writers.discard(gen)
|
|
|
+ else:
|
|
|
+ try:
|
|
|
+ next(gen)
|
|
|
+ except StopIteration:
|
|
|
+ self._active_writers.discard(gen)
|
|
|
+ # workers may have exited in the meantime.
|
|
|
+ self.maintain_pool()
|
|
|
+ sleep(next(intervals)) # don't busyloop
|
|
|
+ finally:
|
|
|
+ self.outbound_buffer.clear()
|
|
|
+ self._active_writers.clear()
|
|
|
|
|
|
- def on_poll_init(self, w, hub):
|
|
|
- """Initialize async pool using the eventloop hub."""
|
|
|
- pool = self._pool
|
|
|
- pool._active_writers = self._active_writers
|
|
|
+ def get_process_queues(self):
|
|
|
+ """Get queues for a new process.
|
|
|
|
|
|
- self._create_timelimit_handlers(hub)
|
|
|
- self._create_process_handlers(hub)
|
|
|
- self._create_write_handlers(hub)
|
|
|
+ Here we will find an unused slot, as there should always
|
|
|
+ be one available when we start a new process.
|
|
|
+ """
|
|
|
+ return next(q for q, owner in items(self._queues)
|
|
|
+ if owner is None)
|
|
|
|
|
|
- # did_start_ok will verify that pool processes were able to start,
|
|
|
- # but this will only work the first time we start, as
|
|
|
- # maxtasksperchild will mess up metrics.
|
|
|
- if not w.consumer.restart_count and not pool.did_start_ok():
|
|
|
- raise WorkerLostError('Could not start worker processes')
|
|
|
+ def on_grow(self, n):
|
|
|
+ """Grow the pool by ``n`` proceses."""
|
|
|
+ diff = max(self._processes - len(self._queues), 0)
|
|
|
+ if diff:
|
|
|
+ self._queues.update(
|
|
|
+ dict((self.create_process_queues(), None) for _ in range(diff))
|
|
|
+ )
|
|
|
|
|
|
- # Maintain_pool is called whenever a process exits.
|
|
|
- hub.add(pool.process_sentinels, self.maintain_pool, READ | ERR)
|
|
|
- # Handle_result_event is called whenever one of the
|
|
|
- # result queues are readable.
|
|
|
- hub.add(pool._fileno_to_outq, self.handle_result_event, READ | ERR)
|
|
|
+ def on_shrink(self, n):
|
|
|
+ """Shrink the pool by ``n`` processes."""
|
|
|
+ pass
|
|
|
|
|
|
- # Timers include calling maintain_pool at a regular interval
|
|
|
- # to be certain processes are restarted.
|
|
|
- for handler, interval in items(self.timers):
|
|
|
- hub.timer.apply_interval(interval * 1000.0, handler)
|
|
|
+ def create_process_queues(self):
|
|
|
+ """Creates new in, out (and optionally syn) queues,
|
|
|
+ returned as a tuple."""
|
|
|
+ inq, outq, synq = _SimpleQueue(), _SimpleQueue(), None
|
|
|
+ inq._writer.setblocking(0)
|
|
|
+ if self.synack:
|
|
|
+ synq = _SimpleQueue()
|
|
|
+ synq._writer.setblocking(0)
|
|
|
+ return inq, outq, synq
|
|
|
|
|
|
- def _create_timelimit_handlers(self, hub, now=time):
|
|
|
- """For async pool this sets up the handlers used
|
|
|
- to implement time limits."""
|
|
|
- apply_after = hub.timer.apply_after
|
|
|
- trefs = self._tref_for_id = WeakValueDictionary()
|
|
|
+ def on_process_alive(self, pid):
|
|
|
+ """Handler called when the WORKER_UP message is received
|
|
|
+ from a child process, which marks the process as ready
|
|
|
+ to receive work."""
|
|
|
+ try:
|
|
|
+ proc = next(w for w in self._pool if w.pid == pid)
|
|
|
+ except StopIteration:
|
|
|
+ # process already exited :( this will be handled elsewhere.
|
|
|
+ return
|
|
|
+ self._fileno_to_inq[proc.inqW_fd] = proc
|
|
|
+ self._fileno_to_synq[proc.synqW_fd] = proc
|
|
|
+ self._all_inqueues.add(proc.inqW_fd)
|
|
|
|
|
|
- def on_timeout_set(R, soft, hard):
|
|
|
- if soft:
|
|
|
- trefs[R._job] = apply_after(
|
|
|
- soft * 1000.0,
|
|
|
- self._on_soft_timeout, (R._job, soft, hard, hub),
|
|
|
- )
|
|
|
- elif hard:
|
|
|
- trefs[R._job] = apply_after(
|
|
|
- hard * 1000.0,
|
|
|
- self._on_hard_timeout, (R._job, )
|
|
|
- )
|
|
|
- self._pool.on_timeout_set = on_timeout_set
|
|
|
+ def on_job_process_down(self, job, pid_gone):
|
|
|
+ """Handler called for each job when the process it was assigned to
|
|
|
+ exits."""
|
|
|
+ if job._write_to:
|
|
|
+ # job was partially written
|
|
|
+ self.on_partial_read(job, job._write_to)
|
|
|
+ elif job._scheduled_for:
|
|
|
+ # job was only scheduled to be written to this process,
|
|
|
+ # but no data was sent so put it back on the outbound_buffer.
|
|
|
+ self._put_back(job)
|
|
|
|
|
|
- def _discard_tref(job):
|
|
|
- try:
|
|
|
- tref = trefs.pop(job)
|
|
|
- tref.cancel()
|
|
|
- del(tref)
|
|
|
- except (KeyError, AttributeError):
|
|
|
- pass # out of scope
|
|
|
- self._discard_tref = _discard_tref
|
|
|
+ def on_job_process_lost(self, job, pid, exitcode):
|
|
|
+ """Handler called for each *started* job when the process it
|
|
|
+ was assigned to exited by mysterious means (error exitcodes and
|
|
|
+ signals)"""
|
|
|
+ self.mark_as_worker_lost(job, exitcode)
|
|
|
|
|
|
- def on_timeout_cancel(R):
|
|
|
- _discard_tref(R._job)
|
|
|
- self._pool.on_timeout_cancel = on_timeout_cancel
|
|
|
+ def human_write_stats(self):
|
|
|
+ if self.write_stats is None:
|
|
|
+ return 'N/A'
|
|
|
+ vals = list(values(self.write_stats))
|
|
|
+ total = sum(vals)
|
|
|
|
|
|
- def _on_soft_timeout(self, job, soft, hard, hub, now=time):
|
|
|
- # only used by async pool.
|
|
|
- if hard:
|
|
|
- self._tref_for_id[job] = hub.timer.apply_at(
|
|
|
- now() + (hard - soft),
|
|
|
- self._on_hard_timeout, (job, ),
|
|
|
- )
|
|
|
- try:
|
|
|
- result = self._pool._cache[job]
|
|
|
- except KeyError:
|
|
|
- pass # job ready
|
|
|
- else:
|
|
|
- self.on_soft_timeout(result)
|
|
|
- finally:
|
|
|
- if not hard:
|
|
|
- # remove tref
|
|
|
- self._discard_tref(job)
|
|
|
+ def per(v, total):
|
|
|
+ return '{0:.2f}%'.format((float(v) / total) * 100.0 if v else 0)
|
|
|
|
|
|
- def _on_hard_timeout(self, job):
|
|
|
- # only used by async pool.
|
|
|
+ return {
|
|
|
+ 'total': total,
|
|
|
+ 'avg': per(total / len(self.write_stats) if total else 0, total),
|
|
|
+ 'all': ', '.join(per(v, total) for v in vals),
|
|
|
+ 'raw': ', '.join(map(str, vals)),
|
|
|
+ }
|
|
|
+
|
|
|
+ def _process_cleanup_queues(self, proc):
|
|
|
+ """Handler called to clean up a processes queues after process
|
|
|
+ exit."""
|
|
|
try:
|
|
|
- result = self._pool._cache[job]
|
|
|
- except KeyError:
|
|
|
- pass # job ready
|
|
|
- else:
|
|
|
- self.on_hard_timeout(result)
|
|
|
- finally:
|
|
|
- # remove tref
|
|
|
- self._discard_tref(job)
|
|
|
+ self._queues[self._find_worker_queues(proc)] = None
|
|
|
+ except (KeyError, ValueError):
|
|
|
+ pass
|
|
|
|
|
|
- def _create_process_handlers(self, hub, READ=READ, ERR=ERR):
|
|
|
- """For async pool this will create the handlers called
|
|
|
- when a process is up/down and etc."""
|
|
|
- pool = self._pool
|
|
|
- hub_add, hub_remove = hub.add, hub.remove
|
|
|
- all_inqueues = self._pool._all_inqueues
|
|
|
- fileno_to_inq = self._pool._fileno_to_inq
|
|
|
- fileno_to_outq = self._pool._fileno_to_outq
|
|
|
- fileno_to_synq = self._pool._fileno_to_synq
|
|
|
- maintain_pool = self._pool.maintain_pool
|
|
|
- handle_result_event = self._pool.handle_result_event
|
|
|
- process_flush_queues = self._pool.process_flush_queues
|
|
|
+ @staticmethod
|
|
|
+ def _stop_task_handler(task_handler):
|
|
|
+ """Called at shutdown to tell processes that we are shutting down."""
|
|
|
+ for proc in task_handler.pool:
|
|
|
+ proc.inq._writer.setblocking(1)
|
|
|
+ try:
|
|
|
+ proc.inq.put(None)
|
|
|
+ except OSError as exc:
|
|
|
+ if get_errno(exc) != errno.EBADF:
|
|
|
+ raise
|
|
|
|
|
|
- def on_process_up(proc):
|
|
|
- """Called when a WORKER_UP message is received from process."""
|
|
|
- # If we got the same fd as a previous process then we will also
|
|
|
- # receive jobs in the old buffer, so we need to reset the
|
|
|
- # job._write_to and job._scheduled_for attributes used to recover
|
|
|
- # message boundaries when processes exit.
|
|
|
- infd = proc.inqW_fd
|
|
|
- for job in values(pool._cache):
|
|
|
- if job._write_to and job._write_to.inqW_fd == infd:
|
|
|
- job._write_to = proc
|
|
|
- if job._scheduled_for and job._scheduled_for.inqW_fd == infd:
|
|
|
- job._scheduled_for = proc
|
|
|
- fileno_to_outq[proc.outqR_fd] = proc
|
|
|
- # maintain_pool is called whenever a process exits.
|
|
|
- hub_add(proc.sentinel, maintain_pool, READ | ERR)
|
|
|
- # handle_result_event is called when the processes outqueue is
|
|
|
- # readable.
|
|
|
- hub_add(proc.outqR_fd, handle_result_event, READ | ERR)
|
|
|
- self._pool.on_process_up = on_process_up
|
|
|
+ def create_result_handler(self):
|
|
|
+ return super(AsynPool, self).create_result_handler(
|
|
|
+ fileno_to_outq=self._fileno_to_outq,
|
|
|
+ on_process_alive=self.on_process_alive,
|
|
|
+ )
|
|
|
|
|
|
- def on_process_down(proc):
|
|
|
- """Called when a worker process exits."""
|
|
|
- process_flush_queues(proc)
|
|
|
- fileno_to_outq.pop(proc.outqR_fd, None)
|
|
|
- fileno_to_inq.pop(proc.inqW_fd, None)
|
|
|
- fileno_to_synq.pop(proc.synqW_fd, None)
|
|
|
- all_inqueues.discard(proc.inqW_fd)
|
|
|
- hub_remove(proc.sentinel)
|
|
|
- hub_remove(proc.outqR_fd)
|
|
|
- self._pool.on_process_down = on_process_down
|
|
|
+ def _process_register_queues(self, proc, queues):
|
|
|
+ """Marks new ownership for ``queues`` so that the fileno indices are
|
|
|
+ updated."""
|
|
|
+ assert queues in self._queues
|
|
|
+ b = len(self._queues)
|
|
|
+ self._queues[queues] = proc
|
|
|
+ assert b == len(self._queues)
|
|
|
|
|
|
- def _create_write_handlers(self, hub,
|
|
|
- pack=struct.pack, dumps=_pickle.dumps,
|
|
|
- protocol=HIGHEST_PROTOCOL):
|
|
|
- """For async pool this creates the handlers used to write data to
|
|
|
- child processes."""
|
|
|
- pool = self._pool
|
|
|
- fileno_to_inq = pool._fileno_to_inq
|
|
|
- fileno_to_synq = pool._fileno_to_synq
|
|
|
- outbound = self.outbound_buffer
|
|
|
- pop_message = outbound.popleft
|
|
|
- put_message = outbound.append
|
|
|
- all_inqueues = pool._all_inqueues
|
|
|
- active_writes = self._active_writes
|
|
|
- diff = all_inqueues.difference
|
|
|
- hub_add, hub_remove = hub.add, hub.remove
|
|
|
- mark_write_fd_as_active = active_writes.add
|
|
|
- mark_write_gen_as_active = self._active_writers.add
|
|
|
- write_generator_done = self._active_writers.discard
|
|
|
- get_job = pool._cache.__getitem__
|
|
|
- # puts back at the end of the queue
|
|
|
- pool._put_back = outbound.appendleft
|
|
|
- precalc = {ACK: pool._create_payload(ACK, (0, )),
|
|
|
- NACK: pool._create_payload(NACK, (0, ))}
|
|
|
+ def _find_worker_queues(self, proc):
|
|
|
+ """Find the queues owned by ``proc``."""
|
|
|
+ try:
|
|
|
+ return next(q for q, owner in items(self._queues)
|
|
|
+ if owner == proc)
|
|
|
+ except StopIteration:
|
|
|
+ raise ValueError(proc)
|
|
|
|
|
|
- def on_poll_start(hub):
|
|
|
- # called for every eventloop iteration, and if there
|
|
|
- # are messages pending this will schedule writing one message
|
|
|
- # by registering the 'schedule_writes' function for all currently
|
|
|
- # inactive inqueues (not already being written to)
|
|
|
+ def _setup_queues(self):
|
|
|
+ # this is only used by the original pool which uses a shared
|
|
|
+ # queue for all processes.
|
|
|
|
|
|
- # consolidate means the eventloop will merge them
|
|
|
- # and call the callback once with the list writable fds as
|
|
|
- # argument. Using this means we minimize the risk of having
|
|
|
- # the same fd receive every task if the pipe read buffer is not
|
|
|
- # full.
|
|
|
- if outbound:
|
|
|
- hub_add(
|
|
|
- diff(active_writes), None, WRITE | ERR,
|
|
|
- consolidate=True,
|
|
|
- )
|
|
|
- self.on_poll_start = on_poll_start
|
|
|
+ # these attributes makes no sense for us, but we will still
|
|
|
+ # have to initialize them.
|
|
|
+ self._inqueue = self._outqueue = \
|
|
|
+ self._quick_put = self._quick_get = self._poll_result = None
|
|
|
|
|
|
- def on_inqueue_close(fd):
|
|
|
- # Makes sure the fd is removed from tracking when
|
|
|
- # the connection is closed, this is essential as fds may be reused.
|
|
|
- active_writes.discard(fd)
|
|
|
- all_inqueues.discard(fd)
|
|
|
- self._pool.on_inqueue_close = on_inqueue_close
|
|
|
+ def process_flush_queues(self, proc):
|
|
|
+ """Flushes all queues, including the outbound buffer, so that
|
|
|
+ all tasks that have not been started will be discarded.
|
|
|
|
|
|
- def schedule_writes(ready_fds, shuffle=random.shuffle):
|
|
|
- # Schedule write operation to ready file descriptor.
|
|
|
- # The file descriptor is writeable, but that does not
|
|
|
- # mean the process is currently reading from the socket.
|
|
|
- # The socket is buffered so writeable simply means that
|
|
|
- # the buffer can accept at least 1 byte of data.
|
|
|
- shuffle(ready_fds)
|
|
|
- for ready_fd in ready_fds:
|
|
|
- if ready_fd in active_writes:
|
|
|
- # already writing to this fd
|
|
|
- continue
|
|
|
- try:
|
|
|
- job = pop_message()
|
|
|
- except IndexError:
|
|
|
- # no more messages, remove all inactive fds from the hub.
|
|
|
- # this is important since the fds are always writeable
|
|
|
- # as long as there's 1 byte left in the buffer, and so
|
|
|
- # this may create a spinloop where the eventloop
|
|
|
- # always wakes up.
|
|
|
- for inqfd in diff(active_writes):
|
|
|
- hub_remove(inqfd)
|
|
|
- break
|
|
|
+ In Celery this is called whenever the transport connection is lost
|
|
|
+ (consumer restart).
|
|
|
|
|
|
+ """
|
|
|
+ resq = proc.outq._reader
|
|
|
+ on_state_change = self._result_handler.on_state_change
|
|
|
+ while not resq.closed and resq.poll(0) and self._state != TERMINATE:
|
|
|
+ try:
|
|
|
+ task = resq.recv()
|
|
|
+ except (IOError, EOFError) as exc:
|
|
|
+ debug('got %r while flushing process %r',
|
|
|
+ exc, proc, exc_info=1)
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ if task is not None:
|
|
|
+ on_state_change(task)
|
|
|
else:
|
|
|
- if not job._accepted: # job not accepted by another worker
|
|
|
- try:
|
|
|
- # keep track of what process the write operation
|
|
|
- # was scheduled for.
|
|
|
- proc = job._scheduled_for = fileno_to_inq[ready_fd]
|
|
|
- except KeyError:
|
|
|
- # write was scheduled for this fd but the process
|
|
|
- # has since exited and the message must be sent to
|
|
|
- # another process.
|
|
|
- put_message(job)
|
|
|
- continue
|
|
|
- cor = _write_job(proc, ready_fd, job)
|
|
|
- job._writer = ref(cor)
|
|
|
- mark_write_gen_as_active(cor)
|
|
|
- mark_write_fd_as_active(ready_fd)
|
|
|
+ debug('got sentinel while flushing process %r', proc)
|
|
|
|
|
|
- # Try to write immediately, in case there's an error.
|
|
|
+ def on_partial_read(self, job, proc):
|
|
|
+ """Called when a job was only partially written to a child process
|
|
|
+ and it exited."""
|
|
|
+ # worker terminated by signal:
|
|
|
+ # we cannot reuse the sockets again, because we don't know if
|
|
|
+ # the process wrote/read anything frmo them, and if so we cannot
|
|
|
+ # restore the message boundaries.
|
|
|
+ if proc.exitcode != EX_RECYCLE:
|
|
|
+ if not job._accepted:
|
|
|
+ # job was not acked, so find another worker to send it to.
|
|
|
+ self._put_back(job)
|
|
|
+ writer = getattr(job, '_writer')
|
|
|
+ writer = writer and writer() or None
|
|
|
+ if writer:
|
|
|
+ self._active_writers.discard(writer)
|
|
|
+
|
|
|
+ # Replace queues to avoid reuse
|
|
|
+ before = len(self._queues)
|
|
|
+ try:
|
|
|
+ queues = self._find_worker_queues(proc)
|
|
|
+ if self.destroy_queues(queues):
|
|
|
+ self._queues[self.create_process_queues()] = None
|
|
|
+ except ValueError:
|
|
|
+ # Not in queue map, make sure sockets are closed.
|
|
|
+ self.destroy_queues((proc.inq, proc.outq, proc.synq))
|
|
|
+ assert len(self._queues) == before
|
|
|
+
|
|
|
+ def destroy_queues(self, queues):
|
|
|
+ """Destroy queues that can no longer be used, so that they
|
|
|
+ be replaced by new sockets."""
|
|
|
+ removed = 1
|
|
|
+ try:
|
|
|
+ self._queues.pop(queues)
|
|
|
+ except KeyError:
|
|
|
+ removed = 0
|
|
|
+ try:
|
|
|
+ self.on_inqueue_close(queues[0]._writer.fileno())
|
|
|
+ except IOError:
|
|
|
+ pass
|
|
|
+ for queue in queues:
|
|
|
+ if queue:
|
|
|
+ for sock in (queue._reader, queue._writer):
|
|
|
+ if not sock.closed:
|
|
|
try:
|
|
|
- next(cor)
|
|
|
- hub_add((ready_fd, ), cor, WRITE | ERR)
|
|
|
- except StopIteration:
|
|
|
+ sock.close()
|
|
|
+ except (IOError, OSError):
|
|
|
pass
|
|
|
- hub.consolidate_callback = schedule_writes
|
|
|
+ return removed
|
|
|
|
|
|
- def send_job(tup):
|
|
|
- # Schedule writing job request for when one of the process
|
|
|
- # inqueues are writable.
|
|
|
- body = dumps(tup, protocol=protocol)
|
|
|
- body_size = len(body)
|
|
|
- header = pack('>I', body_size)
|
|
|
- # index 1,0 is the job ID.
|
|
|
- job = get_job(tup[1][0])
|
|
|
- job._payload = header, body, body_size
|
|
|
- put_message(job)
|
|
|
- self._pool._quick_put = send_job
|
|
|
+ def _create_payload(self, type_, args,
|
|
|
+ dumps=_pickle.dumps, pack=struct.pack,
|
|
|
+ protocol=HIGHEST_PROTOCOL):
|
|
|
+ body = dumps((type_, args), protocol=protocol)
|
|
|
+ size = len(body)
|
|
|
+ header = pack('>I', size)
|
|
|
+ return header, body, size
|
|
|
|
|
|
- write_stats = self.write_stats = Counter()
|
|
|
+ @classmethod
|
|
|
+ def _set_result_sentinel(cls, _outqueue, _pool):
|
|
|
+ # unused
|
|
|
+ pass
|
|
|
|
|
|
- def on_not_recovering(proc):
|
|
|
- # XXX Theoretically a possibility, but not seen in practice yet.
|
|
|
- raise Exception(
|
|
|
- 'Process writable but cannot write. Contact support!')
|
|
|
+ def _help_stuff_finish_args(self):
|
|
|
+ # Pool._help_stuff_finished is a classmethod so we have to use this
|
|
|
+ # trick to modify the arguments passed to it.
|
|
|
+ return (self._pool, )
|
|
|
|
|
|
- def _write_job(proc, fd, job):
|
|
|
- # writes job to the worker process.
|
|
|
- # Operation must complete if more than one byte of data
|
|
|
- # was written. If the broker connection is lost
|
|
|
- # and no data was written the operation shall be cancelled.
|
|
|
- header, body, body_size = job._payload
|
|
|
- errors = 0
|
|
|
+ @classmethod
|
|
|
+ def _help_stuff_finish(cls, pool):
|
|
|
+ debug(
|
|
|
+ 'removing tasks from inqueue until task handler finished',
|
|
|
+ )
|
|
|
+ fileno_to_proc = {}
|
|
|
+ inqR = set()
|
|
|
+ for w in pool:
|
|
|
try:
|
|
|
- # job result keeps track of what process the job is sent to.
|
|
|
- job._write_to = proc
|
|
|
- send = proc.send_job_offset
|
|
|
+ fd = w.inq._reader.fileno()
|
|
|
+ inqR.add(fd)
|
|
|
+ fileno_to_proc[fd] = w
|
|
|
+ except IOError:
|
|
|
+ pass
|
|
|
+ while inqR:
|
|
|
+ readable, _, again = _select(inqR, timeout=0.5)
|
|
|
+ if again:
|
|
|
+ continue
|
|
|
+ if not readable:
|
|
|
+ break
|
|
|
+ for fd in readable:
|
|
|
+ fileno_to_proc[fd].inq._reader.recv()
|
|
|
+ sleep(0)
|
|
|
|
|
|
- Hw = Bw = 0
|
|
|
- # write header
|
|
|
- while Hw < 4:
|
|
|
- try:
|
|
|
- Hw += send(header, Hw)
|
|
|
- except Exception as exc:
|
|
|
- if get_errno(exc) not in UNAVAIL:
|
|
|
- raise
|
|
|
- # suspend until more data
|
|
|
- errors += 1
|
|
|
- if errors > 100:
|
|
|
- on_not_recovering(proc)
|
|
|
- raise StopIteration()
|
|
|
- yield
|
|
|
- errors = 0
|
|
|
+ @property
|
|
|
+ def timers(self):
|
|
|
+ return {self.maintain_pool: 5.0}
|
|
|
|
|
|
- # write body
|
|
|
- while Bw < body_size:
|
|
|
- try:
|
|
|
- Bw += send(body, Bw)
|
|
|
- except Exception as exc:
|
|
|
- if get_errno(exc) not in UNAVAIL:
|
|
|
- raise
|
|
|
- # suspend until more data
|
|
|
- errors += 1
|
|
|
- if errors > 100:
|
|
|
- on_not_recovering(proc)
|
|
|
- raise StopIteration()
|
|
|
- yield
|
|
|
- errors = 0
|
|
|
- finally:
|
|
|
- write_stats[proc.index] += 1
|
|
|
- # message written, so this fd is now available
|
|
|
- active_writes.discard(fd)
|
|
|
- write_generator_done(job._writer()) # is a weakref
|
|
|
|
|
|
- def send_ack(response, pid, job, fd, WRITE=WRITE, ERR=ERR):
|
|
|
- # Only used when synack is enabled.
|
|
|
- # Schedule writing ack response for when the fd is writeable.
|
|
|
- msg = Ack(job, fd, precalc[response])
|
|
|
- callback = promise(write_generator_done)
|
|
|
- cor = _write_ack(fd, msg, callback=callback)
|
|
|
- mark_write_gen_as_active(cor)
|
|
|
- mark_write_fd_as_active(fd)
|
|
|
- callback.args = (cor, )
|
|
|
- hub_add((fd, ), cor, WRITE | ERR)
|
|
|
- self._pool.send_ack = send_ack
|
|
|
+class TaskPool(BasePool):
|
|
|
+ """Multiprocessing Pool implementation."""
|
|
|
+ Pool = AsynPool
|
|
|
+ BlockingPool = _pool.Pool
|
|
|
|
|
|
- def _write_ack(fd, ack, callback=None):
|
|
|
- # writes ack back to the worker if synack enabled.
|
|
|
- # this operation *MUST* complete, otherwise
|
|
|
- # the worker process will hang waiting for the ack.
|
|
|
- header, body, body_size = ack[2]
|
|
|
+ uses_semaphore = True
|
|
|
+ write_stats = None
|
|
|
+
|
|
|
+ def on_start(self):
|
|
|
+ """Run the task pool.
|
|
|
+
|
|
|
+ Will pre-fork all workers so they're ready to accept tasks.
|
|
|
+
|
|
|
+ """
|
|
|
+ if self.options.get('maxtasksperchild'):
|
|
|
try:
|
|
|
- try:
|
|
|
- proc = fileno_to_synq[fd]
|
|
|
- except KeyError:
|
|
|
- # process died, we can safely discard the ack at this
|
|
|
- # point.
|
|
|
- raise StopIteration()
|
|
|
- send = proc.send_syn_offset
|
|
|
+ from billiard.connection import Connection
|
|
|
+ Connection.send_offset
|
|
|
+ except (ImportError, AttributeError):
|
|
|
+ # billiard C extension not installed
|
|
|
+ warning(MAXTASKS_NO_BILLIARD)
|
|
|
|
|
|
- Hw = Bw = 0
|
|
|
- # write header
|
|
|
- while Hw < 4:
|
|
|
- try:
|
|
|
- Hw += send(header, Hw)
|
|
|
- except Exception as exc:
|
|
|
- if get_errno(exc) not in UNAVAIL:
|
|
|
- raise
|
|
|
- yield
|
|
|
+ forking_enable(self.forking_enable)
|
|
|
+ Pool = (self.BlockingPool if self.options.get('threads', True)
|
|
|
+ else self.Pool)
|
|
|
+ P = self._pool = Pool(processes=self.limit,
|
|
|
+ initializer=process_initializer,
|
|
|
+ synack=False,
|
|
|
+ **self.options)
|
|
|
|
|
|
- # write body
|
|
|
- while Bw < body_size:
|
|
|
- try:
|
|
|
- Bw += send(body, Bw)
|
|
|
- except Exception as exc:
|
|
|
- if get_errno(exc) not in UNAVAIL:
|
|
|
- raise
|
|
|
- # suspend until more data
|
|
|
- yield
|
|
|
- finally:
|
|
|
- if callback:
|
|
|
- callback()
|
|
|
- # message written, so this fd is now available
|
|
|
- active_writes.discard(fd)
|
|
|
+ # Create proxy methods
|
|
|
+ self.on_apply = P.apply_async
|
|
|
+ self.maintain_pool = P.maintain_pool
|
|
|
+ self.terminate_job = P.terminate_job
|
|
|
+ self.grow = P.grow
|
|
|
+ self.shrink = P.shrink
|
|
|
+ self.restart = P.restart
|
|
|
+ self.maybe_handle_result = P._result_handler.handle_event
|
|
|
+ self.handle_result_event = P.handle_result_event
|
|
|
+ self.register_with_event_loop = P.register_with_event_loop
|
|
|
|
|
|
- def flush(self):
|
|
|
- if self._pool._state == TERMINATE:
|
|
|
- return
|
|
|
- # cancel all tasks that have not been accepted so that NACK is sent.
|
|
|
- for job in values(self._pool._cache):
|
|
|
- if not job._accepted:
|
|
|
- job._cancel()
|
|
|
+ def did_start_ok(self):
|
|
|
+ return self._pool.did_start_ok()
|
|
|
|
|
|
- # clear the outgoing buffer as the tasks will be redelivered by
|
|
|
- # the broker anyway.
|
|
|
- if self.outbound_buffer:
|
|
|
- self.outbound_buffer.clear()
|
|
|
- try:
|
|
|
- # ...but we must continue writing the payloads we already started
|
|
|
- # to keep message boundaries.
|
|
|
- # The messages may be NACK'ed later if synack is enabled.
|
|
|
- if self._pool._state == RUN:
|
|
|
- # flush outgoing buffers
|
|
|
- intervals = fxrange(0.01, 0.1, 0.01, repeatlast=True)
|
|
|
- while self._active_writers:
|
|
|
- writers = list(self._active_writers)
|
|
|
- for gen in writers:
|
|
|
- if (gen.__name__ == '_write_job' and
|
|
|
- gen_not_started(gen)):
|
|
|
- # has not started writing the job so can
|
|
|
- # discard the task, but we must also remove
|
|
|
- # it from the Pool._cache.
|
|
|
- job_to_discard = None
|
|
|
- for job in values(self._pool._cache):
|
|
|
- if job._writer() is gen: # _writer is saferef
|
|
|
- # removes from Pool._cache
|
|
|
- job_to_discard = job
|
|
|
- break
|
|
|
- if job_to_discard:
|
|
|
- job_to_discard.discard()
|
|
|
- self._active_writers.discard(gen)
|
|
|
- else:
|
|
|
- try:
|
|
|
- next(gen)
|
|
|
- except StopIteration:
|
|
|
- self._active_writers.discard(gen)
|
|
|
- # workers may have exited in the meantime.
|
|
|
- self.maintain_pool()
|
|
|
- sleep(next(intervals)) # don't busyloop
|
|
|
- finally:
|
|
|
- self.outbound_buffer.clear()
|
|
|
- self._active_writers.clear()
|
|
|
+ def on_stop(self):
|
|
|
+ """Gracefully stop the pool."""
|
|
|
+ if self._pool is not None and self._pool._state in (RUN, CLOSE):
|
|
|
+ self._pool.close()
|
|
|
+ self._pool.join()
|
|
|
+ self._pool = None
|
|
|
+
|
|
|
+ def on_terminate(self):
|
|
|
+ """Force terminate the pool."""
|
|
|
+ if self._pool is not None:
|
|
|
+ self._pool.terminate()
|
|
|
+ self._pool = None
|
|
|
+
|
|
|
+ def on_close(self):
|
|
|
+ if self._pool is not None and self._pool._state == RUN:
|
|
|
+ self._pool.close()
|
|
|
+
|
|
|
+ def _get_info(self):
|
|
|
+ return {
|
|
|
+ 'max-concurrency': self.limit,
|
|
|
+ 'processes': [p.pid for p in self._pool._pool],
|
|
|
+ 'max-tasks-per-child': self._pool._maxtasksperchild or 'N/A',
|
|
|
+ 'put-guarded-by-semaphore': self.putlocks,
|
|
|
+ 'timeouts': (self._pool.soft_timeout or 0,
|
|
|
+ self._pool.timeout or 0),
|
|
|
+ 'writes': self._pool.human_write_stats(),
|
|
|
+ }
|
|
|
|
|
|
@property
|
|
|
def num_processes(self):
|
|
|
return self._pool._processes
|
|
|
-
|
|
|
- @property
|
|
|
- def timers(self):
|
|
|
- return {self.maintain_pool: 5.0}
|