Browse Source

Can only check Pool.did_start_ok at startup. Closes #1118

Ask Solem 12 years ago
parent
commit
470be26ab4
2 changed files with 9 additions and 3 deletions
  1. 6 3
      celery/worker/__init__.py
  2. 3 0
      celery/worker/consumer.py

+ 6 - 3
celery/worker/__init__.py

@@ -96,7 +96,7 @@ class Pool(bootsteps.StartStopComponent):
             w.max_concurrency, w.min_concurrency = w.autoscale
         self.autoreload_enabled = autoreload
 
-    def on_poll_init(self, pool, hub):
+    def on_poll_init(self, pool, w, hub):
         apply_after = hub.timer.apply_after
         apply_at = hub.timer.apply_at
         on_soft_timeout = pool.on_soft_timeout
@@ -106,7 +106,10 @@ class Pool(bootsteps.StartStopComponent):
         remove = hub.remove
         now = time.time
 
-        if not pool.did_start_ok():
+        # did_start_ok will verify that pool processes were able to start,
+        # but this will only work the first time we start, as
+        # maxtasksperchild will mess up metrics.
+        if not w.consumer.restart_count and not pool.did_start_ok():
             raise WorkerLostError('Could not start worker processes')
 
         # need to handle pool results before every task
@@ -167,7 +170,7 @@ class Pool(bootsteps.StartStopComponent):
             semaphore=semaphore,
         )
         if w.hub:
-            w.hub.on_init.append(partial(self.on_poll_init, pool))
+            w.hub.on_init.append(partial(self.on_poll_init, pool, w))
         return pool
 
 

+ 3 - 0
celery/worker/consumer.py

@@ -315,6 +315,8 @@ class Consumer(object):
     # Consumer state, can be RUN or CLOSE.
     _state = None
 
+    restart_count = -1  # first start is the same as a restart
+
     def __init__(self, ready_queue,
                  init_callback=noop, send_events=False, hostname=None,
                  initial_prefetch_count=2, pool=None, app=None,
@@ -384,6 +386,7 @@ class Consumer(object):
         self.init_callback(self)
 
         while self._state != CLOSE:
+            self.restart_count += 1
             self.maybe_shutdown()
             try:
                 self.reset_connection()