|
@@ -4,21 +4,27 @@ import heapq
|
|
|
from carrot.utils import partition
|
|
|
|
|
|
from celery import states
|
|
|
+from celery.datastructures import LocalCache
|
|
|
|
|
|
HEARTBEAT_EXPIRE = 150 # 2 minutes, 30 seconds
|
|
|
|
|
|
|
|
|
-class Element(object):
|
|
|
+class Element(dict):
|
|
|
"""Base class for types."""
|
|
|
visited = False
|
|
|
|
|
|
def __init__(self, **fields):
|
|
|
- self.update(fields)
|
|
|
+ dict.__init__(self, fields)
|
|
|
|
|
|
- def update(self, fields, **extra):
|
|
|
- for field_name, field_value in dict(fields, **extra).items():
|
|
|
- setattr(self, field_name, field_value)
|
|
|
+ def __getattr__(self, key):
|
|
|
+ try:
|
|
|
+ return self[key]
|
|
|
+ except KeyError:
|
|
|
+ raise AttributeError("'%s' object has no attribute '%s'" % (
|
|
|
+ self.__class__.__name__, key))
|
|
|
|
|
|
+ def __setattr__(self, key, value):
|
|
|
+ self[key] = value
|
|
|
|
|
|
|
|
|
class Worker(Element):
|
|
@@ -28,13 +34,13 @@ class Worker(Element):
|
|
|
super(Worker, self).__init__(**fields)
|
|
|
self.heartbeats = []
|
|
|
|
|
|
- def online(self, timestamp=None, **kwargs):
|
|
|
+ def on_online(self, timestamp=None, **kwargs):
|
|
|
self._heartpush(timestamp)
|
|
|
|
|
|
- def offline(self, **kwargs):
|
|
|
+ def on_offline(self, **kwargs):
|
|
|
self.heartbeats = []
|
|
|
|
|
|
- def heartbeat(self, timestamp=None, **kwargs):
|
|
|
+ def on_heartbeat(self, timestamp=None, **kwargs):
|
|
|
self._heartpush(timestamp)
|
|
|
|
|
|
def _heartpush(self, timestamp):
|
|
@@ -44,7 +50,7 @@ class Worker(Element):
|
|
|
@property
|
|
|
def alive(self):
|
|
|
return (self.heartbeats and
|
|
|
- time.time() < self.heartbeats[0] + HEARTBEAT_EXPIRE)
|
|
|
+ time.time() < self.heartbeats[-1] + HEARTBEAT_EXPIRE)
|
|
|
|
|
|
|
|
|
class Task(Element):
|
|
@@ -52,17 +58,25 @@ class Task(Element):
|
|
|
_info_fields = ("args", "kwargs", "retries",
|
|
|
"result", "eta", "runtime",
|
|
|
"exception")
|
|
|
- uuid = None
|
|
|
- name = None
|
|
|
- state = states.PENDING
|
|
|
- received = False
|
|
|
- started = False
|
|
|
- args = None
|
|
|
- kwargs = None
|
|
|
- eta = None
|
|
|
- retries = 0
|
|
|
- worker = None
|
|
|
- timestamp = None
|
|
|
+
|
|
|
+ _defaults = dict(uuid=None,
|
|
|
+ name=None,
|
|
|
+ state=states.PENDING,
|
|
|
+ received=False,
|
|
|
+ started=False,
|
|
|
+ succeeded=False,
|
|
|
+ failed=False,
|
|
|
+ retried=False,
|
|
|
+ revoked=False,
|
|
|
+ args=None,
|
|
|
+ kwargs=None,
|
|
|
+ eta=None,
|
|
|
+ retries=None,
|
|
|
+ worker=None,
|
|
|
+ timestamp=None)
|
|
|
+
|
|
|
+ def __init__(self, **fields):
|
|
|
+ super(Task, self).__init__(**dict(self._defaults, **fields))
|
|
|
|
|
|
def info(self, fields=None, extra=[]):
|
|
|
if fields is None:
|
|
@@ -77,52 +91,58 @@ class Task(Element):
|
|
|
return self.state in states.READY_STATES
|
|
|
|
|
|
def update(self, d, **extra):
|
|
|
- d = dict(d, **extra)
|
|
|
if self.worker:
|
|
|
- self.worker.online()
|
|
|
- return super(Task, self).update(d)
|
|
|
+ self.worker.on_heartbeat(timestamp=time.time())
|
|
|
+ return super(Task, self).update(d, **extra)
|
|
|
|
|
|
- def received(self, timestamp=None, **fields):
|
|
|
+ def on_received(self, timestamp=None, **fields):
|
|
|
+ print("ON RECEIVED")
|
|
|
self.received = timestamp
|
|
|
self.state = "RECEIVED"
|
|
|
+ print(fields)
|
|
|
self.update(fields, timestamp=timestamp)
|
|
|
|
|
|
- def started(self, timestamp=None, **fields):
|
|
|
+ def on_started(self, timestamp=None, **fields):
|
|
|
self.state = states.STARTED
|
|
|
self.started = timestamp
|
|
|
- self.update(fields)
|
|
|
+ self.update(fields, timestamp=timestamp)
|
|
|
|
|
|
- def failed(self, timestamp=None, **fields):
|
|
|
+ def on_failed(self, timestamp=None, **fields):
|
|
|
self.state = states.FAILURE
|
|
|
self.failed = timestamp
|
|
|
self.update(fields, timestamp=timestamp)
|
|
|
|
|
|
- def retried(self, timestamp=None, **fields):
|
|
|
+ def on_retried(self, timestamp=None, **fields):
|
|
|
self.state = states.RETRY
|
|
|
self.retried = timestamp
|
|
|
self.update(fields, timestamp=timestamp)
|
|
|
|
|
|
- def succeeded(self, timestamp=None, **fields):
|
|
|
+ def on_succeeded(self, timestamp=None, **fields):
|
|
|
self.state = states.SUCCESS
|
|
|
- self.suceeded = timestamp
|
|
|
+ self.succeeded = timestamp
|
|
|
self.update(fields, timestamp=timestamp)
|
|
|
|
|
|
- def revoked(self, timestamp=None):
|
|
|
+ def on_revoked(self, timestamp=None, **fields):
|
|
|
self.state = states.REVOKED
|
|
|
+ self.revoked = timestamp
|
|
|
+ self.update(fields, timestamp=timestamp)
|
|
|
|
|
|
|
|
|
class State(object):
|
|
|
+ """Represents a snapshot of a clusters state."""
|
|
|
event_count = 0
|
|
|
task_count = 0
|
|
|
|
|
|
- def __init__(self, callback=None):
|
|
|
- self.workers = {}
|
|
|
- self.tasks = {}
|
|
|
- self.callback = callback
|
|
|
+ def __init__(self, callback=None,
|
|
|
+ max_workers_in_memory=5000, max_tasks_in_memory=10000):
|
|
|
+ self.workers = LocalCache(max_workers_in_memory)
|
|
|
+ self.tasks = LocalCache(max_tasks_in_memory)
|
|
|
+ self.event_callback = callback
|
|
|
self.group_handlers = {"worker": self.worker_event,
|
|
|
"task": self.task_event}
|
|
|
|
|
|
- def get_worker(self, hostname, **kwargs):
|
|
|
+ def get_or_create_worker(self, hostname, **kwargs):
|
|
|
+ """Get or create worker by hostname."""
|
|
|
try:
|
|
|
worker = self.workers[hostname]
|
|
|
worker.update(kwargs)
|
|
@@ -131,7 +151,8 @@ class State(object):
|
|
|
hostname=hostname, **kwargs)
|
|
|
return worker
|
|
|
|
|
|
- def get_task(self, uuid, **kwargs):
|
|
|
+ def get_or_create_task(self, uuid, **kwargs):
|
|
|
+ """Get or create task by uuid."""
|
|
|
try:
|
|
|
task = self.tasks[uuid]
|
|
|
task.update(kwargs)
|
|
@@ -140,34 +161,75 @@ class State(object):
|
|
|
return task
|
|
|
|
|
|
def worker_event(self, type, fields):
|
|
|
+ """Process worker event."""
|
|
|
hostname = fields.pop("hostname")
|
|
|
- worker = self.workers[hostname] = Worker(hostname=hostname)
|
|
|
- handler = getattr(worker, type)
|
|
|
+ worker = self.get_or_create_worker(hostname)
|
|
|
+ handler = getattr(worker, "on_%s" % type)
|
|
|
if handler:
|
|
|
handler(**fields)
|
|
|
|
|
|
def task_event(self, type, fields):
|
|
|
+ """Process task event."""
|
|
|
uuid = fields.pop("uuid")
|
|
|
hostname = fields.pop("hostname")
|
|
|
- worker = self.get_worker(hostname)
|
|
|
- task = self.get_task(uuid, worker=worker)
|
|
|
- handler = getattr(task, type)
|
|
|
+ worker = self.get_or_create_worker(hostname)
|
|
|
+ task = self.get_or_create_task(uuid)
|
|
|
+ handler = getattr(task, "on_%s" % type)
|
|
|
if type == "received":
|
|
|
self.task_count += 1
|
|
|
if handler:
|
|
|
handler(**fields)
|
|
|
+ task.worker = worker
|
|
|
|
|
|
def event(self, event):
|
|
|
+ """Process event."""
|
|
|
event = dict((key.encode("utf-8"), value)
|
|
|
for key, value in event.items())
|
|
|
self.event_count += 1
|
|
|
group, _, type = partition(event.pop("type"), "-")
|
|
|
self.group_handlers[group](type, event)
|
|
|
- if self.callback:
|
|
|
- self.callback(self, event)
|
|
|
+ if self.event_callback:
|
|
|
+ self.event_callback(self, event)
|
|
|
|
|
|
def tasks_by_timestamp(self):
|
|
|
- return sorted(self.tasks.items(), key=lambda t: t[1].timestamp,
|
|
|
- reverse=True)
|
|
|
+ """Get tasks by timestamp.
|
|
|
+
|
|
|
+ Returns a list of ``(uuid, task)`` tuples.
|
|
|
+
|
|
|
+ """
|
|
|
+ return self._sort_tasks_by_time(self.tasks.items())
|
|
|
+
|
|
|
+ def _sort_tasks_by_time(self, tasks):
|
|
|
+ """Sort task items by time."""
|
|
|
+ return sorted(tasks, key=lambda t: t[1].timestamp, reverse=True)
|
|
|
+
|
|
|
+ def tasks_by_type(self, name):
|
|
|
+ """Get all tasks by type.
|
|
|
+
|
|
|
+ Returns a list of ``(uuid, task)`` tuples.
|
|
|
+
|
|
|
+ """
|
|
|
+ return self._sort_tasks_by_time([(uuid, task)
|
|
|
+ for uuid, task in self.tasks.items()
|
|
|
+ if task.name == name])
|
|
|
+
|
|
|
+ def tasks_by_worker(self, hostname):
|
|
|
+ """Get all tasks by worker.
|
|
|
+
|
|
|
+ Returns a list of ``(uuid, task)`` tuples.
|
|
|
+
|
|
|
+ """
|
|
|
+ return self._sort_tasks_by_time([(uuid, task)
|
|
|
+ for uuid, task in self.tasks.items()
|
|
|
+ if task.worker.hostname == hostname])
|
|
|
+
|
|
|
+ def task_types(self):
|
|
|
+ """Returns a list of all seen task types."""
|
|
|
+ return list(set(task.name for task in self.tasks.values()))
|
|
|
+
|
|
|
+ def alive_workers(self):
|
|
|
+ """Returns a list of (seemingly) alive workers."""
|
|
|
+ return [w for w in self.workers.values() if w.alive]
|
|
|
+
|
|
|
|
|
|
state = State()
|