cassandra.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. """celery.backends.cassandra"""
  2. try:
  3. import pycassa
  4. from thrift import Thrift
  5. C = __import__('cassandra').ttypes # FIXME: Namespace kludge
  6. except ImportError:
  7. pycassa = None
  8. from datetime import datetime
  9. import itertools
  10. import random
  11. import socket
  12. import time
  13. from celery.backends.base import BaseDictBackend
  14. from celery import conf
  15. from celery.exceptions import ImproperlyConfigured
  16. from celery.loaders import load_settings
  17. from celery.log import setup_logger
  18. from celery.serialization import pickle
  19. from celery import states
  20. class CassandraBackend(BaseDictBackend):
  21. """Highly fault tolerant Cassandra backend.
  22. .. attribute:: servers
  23. List of Cassandra servers with format: "hostname:port".
  24. :raises celery.exceptions.ImproperlyConfigured: if
  25. module :mod:`pycassa` is not available.
  26. """
  27. servers = []
  28. keyspace = None
  29. column_family = None
  30. _retry_timeout = 300
  31. _retry_wait = 3
  32. _index_shards = 64
  33. _index_keys = ["celery.results.index!%02x" % i for i in range(_index_shards)]
  34. def __init__(self, servers=None, keyspace=None, column_family=None, cassandra_options=None, **kwargs):
  35. """Initialize Cassandra backend.
  36. Raises :class:`celery.exceptions.ImproperlyConfigured` if
  37. :setting:`CASSANDRA_SERVERS` is not set.
  38. """
  39. self.logger = setup_logger("celery.backends.cassandra")
  40. self.result_expires = kwargs.get("result_expires") or \
  41. conf.TASK_RESULT_EXPIRES
  42. if not pycassa:
  43. raise ImproperlyConfigured(
  44. "You need to install the pycassa library to use the "
  45. "Cassandra backend. See http://github.com/vomjom/pycassa")
  46. settings = load_settings()
  47. self.servers = servers or \
  48. getattr(settings, "CASSANDRA_SERVERS", self.servers)
  49. self.keyspace = keyspace or \
  50. getattr(settings, "CASSANDRA_KEYSPACE",
  51. self.keyspace)
  52. self.column_family = column_family or \
  53. getattr(settings, "CASSANDRA_COLUMN_FAMILY",
  54. self.column_family)
  55. self.cassandra_options = dict(cassandra_options or {},
  56. **getattr(settings, "CASSANDRA_OPTIONS", {}))
  57. if not self.servers or not self.keyspace or not self.column_family:
  58. raise ImproperlyConfigured(
  59. "Cassandra backend not configured.")
  60. super(CassandraBackend, self).__init__()
  61. self._column_family = None
  62. def _retry_on_error(func):
  63. def wrapper(*args, **kwargs):
  64. self = args[0]
  65. ts = time.time() + self._retry_timeout
  66. while 1:
  67. try:
  68. return func(*args, **kwargs)
  69. except (pycassa.InvalidRequestException,
  70. pycassa.NoServerAvailable,
  71. pycassa.TimedOutException,
  72. pycassa.UnavailableException,
  73. socket.error,
  74. socket.timeout,
  75. Thrift.TException), exc:
  76. self.logger.warn('Cassandra error: %s. Retrying...' % exc)
  77. if time.time() > ts:
  78. raise
  79. time.sleep(self._retry_wait)
  80. return wrapper
  81. def _get_column_family(self):
  82. if self._column_family is None:
  83. conn = pycassa.connect(self.servers,
  84. **self.cassandra_options)
  85. self._column_family = \
  86. pycassa.ColumnFamily(conn, self.keyspace,
  87. self.column_family,
  88. read_consistency_level=pycassa.ConsistencyLevel.DCQUORUM,
  89. write_consistency_level=pycassa.ConsistencyLevel.DCQUORUM)
  90. return self._column_family
  91. def process_cleanup(self):
  92. if self._column_family is not None:
  93. self._column_family = None
  94. @_retry_on_error
  95. def _store_result(self, task_id, result, status, traceback=None):
  96. """Store return value and status of an executed task."""
  97. cf = self._get_column_family()
  98. date_done = datetime.utcnow()
  99. index_key = 'celery.results.index!%02x' % random.randrange(self._index_shards)
  100. index_column_name = '%8x!%s' % (time.mktime(date_done.timetuple()), task_id)
  101. meta = {"status": status,
  102. "result": pickle.dumps(result),
  103. "date_done": date_done.strftime('%Y-%m-%dT%H:%M:%SZ'),
  104. "traceback": pickle.dumps(traceback)}
  105. cf.insert(task_id, meta)
  106. cf.insert(index_key, {index_column_name: status})
  107. @_retry_on_error
  108. def _get_task_meta_for(self, task_id):
  109. """Get task metadata for a task by id."""
  110. cf = self._get_column_family()
  111. try:
  112. obj = cf.get(task_id)
  113. meta = {
  114. "task_id": task_id,
  115. "status": obj["status"],
  116. "result": pickle.loads(str(obj["result"])),
  117. "date_done": obj["date_done"],
  118. "traceback": pickle.loads(str(obj["traceback"])),
  119. }
  120. except (KeyError, pycassa.NotFoundException):
  121. meta = {"status": states.PENDING, "result": None}
  122. return meta
  123. def cleanup(self):
  124. """Delete expired metadata."""
  125. self.logger.debug('Running cleanup...')
  126. expires = datetime.utcnow() - self.result_expires
  127. end_column = '%8x"' % (time.mktime(expires.timetuple()))
  128. cf = self._get_column_family()
  129. column_parent = C.ColumnParent(cf.column_family)
  130. slice_pred = C.SlicePredicate(slice_range=C.SliceRange('', end_column,
  131. count=2**30))
  132. columns = cf.client.multiget_slice(cf.keyspace, self._index_keys,
  133. column_parent, slice_pred,
  134. pycassa.ConsistencyLevel.DCQUORUM)
  135. index_cols = [c.column.name for c in itertools.chain(*columns.values())]
  136. for k in self._index_keys:
  137. cf.remove(k, index_cols)
  138. task_ids = [c[9:] for c in index_cols]
  139. for k in task_ids:
  140. cf.remove(k)
  141. self.logger.debug('Cleaned %i expired results' % len(task_ids))