cassandra.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. """celery.backends.cassandra"""
  2. try:
  3. import pycassa
  4. from thrift import Thrift
  5. C = pycassa.cassandra.ttypes
  6. except ImportError:
  7. pycassa = None
  8. import socket
  9. import time
  10. from datetime import datetime
  11. from celery.backends.base import BaseDictBackend
  12. from celery.exceptions import ImproperlyConfigured
  13. from celery.utils.serialization import pickle
  14. from celery.utils.timeutils import maybe_timedelta, timedelta_seconds
  15. from celery import states
  16. class CassandraBackend(BaseDictBackend):
  17. """Highly fault tolerant Cassandra backend.
  18. .. attribute:: servers
  19. List of Cassandra servers with format: "hostname:port".
  20. :raises celery.exceptions.ImproperlyConfigured: if
  21. module :mod:`pycassa` is not available.
  22. """
  23. servers = []
  24. keyspace = None
  25. column_family = None
  26. _retry_timeout = 300
  27. _retry_wait = 3
  28. def __init__(self, servers=None, keyspace=None, column_family=None,
  29. cassandra_options=None, **kwargs):
  30. """Initialize Cassandra backend.
  31. Raises :class:`celery.exceptions.ImproperlyConfigured` if
  32. the :setting:`CASSANDRA_SERVERS` setting is not set.
  33. """
  34. super(CassandraBackend, self).__init__(**kwargs)
  35. self.logger = self.app.log.setup_logger(
  36. name="celery.backends.cassandra")
  37. self.result_expires = kwargs.get("result_expires") or \
  38. maybe_timedelta(
  39. self.app.conf.CELERY_TASK_RESULT_EXPIRES)
  40. if not pycassa:
  41. raise ImproperlyConfigured(
  42. "You need to install the pycassa library to use the "
  43. "Cassandra backend. See https://github.com/pycassa/pycassa")
  44. self.servers = servers or \
  45. self.app.conf.get("CASSANDRA_SERVERS", self.servers)
  46. self.keyspace = keyspace or \
  47. self.app.conf.get("CASSANDRA_KEYSPACE",
  48. self.keyspace)
  49. self.column_family = column_family or \
  50. self.app.conf.get("CASSANDRA_COLUMN_FAMILY",
  51. self.column_family)
  52. self.cassandra_options = dict(cassandra_options or {},
  53. **self.app.conf.get("CASSANDRA_OPTIONS",
  54. {}))
  55. read_cons = self.app.conf.get("CASSANDRA_READ_CONSISTENCY",
  56. "LOCAL_QUORUM")
  57. write_cons = self.app.conf.get("CASSANDRA_WRITE_CONSISTENCY",
  58. "LOCAL_QUORUM")
  59. try:
  60. self.read_consistency = getattr(pycassa.ConsistencyLevel,
  61. read_cons)
  62. except AttributeError:
  63. self.read_consistency = pycassa.ConsistencyLevel.LOCAL_QUORUM
  64. try:
  65. self.write_consistency = getattr(pycassa.ConsistencyLevel,
  66. write_cons)
  67. except AttributeError:
  68. self.write_consistency = pycassa.ConsistencyLevel.LOCAL_QUORUM
  69. if not self.servers or not self.keyspace or not self.column_family:
  70. raise ImproperlyConfigured(
  71. "Cassandra backend not configured.")
  72. self._column_family = None
  73. def _retry_on_error(self, fun):
  74. self = args[0]
  75. ts = time.time() + self._retry_timeout
  76. while 1:
  77. try:
  78. return fun(*args, **kwargs)
  79. except (pycassa.InvalidRequestException,
  80. pycassa.TimedOutException,
  81. pycassa.UnavailableException,
  82. socket.error,
  83. socket.timeout,
  84. Thrift.TException), exc:
  85. if time.time() > ts:
  86. raise
  87. self.logger.warn('Cassandra error: %r. Retrying...' % (exc, ))
  88. time.sleep(self._retry_wait)
  89. def _get_column_family(self):
  90. if self._column_family is None:
  91. conn = pycassa.connect(self.keyspace, servers=self.servers,
  92. **self.cassandra_options)
  93. self._column_family = \
  94. pycassa.ColumnFamily(conn, self.column_family,
  95. read_consistency_level=self.read_consistency,
  96. write_consistency_level=self.write_consistency)
  97. return self._column_family
  98. def process_cleanup(self):
  99. if self._column_family is not None:
  100. self._column_family = None
  101. def _store_result(self, task_id, result, status, traceback=None):
  102. """Store return value and status of an executed task."""
  103. def _do_store():
  104. cf = self._get_column_family()
  105. date_done = datetime.utcnow()
  106. meta = {"status": status,
  107. "result": pickle.dumps(result),
  108. "date_done": date_done.strftime('%Y-%m-%dT%H:%M:%SZ'),
  109. "traceback": pickle.dumps(traceback)}
  110. cf.insert(task_id, meta,
  111. ttl=timedelta_seconds(self.result_expires))
  112. return self._retry_on_error(_do_store)
  113. def _get_task_meta_for(self, task_id):
  114. """Get task metadata for a task by id."""
  115. def _do_get():
  116. cf = self._get_column_family()
  117. try:
  118. obj = cf.get(task_id)
  119. meta = {
  120. "task_id": task_id,
  121. "status": obj["status"],
  122. "result": pickle.loads(str(obj["result"])),
  123. "date_done": obj["date_done"],
  124. "traceback": pickle.loads(str(obj["traceback"])),
  125. }
  126. except (KeyError, pycassa.NotFoundException):
  127. meta = {"status": states.PENDING, "result": None}
  128. return meta
  129. return self._retry_on_error(_do_get)