base.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. # -*- coding: utf-8 -*-
  2. """
  3. celery.backends.base
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Result backend base classes.
  6. - :class:`BaseBackend` defines the interface.
  7. - :class:`KeyValueStoreBackend` is a common base class
  8. using K/V semantics like _get and _put.
  9. """
  10. from __future__ import absolute_import
  11. import time
  12. import sys
  13. from datetime import timedelta
  14. from billiard.einfo import ExceptionInfo
  15. from kombu.serialization import (
  16. dumps, loads, prepare_accept_content,
  17. registry as serializer_registry,
  18. )
  19. from kombu.utils.encoding import bytes_to_str, ensure_bytes, from_utf8
  20. from celery import states
  21. from celery import current_app, maybe_signature
  22. from celery.app import current_task
  23. from celery.exceptions import ChordError, TimeoutError, TaskRevokedError
  24. from celery.five import items
  25. from celery.result import (
  26. GroupResult, ResultBase, allow_join_result, result_from_tuple,
  27. )
  28. from celery.utils.functional import LRUCache
  29. from celery.utils.log import get_logger
  30. from celery.utils.serialization import (
  31. get_pickled_exception,
  32. get_pickleable_exception,
  33. create_exception_cls,
  34. )
  35. __all__ = ['BaseBackend', 'KeyValueStoreBackend', 'DisabledBackend']
  36. EXCEPTION_ABLE_CODECS = frozenset({'pickle'})
  37. PY3 = sys.version_info >= (3, 0)
  38. logger = get_logger(__name__)
  39. def unpickle_backend(cls, args, kwargs):
  40. """Return an unpickled backend."""
  41. return cls(*args, app=current_app._get_current_object(), **kwargs)
  42. class _nulldict(dict):
  43. def ignore(self, *a, **kw):
  44. pass
  45. __setitem__ = update = setdefault = ignore
  46. class BaseBackend(object):
  47. READY_STATES = states.READY_STATES
  48. UNREADY_STATES = states.UNREADY_STATES
  49. EXCEPTION_STATES = states.EXCEPTION_STATES
  50. TimeoutError = TimeoutError
  51. #: Time to sleep between polling each individual item
  52. #: in `ResultSet.iterate`. as opposed to the `interval`
  53. #: argument which is for each pass.
  54. subpolling_interval = None
  55. #: If true the backend must implement :meth:`get_many`.
  56. supports_native_join = False
  57. #: If true the backend must automatically expire results.
  58. #: The daily backend_cleanup periodic task will not be triggered
  59. #: in this case.
  60. supports_autoexpire = False
  61. #: Set to true if the backend is peristent by default.
  62. persistent = True
  63. retry_policy = {
  64. 'max_retries': 20,
  65. 'interval_start': 0,
  66. 'interval_step': 1,
  67. 'interval_max': 1,
  68. }
  69. def __init__(self, app,
  70. serializer=None, max_cached_results=None, accept=None,
  71. expires=None, expires_type=None, **kwargs):
  72. self.app = app
  73. conf = self.app.conf
  74. self.serializer = serializer or conf.CELERY_RESULT_SERIALIZER
  75. (self.content_type,
  76. self.content_encoding,
  77. self.encoder) = serializer_registry._encoders[self.serializer]
  78. cmax = max_cached_results or conf.CELERY_MAX_CACHED_RESULTS
  79. self._cache = _nulldict() if cmax == -1 else LRUCache(limit=cmax)
  80. self.expires = self.prepare_expires(expires, expires_type)
  81. self.accept = prepare_accept_content(
  82. conf.CELERY_ACCEPT_CONTENT if accept is None else accept,
  83. )
  84. def mark_as_started(self, task_id, **meta):
  85. """Mark a task as started"""
  86. return self.store_result(task_id, meta, status=states.STARTED)
  87. def mark_as_done(self, task_id, result, request=None, state=states.SUCCESS):
  88. """Mark task as successfully executed."""
  89. self.store_result(task_id, result, status=state, request=request)
  90. if request and request.chord:
  91. self.on_chord_part_return(request, state)
  92. def mark_as_failure(self, task_id, exc,
  93. traceback=None, request=None, state=states.FAILURE):
  94. """Mark task as executed with failure. Stores the exception."""
  95. self.store_result(task_id, exc, status=state,
  96. traceback=traceback, request=request)
  97. if request and request.chord:
  98. self.on_chord_part_return(request, state, exc)
  99. def chord_error_from_stack(self, callback, exc=None):
  100. from celery import group
  101. app = self.app
  102. backend = app._tasks[callback.task].backend
  103. try:
  104. group(
  105. [app.signature(errback)
  106. for errback in callback.options.get('link_error') or []],
  107. app=app,
  108. ).apply_async((callback.id,))
  109. except Exception as eb_exc:
  110. return backend.fail_from_current_stack(callback.id, exc=eb_exc)
  111. else:
  112. return backend.fail_from_current_stack(callback.id, exc=exc)
  113. def fail_from_current_stack(self, task_id, exc=None):
  114. type_, real_exc, tb = sys.exc_info()
  115. try:
  116. exc = real_exc if exc is None else exc
  117. ei = ExceptionInfo((type_, exc, tb))
  118. self.mark_as_failure(task_id, exc, ei.traceback)
  119. return ei
  120. finally:
  121. del(tb)
  122. def mark_as_retry(self, task_id, exc, traceback=None, request=None):
  123. """Mark task as being retries. Stores the current
  124. exception (if any)."""
  125. return self.store_result(task_id, exc, status=states.RETRY,
  126. traceback=traceback, request=request)
  127. def mark_as_revoked(self, task_id, reason='', request=None):
  128. return self.store_result(task_id, TaskRevokedError(reason),
  129. status=states.REVOKED, traceback=None,
  130. request=request)
  131. def prepare_exception(self, exc, serializer=None):
  132. """Prepare exception for serialization."""
  133. serializer = self.serializer if serializer is None else serializer
  134. if serializer in EXCEPTION_ABLE_CODECS:
  135. return get_pickleable_exception(exc)
  136. return {'exc_type': type(exc).__name__, 'exc_message': str(exc)}
  137. def exception_to_python(self, exc):
  138. """Convert serialized exception to Python exception."""
  139. if exc:
  140. if not isinstance(exc, BaseException):
  141. exc = create_exception_cls(
  142. from_utf8(exc['exc_type']), __name__)(exc['exc_message'])
  143. if self.serializer in EXCEPTION_ABLE_CODECS:
  144. exc = get_pickled_exception(exc)
  145. return exc
  146. def prepare_value(self, result):
  147. """Prepare value for storage."""
  148. if self.serializer != 'pickle' and isinstance(result, ResultBase):
  149. return result.as_tuple()
  150. return result
  151. def encode(self, data):
  152. _, _, payload = dumps(data, serializer=self.serializer)
  153. return payload
  154. def meta_from_decoded(self, meta):
  155. if meta['status'] in self.EXCEPTION_STATES:
  156. meta['result'] = self.exception_to_python(meta['result'])
  157. return meta
  158. def decode_result(self, payload):
  159. return self.meta_from_decoded(self.decode(payload))
  160. def decode(self, payload):
  161. payload = PY3 and payload or str(payload)
  162. return loads(payload,
  163. content_type=self.content_type,
  164. content_encoding=self.content_encoding,
  165. accept=self.accept)
  166. def wait_for(self, task_id,
  167. timeout=None, interval=0.5, no_ack=True, on_interval=None):
  168. """Wait for task and return its result.
  169. If the task raises an exception, this exception
  170. will be re-raised by :func:`wait_for`.
  171. If `timeout` is not :const:`None`, this raises the
  172. :class:`celery.exceptions.TimeoutError` exception if the operation
  173. takes longer than `timeout` seconds.
  174. """
  175. time_elapsed = 0.0
  176. while 1:
  177. meta = self.get_task_meta(task_id)
  178. if meta['status'] in states.READY_STATES:
  179. return meta
  180. if on_interval:
  181. on_interval()
  182. # avoid hammering the CPU checking status.
  183. time.sleep(interval)
  184. time_elapsed += interval
  185. if timeout and time_elapsed >= timeout:
  186. raise TimeoutError('The operation timed out.')
  187. def prepare_expires(self, value, type=None):
  188. if value is None:
  189. value = self.app.conf.CELERY_TASK_RESULT_EXPIRES
  190. if isinstance(value, timedelta):
  191. value = value.total_seconds()
  192. if value is not None and type:
  193. return type(value)
  194. return value
  195. def prepare_persistent(self, enabled=None):
  196. if enabled is not None:
  197. return enabled
  198. p = self.app.conf.CELERY_RESULT_PERSISTENT
  199. return self.persistent if p is None else p
  200. def encode_result(self, result, status):
  201. if status in self.EXCEPTION_STATES and isinstance(result, Exception):
  202. return self.prepare_exception(result)
  203. else:
  204. return self.prepare_value(result)
  205. def is_cached(self, task_id):
  206. return task_id in self._cache
  207. def store_result(self, task_id, result, status,
  208. traceback=None, request=None, **kwargs):
  209. """Update task state and result."""
  210. result = self.encode_result(result, status)
  211. self._store_result(task_id, result, status, traceback,
  212. request=request, **kwargs)
  213. return result
  214. def forget(self, task_id):
  215. self._cache.pop(task_id, None)
  216. self._forget(task_id)
  217. def _forget(self, task_id):
  218. raise NotImplementedError('backend does not implement forget.')
  219. def get_status(self, task_id):
  220. """Get the status of a task."""
  221. return self.get_task_meta(task_id)['status']
  222. def get_traceback(self, task_id):
  223. """Get the traceback for a failed task."""
  224. return self.get_task_meta(task_id).get('traceback')
  225. def get_result(self, task_id):
  226. """Get the result of a task."""
  227. return self.get_task_meta(task_id).get('result')
  228. def get_children(self, task_id):
  229. """Get the list of subtasks sent by a task."""
  230. try:
  231. return self.get_task_meta(task_id)['children']
  232. except KeyError:
  233. pass
  234. def get_task_meta(self, task_id, cache=True):
  235. if cache:
  236. try:
  237. return self._cache[task_id]
  238. except KeyError:
  239. pass
  240. meta = self._get_task_meta_for(task_id)
  241. if cache and meta.get('status') == states.SUCCESS:
  242. self._cache[task_id] = meta
  243. return meta
  244. def reload_task_result(self, task_id):
  245. """Reload task result, even if it has been previously fetched."""
  246. self._cache[task_id] = self.get_task_meta(task_id, cache=False)
  247. def reload_group_result(self, group_id):
  248. """Reload group result, even if it has been previously fetched."""
  249. self._cache[group_id] = self.get_group_meta(group_id, cache=False)
  250. def get_group_meta(self, group_id, cache=True):
  251. if cache:
  252. try:
  253. return self._cache[group_id]
  254. except KeyError:
  255. pass
  256. meta = self._restore_group(group_id)
  257. if cache and meta is not None:
  258. self._cache[group_id] = meta
  259. return meta
  260. def restore_group(self, group_id, cache=True):
  261. """Get the result for a group."""
  262. meta = self.get_group_meta(group_id, cache=cache)
  263. if meta:
  264. return meta['result']
  265. def save_group(self, group_id, result):
  266. """Store the result of an executed group."""
  267. return self._save_group(group_id, result)
  268. def delete_group(self, group_id):
  269. self._cache.pop(group_id, None)
  270. return self._delete_group(group_id)
  271. def cleanup(self):
  272. """Backend cleanup. Is run by
  273. :class:`celery.task.DeleteExpiredTaskMetaTask`."""
  274. pass
  275. def process_cleanup(self):
  276. """Cleanup actions to do at the end of a task worker process."""
  277. pass
  278. def on_task_call(self, producer, task_id):
  279. return {}
  280. def add_to_chord(self, chord_id, result):
  281. raise NotImplementedError('Backend does not support add_to_chord')
  282. def on_chord_part_return(self, request, state, result, propagate=False):
  283. pass
  284. def fallback_chord_unlock(self, group_id, body, result=None,
  285. countdown=1, **kwargs):
  286. kwargs['result'] = [r.as_tuple() for r in result]
  287. self.app.tasks['celery.chord_unlock'].apply_async(
  288. (group_id, body,), kwargs, countdown=countdown,
  289. )
  290. def apply_chord(self, header, partial_args, group_id, body,
  291. options={}, **kwargs):
  292. fixed_options = {k: v for k, v in items(options) if k != 'task_id'}
  293. result = header(*partial_args, task_id=group_id, **fixed_options or {})
  294. self.fallback_chord_unlock(group_id, body, **kwargs)
  295. return result
  296. def current_task_children(self, request=None):
  297. request = request or getattr(current_task(), 'request', None)
  298. if request:
  299. return [r.as_tuple() for r in getattr(request, 'children', [])]
  300. def __reduce__(self, args=(), kwargs={}):
  301. return (unpickle_backend, (self.__class__, args, kwargs))
  302. BaseDictBackend = BaseBackend # XXX compat
  303. class KeyValueStoreBackend(BaseBackend):
  304. key_t = ensure_bytes
  305. task_keyprefix = 'celery-task-meta-'
  306. group_keyprefix = 'celery-taskset-meta-'
  307. chord_keyprefix = 'chord-unlock-'
  308. implements_incr = False
  309. def __init__(self, *args, **kwargs):
  310. if hasattr(self.key_t, '__func__'):
  311. self.key_t = self.key_t.__func__ # remove binding
  312. self._encode_prefixes()
  313. super(KeyValueStoreBackend, self).__init__(*args, **kwargs)
  314. if self.implements_incr:
  315. self.apply_chord = self._apply_chord_incr
  316. def _encode_prefixes(self):
  317. self.task_keyprefix = self.key_t(self.task_keyprefix)
  318. self.group_keyprefix = self.key_t(self.group_keyprefix)
  319. self.chord_keyprefix = self.key_t(self.chord_keyprefix)
  320. def get(self, key):
  321. raise NotImplementedError('Must implement the get method.')
  322. def mget(self, keys):
  323. raise NotImplementedError('Does not support get_many')
  324. def set(self, key, value):
  325. raise NotImplementedError('Must implement the set method.')
  326. def delete(self, key):
  327. raise NotImplementedError('Must implement the delete method')
  328. def incr(self, key):
  329. raise NotImplementedError('Does not implement incr')
  330. def expire(self, key, value):
  331. pass
  332. def get_key_for_task(self, task_id, key=''):
  333. """Get the cache key for a task by id."""
  334. key_t = self.key_t
  335. return key_t('').join([
  336. self.task_keyprefix, key_t(task_id), key_t(key),
  337. ])
  338. def get_key_for_group(self, group_id, key=''):
  339. """Get the cache key for a group by id."""
  340. key_t = self.key_t
  341. return key_t('').join([
  342. self.group_keyprefix, key_t(group_id), key_t(key),
  343. ])
  344. def get_key_for_chord(self, group_id, key=''):
  345. """Get the cache key for the chord waiting on group with given id."""
  346. key_t = self.key_t
  347. return key_t('').join([
  348. self.chord_keyprefix, key_t(group_id), key_t(key),
  349. ])
  350. def _strip_prefix(self, key):
  351. """Takes bytes, emits string."""
  352. key = self.key_t(key)
  353. for prefix in self.task_keyprefix, self.group_keyprefix:
  354. if key.startswith(prefix):
  355. return bytes_to_str(key[len(prefix):])
  356. return bytes_to_str(key)
  357. def _filter_ready(self, values, READY_STATES=states.READY_STATES):
  358. for k, v in values:
  359. if v is not None:
  360. v = self.decode_result(v)
  361. if v['status'] in READY_STATES:
  362. yield k, v
  363. def _mget_to_results(self, values, keys):
  364. if hasattr(values, 'items'):
  365. # client returns dict so mapping preserved.
  366. return {
  367. self._strip_prefix(k): v
  368. for k, v in self._filter_ready(items(values))
  369. }
  370. else:
  371. # client returns list so need to recreate mapping.
  372. return {
  373. bytes_to_str(keys[i]): v
  374. for i, v in self._filter_ready(enumerate(values))
  375. }
  376. def get_many(self, task_ids, timeout=None, interval=0.5, no_ack=True,
  377. on_message=None,
  378. READY_STATES=states.READY_STATES):
  379. interval = 0.5 if interval is None else interval
  380. ids = task_ids if isinstance(task_ids, set) else set(task_ids)
  381. cached_ids = set()
  382. cache = self._cache
  383. for task_id in ids:
  384. try:
  385. cached = cache[task_id]
  386. except KeyError:
  387. pass
  388. else:
  389. if cached['status'] in READY_STATES:
  390. yield bytes_to_str(task_id), cached
  391. cached_ids.add(task_id)
  392. ids.difference_update(cached_ids)
  393. iterations = 0
  394. while ids:
  395. keys = list(ids)
  396. r = self._mget_to_results(self.mget([self.get_key_for_task(k)
  397. for k in keys]), keys)
  398. cache.update(r)
  399. ids.difference_update({bytes_to_str(v) for v in r})
  400. for key, value in items(r):
  401. if on_message is not None:
  402. on_message(value)
  403. yield bytes_to_str(key), value
  404. if timeout and iterations * interval >= timeout:
  405. raise TimeoutError('Operation timed out ({0})'.format(timeout))
  406. time.sleep(interval) # don't busy loop.
  407. iterations += 1
  408. def _forget(self, task_id):
  409. self.delete(self.get_key_for_task(task_id))
  410. def _store_result(self, task_id, result, status,
  411. traceback=None, request=None, **kwargs):
  412. meta = {'status': status, 'result': result, 'traceback': traceback,
  413. 'children': self.current_task_children(request)}
  414. self.set(self.get_key_for_task(task_id), self.encode(meta))
  415. return result
  416. def _save_group(self, group_id, result):
  417. self.set(self.get_key_for_group(group_id),
  418. self.encode({'result': result.as_tuple()}))
  419. return result
  420. def _delete_group(self, group_id):
  421. self.delete(self.get_key_for_group(group_id))
  422. def _get_task_meta_for(self, task_id):
  423. """Get task metadata for a task by id."""
  424. meta = self.get(self.get_key_for_task(task_id))
  425. if not meta:
  426. return {'status': states.PENDING, 'result': None}
  427. return self.decode_result(meta)
  428. def _restore_group(self, group_id):
  429. """Get task metadata for a task by id."""
  430. meta = self.get(self.get_key_for_group(group_id))
  431. # previously this was always pickled, but later this
  432. # was extended to support other serializers, so the
  433. # structure is kind of weird.
  434. if meta:
  435. meta = self.decode(meta)
  436. result = meta['result']
  437. meta['result'] = result_from_tuple(result, self.app)
  438. return meta
  439. def _apply_chord_incr(self, header, partial_args, group_id, body,
  440. result=None, options={}, **kwargs):
  441. self.save_group(group_id, self.app.GroupResult(group_id, result))
  442. fixed_options = {k: v for k, v in items(options) if k != 'task_id'}
  443. return header(*partial_args, task_id=group_id, **fixed_options or {})
  444. def on_chord_part_return(self, request, state, result, propagate=None):
  445. if not self.implements_incr:
  446. return
  447. app = self.app
  448. if propagate is None:
  449. propagate = app.conf.CELERY_CHORD_PROPAGATES
  450. gid = request.group
  451. if not gid:
  452. return
  453. key = self.get_key_for_chord(gid)
  454. try:
  455. deps = GroupResult.restore(gid, backend=self)
  456. except Exception as exc:
  457. callback = maybe_signature(request.chord, app=app)
  458. logger.error('Chord %r raised: %r', gid, exc, exc_info=1)
  459. return self.chord_error_from_stack(
  460. callback,
  461. ChordError('Cannot restore group: {0!r}'.format(exc)),
  462. )
  463. if deps is None:
  464. try:
  465. raise ValueError(gid)
  466. except ValueError as exc:
  467. callback = maybe_signature(request.chord, app=app)
  468. logger.error('Chord callback %r raised: %r', gid, exc,
  469. exc_info=1)
  470. return self.chord_error_from_stack(
  471. callback,
  472. ChordError('GroupResult {0} no longer exists'.format(gid)),
  473. )
  474. val = self.incr(key)
  475. size = len(deps)
  476. if val > size:
  477. logger.warning('Chord counter incremented too many times for %r',
  478. gid)
  479. elif val == size:
  480. callback = maybe_signature(request.chord, app=app)
  481. j = deps.join_native if deps.supports_native_join else deps.join
  482. try:
  483. with allow_join_result():
  484. ret = j(timeout=3.0, propagate=propagate)
  485. except Exception as exc:
  486. try:
  487. culprit = next(deps._failed_join_report())
  488. reason = 'Dependency {0.id} raised {1!r}'.format(
  489. culprit, exc,
  490. )
  491. except StopIteration:
  492. reason = repr(exc)
  493. logger.error('Chord %r raised: %r', gid, reason, exc_info=1)
  494. self.chord_error_from_stack(callback, ChordError(reason))
  495. else:
  496. try:
  497. callback.delay(ret)
  498. except Exception as exc:
  499. logger.error('Chord %r raised: %r', gid, exc, exc_info=1)
  500. self.chord_error_from_stack(
  501. callback,
  502. ChordError('Callback error: {0!r}'.format(exc)),
  503. )
  504. finally:
  505. deps.delete()
  506. self.client.delete(key)
  507. else:
  508. self.expire(key, 86400)
  509. class DisabledBackend(BaseBackend):
  510. _cache = {} # need this attribute to reset cache in tests.
  511. def store_result(self, *args, **kwargs):
  512. pass
  513. def _is_disabled(self, *args, **kwargs):
  514. raise NotImplementedError(
  515. 'No result backend configured. '
  516. 'Please see the documentation for more information.')
  517. wait_for = get_status = get_result = get_traceback = _is_disabled
  518. get_many = _is_disabled