trace.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. # -*- coding: utf-8 -*-
  2. """
  3. celery.app.trace
  4. ~~~~~~~~~~~~~~~~
  5. This module defines how the task execution is traced:
  6. errors are recorded, handlers are applied and so on.
  7. """
  8. from __future__ import absolute_import
  9. # ## ---
  10. # This is the heart of the worker, the inner loop so to speak.
  11. # It used to be split up into nice little classes and methods,
  12. # but in the end it only resulted in bad performance and horrible tracebacks,
  13. # so instead we now use one closure per task class.
  14. import logging
  15. import os
  16. import sys
  17. from collections import namedtuple
  18. from warnings import warn
  19. from billiard.einfo import ExceptionInfo
  20. from kombu.exceptions import EncodeError
  21. from kombu.serialization import loads as loads_message, prepare_accept_content
  22. from kombu.utils.encoding import safe_repr, safe_str
  23. from celery import current_app, group
  24. from celery import states, signals
  25. from celery._state import _task_stack
  26. from celery.app import set_default_app
  27. from celery.app.task import Task as BaseTask, Context
  28. from celery.exceptions import Ignore, Reject, Retry, InvalidTaskError
  29. from celery.five import monotonic
  30. from celery.utils import gethostname
  31. from celery.utils.log import get_logger
  32. from celery.utils.objects import mro_lookup
  33. from celery.utils.saferepr import saferepr
  34. from celery.utils.serialization import (
  35. get_pickleable_exception, get_pickled_exception, get_pickleable_etype,
  36. )
  37. from celery.utils.text import truncate
  38. __all__ = ['TraceInfo', 'build_tracer', 'trace_task',
  39. 'setup_worker_optimizations', 'reset_worker_optimizations']
  40. logger = get_logger(__name__)
  41. info = logger.info
  42. #: Format string used to log task success.
  43. LOG_SUCCESS = """\
  44. Task %(name)s[%(id)s] succeeded in %(runtime)ss: %(return_value)s\
  45. """
  46. #: Format string used to log task failure.
  47. LOG_FAILURE = """\
  48. Task %(name)s[%(id)s] %(description)s: %(exc)s\
  49. """
  50. #: Format string used to log task internal error.
  51. LOG_INTERNAL_ERROR = """\
  52. Task %(name)s[%(id)s] %(description)s: %(exc)s\
  53. """
  54. #: Format string used to log task ignored.
  55. LOG_IGNORED = """\
  56. Task %(name)s[%(id)s] %(description)s\
  57. """
  58. #: Format string used to log task rejected.
  59. LOG_REJECTED = """\
  60. Task %(name)s[%(id)s] %(exc)s\
  61. """
  62. #: Format string used to log task retry.
  63. LOG_RETRY = """\
  64. Task %(name)s[%(id)s] retry: %(exc)s\
  65. """
  66. log_policy_t = namedtuple(
  67. 'log_policy_t', ('format', 'description', 'severity', 'traceback', 'mail'),
  68. )
  69. log_policy_reject = log_policy_t(LOG_REJECTED, 'rejected', logging.WARN, 1, 1)
  70. log_policy_ignore = log_policy_t(LOG_IGNORED, 'ignored', logging.INFO, 0, 0)
  71. log_policy_internal = log_policy_t(
  72. LOG_INTERNAL_ERROR, 'INTERNAL ERROR', logging.CRITICAL, 1, 1,
  73. )
  74. log_policy_expected = log_policy_t(
  75. LOG_FAILURE, 'raised expected', logging.INFO, 0, 0,
  76. )
  77. log_policy_unexpected = log_policy_t(
  78. LOG_FAILURE, 'raised unexpected', logging.ERROR, 1, 1,
  79. )
  80. send_prerun = signals.task_prerun.send
  81. send_postrun = signals.task_postrun.send
  82. send_success = signals.task_success.send
  83. STARTED = states.STARTED
  84. SUCCESS = states.SUCCESS
  85. IGNORED = states.IGNORED
  86. REJECTED = states.REJECTED
  87. RETRY = states.RETRY
  88. FAILURE = states.FAILURE
  89. EXCEPTION_STATES = states.EXCEPTION_STATES
  90. IGNORE_STATES = frozenset({IGNORED, RETRY, REJECTED})
  91. #: set by :func:`setup_worker_optimizations`
  92. _localized = []
  93. _patched = {}
  94. trace_ok_t = namedtuple('trace_ok_t', ('retval', 'info', 'runtime', 'retstr'))
  95. def task_has_custom(task, attr):
  96. """Return true if the task or one of its bases
  97. defines ``attr`` (excluding the one in BaseTask)."""
  98. return mro_lookup(task.__class__, attr, stop={BaseTask, object},
  99. monkey_patched=['celery.app.task'])
  100. def get_log_policy(task, einfo, exc):
  101. if isinstance(exc, Reject):
  102. return log_policy_reject
  103. elif isinstance(exc, Ignore):
  104. return log_policy_ignore
  105. elif einfo.internal:
  106. return log_policy_internal
  107. else:
  108. if task.throws and isinstance(exc, task.throws):
  109. return log_policy_expected
  110. return log_policy_unexpected
  111. class TraceInfo(object):
  112. __slots__ = ('state', 'retval')
  113. def __init__(self, state, retval=None):
  114. self.state = state
  115. self.retval = retval
  116. def handle_error_state(self, task, req,
  117. eager=False, call_errbacks=True):
  118. store_errors = not eager
  119. if task.ignore_result:
  120. store_errors = task.store_errors_even_if_ignored
  121. return {
  122. RETRY: self.handle_retry,
  123. FAILURE: self.handle_failure,
  124. }[self.state](task, req,
  125. store_errors=store_errors,
  126. call_errbacks=call_errbacks)
  127. def handle_reject(self, task, req, **kwargs):
  128. self._log_error(task, req, ExceptionInfo())
  129. def handle_ignore(self, task, req, **kwargs):
  130. self._log_error(task, req, ExceptionInfo())
  131. def handle_retry(self, task, req, store_errors=True, **kwargs):
  132. """Handle retry exception."""
  133. # the exception raised is the Retry semi-predicate,
  134. # and it's exc' attribute is the original exception raised (if any).
  135. type_, _, tb = sys.exc_info()
  136. try:
  137. reason = self.retval
  138. einfo = ExceptionInfo((type_, reason, tb))
  139. if store_errors:
  140. task.backend.mark_as_retry(
  141. req.id, reason.exc, einfo.traceback, request=req,
  142. )
  143. task.on_retry(reason.exc, req.id, req.args, req.kwargs, einfo)
  144. signals.task_retry.send(sender=task, request=req,
  145. reason=reason, einfo=einfo)
  146. info(LOG_RETRY, {
  147. 'id': req.id, 'name': task.name,
  148. 'exc': safe_repr(reason.exc),
  149. })
  150. return einfo
  151. finally:
  152. del(tb)
  153. def handle_failure(self, task, req, store_errors=True, call_errbacks=True):
  154. """Handle exception."""
  155. type_, _, tb = sys.exc_info()
  156. try:
  157. exc = self.retval
  158. einfo = ExceptionInfo()
  159. einfo.exception = get_pickleable_exception(einfo.exception)
  160. einfo.type = get_pickleable_etype(einfo.type)
  161. task.backend.mark_as_failure(
  162. req.id, exc, einfo.traceback,
  163. request=req, store_result=store_errors,
  164. call_errbacks=call_errbacks,
  165. )
  166. task.on_failure(exc, req.id, req.args, req.kwargs, einfo)
  167. signals.task_failure.send(sender=task, task_id=req.id,
  168. exception=exc, args=req.args,
  169. kwargs=req.kwargs,
  170. traceback=tb,
  171. einfo=einfo)
  172. self._log_error(task, req, einfo)
  173. return einfo
  174. finally:
  175. del(tb)
  176. def _log_error(self, task, req, einfo):
  177. eobj = einfo.exception = get_pickled_exception(einfo.exception)
  178. exception, traceback, exc_info, sargs, skwargs = (
  179. safe_repr(eobj),
  180. safe_str(einfo.traceback),
  181. einfo.exc_info,
  182. safe_repr(req.args),
  183. safe_repr(req.kwargs),
  184. )
  185. policy = get_log_policy(task, einfo, eobj)
  186. context = {
  187. 'hostname': req.hostname,
  188. 'id': req.id,
  189. 'name': task.name,
  190. 'exc': exception,
  191. 'traceback': traceback,
  192. 'args': sargs,
  193. 'kwargs': skwargs,
  194. 'description': policy.description,
  195. 'internal': einfo.internal,
  196. }
  197. logger.log(policy.severity, policy.format.strip(), context,
  198. exc_info=exc_info if policy.traceback else None,
  199. extra={'data': context})
  200. if policy.mail:
  201. task.send_error_email(context, einfo.exception)
  202. def build_tracer(name, task, loader=None, hostname=None, store_errors=True,
  203. Info=TraceInfo, eager=False, propagate=False, app=None,
  204. monotonic=monotonic, truncate=truncate,
  205. trace_ok_t=trace_ok_t, IGNORE_STATES=IGNORE_STATES):
  206. """Return a function that traces task execution; catches all
  207. exceptions and updates result backend with the state and result
  208. If the call was successful, it saves the result to the task result
  209. backend, and sets the task status to `"SUCCESS"`.
  210. If the call raises :exc:`~@Retry`, it extracts
  211. the original exception, uses that as the result and sets the task state
  212. to `"RETRY"`.
  213. If the call results in an exception, it saves the exception as the task
  214. result, and sets the task state to `"FAILURE"`.
  215. Return a function that takes the following arguments:
  216. :param uuid: The id of the task.
  217. :param args: List of positional args to pass on to the function.
  218. :param kwargs: Keyword arguments mapping to pass on to the function.
  219. :keyword request: Request dict.
  220. """
  221. # If the task doesn't define a custom __call__ method
  222. # we optimize it away by simply calling the run method directly,
  223. # saving the extra method call and a line less in the stack trace.
  224. fun = task if task_has_custom(task, '__call__') else task.run
  225. loader = loader or app.loader
  226. backend = task.backend
  227. ignore_result = task.ignore_result
  228. track_started = task.track_started
  229. track_started = not eager and (task.track_started and not ignore_result)
  230. publish_result = not eager and not ignore_result
  231. hostname = hostname or gethostname()
  232. loader_task_init = loader.on_task_init
  233. loader_cleanup = loader.on_process_cleanup
  234. task_on_success = None
  235. task_after_return = None
  236. if task_has_custom(task, 'on_success'):
  237. task_on_success = task.on_success
  238. if task_has_custom(task, 'after_return'):
  239. task_after_return = task.after_return
  240. store_result = backend.store_result
  241. mark_as_done = backend.mark_as_done
  242. backend_cleanup = backend.process_cleanup
  243. pid = os.getpid()
  244. request_stack = task.request_stack
  245. push_request = request_stack.push
  246. pop_request = request_stack.pop
  247. push_task = _task_stack.push
  248. pop_task = _task_stack.pop
  249. _does_info = logger.isEnabledFor(logging.INFO)
  250. resultrepr_maxsize = task.resultrepr_maxsize
  251. prerun_receivers = signals.task_prerun.receivers
  252. postrun_receivers = signals.task_postrun.receivers
  253. success_receivers = signals.task_success.receivers
  254. from celery import canvas
  255. signature = canvas.maybe_signature # maybe_ does not clone if already
  256. def on_error(request, exc, uuid, state=FAILURE, call_errbacks=True):
  257. if propagate:
  258. raise
  259. I = Info(state, exc)
  260. R = I.handle_error_state(
  261. task, request, eager=eager, call_errbacks=call_errbacks,
  262. )
  263. return I, R, I.state, I.retval
  264. def trace_task(uuid, args, kwargs, request=None):
  265. # R - is the possibly prepared return value.
  266. # I - is the Info object.
  267. # T - runtime
  268. # Rstr - textual representation of return value
  269. # retval - is the always unmodified return value.
  270. # state - is the resulting task state.
  271. # This function is very long because we have unrolled all the calls
  272. # for performance reasons, and because the function is so long
  273. # we want the main variables (I, and R) to stand out visually from the
  274. # the rest of the variables, so breaking PEP8 is worth it ;)
  275. R = I = T = Rstr = retval = state = None
  276. task_request = None
  277. time_start = monotonic()
  278. try:
  279. try:
  280. kwargs.items
  281. except AttributeError:
  282. raise InvalidTaskError(
  283. 'Task keyword arguments is not a mapping')
  284. push_task(task)
  285. task_request = Context(request or {}, args=args,
  286. called_directly=False, kwargs=kwargs)
  287. root_id = task_request.root_id or uuid
  288. push_request(task_request)
  289. try:
  290. # -*- PRE -*-
  291. if prerun_receivers:
  292. send_prerun(sender=task, task_id=uuid, task=task,
  293. args=args, kwargs=kwargs)
  294. loader_task_init(uuid, task)
  295. if track_started:
  296. store_result(
  297. uuid, {'pid': pid, 'hostname': hostname}, STARTED,
  298. request=task_request,
  299. )
  300. # -*- TRACE -*-
  301. try:
  302. R = retval = fun(*args, **kwargs)
  303. state = SUCCESS
  304. except Reject as exc:
  305. I, R = Info(REJECTED, exc), ExceptionInfo(internal=True)
  306. state, retval = I.state, I.retval
  307. I.handle_reject(task, task_request)
  308. except Ignore as exc:
  309. I, R = Info(IGNORED, exc), ExceptionInfo(internal=True)
  310. state, retval = I.state, I.retval
  311. I.handle_ignore(task, task_request)
  312. except Retry as exc:
  313. I, R, state, retval = on_error(
  314. task_request, exc, uuid, RETRY, call_errbacks=False)
  315. except Exception as exc:
  316. I, R, state, retval = on_error(task_request, exc, uuid)
  317. except BaseException as exc:
  318. raise
  319. else:
  320. try:
  321. # callback tasks must be applied before the result is
  322. # stored, so that result.children is populated.
  323. # groups are called inline and will store trail
  324. # separately, so need to call them separately
  325. # so that the trail's not added multiple times :(
  326. # (Issue #1936)
  327. callbacks = task.request.callbacks
  328. if callbacks:
  329. if len(task.request.callbacks) > 1:
  330. sigs, groups = [], []
  331. for sig in callbacks:
  332. sig = signature(sig, app=app)
  333. if isinstance(sig, group):
  334. groups.append(sig)
  335. else:
  336. sigs.append(sig)
  337. for group_ in groups:
  338. group_.apply_async(
  339. (retval,),
  340. parent_id=uuid, root_id=root_id,
  341. )
  342. if sigs:
  343. group(sigs, app=app).apply_async(
  344. (retval,),
  345. parent_id=uuid, root_id=root_id,
  346. )
  347. else:
  348. signature(callbacks[0], app=app).apply_async(
  349. (retval,), parent_id=uuid, root_id=root_id,
  350. )
  351. # execute first task in chain
  352. chain = task_request.chain
  353. if chain:
  354. signature(chain.pop(), app=app).apply_async(
  355. (retval,), chain=chain,
  356. parent_id=uuid, root_id=root_id,
  357. )
  358. mark_as_done(
  359. uuid, retval, task_request, publish_result,
  360. )
  361. except EncodeError as exc:
  362. I, R, state, retval = on_error(task_request, exc, uuid)
  363. else:
  364. if task_on_success:
  365. task_on_success(retval, uuid, args, kwargs)
  366. if success_receivers:
  367. send_success(sender=task, result=retval)
  368. if _does_info:
  369. T = monotonic() - time_start
  370. Rstr = saferepr(R, resultrepr_maxsize)
  371. info(LOG_SUCCESS, {
  372. 'id': uuid, 'name': name,
  373. 'return_value': Rstr, 'runtime': T,
  374. })
  375. # -* POST *-
  376. if state not in IGNORE_STATES:
  377. if task_after_return:
  378. task_after_return(
  379. state, retval, uuid, args, kwargs, None,
  380. )
  381. finally:
  382. try:
  383. if postrun_receivers:
  384. send_postrun(sender=task, task_id=uuid, task=task,
  385. args=args, kwargs=kwargs,
  386. retval=retval, state=state)
  387. finally:
  388. pop_task()
  389. pop_request()
  390. if not eager:
  391. try:
  392. backend_cleanup()
  393. loader_cleanup()
  394. except (KeyboardInterrupt, SystemExit, MemoryError):
  395. raise
  396. except Exception as exc:
  397. logger.error('Process cleanup failed: %r', exc,
  398. exc_info=True)
  399. except MemoryError:
  400. raise
  401. except Exception as exc:
  402. if eager:
  403. raise
  404. R = report_internal_error(task, exc)
  405. if task_request is not None:
  406. I, _, _, _ = on_error(task_request, exc, uuid)
  407. return trace_ok_t(R, I, T, Rstr)
  408. return trace_task
  409. def trace_task(task, uuid, args, kwargs, request={}, **opts):
  410. try:
  411. if task.__trace__ is None:
  412. task.__trace__ = build_tracer(task.name, task, **opts)
  413. return task.__trace__(uuid, args, kwargs, request)
  414. except Exception as exc:
  415. return trace_ok_t(report_internal_error(task, exc), None, 0.0, None)
  416. def _trace_task_ret(name, uuid, request, body, content_type,
  417. content_encoding, loads=loads_message, app=None,
  418. **extra_request):
  419. app = app or current_app._get_current_object()
  420. embed = None
  421. if content_type:
  422. accept = prepare_accept_content(app.conf.accept_content)
  423. args, kwargs, embed = loads(
  424. body, content_type, content_encoding, accept=accept,
  425. )
  426. else:
  427. args, kwargs, embed = body
  428. hostname = gethostname()
  429. request.update({
  430. 'args': args, 'kwargs': kwargs,
  431. 'hostname': hostname, 'is_eager': False,
  432. }, **embed or {})
  433. R, I, T, Rstr = trace_task(app.tasks[name],
  434. uuid, args, kwargs, request, app=app)
  435. return (1, R, T) if I else (0, Rstr, T)
  436. trace_task_ret = _trace_task_ret
  437. def _fast_trace_task(task, uuid, request, body, content_type,
  438. content_encoding, loads=loads_message, _loc=_localized,
  439. hostname=None, **_):
  440. embed = None
  441. tasks, accept, hostname = _loc
  442. if content_type:
  443. args, kwargs, embed = loads(
  444. body, content_type, content_encoding, accept=accept,
  445. )
  446. else:
  447. args, kwargs, embed = body
  448. request.update({
  449. 'args': args, 'kwargs': kwargs,
  450. 'hostname': hostname, 'is_eager': False,
  451. }, **embed or {})
  452. R, I, T, Rstr = tasks[task].__trace__(
  453. uuid, args, kwargs, request,
  454. )
  455. return (1, R, T) if I else (0, Rstr, T)
  456. def report_internal_error(task, exc):
  457. _type, _value, _tb = sys.exc_info()
  458. try:
  459. _value = task.backend.prepare_exception(exc, 'pickle')
  460. exc_info = ExceptionInfo((_type, _value, _tb), internal=True)
  461. warn(RuntimeWarning(
  462. 'Exception raised outside body: {0!r}:\n{1}'.format(
  463. exc, exc_info.traceback)))
  464. return exc_info
  465. finally:
  466. del(_tb)
  467. def setup_worker_optimizations(app, hostname=None):
  468. global trace_task_ret
  469. hostname = hostname or gethostname()
  470. # make sure custom Task.__call__ methods that calls super
  471. # will not mess up the request/task stack.
  472. _install_stack_protection()
  473. # all new threads start without a current app, so if an app is not
  474. # passed on to the thread it will fall back to the "default app",
  475. # which then could be the wrong app. So for the worker
  476. # we set this to always return our app. This is a hack,
  477. # and means that only a single app can be used for workers
  478. # running in the same process.
  479. app.set_current()
  480. set_default_app(app)
  481. # evaluate all task classes by finalizing the app.
  482. app.finalize()
  483. # set fast shortcut to task registry
  484. _localized[:] = [
  485. app._tasks,
  486. prepare_accept_content(app.conf.accept_content),
  487. hostname,
  488. ]
  489. trace_task_ret = _fast_trace_task
  490. from celery.worker import request as request_module
  491. request_module.trace_task_ret = _fast_trace_task
  492. request_module.__optimize__()
  493. def reset_worker_optimizations():
  494. global trace_task_ret
  495. trace_task_ret = _trace_task_ret
  496. try:
  497. delattr(BaseTask, '_stackprotected')
  498. except AttributeError:
  499. pass
  500. try:
  501. BaseTask.__call__ = _patched.pop('BaseTask.__call__')
  502. except KeyError:
  503. pass
  504. from celery.worker import request as request_module
  505. request_module.trace_task_ret = _trace_task_ret
  506. def _install_stack_protection():
  507. # Patches BaseTask.__call__ in the worker to handle the edge case
  508. # where people override it and also call super.
  509. #
  510. # - The worker optimizes away BaseTask.__call__ and instead
  511. # calls task.run directly.
  512. # - so with the addition of current_task and the request stack
  513. # BaseTask.__call__ now pushes to those stacks so that
  514. # they work when tasks are called directly.
  515. #
  516. # The worker only optimizes away __call__ in the case
  517. # where it has not been overridden, so the request/task stack
  518. # will blow if a custom task class defines __call__ and also
  519. # calls super().
  520. if not getattr(BaseTask, '_stackprotected', False):
  521. _patched['BaseTask.__call__'] = orig = BaseTask.__call__
  522. def __protected_call__(self, *args, **kwargs):
  523. stack = self.request_stack
  524. req = stack.top
  525. if req and not req._protected and \
  526. len(stack) == 1 and not req.called_directly:
  527. req._protected = 1
  528. return self.run(*args, **kwargs)
  529. return orig(self, *args, **kwargs)
  530. BaseTask.__call__ = __protected_call__
  531. BaseTask._stackprotected = True