control.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. # -*- coding: utf-8 -*-
  2. """Worker remote control command implementations."""
  3. from __future__ import absolute_import, unicode_literals
  4. import io
  5. import tempfile
  6. from collections import namedtuple
  7. from billiard.common import TERM_SIGNAME
  8. from kombu.utils.encoding import safe_repr
  9. from celery.exceptions import WorkerShutdown
  10. from celery.five import UserDict, items, string_t, text_t
  11. from celery.platforms import signals as _signals
  12. from celery.utils import timeutils
  13. from celery.utils.functional import maybe_list
  14. from celery.utils.log import get_logger
  15. from celery.utils.serialization import jsonify, strtobool
  16. from . import state as worker_state
  17. from .request import Request
  18. __all__ = ['Panel']
  19. DEFAULT_TASK_INFO_ITEMS = ('exchange', 'routing_key', 'rate_limit')
  20. logger = get_logger(__name__)
  21. controller_info_t = namedtuple('controller_info_t', [
  22. 'alias', 'type', 'visible', 'default_timeout',
  23. 'help', 'signature', 'args', 'variadic',
  24. ])
  25. def ok(value):
  26. return {'ok': value}
  27. def nok(value):
  28. return {'error': value}
  29. class Panel(UserDict):
  30. """Global registry of remote control commands."""
  31. data = dict() # global dict.
  32. meta = dict()
  33. by_alias = dict()
  34. @classmethod
  35. def register(cls, *args, **kwargs):
  36. if args:
  37. return cls._register(**kwargs)(*args)
  38. return cls._register(**kwargs)
  39. @classmethod
  40. def _register(cls, name=None, alias=None, type='control',
  41. visible=True, default_timeout=1.0, help=None,
  42. signature=None, args=None, variadic=None):
  43. def _inner(fun):
  44. control_name = name or fun.__name__
  45. _help = help or (fun.__doc__ or '').strip().split('\n')[0]
  46. cls.data[control_name] = fun
  47. cls.meta[control_name] = controller_info_t(
  48. alias, type, visible, default_timeout,
  49. _help, signature, args, variadic)
  50. if alias:
  51. cls.data[alias] = fun
  52. return fun
  53. return _inner
  54. def control_command(**kwargs):
  55. return Panel.register(type='control', **kwargs)
  56. def inspect_command(**kwargs):
  57. return Panel.register(type='inspect', **kwargs)
  58. # -- App
  59. @inspect_command()
  60. def report(state):
  61. """Information about Celery installation for bug reports."""
  62. return ok(state.app.bugreport())
  63. @inspect_command(
  64. alias='dump_conf', # XXX < backwards compatible
  65. signature='[include_defaults=False]',
  66. args=[('with_defaults', strtobool)],
  67. )
  68. def conf(state, with_defaults=False, **kwargs):
  69. """List configuration."""
  70. return jsonify(state.app.conf.table(with_defaults=with_defaults),
  71. keyfilter=_wanted_config_key,
  72. unknown_type_filter=safe_repr)
  73. def _wanted_config_key(key):
  74. return isinstance(key, string_t) and not key.startswith('__')
  75. # -- Task
  76. @inspect_command(
  77. variadic='ids',
  78. signature='[id1 [id2 [... [idN]]]]',
  79. )
  80. def query_task(state, ids, **kwargs):
  81. """Query for task information by id."""
  82. return {
  83. req.id: (_state_of_task(req), req.info())
  84. for req in _find_requests_by_id(maybe_list(ids))
  85. }
  86. def _find_requests_by_id(ids,
  87. get_request=worker_state.requests.__getitem__):
  88. for task_id in ids:
  89. try:
  90. yield get_request(task_id)
  91. except KeyError:
  92. pass
  93. def _state_of_task(request,
  94. is_active=worker_state.active_requests.__contains__,
  95. is_reserved=worker_state.reserved_requests.__contains__):
  96. if is_active(request):
  97. return 'active'
  98. elif is_reserved(request):
  99. return 'reserved'
  100. return 'ready'
  101. @control_command(
  102. variadic='task_id',
  103. signature='[id1 [id2 [... [idN]]]]',
  104. )
  105. def revoke(state, task_id, terminate=False, signal=None, **kwargs):
  106. """Revoke task by task id (or list of ids).
  107. Keyword Arguments:
  108. terminate (bool): Also terminate the process if the task is active.
  109. signal (str): Name of signal to use for terminate (e.g., ``KILL``).
  110. """
  111. # supports list argument since 3.1
  112. task_ids, task_id = set(maybe_list(task_id) or []), None
  113. size = len(task_ids)
  114. terminated = set()
  115. worker_state.revoked.update(task_ids)
  116. if terminate:
  117. signum = _signals.signum(signal or TERM_SIGNAME)
  118. for request in _find_requests_by_id(task_ids):
  119. if request.id not in terminated:
  120. terminated.add(request.id)
  121. logger.info('Terminating %s (%s)', request.id, signum)
  122. request.terminate(state.consumer.pool, signal=signum)
  123. if len(terminated) >= size:
  124. break
  125. if not terminated:
  126. return ok('terminate: tasks unknown')
  127. return ok('terminate: {0}'.format(', '.join(terminated)))
  128. idstr = ', '.join(task_ids)
  129. logger.info('Tasks flagged as revoked: %s', idstr)
  130. return ok('tasks {0} flagged as revoked'.format(idstr))
  131. @control_command(
  132. variadic='task_id',
  133. args=[('signal', text_t)],
  134. signature='<signal> [id1 [id2 [... [idN]]]]'
  135. )
  136. def terminate(state, signal, task_id, **kwargs):
  137. """Terminate task by task id (or list of ids)."""
  138. return revoke(state, task_id, terminate=True, signal=signal)
  139. @control_command(
  140. args=[('task_name', text_t), ('rate_limit', text_t)],
  141. signature='<task_name> <rate_limit (e.g., 5/s | 5/m | 5/h)>',
  142. )
  143. def rate_limit(state, task_name, rate_limit, **kwargs):
  144. """Tell worker(s) to modify the rate limit for a task by type.
  145. See Also:
  146. :attr:`celery.task.base.Task.rate_limit`.
  147. Arguments:
  148. task_name (str): Type of task to set rate limit for.
  149. rate_limit (int, str): New rate limit.
  150. """
  151. try:
  152. timeutils.rate(rate_limit)
  153. except ValueError as exc:
  154. return nok('Invalid rate limit string: {0!r}'.format(exc))
  155. try:
  156. state.app.tasks[task_name].rate_limit = rate_limit
  157. except KeyError:
  158. logger.error('Rate limit attempt for unknown task %s',
  159. task_name, exc_info=True)
  160. return nok('unknown task')
  161. state.consumer.reset_rate_limits()
  162. if not rate_limit:
  163. logger.info('Rate limits disabled for tasks of type %s', task_name)
  164. return ok('rate limit disabled successfully')
  165. logger.info('New rate limit for tasks of type %s: %s.',
  166. task_name, rate_limit)
  167. return ok('new rate limit set successfully')
  168. @control_command(
  169. args=[('task_name', text_t), ('soft', float), ('hard', float)],
  170. signature='<task_name> <soft_secs> [hard_secs]',
  171. )
  172. def time_limit(state, task_name=None, hard=None, soft=None, **kwargs):
  173. """Tell worker(s) to modify the time limit for task by type.
  174. Arguments:
  175. task_name (str): Name of task to change.
  176. hard (float): Hard time limit.
  177. soft (float): Soft time limit.
  178. """
  179. try:
  180. task = state.app.tasks[task_name]
  181. except KeyError:
  182. logger.error('Change time limit attempt for unknown task %s',
  183. task_name, exc_info=True)
  184. return nok('unknown task')
  185. task.soft_time_limit = soft
  186. task.time_limit = hard
  187. logger.info('New time limits for tasks of type %s: soft=%s hard=%s',
  188. task_name, soft, hard)
  189. return ok('time limits set successfully')
  190. # -- Events
  191. @inspect_command()
  192. def clock(state, **kwargs):
  193. """Get current logical clock value."""
  194. return {'clock': state.app.clock.value}
  195. @control_command()
  196. def election(state, id, topic, action=None, **kwargs):
  197. """Hold election.
  198. Arguments:
  199. id (str): Unique election id.
  200. topic (str): Election topic.
  201. action (str): Action to take for elected actor.
  202. """
  203. if state.consumer.gossip:
  204. state.consumer.gossip.election(id, topic, action)
  205. @control_command()
  206. def enable_events(state):
  207. """Tell worker(s) to send task-related events."""
  208. dispatcher = state.consumer.event_dispatcher
  209. if dispatcher.groups and 'task' not in dispatcher.groups:
  210. dispatcher.groups.add('task')
  211. logger.info('Events of group {task} enabled by remote.')
  212. return ok('task events enabled')
  213. return ok('task events already enabled')
  214. @control_command()
  215. def disable_events(state):
  216. """Tell worker(s) to stop sending task-related events."""
  217. dispatcher = state.consumer.event_dispatcher
  218. if 'task' in dispatcher.groups:
  219. dispatcher.groups.discard('task')
  220. logger.info('Events of group {task} disabled by remote.')
  221. return ok('task events disabled')
  222. return ok('task events already disabled')
  223. @control_command()
  224. def heartbeat(state):
  225. """Tell worker(s) to send event heartbeat immediately."""
  226. logger.debug('Heartbeat requested by remote.')
  227. dispatcher = state.consumer.event_dispatcher
  228. dispatcher.send('worker-heartbeat', freq=5, **worker_state.SOFTWARE_INFO)
  229. # -- Worker
  230. @inspect_command(visible=False)
  231. def hello(state, from_node, revoked=None, **kwargs):
  232. """Request mingle sync-data."""
  233. if from_node != state.hostname:
  234. logger.info('sync with %s', from_node)
  235. if revoked:
  236. worker_state.revoked.update(revoked)
  237. return {
  238. 'revoked': worker_state.revoked._data,
  239. 'clock': state.app.clock.forward(),
  240. }
  241. @inspect_command(default_timeout=0.2)
  242. def ping(state, **kwargs):
  243. """Ping worker(s)."""
  244. return ok('pong')
  245. @inspect_command()
  246. def stats(state, **kwargs):
  247. """Request worker statistics/information."""
  248. return state.consumer.controller.stats()
  249. @inspect_command(alias='dump_schedule')
  250. def scheduled(state, **kwargs):
  251. """List of currently scheduled ETA/countdown tasks."""
  252. return list(_iter_schedule_requests(state.consumer.timer))
  253. def _iter_schedule_requests(timer, Request=Request):
  254. for waiting in timer.schedule.queue:
  255. try:
  256. arg0 = waiting.entry.args[0]
  257. except (IndexError, TypeError):
  258. continue
  259. else:
  260. if isinstance(arg0, Request):
  261. yield {
  262. 'eta': arg0.eta.isoformat() if arg0.eta else None,
  263. 'priority': waiting.priority,
  264. 'request': arg0.info(),
  265. }
  266. @inspect_command(alias='dump_reserved')
  267. def reserved(state, **kwargs):
  268. """List of currently reserved tasks, not including scheduled/active."""
  269. reserved_tasks = (
  270. state.tset(worker_state.reserved_requests) -
  271. state.tset(worker_state.active_requests)
  272. )
  273. if not reserved_tasks:
  274. return []
  275. return [request.info() for request in reserved_tasks]
  276. @inspect_command(alias='dump_active')
  277. def active(state, **kwargs):
  278. """List of tasks currently being executed."""
  279. return [request.info()
  280. for request in state.tset(worker_state.active_requests)]
  281. @inspect_command(alias='dump_revoked')
  282. def revoked(state, **kwargs):
  283. """List of revoked task-ids."""
  284. return list(worker_state.revoked)
  285. @inspect_command(
  286. alias='dump_tasks',
  287. variadic='taskinfoitems',
  288. signature='[attr1 [attr2 [... [attrN]]]]',
  289. )
  290. def registered(state, taskinfoitems=None, builtins=False, **kwargs):
  291. """List of registered tasks.
  292. Arguments:
  293. taskinfoitems (Sequence[str]): List of task attributes to include.
  294. Defaults to ``exchange,routing_key,rate_limit``.
  295. builtins (bool): Also include built-in tasks.
  296. """
  297. reg = state.app.tasks
  298. taskinfoitems = taskinfoitems or DEFAULT_TASK_INFO_ITEMS
  299. tasks = reg if builtins else (
  300. task for task in reg if not task.startswith('celery.'))
  301. def _extract_info(task):
  302. fields = {
  303. field: str(getattr(task, field, None)) for field in taskinfoitems
  304. if getattr(task, field, None) is not None
  305. }
  306. if fields:
  307. info = ['='.join(f) for f in items(fields)]
  308. return '{0} [{1}]'.format(task.name, ' '.join(info))
  309. return task.name
  310. return [_extract_info(reg[task]) for task in sorted(tasks)]
  311. # -- Debugging
  312. @inspect_command(
  313. default_timeout=60.0,
  314. args=[('type', text_t), ('num', int), ('max_depth', int)],
  315. signature='[object_type=Request] [num=200 [max_depth=10]]',
  316. )
  317. def objgraph(state, num=200, max_depth=10, type='Request'): # pragma: no cover
  318. """Create graph of uncollected objects (memory-leak debugging).
  319. Arguments:
  320. num (int): Max number of objects to graph.
  321. max_depth (int): Traverse at most n levels deep.
  322. type (str): Name of object to graph. Default is ``"Request"``.
  323. """
  324. try:
  325. import objgraph
  326. except ImportError:
  327. raise ImportError('Requires the objgraph library')
  328. logger.info('Dumping graph for type %r', type)
  329. with tempfile.NamedTemporaryFile(prefix='cobjg',
  330. suffix='.png', delete=False) as fh:
  331. objects = objgraph.by_type(type)[:num]
  332. objgraph.show_backrefs(
  333. objects,
  334. max_depth=max_depth, highlight=lambda v: v in objects,
  335. filename=fh.name,
  336. )
  337. return {'filename': fh.name}
  338. @inspect_command()
  339. def memsample(state, **kwargs):
  340. """Sample current RSS memory usage."""
  341. from celery.utils.debug import sample_mem
  342. return sample_mem()
  343. @inspect_command(
  344. args=[('samples', int)],
  345. signature='[n_samples=10]',
  346. )
  347. def memdump(state, samples=10, **kwargs): # pragma: no cover
  348. """Dump statistics of previous memsample requests."""
  349. from celery.utils.debug import memdump
  350. out = io.StringIO()
  351. memdump(file=out)
  352. return out.getvalue()
  353. # -- Pool
  354. @control_command(
  355. args=[('n', int)],
  356. signature='[N=1]',
  357. )
  358. def pool_grow(state, n=1, **kwargs):
  359. """Grow pool by n processes/threads."""
  360. state.consumer.pool.grow(n)
  361. state.consumer._update_prefetch_count(n)
  362. return ok('pool will grow')
  363. @control_command(
  364. args=[('n', int)],
  365. signature='[N=1]',
  366. )
  367. def pool_shrink(state, n=1, **kwargs):
  368. """Shrink pool by n processes/threads."""
  369. state.consumer.pool.shrink(n)
  370. state.consumer._update_prefetch_count(-n)
  371. return ok('pool will shrink')
  372. @control_command()
  373. def pool_restart(state, modules=None, reload=False, reloader=None, **kwargs):
  374. """Restart execution pool."""
  375. if state.app.conf.worker_pool_restarts:
  376. state.consumer.controller.reload(modules, reload, reloader=reloader)
  377. return ok('reload started')
  378. else:
  379. raise ValueError('Pool restarts not enabled')
  380. @control_command()
  381. def shutdown(state, msg='Got shutdown from remote', **kwargs):
  382. """Shutdown worker(s)."""
  383. logger.warning(msg)
  384. raise WorkerShutdown(msg)
  385. # -- Queues
  386. @control_command(
  387. args=[
  388. ('queue', text_t),
  389. ('exchange', text_t),
  390. ('exchange_type', text_t),
  391. ('routing_key', text_t),
  392. ],
  393. signature='<queue> [exchange [type [routing_key]]]',
  394. )
  395. def add_consumer(state, queue, exchange=None, exchange_type=None,
  396. routing_key=None, **options):
  397. """Tell worker(s) to consume from task queue by name."""
  398. state.consumer.call_soon(
  399. state.consumer.add_task_queue,
  400. queue, exchange, exchange_type or 'direct', routing_key, **options)
  401. return ok('add consumer {0}'.format(queue))
  402. @control_command(
  403. args=[('queue', text_t)],
  404. signature='<queue>',
  405. )
  406. def cancel_consumer(state, queue, **_):
  407. """Tell worker(s) to stop consuming from task queue by name."""
  408. state.consumer.call_soon(
  409. state.consumer.cancel_task_queue, queue,
  410. )
  411. return ok('no longer consuming from {0}'.format(queue))
  412. @inspect_command()
  413. def active_queues(state):
  414. """List the task queues a worker are currently consuming from."""
  415. if state.consumer.task_consumer:
  416. return [dict(queue.as_dict(recurse=True))
  417. for queue in state.consumer.task_consumer.queues]
  418. return []