Browse Source

Remove the statistics thing (-S, celerystats etc) as it's being replaced by celerymon

Ask Solem 15 years ago
parent
commit
ddaec795c0

+ 0 - 15
README.rst

@@ -94,10 +94,6 @@ Features
       returns a JSON-serialized data structure containing the task status,
       and the return value if completed, or exception on failure.
 
-    * The worker can collect statistics, like, how many tasks has been
-      executed by type, and the time it took to process them. Very useful
-      for monitoring and profiling.
-
     * Pool workers are supervised, so if for some reason a worker crashes
         it is automatically replaced by a new worker.
 
@@ -285,11 +281,7 @@ finish and get its return value (or exception if the task failed).
 
 So, let's execute the task again, but this time we'll keep track of the task:
 
-<<<<<<< HEAD
-    >>> result = MyTask.delay("hello")
-=======
     >>> result = add.delay(4, 4)
->>>>>>> master
     >>> result.ready() # returns True if the task has finished processing.
     False
     >>> result.result # task is not ready, so no return value yet.
@@ -305,13 +297,6 @@ If the task raises an exception, the return value of ``result.successful()``
 will be ``False``, and ``result.result`` will contain the exception instance
 raised by the task.
 
-<<<<<<< HEAD
-If the task raises an exception, the return value of ``result.successful()``
-will be ``False``, and ``result.result`` will contain the exception instance
-raised by the task.
-
-=======
->>>>>>> master
 Worker auto-discovery of tasks
 ------------------------------
 

+ 0 - 0
bin/celeryinit


+ 1 - 18
celery/bin/celeryd.py

@@ -26,11 +26,6 @@
     Also run the ``celerybeat`` periodic task scheduler. Please note that
     there must only be one instance of this service.
 
-.. cmdoption:: -s, --statistics
-
-    Turn on reporting of statistics (remember to flush the statistics message
-    queue from time to time).
-
 .. cmdoption:: -d, --detach, --daemon
 
     Run in the background as a daemon.
@@ -80,17 +75,12 @@ from celery.loaders import current_loader, settings
 from celery.loaders import settings
 from celery.messaging import get_connection_info
 
-USE_STATISTICS = getattr(settings, "CELERY_STATISTICS", False)
-# Make sure the setting exists.
-settings.CELERY_STATISTICS = USE_STATISTICS
-
 STARTUP_INFO_FMT = """
 Configuration ->
     * Broker -> %(conninfo)s
     * Exchange -> %(exchange)s (%(exchange_type)s)
     * Consumer -> Queue:%(consumer_queue)s Binding:%(consumer_rkey)s
     * Concurrency -> %(concurrency)s
-    * Statistics -> %(statistics)s
     * Celerybeat -> %(celerybeat)s
 """.strip()
 
@@ -104,9 +94,6 @@ OPTION_LIST = (
             help="Discard all waiting tasks before the server is started. "
                  "WARNING: This is unrecoverable, and the tasks will be "
                  "deleted from the messaging server."),
-    optparse.make_option('-s', '--statistics', default=USE_STATISTICS,
-            action="store_true", dest="statistics",
-            help="Collect statistics."),
     optparse.make_option('-f', '--logfile', default=conf.DAEMON_LOG_FILE,
             action="store", dest="logfile",
             help="Path to log file."),
@@ -145,14 +132,11 @@ def run_worker(concurrency=conf.DAEMON_CONCURRENCY, detach=False,
         loglevel=conf.DAEMON_LOG_LEVEL, logfile=conf.DAEMON_LOG_FILE,
         discard=False, pidfile=conf.DAEMON_PID_FILE, umask=0,
         uid=None, gid=None, working_directory=None,
-        chroot=None, statistics=None, run_clockservice=False, **kwargs):
+        chroot=None, run_clockservice=False, **kwargs):
     """Starts the celery worker server."""
 
     print("Celery %s is starting." % __version__)
 
-    if statistics is not None:
-        settings.CELERY_STATISTICS = statistics
-
     if not concurrency:
         concurrency = multiprocessing.cpu_count()
 
@@ -190,7 +174,6 @@ def run_worker(concurrency=conf.DAEMON_CONCURRENCY, detach=False,
             "concurrency": concurrency,
             "loglevel": loglevel,
             "pidfile": pidfile,
-            "statistics": settings.CELERY_STATISTICS and "ON" or "OFF",
             "celerybeat": run_clockservice and "ON" or "OFF",
     })
 

+ 0 - 1
celery/conf.py

@@ -15,7 +15,6 @@ DEFAULT_LOG_FMT = '[%(asctime)s: %(levelname)s/%(processName)s] %(message)s'
 DEFAULT_DAEMON_LOG_LEVEL = "WARN"
 DEFAULT_DAEMON_LOG_FILE = "celeryd.log"
 DEFAULT_AMQP_CONNECTION_TIMEOUT = 4
-DEFAULT_STATISTICS = False
 DEFAULT_ALWAYS_EAGER = False
 DEFAULT_TASK_RESULT_EXPIRES = timedelta(days=5)
 DEFAULT_AMQP_CONNECTION_RETRY = True

+ 0 - 23
celery/management/commands/celerystats.py

@@ -1,23 +0,0 @@
-"""
-
-Start the celery daemon from the Django management command.
-
-"""
-from django.core.management.base import BaseCommand
-
-from celery.statistics import StatsCollector
-
-
-class Command(BaseCommand):
-    """Collect/flush and dump a report from the currently available
-    statistics."""
-    option_list = BaseCommand.option_list
-    help = "Collect/flush and dump a report from the currently available " + \
-            "statistics"
-
-    def handle(self, *args, **options):
-        """Handle the management command."""
-        stats = StatsCollector()
-        print("* Gathering statistics...")
-        stats.collect()
-        stats.report()

+ 0 - 14
celery/messaging.py

@@ -72,20 +72,6 @@ class TaskConsumer(Consumer):
     no_ack = False
 
 
-class StatsPublisher(Publisher):
-    exchange = "celerygraph"
-    exchange_type = "direct"
-    routing_key = "stats"
-
-
-class StatsConsumer(Consumer):
-    queue = "celerygraph"
-    exchange = "celerygraph"
-    routing_key = "stats"
-    exchange_type = "direct"
-    no_ack = True
-
-
 class EventPublisher(Publisher):
     exchange = "celeryevent"
     routing_key = "event"

+ 0 - 236
celery/statistics.py

@@ -1,236 +0,0 @@
-"""
-
-    Publishing Statistics and Monitoring Celery.
-
-"""
-from carrot.connection import DjangoBrokerConnection
-from celery.messaging import StatsPublisher, StatsConsumer
-from celery.loaders import settings
-from django.core.cache import cache
-import time
-
-DEFAULT_CACHE_KEY_PREFIX = "celery-statistics"
-
-
-class Statistics(object):
-    """Base class for classes publishing celery statistics.
-
-    .. attribute:: type
-
-        **REQUIRED** The type of statistics this class handles.
-
-    **Required handlers**
-
-        * on_start()
-
-        * on_stop()
-
-    """
-    type = None
-
-    def __init__(self, **kwargs):
-        self.enabled = getattr(settings, "CELERY_STATISTICS", False)
-        if not self.type:
-            raise NotImplementedError(
-                "Statistic classes must define their type.")
-
-    def publish(self, **data):
-        """Publish statistics to be collected later by
-        :class:`StatsCollector`.
-
-        :param data: An arbitrary Python object containing the statistics
-            to be published.
-
-        """
-        if not self.enabled:
-            return
-        connection = DjangoBrokerConnection()
-        publisher = StatsPublisher(connection=connection)
-        publisher.send({"type": self.type, "data": data})
-        publisher.close()
-        connection.close()
-
-    @classmethod
-    def start(cls, *args, **kwargs):
-        """Convenience method instantiating and running :meth:`run` in
-        one swoop."""
-        stat = cls()
-        stat.run(*args, **kwargs)
-        return stat
-
-    def run(self, *args, **kwargs):
-        """Start producing statistics."""
-        if self.enabled:
-            return self.on_start(*args, **kwargs)
-
-    def stop(self, *args, **kwargs):
-        """Stop producing and publish statistics."""
-        if self.enabled:
-            return self.on_finish(*args, **kwargs)
-
-    def on_start(self, *args, **kwargs):
-        """What to do when the :meth:`run` method is called."""
-        raise NotImplementedError(
-                "Statistics classes must define a on_start handler.")
-
-    def on_stop(self, *args, **kwargs):
-        """What to do when the :meth:`stop` method is called."""
-        raise NotImplementedError(
-                "Statistics classes must define a on_stop handler.")
-
-
-class TimerStats(Statistics):
-    """A generic timer producing ``celery`` statistics.
-
-    .. attribute:: time_start
-
-        The time when this class was instantiated (in :func:`time.time`
-        format.)
-
-    """
-    time_start = None
-
-    def on_start(self, task_id, task_name, args, kwargs):
-        """What to do when the timers :meth:`run` method is called."""
-        self.task_id = task_id
-        self.task_name = task_name
-        self.args = args
-        self.kwargs = kwargs
-        self.time_start = time.time()
-
-    def on_finish(self):
-        """What to do when the timers :meth:`stop` method is called.
-
-        :returns: the time in seconds it took between calling :meth:`start` on
-            this class and :meth:`stop`.
-        """
-        nsecs = time.time() - self.time_start
-        self.publish(task_id=self.task_id,
-                     task_name=self.task_name,
-                     args=self.args,
-                     kwargs=self.kwargs,
-                     nsecs=str(nsecs))
-        return nsecs
-
-
-class TaskTimerStats(TimerStats):
-    """Time a running :class:`celery.task.Task`."""
-    type = "task_time_running"
-
-
-class StatsCollector(object):
-    """Collect and report Celery statistics.
-
-    **NOTE**: Please run only one collector at any time, or your stats
-        will be skewed.
-
-    .. attribute:: total_tasks_processed
-
-        The number of tasks executed in total since the first time
-        :meth:`collect` was executed on this class instance.
-
-    .. attribute:: total_tasks_processed_by_type
-
-        A dictionary of task names and how many times they have been
-        executed in total since the first time :meth:`collect` was executed
-        on this class instance.
-
-    .. attribute:: total_task_time_running
-
-        The total time, in seconds, it took to process all the tasks executed
-        since the first time :meth:`collect` was executed on this class
-        instance.
-
-    .. attribute:: total_task_time_running_by_type
-
-        A dictionary of task names and their total running time in seconds,
-        counting all the tasks that has been run since the first time
-        :meth:`collect` was executed on this class instance.
-
-    **NOTE**: You have to run :meth:`collect` for these attributes
-        to be filled.
-
-
-    """
-
-    allowed_types = ["task_time_running"]
-
-    def __init__(self):
-        self.total_tasks_processed = 0
-        self.total_tasks_processed_by_type = {}
-        self.total_task_time_running = 0.0
-        self.total_task_time_running_by_type = {}
-
-    def collect(self):
-        """Collect any new statistics available since the last time
-        :meth:`collect` was executed."""
-        connection = DjangoBrokerConnection()
-        consumer = StatsConsumer(connection=connection)
-        it = consumer.iterqueue(infinite=False)
-        for message in it:
-            stats_entry = message.decode()
-            stat_type = stats_entry["type"]
-            if stat_type in self.allowed_types:
-                # Decode keys to unicode for use as kwargs.
-                data = dict((key.encode("utf-8"), value)
-                                for key, value in stats_entry["data"].items())
-                handler = getattr(self, stat_type)
-                handler(**data)
-
-    def dump_to_cache(self, cache_key_prefix=DEFAULT_CACHE_KEY_PREFIX):
-        """Store collected statistics in the cache."""
-        cache.set("%s-total_tasks_processed" % cache_key_prefix,
-                self.total_tasks_processed)
-        cache.set("%s-total_tasks_processed_by_type" % cache_key_prefix,
-                    self.total_tasks_processed_by_type)
-        cache.set("%s-total_task_time_running" % cache_key_prefix,
-                    self.total_task_time_running)
-        cache.set("%s-total_task_time_running_by_type" % cache_key_prefix,
-                    self.total_task_time_running_by_type)
-
-    def task_time_running(self, task_id, task_name, args, kwargs, nsecs):
-        """Process statistics regarding how long a task has been running
-        (the :class:TaskTimerStats` class is responsible for sending these).
-
-        :param task_id: The UUID of the task.
-        :param task_name: The name of task.
-        :param args: The tasks positional arguments.
-        :param kwargs: The tasks keyword arguments.
-        :param nsecs: The number of seconds (in :func:`time.time` format)
-            it took to execute the task.
-
-        """
-        nsecs = float(nsecs)
-        self.total_tasks_processed += 1
-        self.total_task_time_running += nsecs
-        if task_name not in self.total_task_time_running_by_type:
-            self.total_task_time_running_by_type[task_name] = nsecs
-        else:
-            self.total_task_time_running_by_type[task_name] += nsecs
-        if task_name not in self.total_tasks_processed_by_type:
-            self.total_tasks_processed_by_type[task_name] = 1
-        else:
-            self.total_tasks_processed_by_type[task_name] += 1
-
-    def report(self):
-        """Dump a nice statistics report from the data collected since
-        the first time :meth:`collect` was executed on this instance.
-
-        It outputs the following information:
-
-            * Total processing time by task type and how many times each
-                task has been excuted.
-
-            * Total task processing time.
-
-            * Total number of tasks executed
-
-        """
-        print("Total processing time by task type:")
-        for task_name, nsecs in self.total_task_time_running_by_type.items():
-            print("\t%s: %s secs. (for a total of %d executed.)" % (
-                    task_name, nsecs,
-                    self.total_tasks_processed_by_type.get(task_name)))
-        print("Total task processing time: %s secs." % (
-            self.total_task_time_running))
-        print("Total tasks processed: %d" % self.total_tasks_processed)

+ 0 - 96
celery/tests/test_statistics.py

@@ -1,96 +0,0 @@
-from __future__ import with_statement
-import unittest
-import time
-from celery.statistics import TaskTimerStats, Statistics, StatsCollector
-from carrot.connection import DjangoBrokerConnection
-from celery.messaging import StatsConsumer
-from celery.tests.utils import override_stdouts
-
-
-class PartialStatistics(Statistics):
-    type = "c.u.partial"
-
-
-class TestStatisticsInterface(unittest.TestCase):
-
-    def test_must_have_type(self):
-        self.assertRaises(NotImplementedError, Statistics)
-
-    def test_must_have_on_start(self):
-        self.assertRaises(NotImplementedError, PartialStatistics().on_start)
-
-    def test_must_have_on_stop(self):
-        self.assertRaises(NotImplementedError, PartialStatistics().on_stop)
-
-
-class TestTaskTimerStats(unittest.TestCase):
-
-    def test_time(self):
-        self.assertTimeElapsed(0.5, 1, 0, "0.5")
-        self.assertTimeElapsed(0.002, 0.05, 0, "0.0")
-        self.assertTimeElapsed(0.1, 0.5, 0, "0.1")
-
-    def test_not_enabled(self):
-        t = TaskTimerStats()
-        t.enabled = False
-        self.assertFalse(t.publish(isnot="enabled"))
-        self.assertFalse(getattr(t, "time_start", None))
-        t.run("foo", "bar", [], {})
-        t.stop()
-
-    def assertTimeElapsed(self, time_sleep, max_appx, min_appx, appx):
-        t = TaskTimerStats()
-        t.enabled = True
-        t.run("foo", "bar", [], {})
-        self.assertTrue(t.time_start)
-        time.sleep(time_sleep)
-        time_stop = t.stop()
-        self.assertTrue(time_stop)
-        self.assertFalse(time_stop > max_appx)
-        self.assertFalse(time_stop <= min_appx)
-
-        strstop = str(time_stop)[0:3]
-        # Time elapsed is approximately 0.1 seconds.
-        self.assertTrue(strstop == appx)
-
-
-class TestStatsCollector(unittest.TestCase):
-
-    def setUp(self):
-        conn = DjangoBrokerConnection()
-        consumer = StatsConsumer(connection=conn)
-        consumer.discard_all()
-        conn.close()
-        consumer.close()
-        self.s = StatsCollector()
-        self.assertEquals(self.s.total_tasks_processed, 0)
-        self.assertEquals(self.s.total_tasks_processed_by_type, {})
-        self.assertEquals(self.s.total_task_time_running, 0.0)
-        self.assertEquals(self.s.total_task_time_running_by_type, {})
-
-    def test_collect_report_dump(self):
-        timer1 = TaskTimerStats()
-        timer1.enabled = True
-        timer1.run("foo", "bar", [], {})
-        timer2 = TaskTimerStats()
-        timer2.enabled = True
-        timer2.run("foo", "bar", [], {})
-        timer3 = TaskTimerStats()
-        timer3.enabled = True
-        timer3.run("foo", "bar", [], {})
-        for timer in (timer1, timer2, timer3):
-            timer.stop()
-
-        # Collect
-        self.s.collect()
-        self.assertEquals(self.s.total_tasks_processed, 3)
-
-        # Report
-        with override_stdouts() as outs:
-            stdout, stderr = outs
-            self.s.report()
-            self.assertTrue(
-                "Total processing time by task type:" in stdout.getvalue())
-
-        # Dump to cache
-        self.s.dump_to_cache()

+ 1 - 7
celery/worker/job.py

@@ -16,7 +16,6 @@ from celery.loaders import current_loader
 from celery.execute import TaskTrace
 from celery.registry import tasks
 from celery.exceptions import NotRegistered
-from celery.statistics import TaskTimerStats
 from celery.datastructures import ExceptionInfo
 
 # pep8.py borks on a inline signature separator and
@@ -89,12 +88,7 @@ class WorkerTaskTrace(TaskTrace):
         # Backend process cleanup
         self.task.backend.process_cleanup()
 
-        timer_stat = TaskTimerStats.start(self.task_id, self.task_name,
-                                          self.args, self.kwargs)
-        try:
-            return self._trace()
-        finally:
-            timer_stat.stop()
+        return self._trace()
 
     def handle_success(self, retval, *args):
         """Handle successful execution.

+ 0 - 4
docs/introduction.rst

@@ -94,10 +94,6 @@ Features
       returns a JSON-serialized data structure containing the task status,
       and the return value if completed, or exception on failure.
 
-    * The worker can collect statistics, like, how many tasks has been
-      executed by type, and the time it took to process them. Very useful
-      for monitoring and profiling.
-
     * Pool workers are supervised, so if for some reason a worker crashes
         it is automatically replaced by a new worker.