فهرست منبع

celery.beat: Refactored the persistent schedule into PersistentScheduler, also better merging of tasks on disk and in "registry".

Ask Solem 15 سال پیش
والد
کامیت
3580f4c75d
2فایلهای تغییر یافته به همراه119 افزوده شده و 59 حذف شده
  1. 110 51
      celery/beat.py
  2. 9 8
      celery/bin/celerybeat.py

+ 110 - 51
celery/beat.py

@@ -24,6 +24,14 @@ class SchedulingError(Exception):
     """An error occured while scheduling a task."""
     """An error occured while scheduling a task."""
 
 
 
 
+def maybe_schedule(s, relative=False):
+    if isinstance(s, int):
+        s = timedelta(seconds=s)
+    if isinstance(s, timedelta):
+        return schedule(s, relative)
+    return s
+
+
 class ScheduleEntry(object):
 class ScheduleEntry(object):
     """An entry in the scheduler.
     """An entry in the scheduler.
 
 
@@ -61,9 +69,10 @@ class ScheduleEntry(object):
     """
     """
 
 
     def __init__(self, name, schedule, args=(), kwargs={},
     def __init__(self, name, schedule, args=(), kwargs={},
-            options={}, last_run_at=None, total_run_count=None):
+            options={}, last_run_at=None, total_run_count=None,
+            relative=False):
         self.name = name
         self.name = name
-        self.schedule = schedule
+        self.schedule = maybe_schedule(schedule, relative)
         self.args = args
         self.args = args
         self.kwargs = kwargs
         self.kwargs = kwargs
         self.options = options
         self.options = options
@@ -81,6 +90,18 @@ class ScheduleEntry(object):
                               datetime.now(),
                               datetime.now(),
                               self.total_run_count + 1)
                               self.total_run_count + 1)
 
 
+    def update(self, other):
+        """Update values from another entry.
+
+        Does only update "editable" fields (schedule, args,
+        kwargs, options).
+
+        """
+        self.schedule = other.schedule
+        self.args = other.args
+        self.kwargs = other.kwargs
+        self.options = other.options
+
     def is_due(self):
     def is_due(self):
         """See :meth:`celery.task.base.PeriodicTask.is_due`."""
         """See :meth:`celery.task.base.PeriodicTask.is_due`."""
         return self.schedule.is_due(self.last_run_at)
         return self.schedule.is_due(self.last_run_at)
@@ -114,19 +135,19 @@ class Scheduler(UserDict):
     """
     """
     Entry = ScheduleEntry
     Entry = ScheduleEntry
 
 
-    def __init__(self, schedule=None, logger=None,
-            max_interval=None):
+    def __init__(self, schedule=None, logger=None, max_interval=None,
+            **kwargs):
+        UserDict.__init__(self)
+        if schedule is None:
+            schedule = self.dict_to_entries(conf.CELERYBEAT_SCHEDULE)
         self.data = schedule
         self.data = schedule
-        if self.data is None:
-            self.data = {}
-        self.logger = logger or log.get_default_logger()
+        self.logger = logger or log.get_default_logger("celery.beat")
         self.max_interval = max_interval or conf.CELERYBEAT_MAX_LOOP_INTERVAL
         self.max_interval = max_interval or conf.CELERYBEAT_MAX_LOOP_INTERVAL
-
-        self.cleanup()
         self.setup_schedule()
         self.setup_schedule()
 
 
     def maybe_due(self, entry, connection=None):
     def maybe_due(self, entry, connection=None):
         is_due, next_time_to_run = entry.is_due()
         is_due, next_time_to_run = entry.is_due()
+
         if is_due:
         if is_due:
             self.logger.debug("Scheduler: Sending due task %s" % entry.name)
             self.logger.debug("Scheduler: Sending due task %s" % entry.name)
             try:
             try:
@@ -160,7 +181,7 @@ class Scheduler(UserDict):
         return min(remaining_times + [self.max_interval])
         return min(remaining_times + [self.max_interval])
 
 
     def reserve(self, entry):
     def reserve(self, entry):
-        new_entry = self.schedule[entry.name] = entry.next()
+        new_entry = self[entry.name] = entry.next()
         return new_entry
         return new_entry
 
 
     def apply_async(self, entry, **kwargs):
     def apply_async(self, entry, **kwargs):
@@ -169,8 +190,6 @@ class Scheduler(UserDict):
         # forever.)
         # forever.)
         entry = self.reserve(entry)
         entry = self.reserve(entry)
 
 
-        print("APPLYING: %s" % (entry, ))
-
         try:
         try:
             result = send_task(entry.name, entry.args, entry.kwargs,
             result = send_task(entry.name, entry.args, entry.kwargs,
                                **entry.options)
                                **entry.options)
@@ -179,49 +198,88 @@ class Scheduler(UserDict):
                     entry.name, exc))
                     entry.name, exc))
         return result
         return result
 
 
-    def maybe_schedule(self, s, relative=False):
-        if isinstance(s, int):
-            return timedelta(seconds=s)
-        if isinstance(s, timedelta):
-            return schedule(s, relative)
-        return s
-
     def setup_schedule(self):
     def setup_schedule(self):
-        self.data = self.dict_to_entries(conf.CELERYBEAT_SCHEDULE)
+        pass
 
 
-    def dict_to_entries(self, dict_):
-        entries = {}
-        for name, entry in dict_.items():
-            relative = entry.pop("relative", None)
-            entry["schedule"] = self.maybe_schedule(entry["schedule"],
-                                                    relative)
-            entries[name] = self.Entry(**entry)
-        return entries
-
-    def cleanup(self):
+    def sync(self):
         pass
         pass
 
 
-    @property
-    def schedule(self):
+    def close(self):
+        self.sync()
+
+    def dict_to_entries(self, dict_):
+        return dict((name, self.Entry(**entry))
+                        for name, entry in dict_.items())
+
+    def get_schedule(self):
         return self.data
         return self.data
 
 
+    def _set_schedule(self, schedule):
+        self.data = schedule
+
+    def _get_schedule(self):
+        return self.get_schedule()
+
+    schedule = property(_get_schedule, _set_schedule)
+
+
+class PersistentScheduler(Scheduler):
+    persistence = shelve
+
+    _store = None
 
 
-class ClockService(object):
-    scheduler_cls = Scheduler
-    open_schedule = lambda self, filename: shelve.open(filename)
+    def __init__(self, *args, **kwargs):
+        self.schedule_filename = kwargs.get("schedule_filename")
+        Scheduler.__init__(self, *args, **kwargs)
+
+    def setup_schedule(self):
+        self._store = self.persistence.open(self.schedule_filename)
+        self._diskmerge(self._store, conf.CELERYBEAT_SCHEDULE)
+        self.sync()
+        self.schedule = self._store
+
+    def _diskmerge(self, a, b):
+        A, B = set(a), set(b)
+
+        # Remove items from disk not in the schedule anymore.
+        for key in A ^ B:
+            a.pop(key, None)
+
+        # Update and add new items in the schedule
+        for key in B:
+            entry = self.Entry(**b[key])
+            if a.get(key):
+                a[key].update(entry)
+            else:
+                a[key] = entry
+
+    def sync(self):
+        if self._store is not None:
+            self.logger.debug("CeleryBeat: Syncing schedule to disk...")
+            self._store.sync()
+
+    def close(self):
+        self.sync()
+        self._store.close()
+
+
+class Service(object):
+    scheduler_cls = PersistentScheduler
 
 
     def __init__(self, logger=None,
     def __init__(self, logger=None,
             max_interval=conf.CELERYBEAT_MAX_LOOP_INTERVAL,
             max_interval=conf.CELERYBEAT_MAX_LOOP_INTERVAL,
             schedule=conf.CELERYBEAT_SCHEDULE,
             schedule=conf.CELERYBEAT_SCHEDULE,
             schedule_filename=conf.CELERYBEAT_SCHEDULE_FILENAME,
             schedule_filename=conf.CELERYBEAT_SCHEDULE_FILENAME,
             scheduler_cls=None):
             scheduler_cls=None):
-        self.logger = logger or log.get_default_logger()
         self.max_interval = max_interval
         self.max_interval = max_interval
         self.scheduler_cls = scheduler_cls or self.scheduler_cls
         self.scheduler_cls = scheduler_cls or self.scheduler_cls
-        self._shutdown = threading.Event()
-        self._stopped = threading.Event()
+        self.logger = logger or log.get_default_logger(name="celery.beat")
         self.schedule = schedule
         self.schedule = schedule
+        self.schedule_filename = schedule_filename
+
         self._scheduler = None
         self._scheduler = None
+        self._shutdown = threading.Event()
+        self._stopped = threading.Event()
         silence = self.max_interval < 60 and 10 or 1
         silence = self.max_interval < 60 and 10 or 1
         self.debug = log.SilenceRepeated(self.logger.debug,
         self.debug = log.SilenceRepeated(self.logger.debug,
                                          max_iterations=silence)
                                          max_iterations=silence)
@@ -237,19 +295,18 @@ class ClockService(object):
 
 
         try:
         try:
             try:
             try:
-                while True:
-                    if self._shutdown.isSet():
-                        break
+                while not self._shutdown.isSet():
                     interval = self.scheduler.tick()
                     interval = self.scheduler.tick()
                     self.debug("Celerybeat: Waking up %s." % (
                     self.debug("Celerybeat: Waking up %s." % (
                             humanize_seconds(interval, prefix="in ")))
                             humanize_seconds(interval, prefix="in ")))
                     time.sleep(interval)
                     time.sleep(interval)
             except (KeyboardInterrupt, SystemExit):
             except (KeyboardInterrupt, SystemExit):
-                self.sync()
+                self._shutdown.set()
         finally:
         finally:
             self.sync()
             self.sync()
 
 
     def sync(self):
     def sync(self):
+        self.scheduler.close()
         self._stopped.set()
         self._stopped.set()
 
 
     def stop(self, wait=False):
     def stop(self, wait=False):
@@ -260,45 +317,47 @@ class ClockService(object):
     @property
     @property
     def scheduler(self):
     def scheduler(self):
         if self._scheduler is None:
         if self._scheduler is None:
+            filename = self.schedule_filename
             self._scheduler = instantiate(self.scheduler_cls,
             self._scheduler = instantiate(self.scheduler_cls,
                                           schedule=self.schedule,
                                           schedule=self.schedule,
+                                          schedule_filename=filename,
                                           logger=self.logger,
                                           logger=self.logger,
                                           max_interval=self.max_interval)
                                           max_interval=self.max_interval)
         return self._scheduler
         return self._scheduler
 
 
 
 
 class _Threaded(threading.Thread):
 class _Threaded(threading.Thread):
-    """Embedded clock service using threading."""
+    """Embedded task scheduler using threading."""
 
 
     def __init__(self, *args, **kwargs):
     def __init__(self, *args, **kwargs):
         super(_Threaded, self).__init__()
         super(_Threaded, self).__init__()
-        self.clockservice = ClockService(*args, **kwargs)
+        self.service = Service(*args, **kwargs)
         self.setDaemon(True)
         self.setDaemon(True)
 
 
     def run(self):
     def run(self):
-        self.clockservice.start()
+        self.service.start()
 
 
     def stop(self):
     def stop(self):
-        self.clockservice.stop(wait=True)
+        self.service.stop(wait=True)
 
 
 
 
 class _Process(multiprocessing.Process):
 class _Process(multiprocessing.Process):
-    """Embedded clock service using multiprocessing."""
+    """Embedded task scheduler using multiprocessing."""
 
 
     def __init__(self, *args, **kwargs):
     def __init__(self, *args, **kwargs):
         super(_Process, self).__init__()
         super(_Process, self).__init__()
-        self.clockservice = ClockService(*args, **kwargs)
+        self.service = Service(*args, **kwargs)
 
 
     def run(self):
     def run(self):
         platform.reset_signal("SIGTERM")
         platform.reset_signal("SIGTERM")
-        self.clockservice.start(embedded_process=True)
+        self.service.start(embedded_process=True)
 
 
     def stop(self):
     def stop(self):
-        self.clockservice.stop()
+        self.service.stop()
         self.terminate()
         self.terminate()
 
 
 
 
-def EmbeddedClockService(*args, **kwargs):
+def EmbeddedService(*args, **kwargs):
     """Return embedded clock service.
     """Return embedded clock service.
 
 
     :keyword thread: Run threaded instead of as a separate process.
     :keyword thread: Run threaded instead of as a separate process.

+ 9 - 8
celery/bin/celerybeat.py

@@ -10,7 +10,7 @@
 
 
 .. cmdoption:: -S, --scheduler
 .. cmdoption:: -S, --scheduler
 
 
-    Scheduler class to use. Default is celery.beat.Scheduler
+    Scheduler class to use. Default is celery.beat.PersistentScheduler
 
 
 .. cmdoption:: -f, --logfile
 .. cmdoption:: -f, --logfile
 
 
@@ -27,10 +27,10 @@ import optparse
 import traceback
 import traceback
 
 
 import celery
 import celery
+from celery import beat
 from celery import conf
 from celery import conf
 from celery import platform
 from celery import platform
 from celery.log import emergency_error
 from celery.log import emergency_error
-from celery.beat import ClockService
 from celery.utils import info
 from celery.utils import info
 
 
 STARTUP_INFO_FMT = """
 STARTUP_INFO_FMT = """
@@ -53,7 +53,8 @@ OPTION_LIST = (
     optparse.make_option('-S', '--scheduler',
     optparse.make_option('-S', '--scheduler',
             default=None,
             default=None,
             action="store", dest="scheduler_cls",
             action="store", dest="scheduler_cls",
-            help="Scheduler class. Default is celery.beat.Scheduler"),
+            help="Scheduler class. Default is "
+                 "celery.beat.PersistentScheduler"),
     optparse.make_option('-f', '--logfile', default=conf.CELERYBEAT_LOG_FILE,
     optparse.make_option('-f', '--logfile', default=conf.CELERYBEAT_LOG_FILE,
             action="store", dest="logfile",
             action="store", dest="logfile",
             help="Path to log file."),
             help="Path to log file."),
@@ -65,7 +66,7 @@ OPTION_LIST = (
 
 
 
 
 class Beat(object):
 class Beat(object):
-    ClockService = ClockService
+    Service = beat.Service
 
 
     def __init__(self, loglevel=conf.CELERYBEAT_LOG_LEVEL,
     def __init__(self, loglevel=conf.CELERYBEAT_LOG_LEVEL,
             logfile=conf.CELERYBEAT_LOG_FILE,
             logfile=conf.CELERYBEAT_LOG_FILE,
@@ -94,10 +95,10 @@ class Beat(object):
     def start_scheduler(self):
     def start_scheduler(self):
         from celery.log import setup_logger
         from celery.log import setup_logger
         logger = setup_logger(self.loglevel, self.logfile, name="celery.beat")
         logger = setup_logger(self.loglevel, self.logfile, name="celery.beat")
-        beat = self.ClockService(logger=logger,
-                                 max_interval=self.max_interval,
-                                 scheduler_cls=self.scheduler_cls,
-                                 schedule_filename=self.schedule)
+        beat = self.Service(logger=logger,
+                            max_interval=self.max_interval,
+                            scheduler_cls=self.scheduler_cls,
+                            schedule_filename=self.schedule)
 
 
         try:
         try:
             self.install_sync_handler(beat)
             self.install_sync_handler(beat)