[535] | 1 | # Watchdog
|
---|
| 2 |
|
---|
| 3 | #-----------------------------------------------------------------------------
|
---|
| 4 |
|
---|
| 5 | from threading import Thread, Lock
|
---|
| 6 |
|
---|
| 7 | import time
|
---|
| 8 |
|
---|
| 9 | #-----------------------------------------------------------------------------
|
---|
| 10 |
|
---|
| 11 | ## @package mlx.watchdog
|
---|
| 12 | #
|
---|
| 13 | # Watchdog module. It implements a thread which wakes up regularly and checks
|
---|
| 14 | # that none of its clients is left in a state for too long a time. If so, that
|
---|
| 15 | # fact is logged.
|
---|
| 16 |
|
---|
| 17 | #-----------------------------------------------------------------------------
|
---|
| 18 |
|
---|
| 19 | class _ClientState(object):
|
---|
| 20 | """A client state.
|
---|
| 21 |
|
---|
| 22 | It can be set or cleared. If set, there is a timeout associated with it. If
|
---|
| 23 | the timeout is over, the client state is logged. If it becomes clear later,
|
---|
| 24 | that fact is logged too."""
|
---|
| 25 | def __init__(self, timeout, name):
|
---|
| 26 | """Construct the client state with the given timeout and log text."""
|
---|
| 27 |
|
---|
| 28 | self._lock = Lock()
|
---|
| 29 |
|
---|
| 30 | self._timeout = timeout
|
---|
| 31 | self._name = name
|
---|
| 32 | self._nextTimeout = None
|
---|
| 33 | self._timedout = False
|
---|
| 34 |
|
---|
| 35 | def set(self):
|
---|
| 36 | """Put the client into the set state."""
|
---|
| 37 | with self._lock:
|
---|
| 38 | if self._nextTimeout is None:
|
---|
| 39 | self._nextTimeout = time.time() + self._timeout
|
---|
| 40 |
|
---|
| 41 | def clear(self):
|
---|
| 42 | """Put the client into the cleared state."""
|
---|
| 43 | with self._lock:
|
---|
| 44 | self._nextTimeout = None
|
---|
| 45 |
|
---|
| 46 | def _check(self, t):
|
---|
| 47 | """Check the client state.
|
---|
| 48 |
|
---|
| 49 | If it has timed out, and it has not been logged yet, it is logged. If
|
---|
| 50 | it is cleared, but had a timeout earlier, that fact is also logged."""
|
---|
| 51 | logTimeout = False
|
---|
| 52 | logCleared = False
|
---|
| 53 | with self._lock:
|
---|
| 54 | if self._nextTimeout is None:
|
---|
| 55 | logCleared = self._timedout
|
---|
| 56 | self._timedout = False
|
---|
| 57 | elif t>=self._nextTimeout:
|
---|
| 58 | logTimeout = not self._timedout
|
---|
| 59 | self._timedout = True
|
---|
| 60 |
|
---|
| 61 | if logTimeout:
|
---|
| 62 | print "Watchdog client %s has timed out!" % (self._name)
|
---|
| 63 | elif logCleared:
|
---|
| 64 | print "Watchdog client %s has been cleared." % (self._name)
|
---|
| 65 |
|
---|
| 66 | #-----------------------------------------------------------------------------
|
---|
| 67 |
|
---|
| 68 | class Watchdog(Thread):
|
---|
| 69 | """The watchdog thread."""
|
---|
| 70 | _instance = None
|
---|
| 71 |
|
---|
| 72 | WAKEUP_INTERVAL = 1.0
|
---|
| 73 |
|
---|
| 74 | LOG_INTERVAL = 60.0
|
---|
| 75 |
|
---|
| 76 | @staticmethod
|
---|
| 77 | def get():
|
---|
| 78 | """Get the only instance of the watchdog."""
|
---|
| 79 | return Watchdog._instance
|
---|
| 80 |
|
---|
| 81 | def __init__(self):
|
---|
| 82 | """Construct the watchdog."""
|
---|
| 83 | assert self._instance is None
|
---|
| 84 |
|
---|
| 85 | super(Watchdog, self).__init__()
|
---|
| 86 | self.daemon = True
|
---|
| 87 |
|
---|
| 88 | self._lock = Lock()
|
---|
| 89 | self._clients = []
|
---|
| 90 |
|
---|
| 91 | Watchdog._instance = self
|
---|
| 92 |
|
---|
| 93 | def addClient(self, timeout, name):
|
---|
| 94 | """Add a client with the given timeout and name.
|
---|
| 95 |
|
---|
| 96 | The new client is returned."""
|
---|
| 97 | client = _ClientState(timeout, name)
|
---|
| 98 | with self._lock:
|
---|
| 99 | self._clients.append(client)
|
---|
| 100 | return client
|
---|
| 101 |
|
---|
| 102 | def run(self):
|
---|
| 103 | """Perform the client checks, then wait for WAKEUP_INTERVAL.
|
---|
| 104 |
|
---|
| 105 | If LOG_INTERVAL elapses, put an entry in the debug log to confirm that
|
---|
| 106 | the watchdog still works."""
|
---|
| 107 |
|
---|
| 108 | nextLogTime = time.time()
|
---|
| 109 | nextWakeupTime = nextLogTime + self.WAKEUP_INTERVAL
|
---|
| 110 |
|
---|
| 111 | while True:
|
---|
| 112 | t = time.time()
|
---|
| 113 | while t>=nextWakeupTime:
|
---|
| 114 | nextWakeupTime += self.WAKEUP_INTERVAL
|
---|
| 115 |
|
---|
| 116 | if t>=nextLogTime:
|
---|
| 117 | print "Watchdog.run: running"
|
---|
| 118 | while t>=nextLogTime:
|
---|
| 119 | nextLogTime += self.LOG_INTERVAL
|
---|
| 120 |
|
---|
| 121 | self._checkClients(t)
|
---|
| 122 |
|
---|
| 123 | t = time.time()
|
---|
| 124 | if t<nextWakeupTime:
|
---|
| 125 | time.sleep(nextWakeupTime - t)
|
---|
| 126 |
|
---|
| 127 | def _checkClients(self, t):
|
---|
| 128 | """Check the clients."""
|
---|
| 129 | with self._lock:
|
---|
| 130 | clients = self._clients[:]
|
---|
| 131 |
|
---|
| 132 | for client in clients:
|
---|
| 133 | client._check(t)
|
---|
| 134 |
|
---|
| 135 | #-----------------------------------------------------------------------------
|
---|
| 136 | #-----------------------------------------------------------------------------
|
---|