@@ -35,17 +35,17 @@ type Watchdog struct {
35
35
// restartTimeout describes the timeout between
36
36
// restarting the Instance.
37
37
restartTimeout time.Duration
38
- // stopped indicates whether Watchdog was stopped.
39
- stopped bool
40
- // stopMutex used to avoid a race condition under stopped field.
41
- stopMutex sync.Mutex
42
38
// done channel used to inform the signal handle goroutine
43
39
// about termination of the Instance.
44
40
done chan bool
45
41
// provider provides Watchdog methods to get objects whose creation
46
42
// and updating may depend on changing external parameters
47
43
// (such as configuration file).
48
44
provider Provider
45
+ // stopMutex used to avoid a race condition under shouldStop field.
46
+ stopMutex sync.Mutex
47
+ // shouldStop indicates whether the Watchdog should be stopped.
48
+ shouldStop bool
49
49
}
50
50
51
51
// NewWatchdog creates a new instance of Watchdog.
@@ -55,33 +55,49 @@ func NewWatchdog(restartable bool, restartTimeout time.Duration, logger *ttlog.L
55
55
provider : provider }
56
56
57
57
wd .done = make (chan bool , 1 )
58
- wd .stopped = false
59
58
60
59
return & wd
61
60
}
62
61
63
62
// Start starts the Instance and signal handling.
64
- func (wd * Watchdog ) Start () {
63
+ func (wd * Watchdog ) Start (preStartAction func () error ) error {
64
+ var err error
65
+ // Create Instance.
66
+ if wd .Instance , err = wd .provider .CreateInstance (wd .logger ); err != nil {
67
+ wd .logger .Printf (`Watchdog(ERROR): "%v".` , err )
68
+ return err
69
+ }
70
+ wd .logger = wd .Instance .logger
71
+ // The signal handling loop must be started before the instance
72
+ // get started for avoiding a race condition between tt start
73
+ // and tt stop. This way we avoid a situation when we receive
74
+ // a signal before starting a handler for it.
75
+ wd .startSignalHandling ()
76
+
77
+ if err = preStartAction (); err != nil {
78
+ wd .logger .Printf (`Pre-start action error: %v` , err )
79
+ // Finish the signal handling goroutine.
80
+ wd .done <- true
81
+ return err
82
+ }
83
+
65
84
// The Instance must be restarted on completion if the "restartable"
66
85
// parameter is set to "true".
67
86
for {
68
87
var err error
69
- // Create Instance.
70
- if wd .Instance , err = wd .provider .CreateInstance (wd .logger ); err != nil {
88
+
89
+ wd .stopMutex .Lock ()
90
+ if wd .shouldStop {
91
+ wd .logger .Printf (`Watchdog(ERROR): terminated before instance start.` )
92
+ wd .stopMutex .Unlock ()
93
+ return nil
94
+ }
95
+ // Start the Instance.
96
+ if err := wd .Instance .Start (); err != nil {
71
97
wd .logger .Printf (`Watchdog(ERROR): "%v".` , err )
98
+ wd .stopMutex .Unlock ()
72
99
break
73
100
}
74
- wd .logger = wd .Instance .logger
75
- // Start the Instance and forwarding signals (except SIGINT and SIGTERM)
76
- wd .startSignalHandling ()
77
- wd .stopMutex .Lock ()
78
- if ! wd .stopped {
79
- if err := wd .Instance .Start (); err != nil {
80
- wd .logger .Printf (`Watchdog(ERROR): "%v".` , err )
81
- wd .stopMutex .Unlock ()
82
- break
83
- }
84
- }
85
101
wd .stopMutex .Unlock ()
86
102
87
103
// Wait while the Instance will be terminated.
@@ -100,7 +116,7 @@ func (wd *Watchdog) Start() {
100
116
wd .logger .Println ("Watchdog(ERROR): can't check if the instance is restartable." )
101
117
break
102
118
}
103
- if wd .stopped || ! restartable {
119
+ if wd .shouldStop || ! restartable {
104
120
wd .logger .Println ("Watchdog(INFO): the Instance has shutdown." )
105
121
break
106
122
}
@@ -112,15 +128,26 @@ func (wd *Watchdog) Start() {
112
128
wd .logger = logger
113
129
}
114
130
time .Sleep (wd .restartTimeout )
131
+
132
+ wd .shouldStop = false
133
+
134
+ // Recreate Instance.
135
+ if wd .Instance , err = wd .provider .CreateInstance (wd .logger ); err != nil {
136
+ wd .logger .Printf (`Watchdog(ERROR): "%v".` , err )
137
+ return err
138
+ }
139
+ wd .logger = wd .Instance .logger
140
+ // Before the restart of an instance start a new signal handling loop.
141
+ wd .startSignalHandling ()
115
142
}
143
+ return nil
116
144
}
117
145
118
146
// startSignalHandling starts signal handling in a separate goroutine.
119
147
func (wd * Watchdog ) startSignalHandling () {
120
148
sigChan := make (chan os.Signal , 1 )
121
- // Reset unregisters all previous handlers for interrupt signals.
122
- signal .Reset (syscall .SIGINT ,
123
- syscall .SIGTERM , syscall .SIGHUP )
149
+ // Reset the signal mask before starting of the new loop.
150
+ signal .Reset ()
124
151
signal .Notify (sigChan )
125
152
126
153
// Set barrier to synchronize with the main loop when the Instance stops.
@@ -137,19 +164,22 @@ func (wd *Watchdog) startSignalHandling() {
137
164
switch sig {
138
165
case syscall .SIGINT , syscall .SIGTERM :
139
166
wd .stopMutex .Lock ()
140
- wd .Instance .Stop (30 * time .Second )
141
167
// If we receive one of the "stop" signals, the
142
168
// program should be terminated.
143
- wd .stopped = true
169
+ wd .shouldStop = true
144
170
wd .stopMutex .Unlock ()
171
+ if wd .Instance .IsAlive () {
172
+ wd .Instance .Stop (30 * time .Second )
173
+ }
145
174
case syscall .SIGHUP :
146
175
// Rotate the log files.
147
176
wd .logger .Rotate ()
148
177
default :
149
- wd .Instance .SendSignal (sig )
178
+ if wd .Instance .IsAlive () {
179
+ wd .Instance .SendSignal (sig )
180
+ }
150
181
}
151
182
case _ = <- wd .done :
152
- signal .Reset ()
153
183
return
154
184
}
155
185
}
0 commit comments