@@ -35,10 +35,12 @@ type Watchdog struct {
35
35
// restartTimeout describes the timeout between
36
36
// restarting the Instance.
37
37
restartTimeout time.Duration
38
- // stopped indicates whether Watchdog was stopped.
39
- stopped bool
40
- // stopMutex used to avoid a race condition under stopped field.
41
- stopMutex sync.Mutex
38
+ // watchdogStateMutex used to avoid a race condition under started and shouldStop fields.
39
+ watchdogStateMutex sync.Mutex
40
+ // started indicates whether Watchdog has started an Instance.
41
+ started bool
42
+ // shouldStop indicates whether Watchdog should stop the Instance.
43
+ shouldStop bool
42
44
// done channel used to inform the signal handle goroutine
43
45
// about termination of the Instance.
44
46
done chan bool
@@ -55,7 +57,7 @@ func NewWatchdog(restartable bool, restartTimeout time.Duration, logger *ttlog.L
55
57
provider : provider }
56
58
57
59
wd .done = make (chan bool , 1 )
58
- wd .stopped = false
60
+ wd .shouldStop = false
59
61
60
62
return & wd
61
63
}
@@ -72,23 +74,32 @@ func (wd *Watchdog) Start() {
72
74
break
73
75
}
74
76
wd .logger = wd .Instance .logger
75
- // Start the Instance and forwarding signals (except SIGINT and SIGTERM)
76
- wd .startSignalHandling ()
77
- wd .stopMutex . Lock ()
78
- if ! wd . stopped {
77
+
78
+ wd .watchdogStateMutex . Lock ()
79
+ if ! wd .shouldStop {
80
+ // Start the Instance and forwarding signals (except SIGINT and SIGTERM)
79
81
if err := wd .Instance .Start (); err != nil {
80
82
wd .logger .Printf (`Watchdog(ERROR): "%v".` , err )
81
- wd .stopMutex .Unlock ()
83
+ wd .watchdogStateMutex .Unlock ()
82
84
break
83
85
}
86
+ wd .started = true
87
+ } else {
88
+ wd .logger .Printf (`Watchdog(ERROR): terminated before start.` )
89
+ wd .watchdogStateMutex .Unlock ()
90
+ return
84
91
}
85
- wd .stopMutex .Unlock ()
92
+ wd .watchdogStateMutex .Unlock ()
86
93
87
94
// Wait while the Instance will be terminated.
88
95
if err := wd .Instance .Wait (); err != nil {
89
96
wd .logger .Printf (`Watchdog(WARN): "%v".` , err )
90
97
}
91
98
99
+ wd .watchdogStateMutex .Lock ()
100
+ wd .started = false
101
+ wd .watchdogStateMutex .Unlock ()
102
+
92
103
// Set Instance process completion indication.
93
104
wd .done <- true
94
105
// Wait for the signal processing goroutine to complete.
@@ -100,7 +111,7 @@ func (wd *Watchdog) Start() {
100
111
wd .logger .Println ("Watchdog(ERROR): can't check if the instance is restartable." )
101
112
break
102
113
}
103
- if wd .stopped || ! restartable {
114
+ if wd .shouldStop || ! restartable {
104
115
wd .logger .Println ("Watchdog(INFO): the Instance has shutdown." )
105
116
break
106
117
}
@@ -112,15 +123,18 @@ func (wd *Watchdog) Start() {
112
123
wd .logger = logger
113
124
}
114
125
time .Sleep (wd .restartTimeout )
126
+
127
+ wd .shouldStop = false
128
+ // Before the restart of an instance start a new signal handling loop.
129
+ wd .StartSignalHandling ()
115
130
}
116
131
}
117
132
118
- // startSignalHandling starts signal handling in a separate goroutine.
119
- func (wd * Watchdog ) startSignalHandling () {
133
+ // StartSignalHandling starts signal handling in a separate goroutine.
134
+ func (wd * Watchdog ) StartSignalHandling () {
120
135
sigChan := make (chan os.Signal , 1 )
121
- // Reset unregisters all previous handlers for interrupt signals.
122
- signal .Reset (syscall .SIGINT ,
123
- syscall .SIGTERM , syscall .SIGHUP )
136
+ // Reset the signal mask before starting of the new loop.
137
+ signal .Reset ()
124
138
signal .Notify (sigChan )
125
139
126
140
// Set barrier to synchronize with the main loop when the Instance stops.
@@ -136,20 +150,25 @@ func (wd *Watchdog) startSignalHandling() {
136
150
case sig := <- sigChan :
137
151
switch sig {
138
152
case syscall .SIGINT , syscall .SIGTERM :
139
- wd .stopMutex .Lock ()
140
- wd .Instance .Stop (30 * time .Second )
153
+ wd .watchdogStateMutex .Lock ()
154
+ if wd .started {
155
+ wd .Instance .Stop (30 * time .Second )
156
+ }
141
157
// If we receive one of the "stop" signals, the
142
158
// program should be terminated.
143
- wd .stopped = true
144
- wd .stopMutex .Unlock ()
159
+ wd .shouldStop = true
160
+ wd .watchdogStateMutex .Unlock ()
145
161
case syscall .SIGHUP :
146
162
// Rotate the log files.
147
163
wd .logger .Rotate ()
148
164
default :
149
- wd .Instance .SendSignal (sig )
165
+ wd .watchdogStateMutex .Lock ()
166
+ if wd .started {
167
+ wd .Instance .SendSignal (sig )
168
+ }
169
+ wd .watchdogStateMutex .Unlock ()
150
170
}
151
171
case _ = <- wd .done :
152
- signal .Reset ()
153
172
return
154
173
}
155
174
}
0 commit comments