forked from travis-ci/worker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
processor_pool.go
278 lines (231 loc) · 7.63 KB
/
processor_pool.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
package worker
import (
"fmt"
"os"
"sort"
"sync"
"sync/atomic"
gocontext "context"
"github.com/pborman/uuid"
"github.com/sirupsen/logrus"
"github.com/travis-ci/worker/backend"
"github.com/travis-ci/worker/config"
"github.com/travis-ci/worker/context"
)
// A ProcessorPool spins up multiple Processors handling build jobs from the
// same queue.
type ProcessorPool struct {
Context gocontext.Context
Provider backend.Provider
Generator BuildScriptGenerator
Persister BuildTracePersister
CancellationBroadcaster *CancellationBroadcaster
Hostname string
Config *config.Config
queue JobQueue
logWriterFactory LogWriterFactory
poolErrors []error
processorsLock sync.Mutex
processors []*Processor
processorsWG sync.WaitGroup
pauseCount int
activeProcessorCount int32
firstProcessorStarted chan struct{}
startProcessorOnce sync.Once
}
type ProcessorPoolConfig struct {
Hostname string
Context gocontext.Context
Config *config.Config
}
// NewProcessorPool creates a new processor pool using the given arguments.
func NewProcessorPool(ppc *ProcessorPoolConfig,
provider backend.Provider, generator BuildScriptGenerator, persister BuildTracePersister,
cancellationBroadcaster *CancellationBroadcaster) *ProcessorPool {
return &ProcessorPool{
Hostname: ppc.Hostname,
Context: ppc.Context,
Config: ppc.Config,
Provider: provider,
Generator: generator,
Persister: persister,
CancellationBroadcaster: cancellationBroadcaster,
firstProcessorStarted: make(chan struct{}),
}
}
// Each loops through all the processors in the pool and calls the given
// function for each of them, passing in the index and the processor. The order
// of the processors is the same for the same set of processors.
func (p *ProcessorPool) Each(f func(int, *Processor)) {
procIDs := []string{}
procsByID := map[string]*Processor{}
for _, proc := range p.processors {
procIDs = append(procIDs, proc.ID)
procsByID[proc.ID] = proc
}
sort.Strings(procIDs)
for i, procID := range procIDs {
f(i, procsByID[procID])
}
}
// Ready returns true if the processor pool is running as expected.
// Returns false if the processor pool has not been started yet.
func (p *ProcessorPool) Ready() bool {
return p.queue != nil
}
// Size returns the number of processors that are currently running.
//
// This includes processors that are in the process of gracefully shutting down. It's
// important to track these because they are still running jobs and thus still using
// resources that need to be managed and tracked.
func (p *ProcessorPool) Size() int {
val := atomic.LoadInt32(&p.activeProcessorCount)
return int(val)
}
// SetSize adjust the pool to run the given number of processors.
//
// This operates in an eventually consistent manner. Because some workers may be
// running jobs, we may not be able to immediately adjust the pool size. Once jobs
// finish, the pool size should rest at the given value.
func (p *ProcessorPool) SetSize(newSize int) {
// It's important to lock over the whole method rather than use the lock for
// individual Incr and Decr calls. We don't want other calls to SetSize to see
// the intermediate state where only some processors have been started, or they
// will do the wrong math and start the wrong number of processors.
p.processorsLock.Lock()
defer p.processorsLock.Unlock()
cur := len(p.processors)
if newSize > cur {
diff := newSize - cur
for i := 0; i < diff; i++ {
p.incr()
}
} else if newSize < cur {
diff := cur - newSize
for i := 0; i < diff; i++ {
p.decr()
}
}
}
// ExpectedSize returns the size of the pool once gracefully shutdown processors
// complete.
//
// After calling SetSize, ExpectedSize will soon reflect the requested new size,
// while Size will include processors that are still processing their last job
// before shutting down.
func (p *ProcessorPool) ExpectedSize() int {
return len(p.processors)
}
// TotalProcessed returns the sum of all processor ProcessedCount values.
func (p *ProcessorPool) TotalProcessed() int {
total := 0
p.Each(func(_ int, pr *Processor) {
total += pr.ProcessedCount
})
return total
}
// Run starts up a number of processors and connects them to the given queue.
// This method stalls until all processors have finished.
func (p *ProcessorPool) Run(poolSize int, queue JobQueue, logWriterFactory LogWriterFactory) error {
p.queue = queue
p.logWriterFactory = logWriterFactory
p.poolErrors = []error{}
for i := 0; i < poolSize; i++ {
p.Incr()
}
p.waitForFirstProcessor()
if len(p.poolErrors) > 0 {
context.LoggerFromContext(p.Context).WithFields(logrus.Fields{
"self": "processor_pool",
"pool_errors": p.poolErrors,
}).Panic("failed to populate pool")
}
p.processorsWG.Wait()
return nil
}
// GracefulShutdown causes each processor in the pool to start its graceful
// shutdown.
func (p *ProcessorPool) GracefulShutdown(togglePause bool) {
p.processorsLock.Lock()
defer p.processorsLock.Unlock()
logger := context.LoggerFromContext(p.Context).WithField("self", "processor_pool")
if togglePause {
p.pauseCount++
if p.pauseCount == 1 {
logger.Info("incrementing wait group for pause")
p.processorsWG.Add(1)
} else if p.pauseCount == 2 {
logger.Info("finishing wait group to unpause")
p.processorsWG.Done()
} else if p.pauseCount > 2 {
return
}
}
// In case no processors were ever started, we still want a graceful shutdown
// request to proceed. Without this, we will wait forever until the process is
// forcefully killed.
p.startProcessorOnce.Do(func() {
close(p.firstProcessorStarted)
})
ps := len(p.processors)
for i := 0; i < ps; i++ {
// Use decr to make sure the processor is removed from the list in the pool
p.decr()
}
}
// Incr adds a single running processor to the pool
func (p *ProcessorPool) Incr() {
p.processorsLock.Lock()
defer p.processorsLock.Unlock()
p.incr()
}
// incr assumes the processorsLock has already been locked
func (p *ProcessorPool) incr() {
proc, err := p.makeProcessor(p.queue, p.logWriterFactory)
if err != nil {
context.LoggerFromContext(p.Context).WithFields(logrus.Fields{
"err": err,
"self": "processor_pool",
}).Error("couldn't create processor")
p.poolErrors = append(p.poolErrors, err)
return
}
p.processors = append(p.processors, proc)
p.processorsWG.Add(1)
go func() {
defer p.processorsWG.Done()
atomic.AddInt32(&p.activeProcessorCount, 1)
proc.Run()
atomic.AddInt32(&p.activeProcessorCount, -1)
}()
p.startProcessorOnce.Do(func() {
close(p.firstProcessorStarted)
})
}
// Decr pops a processor out of the pool and issues a graceful shutdown
func (p *ProcessorPool) Decr() {
}
// decr assumes the processorsLock has already been locked
func (p *ProcessorPool) decr() {
if len(p.processors) == 0 {
return
}
var proc *Processor
proc, p.processors = p.processors[len(p.processors)-1], p.processors[:len(p.processors)-1]
proc.GracefulShutdown()
}
func (p *ProcessorPool) makeProcessor(queue JobQueue, logWriterFactory LogWriterFactory) (*Processor, error) {
processorUUID := uuid.NewRandom()
processorID := fmt.Sprintf("%s@%d.%s", processorUUID.String(), os.Getpid(), p.Hostname)
ctx := context.FromProcessor(p.Context, processorID)
return NewProcessor(ctx, p.Hostname,
queue, logWriterFactory, p.Provider, p.Generator, p.Persister, p.CancellationBroadcaster,
ProcessorConfig{
Config: p.Config,
})
}
func (p *ProcessorPool) waitForFirstProcessor() {
// wait until this channel is closed. the first processor to start running
// will close it.
<-p.firstProcessorStarted
}