Skip to content

Commit 52a42c3

Browse files
geroplroboquat
authored andcommitted
[bridge] Move MetaInstanceController into Bridge.controlInstances
1 parent 5800e3c commit 52a42c3

File tree

7 files changed

+92
-139
lines changed

7 files changed

+92
-139
lines changed

components/ws-manager-bridge/src/bridge.ts

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,6 @@ export class WorkspaceManagerBridge implements Disposable {
418418
clientProvider: ClientProvider,
419419
controllerIntervalSeconds: number,
420420
controllerMaxDisconnectSeconds: number,
421-
maxTimeToRunningPhaseSeconds = 60 * 60,
422421
) {
423422
let disconnectStarted = Number.MAX_SAFE_INTEGER;
424423
this.disposables.push(
@@ -435,12 +434,7 @@ export class WorkspaceManagerBridge implements Disposable {
435434

436435
// Control running workspace instances against ws-manager
437436
try {
438-
await this.controlRunningInstances(
439-
ctx,
440-
runningInstances,
441-
clientProvider,
442-
maxTimeToRunningPhaseSeconds,
443-
);
437+
await this.controlRunningInstances(ctx, runningInstances, clientProvider);
444438

445439
disconnectStarted = Number.MAX_SAFE_INTEGER; // Reset disconnect period
446440
} catch (err) {
@@ -453,6 +447,9 @@ export class WorkspaceManagerBridge implements Disposable {
453447
}
454448
}
455449

450+
// Control workspace instances against timeouts
451+
await this.controlInstancesTimeouts(ctx, runningInstances);
452+
456453
log.debug("Done controlling instances.", { installation });
457454
} catch (err) {
458455
TraceContext.setError(ctx, err);
@@ -466,11 +463,14 @@ export class WorkspaceManagerBridge implements Disposable {
466463
);
467464
}
468465

466+
/**
467+
* This methods controls all instances that we have currently marked as "running" in the DB.
468+
* It checks whether they are still running with their respective ws-manager, and if not, marks them as stopped in the DB.
469+
*/
469470
protected async controlRunningInstances(
470471
parentCtx: TraceContext,
471472
runningInstances: RunningWorkspaceInfo[],
472473
clientProvider: ClientProvider,
473-
maxTimeToRunningPhaseSeconds: number,
474474
) {
475475
const installation = this.config.installation;
476476

@@ -488,12 +488,7 @@ export class WorkspaceManagerBridge implements Disposable {
488488

489489
for (const [instanceId, ri] of runningInstancesIdx.entries()) {
490490
const instance = ri.latestInstance;
491-
if (
492-
!(
493-
instance.status.phase === "running" ||
494-
durationLongerThanSeconds(Date.parse(instance.creationTime), maxTimeToRunningPhaseSeconds)
495-
)
496-
) {
491+
if (instance.status.phase !== "running") {
497492
log.debug({ instanceId }, "Skipping instance", {
498493
phase: instance.status.phase,
499494
creationTime: instance.creationTime,
@@ -517,6 +512,81 @@ export class WorkspaceManagerBridge implements Disposable {
517512
}
518513
}
519514

515+
/**
516+
* This methods controls all instances of this installation during periods where ws-manager does not control them, but we have them in our DB.
517+
* These currently are:
518+
* - preparing
519+
* - building
520+
* It also covers these phases, as fallback, when - for whatever reason - we no longer receive updates from ws-manager.
521+
* - stopping (as fallback, in case ws-manager is stopped to early: configure to be >= then ws-manager timeouts!)
522+
* - unknown (fallback)
523+
*/
524+
protected async controlInstancesTimeouts(parentCtx: TraceContext, runningInstances: RunningWorkspaceInfo[]) {
525+
const installation = this.config.installation;
526+
527+
const span = TraceContext.startSpan("controlDBInstances", parentCtx);
528+
const ctx = { span };
529+
try {
530+
log.debug("Controlling DB instances...", { installation });
531+
532+
await Promise.all(runningInstances.map((info) => this.controlInstanceTimeouts(ctx, info)));
533+
534+
log.debug("Done controlling DB instances.", { installation });
535+
} catch (err) {
536+
log.error("Error while running controlDBInstances", err, {
537+
installation: this.cluster.name,
538+
});
539+
TraceContext.setError(ctx, err);
540+
} finally {
541+
span.finish();
542+
}
543+
}
544+
545+
protected async controlInstanceTimeouts(parentCtx: TraceContext, info: RunningWorkspaceInfo) {
546+
const logContext: LogContext = {
547+
userId: info.workspace.ownerId,
548+
workspaceId: info.workspace.id,
549+
instanceId: info.latestInstance.id,
550+
};
551+
const ctx = TraceContext.childContext("controlDBInstance", parentCtx);
552+
try {
553+
const now = Date.now();
554+
const creationTime = new Date(info.latestInstance.creationTime).getTime();
555+
const stoppingTime = new Date(info.latestInstance.stoppingTime ?? now).getTime(); // stoppingTime only set if entered stopping state
556+
const timedOutInPreparing = now >= creationTime + this.config.timeouts.preparingPhaseSeconds * 1000;
557+
const timedOutInBuilding = now >= creationTime + this.config.timeouts.buildingPhaseSeconds * 1000;
558+
const timedOutInStopping = now >= stoppingTime + this.config.timeouts.stoppingPhaseSeconds * 1000;
559+
const timedOutInUnknown = now >= creationTime + this.config.timeouts.unknownPhaseSeconds * 1000;
560+
const currentPhase = info.latestInstance.status.phase;
561+
562+
log.debug(logContext, "Controller: Checking for instances in the DB to mark as stopped", {
563+
creationTime,
564+
stoppingTime,
565+
timedOutInPreparing,
566+
timedOutInStopping,
567+
currentPhase,
568+
});
569+
570+
if (
571+
(currentPhase === "preparing" && timedOutInPreparing) ||
572+
(currentPhase === "building" && timedOutInBuilding) ||
573+
(currentPhase === "stopping" && timedOutInStopping) ||
574+
(currentPhase === "unknown" && timedOutInUnknown)
575+
) {
576+
log.info(logContext, "Controller: Marking workspace instance as stopped", {
577+
creationTime,
578+
currentPhase,
579+
});
580+
await this.markWorkspaceInstanceAsStopped(ctx, info, new Date(now));
581+
}
582+
} catch (err) {
583+
log.warn(logContext, "Controller: Error while marking workspace instance as stopped", err);
584+
TraceContext.setError(ctx, err);
585+
} finally {
586+
ctx.span.finish();
587+
}
588+
}
589+
520590
protected async markWorkspaceInstanceAsStopped(ctx: TraceContext, info: RunningWorkspaceInfo, now: Date) {
521591
const nowISO = now.toISOString();
522592
info.latestInstance.stoppingTime = nowISO;

components/ws-manager-bridge/src/config.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,8 @@ export interface Configuration {
2626
// controllerMaxDisconnect configures how long the controller may be disconnected from ws-manager before it emits a warning
2727
controllerMaxDisconnectSeconds: number;
2828

29-
// maxTimeToRunningPhaseSeconds is the time that we are willing to give a workspce instance in which it has to reach a running state
30-
maxTimeToRunningPhaseSeconds: number;
31-
3229
// timeouts configures the timeout behaviour of pre-workspace cluster workspaces
3330
timeouts: {
34-
metaInstanceCheckIntervalSeconds: number;
3531
preparingPhaseSeconds: number;
3632
buildingPhaseSeconds: number;
3733
stoppingPhaseSeconds: number;

components/ws-manager-bridge/src/container-module.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import {
2929
import { ClusterService, ClusterServiceServer } from "./cluster-service-server";
3030
import { IAnalyticsWriter } from "@gitpod/gitpod-protocol/lib/analytics";
3131
import { newAnalyticsWriterFromEnv } from "@gitpod/gitpod-protocol/lib/util/analytics";
32-
import { MetaInstanceController } from "./meta-instance-controller";
3332
import { IClientCallMetrics } from "@gitpod/content-service/lib/client-call-metrics";
3433
import { PrometheusClientCallMetrics } from "@gitpod/gitpod-protocol/lib/messaging/client-call-metrics";
3534
import { PreparingUpdateEmulator, PreparingUpdateEmulatorFactory } from "./preparing-update-emulator";
@@ -44,8 +43,6 @@ export const containerModule = new ContainerModule((bind) => {
4443

4544
bind(BridgeController).toSelf().inSingletonScope();
4645

47-
bind(MetaInstanceController).toSelf().inSingletonScope();
48-
4946
bind(PrometheusClientCallMetrics).toSelf().inSingletonScope();
5047
bind(IClientCallMetrics).to(PrometheusClientCallMetrics).inSingletonScope();
5148
bind(IWorkspaceManagerClientCallMetrics).toService(IClientCallMetrics);

components/ws-manager-bridge/src/main.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import { TypeORM } from "@gitpod/gitpod-db/lib/typeorm/typeorm";
1414
import { TracingManager } from "@gitpod/gitpod-protocol/lib/util/tracing";
1515
import { ClusterServiceServer } from "./cluster-service-server";
1616
import { BridgeController } from "./bridge-controller";
17-
import { MetaInstanceController } from "./meta-instance-controller";
1817

1918
log.enableJSONLogging("ws-manager-bridge", undefined, LogrusLogLevel.getFromEnv());
2019

@@ -49,9 +48,6 @@ export const start = async (container: Container) => {
4948
const clusterServiceServer = container.get<ClusterServiceServer>(ClusterServiceServer);
5049
await clusterServiceServer.start();
5150

52-
const metaInstanceController = container.get<MetaInstanceController>(MetaInstanceController);
53-
metaInstanceController.start();
54-
5551
process.on("SIGTERM", async () => {
5652
log.info("SIGTERM received, stopping");
5753
bridgeController.dispose();

components/ws-manager-bridge/src/meta-instance-controller.ts

Lines changed: 0 additions & 103 deletions
This file was deleted.

install/installer/pkg/components/ws-manager-bridge/configmap.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,10 @@ func configmap(ctx *common.RenderContext) ([]runtime.Object, error) {
2525
Host: "localhost",
2626
},
2727
Timeouts: Timeouts{
28-
MetaInstanceCheckIntervalSeconds: 60,
29-
PreparingPhaseSeconds: 3600,
30-
BuildingPhaseSeconds: 3600,
31-
StoppingPhaseSeconds: 3600,
32-
UnknownPhaseSeconds: 600,
28+
PreparingPhaseSeconds: 3600,
29+
BuildingPhaseSeconds: 3600,
30+
StoppingPhaseSeconds: 3600,
31+
UnknownPhaseSeconds: 600,
3332
},
3433
EmulatePreparingIntervalSeconds: 10,
3534
StaticBridges: WSManagerList(ctx),

install/installer/pkg/components/ws-manager-bridge/types.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ type Configuration struct {
1212
WSClusterDBReconcileIntervalSeconds int32 `json:"wsClusterDBReconcileIntervalSeconds"`
1313
ControllerIntervalSeconds int32 `json:"controllerIntervalSeconds"`
1414
ControllerMaxDisconnectSeconds int32 `json:"controllerMaxDisconnectSeconds"`
15-
MaxTimeToRunningPhaseSeconds int32 `json:"maxTimeToRunningPhaseSeconds"`
1615
EmulatePreparingIntervalSeconds int32 `json:"emulatePreparingIntervalSeconds"`
1716
Timeouts Timeouts `json:"timeouts"`
1817
}
@@ -23,11 +22,10 @@ type ClusterService struct {
2322
}
2423

2524
type Timeouts struct {
26-
MetaInstanceCheckIntervalSeconds int32 `json:"metaInstanceCheckIntervalSeconds"`
27-
PreparingPhaseSeconds int32 `json:"preparingPhaseSeconds"`
28-
StoppingPhaseSeconds int32 `json:"stoppingPhaseSeconds"`
29-
BuildingPhaseSeconds int32 `json:"buildingPhaseSeconds"`
30-
UnknownPhaseSeconds int32 `json:"unknownPhaseSeconds"`
25+
PreparingPhaseSeconds int32 `json:"preparingPhaseSeconds"`
26+
BuildingPhaseSeconds int32 `json:"buildingPhaseSeconds"`
27+
StoppingPhaseSeconds int32 `json:"stoppingPhaseSeconds"`
28+
UnknownPhaseSeconds int32 `json:"unknownPhaseSeconds"`
3129
}
3230

3331
// WorkspaceCluster from components/gitpod-protocol/src/workspace-cluster.ts

0 commit comments

Comments
 (0)