@@ -418,7 +418,6 @@ export class WorkspaceManagerBridge implements Disposable {
418
418
clientProvider : ClientProvider ,
419
419
controllerIntervalSeconds : number ,
420
420
controllerMaxDisconnectSeconds : number ,
421
- maxTimeToRunningPhaseSeconds = 60 * 60 ,
422
421
) {
423
422
let disconnectStarted = Number . MAX_SAFE_INTEGER ;
424
423
this . disposables . push (
@@ -435,12 +434,7 @@ export class WorkspaceManagerBridge implements Disposable {
435
434
436
435
// Control running workspace instances against ws-manager
437
436
try {
438
- await this . controlRunningInstances (
439
- ctx ,
440
- runningInstances ,
441
- clientProvider ,
442
- maxTimeToRunningPhaseSeconds ,
443
- ) ;
437
+ await this . controlRunningInstances ( ctx , runningInstances , clientProvider ) ;
444
438
445
439
disconnectStarted = Number . MAX_SAFE_INTEGER ; // Reset disconnect period
446
440
} catch ( err ) {
@@ -453,6 +447,9 @@ export class WorkspaceManagerBridge implements Disposable {
453
447
}
454
448
}
455
449
450
+ // Control workspace instances against timeouts
451
+ await this . controlInstancesTimeouts ( ctx , runningInstances ) ;
452
+
456
453
log . debug ( "Done controlling instances." , { installation } ) ;
457
454
} catch ( err ) {
458
455
TraceContext . setError ( ctx , err ) ;
@@ -466,11 +463,14 @@ export class WorkspaceManagerBridge implements Disposable {
466
463
) ;
467
464
}
468
465
466
+ /**
467
+ * This methods controls all instances that we have currently marked as "running" in the DB.
468
+ * It checks whether they are still running with their respective ws-manager, and if not, marks them as stopped in the DB.
469
+ */
469
470
protected async controlRunningInstances (
470
471
parentCtx : TraceContext ,
471
472
runningInstances : RunningWorkspaceInfo [ ] ,
472
473
clientProvider : ClientProvider ,
473
- maxTimeToRunningPhaseSeconds : number ,
474
474
) {
475
475
const installation = this . config . installation ;
476
476
@@ -488,12 +488,7 @@ export class WorkspaceManagerBridge implements Disposable {
488
488
489
489
for ( const [ instanceId , ri ] of runningInstancesIdx . entries ( ) ) {
490
490
const instance = ri . latestInstance ;
491
- if (
492
- ! (
493
- instance . status . phase === "running" ||
494
- durationLongerThanSeconds ( Date . parse ( instance . creationTime ) , maxTimeToRunningPhaseSeconds )
495
- )
496
- ) {
491
+ if ( instance . status . phase !== "running" ) {
497
492
log . debug ( { instanceId } , "Skipping instance" , {
498
493
phase : instance . status . phase ,
499
494
creationTime : instance . creationTime ,
@@ -517,6 +512,81 @@ export class WorkspaceManagerBridge implements Disposable {
517
512
}
518
513
}
519
514
515
+ /**
516
+ * This methods controls all instances of this installation during periods where ws-manager does not control them, but we have them in our DB.
517
+ * These currently are:
518
+ * - preparing
519
+ * - building
520
+ * It also covers these phases, as fallback, when - for whatever reason - we no longer receive updates from ws-manager.
521
+ * - stopping (as fallback, in case ws-manager is stopped to early: configure to be >= then ws-manager timeouts!)
522
+ * - unknown (fallback)
523
+ */
524
+ protected async controlInstancesTimeouts ( parentCtx : TraceContext , runningInstances : RunningWorkspaceInfo [ ] ) {
525
+ const installation = this . config . installation ;
526
+
527
+ const span = TraceContext . startSpan ( "controlDBInstances" , parentCtx ) ;
528
+ const ctx = { span } ;
529
+ try {
530
+ log . debug ( "Controlling DB instances..." , { installation } ) ;
531
+
532
+ await Promise . all ( runningInstances . map ( ( info ) => this . controlInstanceTimeouts ( ctx , info ) ) ) ;
533
+
534
+ log . debug ( "Done controlling DB instances." , { installation } ) ;
535
+ } catch ( err ) {
536
+ log . error ( "Error while running controlDBInstances" , err , {
537
+ installation : this . cluster . name ,
538
+ } ) ;
539
+ TraceContext . setError ( ctx , err ) ;
540
+ } finally {
541
+ span . finish ( ) ;
542
+ }
543
+ }
544
+
545
+ protected async controlInstanceTimeouts ( parentCtx : TraceContext , info : RunningWorkspaceInfo ) {
546
+ const logContext : LogContext = {
547
+ userId : info . workspace . ownerId ,
548
+ workspaceId : info . workspace . id ,
549
+ instanceId : info . latestInstance . id ,
550
+ } ;
551
+ const ctx = TraceContext . childContext ( "controlDBInstance" , parentCtx ) ;
552
+ try {
553
+ const now = Date . now ( ) ;
554
+ const creationTime = new Date ( info . latestInstance . creationTime ) . getTime ( ) ;
555
+ const stoppingTime = new Date ( info . latestInstance . stoppingTime ?? now ) . getTime ( ) ; // stoppingTime only set if entered stopping state
556
+ const timedOutInPreparing = now >= creationTime + this . config . timeouts . preparingPhaseSeconds * 1000 ;
557
+ const timedOutInBuilding = now >= creationTime + this . config . timeouts . buildingPhaseSeconds * 1000 ;
558
+ const timedOutInStopping = now >= stoppingTime + this . config . timeouts . stoppingPhaseSeconds * 1000 ;
559
+ const timedOutInUnknown = now >= creationTime + this . config . timeouts . unknownPhaseSeconds * 1000 ;
560
+ const currentPhase = info . latestInstance . status . phase ;
561
+
562
+ log . debug ( logContext , "Controller: Checking for instances in the DB to mark as stopped" , {
563
+ creationTime,
564
+ stoppingTime,
565
+ timedOutInPreparing,
566
+ timedOutInStopping,
567
+ currentPhase,
568
+ } ) ;
569
+
570
+ if (
571
+ ( currentPhase === "preparing" && timedOutInPreparing ) ||
572
+ ( currentPhase === "building" && timedOutInBuilding ) ||
573
+ ( currentPhase === "stopping" && timedOutInStopping ) ||
574
+ ( currentPhase === "unknown" && timedOutInUnknown )
575
+ ) {
576
+ log . info ( logContext , "Controller: Marking workspace instance as stopped" , {
577
+ creationTime,
578
+ currentPhase,
579
+ } ) ;
580
+ await this . markWorkspaceInstanceAsStopped ( ctx , info , new Date ( now ) ) ;
581
+ }
582
+ } catch ( err ) {
583
+ log . warn ( logContext , "Controller: Error while marking workspace instance as stopped" , err ) ;
584
+ TraceContext . setError ( ctx , err ) ;
585
+ } finally {
586
+ ctx . span . finish ( ) ;
587
+ }
588
+ }
589
+
520
590
protected async markWorkspaceInstanceAsStopped ( ctx : TraceContext , info : RunningWorkspaceInfo , now : Date ) {
521
591
const nowISO = now . toISOString ( ) ;
522
592
info . latestInstance . stoppingTime = nowISO ;
0 commit comments