Skip to content

Allow restarting workspaces with forceDefaultImage=true even when another instance is already running #3993

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions components/dashboard/src/start/StartWorkspace.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ export default class StartWorkspace extends React.Component<StartWorkspaceProps,
// Preparing means that we haven't actually started the workspace instance just yet, but rather
// are still preparing for launch. This means we're building the Docker image for the workspace.
case "preparing":
return <ImageBuildView workspaceId={this.state.workspaceInstance.workspaceId} onStartWithDefaultImage={() => this.startWorkspace(true, true)} />;
return <ImageBuildView workspaceId={this.state.workspaceInstance.workspaceId} onStartWithDefaultImage={e => { (e.target as HTMLButtonElement).disabled = true; this.startWorkspace(true, true); }} />;

// Pending means the workspace does not yet consume resources in the cluster, but rather is looking for
// some space within the cluster. If for example the cluster needs to scale up to accomodate the
Expand Down Expand Up @@ -268,7 +268,7 @@ export default class StartWorkspace extends React.Component<StartWorkspaceProps,
case "stopped":
phase = StartPhase.Stopped;
if (this.state.hasImageBuildLogs) {
return <ImageBuildView workspaceId={this.state.workspaceInstance.workspaceId} onStartWithDefaultImage={() => this.startWorkspace(true, true)} phase={phase} error={error} />;
return <ImageBuildView workspaceId={this.state.workspaceInstance.workspaceId} onStartWithDefaultImage={e => { (e.target as HTMLButtonElement).disabled = true; this.startWorkspace(true, true); }} phase={phase} error={error} />;
}
if (!isHeadless && this.state.workspaceInstance.status.conditions.timeout) {
title = 'Timed Out';
Expand Down Expand Up @@ -332,7 +332,7 @@ function PendingChangesDropdown(props: { workspaceInstance?: WorkspaceInstance }

interface ImageBuildViewProps {
workspaceId: string;
onStartWithDefaultImage: () => void;
onStartWithDefaultImage: (event: React.MouseEvent) => void;
phase?: StartPhase;
error?: StartWorkspaceError;
}
Expand Down
67 changes: 62 additions & 5 deletions components/server/src/workspace/gitpod-server-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -396,17 +396,21 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
const user = this.checkAndBlockUser();
await this.checkTermsAcceptance();

log.info({ userId: user.id, workspaceId }, 'startWorkspace');
const logCtx = { userId: user.id, workspaceId };
log.info(logCtx, 'startWorkspace');

const mayStartPromise = this.mayStartWorkspace({ span }, user, this.workspaceDb.trace({ span }).findRegularRunningInstances(user.id));
const workspace = await this.internalGetWorkspace(workspaceId, this.workspaceDb.trace({ span }));
await this.guardAccess({ kind: "workspace", subject: workspace }, "get");

const runningInstance = await this.workspaceDb.trace({ span }).findRunningInstance(workspace.id);
if (runningInstance) {
// We already have a running workspace.
// Note: ownership doesn't matter here as this is basically a noop. It's not StartWorkspace's concern
// to guard workspace access - just to prevent non-owners from starting workspaces.
if (runningInstance && !options.forceDefaultImage) {
// We already have a running workspace, and we're not forcing the default image.
// Notes:
// * ownership doesn't matter here as this is basically a noop. It's not StartWorkspace's concern
// to guard workspace access - just to prevent non-owners from starting workspaces.
// * forceDefaultImage now always interrupts a previously-running instance, even if the previous
// instance already had options.forceDefaultImage === true.

await this.guardAccess({ kind: "workspaceInstance", subject: runningInstance, workspaceOwnerID: workspace.ownerId, workspaceIsShared: workspace.shareable || false }, "get");
return {
Expand All @@ -433,6 +437,14 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod

await mayStartPromise;

if (runningInstance) {
// We already had a running workspace. This may happen if we're forcing the default image.
// In that case, we first stop the previous workspace, and wait for it to be completely gone.
await this.internalStopWorkspaceAndWaitForInstance({ span }, workspaceId, workspace.ownerId).catch(err => {
log.error(logCtx, "internalStopWorkspaceAndWaitForInstance error: ", err);
});
}

// at this point we're about to actually start a new workspace
return await this.workspaceStarter.startWorkspace({ span }, workspace, user, await envVars, {
forceDefaultImage: !!options.forceDefaultImage
Expand Down Expand Up @@ -469,6 +481,51 @@ export class GitpodServerImpl<Client extends GitpodClient, Server extends Gitpod
}
}

protected async internalStopWorkspaceAndWaitForInstance(ctx: TraceContext, workspaceId: string, ownerId?: string, policy?: StopWorkspacePolicy): Promise<void> {
const runningInstance = await this.workspaceDb.trace(ctx).findRunningInstance(workspaceId);
let instanceStoppedPromise = Promise.resolve();
if (runningInstance) {
// If a previous workspace instance is still running, we'll wait for it to be completely gone.
let toDispose: Disposable | undefined;
let timeout: NodeJS.Timeout | undefined;
instanceStoppedPromise = new Promise<void>((resolve, reject) => {
toDispose = this.messageBusIntegration.listenForWorkspaceInstanceUpdates(
ownerId,
(ctx: TraceContext, instance: WorkspaceInstance) => {
if (instance.workspaceId !== runningInstance.workspaceId || instance.id !== runningInstance.id) {
return;
}
if (instance.status.phase === 'stopped') {
resolve();
}
}
);
// Time out if the instance still isn't stopped after 5 minutes
timeout = setTimeout(() => {
reject(new Error('Timed out while waiting for previous workspace instance to stop'));
}, 1000 * 60 * 5);
}).finally(() => {
if (toDispose) {
toDispose.dispose();
}
if (timeout) {
clearTimeout(timeout);
}
});
if (!runningInstance.region) {
// If there is an instance, but it doesn't have a region, it means that the instance wasn't actually
// started yet, i.e. it didn't go through `actuallyStartWorkspace` yet.
// Instead of calling ws-manager, we just wait for some time -- we would not know which workspace
// manager to talk to after all. There's a chance for a race condition here (some other `server`
// instance could be starting the instance currently), so in lieu of proper cross-server-instance-
// locking we wait for 10 seconds to prevent that race.
await new Promise(resolve => setTimeout(resolve, 10 * 1000));
}
}
await this.internalStopWorkspace(ctx, workspaceId, ownerId, policy);
await instanceStoppedPromise;
}

protected async internalStopWorkspace(ctx: TraceContext, workspaceId: string, ownerId?: string, policy?: StopWorkspacePolicy): Promise<void> {
const instance = await this.workspaceDb.trace(ctx).findRunningInstance(workspaceId);
if (!instance) {
Expand Down