Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions web/packages/teleterm/src/ui/ClusterLogout/ClusterLogout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ import DialogConfirmation, {
} from 'design/DialogConfirmation';
import { Cross } from 'design/Icon';
import { P } from 'design/Text/Text';
import { useAsync } from 'shared/hooks/useAsync';

import { useAppContext } from 'teleterm/ui/appContextProvider';
import { RootClusterUri } from 'teleterm/ui/uri';

import { useClusterLogout } from './useClusterLogout';
import { logoutWithCleanup } from './logoutWithCleanup';

export function ClusterLogout({
clusterUri,
Expand All @@ -41,9 +43,10 @@ export function ClusterLogout({
hidden?: boolean;
onClose(): void;
}) {
const { removeCluster, status, statusText } = useClusterLogout({
clusterUri,
});
const ctx = useAppContext();
const [{ status, statusText }, removeCluster] = useAsync(() =>
logoutWithCleanup(ctx, clusterUri)
);

async function removeClusterAndClose(): Promise<void> {
const [, err] = await removeCluster();
Expand Down
69 changes: 69 additions & 0 deletions web/packages/teleterm/src/ui/ClusterLogout/logoutWithCleanup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/**
* Teleport
* Copyright (C) 2025 Gravitational, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

import Logger from 'teleterm/logger';
import { IAppContext } from 'teleterm/ui/types';
import { RootClusterUri, routing } from 'teleterm/ui/uri';

/** Disposes cluster-related resources and then logs out. */
export async function logoutWithCleanup(
ctx: IAppContext,
clusterUri: RootClusterUri
): Promise<void> {
const logger = new Logger('logoutWithCleanup');
// This function checks for updates, do not wait for it.
ctx.mainProcessClient
.maybeRemoveAppUpdatesManagingCluster(clusterUri)
.catch(err => {
logger.error('Failed to remove managing cluster', err);
});

if (ctx.workspacesService.getRootClusterUri() === clusterUri) {
const [firstConnectedWorkspace] = ctx.workspacesService
.getConnectedWorkspacesClustersUri()
.filter(c => c !== clusterUri);
if (firstConnectedWorkspace) {
await ctx.workspacesService.setActiveWorkspace(firstConnectedWorkspace);
} else {
await ctx.workspacesService.setActiveWorkspace(null);
}
}

// Remove connections first, they depend both on the cluster and the workspace.
ctx.connectionTracker.removeItemsBelongingToRootCluster(clusterUri);
// Remove the workspace next, because it depends on the cluster.
ctx.workspacesService.removeWorkspace(clusterUri);

// If there are active ssh connections to the agent, killing it will take a few seconds. To work
// around this, kill the agent only after removing the workspace. Removing the workspace closes
// ssh tabs, so it should terminate connections to the cluster from the app.
await ctx.connectMyComputerService.killAgentAndRemoveData(clusterUri);

await ctx.clustersService.removeClusterGateways(clusterUri);

const {
params: { rootClusterId },
} = routing.parseClusterUri(clusterUri);
await ctx.mainProcessClient.removeKubeConfig({
relativePath: rootClusterId,
isDirectory: true,
});

// Remove the cluster, it does not depend on anything.
await ctx.clustersService.logout(clusterUri);
}
74 changes: 0 additions & 74 deletions web/packages/teleterm/src/ui/ClusterLogout/useClusterLogout.ts

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ test('add cluster does not overwrite the existing cluster', async () => {
);
});

test('remove cluster', async () => {
test('remove gateways', async () => {
const { removeGateway } = getClientMocks();
const service = createService({ removeGateway });
const gatewayFromRootCluster = makeDatabaseGateway({
Expand Down Expand Up @@ -164,14 +164,11 @@ test('remove cluster', async () => {
]);
});

await service.removeClusterAndResources(clusterUri);
await service.removeClusterGateways(clusterUri);

expect(service.findCluster(clusterUri)).toBeUndefined();
expect(service.findCluster(leafClusterMock.uri)).toBeUndefined();
expect(service.state.gateways).toEqual(
new Map([[gatewayFromOtherCluster.uri, gatewayFromOtherCluster]])
);

expect(removeGateway).toHaveBeenCalledWith({
gatewayUri: gatewayFromRootCluster.uri,
});
Expand Down Expand Up @@ -222,8 +219,8 @@ test('logout from cluster', async () => {

expect(logout).toHaveBeenCalledWith({ clusterUri });
expect(removeCluster).toHaveBeenCalledWith({ clusterUri });
expect(service.findCluster(clusterMock.uri).connected).toBe(false);
expect(service.findCluster(leafClusterMock.uri).connected).toBe(false);
expect(service.findCluster(clusterMock.uri)).toBeUndefined();
expect(service.findCluster(leafClusterMock.uri)).toBeUndefined();
});

test('create a gateway', async () => {
Expand Down
44 changes: 10 additions & 34 deletions web/packages/teleterm/src/ui/services/clusters/clustersService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,7 @@ export class ClustersService extends ImmutableStore<ClustersServiceState> {
return cluster;
}

/**
* Logs out of the cluster and removes the profile.
* Does not remove the cluster from the state, but sets the cluster and its leafs as disconnected.
* It needs to be done, because some code can operate on the cluster the intermediate period between logout
* and actually removing it from the state.
* A code that operates on that intermediate state is in `useClusterLogout.tsx`.
* After invoking `logout()`, it looks for the next workspace to switch to. If we hadn't marked the cluster as disconnected,
* the method might have returned us the same cluster we wanted to log out of.
*/
/** Logs out of the cluster. */
async logout(clusterUri: uri.RootClusterUri) {
// TODO(gzdunek): logout and removeCluster should be combined into a single acton in tshd
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To preserve the current behavior, I'd need to move both logout and removeCluster methods. That separation feels like a poor API design and made me revisit the original PR #24978 that introduced it.

As far as I remember that PR didn't introduce logout & removeCluster. There are two separate RPCs for that because in the alpha version of Connect it was possible to log out of a cluster without removing it from the list of clusters in the app. Similar to how you can disconnect a gateway and only then remove it from the connections.

Though I think I see what you mean in the context of #24978 splitting the logout sequence into first changing connected to false and actually removing the cluster from the state at the very end.

Even today, if you remove a profile from disk and then call ClustersService.syncRootClustersAndCatchErrors, the app will likely crash because the cluster is suddenly missing.

I understand this becomes a larger concern when ~/.tsh sharing gets implemented, right? Because at the moment I don't think there are many opportunities to trigger ClustersService.syncRootClustersAndCatchErrors beyond the app start, but looking at its callsites what you described is technically possible.

Most usages already perform a null check, so it was more consistent and easier to apply the same pattern to the remaining cases.

That was surprising to me because if I had to bet I wouldn't have said that this is the case. 😅 I think in my head I've always assumed that we've had this sweet little invariant where the existence of a workspace at least implies that a root cluster is available.


I don't know, I'm not entirely opposed to this change, it just feels like a big departure from something I've always assumed was invariant, so I do feel a bit uneasy about it.

Perhaps we should document functions returning Cluster from ClustersService to note that they might return no cluster?

Copy link
Copy Markdown
Contributor Author

@gzdunek gzdunek Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I remember that PR didn't introduce logout & removeCluster. There are two separate RPCs for that because in the alpha version of Connect it was possible to log out of a cluster without removing it from the list of clusters in the app

Yeah, I meant the methods in ClustersService, not the tshd RPCs. Before that PR, the logout sequence started from the logout in tshd and removing the ClustersService state. In the PR, we switched to removing the state at the very end.

Tbh, even the fix that we added could be done easier. In the comment for ClustersService.logout we said:

A code that operates on that intermediate state is in useClusterLogout.tsx.
After invoking logout(), it looks for the next workspace to switch to. If we hadn't marked the cluster as disconnected, the method might have returned us the same cluster we wanted to log out of.

We could as well explicitly filter out that cluster when looking for the next connected workspace in useCluserLogout :)

I understand this becomes a larger concern when ~/.tsh sharing gets implemented, right? Because at the moment I don't think there are many opportunities to trigger ClustersService.syncRootClustersAndCatchErrors beyond the app start, but looking at its callsites what you described is technically possible.

This change isn't strictly necessary for sharing ~/.tsh, but having a single method helps make the logic a bit cleaner.
I assume that in the ideal world, it would work like that:

  1. The profile watcher detects a logout.
  2. It calls logout on the cluster service (in the main process) to update the internal state.
  3. It sends a request to the renderer to clean up its local state (or to multiple renderers in theory).

It still could be four steps (as we have it today), where step 2 only calls tshd and sets .connected = false, and a separate step 4 actually removes the cluster, but that's a tighter coupling between the renderer and main process than seems necessary.

One alternative is switching steps 2 and 3, so the cluster is removed at the very end. That way, it would be removed after the workspace.
However, this still doesn’t address the other issue: ClustersService.syncRootClustersAndCatchError can be triggered beyond just the app start, which could cause a mismatch between workspaces and clusters.
So maybe it’s cleaner to have the null checks, unless we can guarantee that these stores are always in sync (or alternatively, prevent this function from being called beyond app initialization).

But hmm, now that I think of it, maybe it actually has more sense to switch the steps? So the renderer first needs to remove the workspace and other dependencies and then we attempt to logout in tsh and remove the cluster (and we forget about ClustersService.syncRootClustersAndCatchError).

That was surprising to me because if I had to bet I wouldn't have said that this is the case. 😅

Maybe it wasn't the majority, but we did have 17 places with null checks and something around that I added in this PR.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After discussing it through DMs with Rafał, we decided to switch the steps and perform the logout at the end of the logout sequence.
It makes more sense this way, since a cluster can exist without a workspace, but not the other way around.
If the logout in tshd fails, the app will remain usable. The cluster will still appear in the profile selector, allowing the user to retry the logout or open a new workspace for it.

To address the types issues, we should pass a cluster through the workspace context, so that we won't need all these null checks.
When it comes to ClustersService.syncRootClustersAndCatchError, it should be called only once, before creating the workspaces. I left a TODO item to fix the one incorrect usage.

await this.client.logout({ clusterUri });
Expand All @@ -97,7 +89,7 @@ export class ClustersService extends ImmutableStore<ClustersServiceState> {
this.setState(draft => {
draft.clusters.forEach(cluster => {
if (routing.belongsToProfile(clusterUri, cluster.uri)) {
cluster.connected = false;
draft.clusters.delete(cluster.uri);
}
});
});
Expand Down Expand Up @@ -192,6 +184,13 @@ export class ClustersService extends ImmutableStore<ClustersServiceState> {
]);
}

/**
* Synchronizes root clusters.
*
* This should only be called before creating workspaces.
* If called afterward, a cluster might be removed without first removing
* its associated workspace, resulting in an invalid state.
*/
async syncRootClustersAndCatchErrors(abortSignal?: AbortSignal) {
let clusters: Cluster[];

Expand Down Expand Up @@ -315,22 +314,9 @@ export class ClustersService extends ImmutableStore<ClustersServiceState> {
return response;
}

/** Removes cluster, its leafs and other resources. */
async removeClusterAndResources(clusterUri: uri.RootClusterUri) {
this.setState(draft => {
draft.clusters.forEach(cluster => {
if (routing.belongsToProfile(clusterUri, cluster.uri)) {
draft.clusters.delete(cluster.uri);
}
});
});
await this.removeClusterKubeConfigs(clusterUri);
await this.removeClusterGateways(clusterUri);
}

// TODO(ravicious): Create a single RPC for this rather than sending a separate request for each
// gateway.
private async removeClusterGateways(clusterUri: uri.RootClusterUri) {
async removeClusterGateways(clusterUri: uri.RootClusterUri) {
for (const [, gateway] of this.state.gateways) {
if (routing.belongsToProfile(clusterUri, gateway.targetUri)) {
try {
Expand Down Expand Up @@ -512,16 +498,6 @@ export class ClustersService extends ImmutableStore<ClustersServiceState> {
return this.getClusters().filter(c => !c.leaf);
}

async removeClusterKubeConfigs(clusterUri: string): Promise<void> {
const {
params: { rootClusterId },
} = routing.parseClusterUri(clusterUri);
return this.mainProcessClient.removeKubeConfig({
relativePath: rootClusterId,
isDirectory: true,
});
}

useState() {
return useStore(this).state;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ export class WorkspacesService extends ImmutableStore<WorkspacesState> {
}

if (cluster.profileStatusError) {
// TODO(gzdunek): We should only sync the target cluster, not all of them.
await this.clustersService.syncRootClustersAndCatchErrors(abortSignal);
// Update the cluster.
cluster = this.clustersService.findCluster(clusterUri);
Expand Down
Loading