Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 52 additions & 18 deletions sled-agent/src/common/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,26 +116,45 @@ impl ObservedPropolisState {
}
}

/// Newtype to facilitate conversions from Propolis states to external API
/// states.
pub struct InstanceState(pub ApiInstanceState);
/// The set of instance states that sled agent can publish to Nexus. This is
/// a subset of the instance states Nexus knows about: the Creating and
/// Destroyed states are reserved for Nexus to use for instances that are being
/// created for the very first time or have been explicitly deleted.
pub enum PublishedInstanceState {
Starting,
Running,
Stopping,
Stopped,
Rebooting,
Migrating,
Repairing,
Failed,
}

impl From<PropolisInstanceState> for InstanceState {
impl From<PropolisInstanceState> for PublishedInstanceState {
fn from(value: PropolisInstanceState) -> Self {
InstanceState(match value {
match value {
// From an external perspective, the instance has already been
// created. Creating the propolis instance is an internal detail and
// happens every time we start the instance, so we map it to
// "Starting" here.
PropolisInstanceState::Creating
| PropolisInstanceState::Starting => ApiInstanceState::Starting,
PropolisInstanceState::Running => ApiInstanceState::Running,
PropolisInstanceState::Stopping => ApiInstanceState::Stopping,
PropolisInstanceState::Stopped => ApiInstanceState::Stopped,
PropolisInstanceState::Rebooting => ApiInstanceState::Rebooting,
PropolisInstanceState::Migrating => ApiInstanceState::Migrating,
PropolisInstanceState::Repairing => ApiInstanceState::Repairing,
PropolisInstanceState::Failed => ApiInstanceState::Failed,
| PropolisInstanceState::Starting => {
PublishedInstanceState::Starting
}
PropolisInstanceState::Running => PublishedInstanceState::Running,
PropolisInstanceState::Stopping => PublishedInstanceState::Stopping,
PropolisInstanceState::Stopped => PublishedInstanceState::Stopped,
PropolisInstanceState::Rebooting => {
PublishedInstanceState::Rebooting
}
PropolisInstanceState::Migrating => {
PublishedInstanceState::Migrating
}
PropolisInstanceState::Repairing => {
PublishedInstanceState::Repairing
}
PropolisInstanceState::Failed => PublishedInstanceState::Failed,
// NOTE: This is a bit of an odd one - we intentionally do *not*
// translate the "destroyed" propolis state to the destroyed instance
// API state.
Expand All @@ -145,8 +164,23 @@ impl From<PropolisInstanceState> for InstanceState {
// should be torn down. Instead, it implies that the Propolis service
// should be stopped, but the VM could be allocated to a different
// machine.
PropolisInstanceState::Destroyed => ApiInstanceState::Stopped,
})
PropolisInstanceState::Destroyed => PublishedInstanceState::Stopped,
}
}
}

impl From<PublishedInstanceState> for ApiInstanceState {
fn from(value: PublishedInstanceState) -> Self {
match value {
PublishedInstanceState::Starting => ApiInstanceState::Starting,
PublishedInstanceState::Running => ApiInstanceState::Running,
PublishedInstanceState::Stopping => ApiInstanceState::Stopping,
PublishedInstanceState::Stopped => ApiInstanceState::Stopped,
PublishedInstanceState::Rebooting => ApiInstanceState::Rebooting,
PublishedInstanceState::Migrating => ApiInstanceState::Migrating,
PublishedInstanceState::Repairing => ApiInstanceState::Repairing,
PublishedInstanceState::Failed => ApiInstanceState::Failed,
}
}
}

Expand Down Expand Up @@ -244,7 +278,7 @@ impl InstanceStates {
None
};

let next_state = InstanceState::from(observed.instance_state).0;
let next_state = PublishedInstanceState::from(observed.instance_state);
match observed.migration_status {
// Case 3: Update normally if there is no migration in progress or
// the current migration is unrecognized or in flight.
Expand Down Expand Up @@ -294,8 +328,8 @@ impl InstanceStates {

// Transitions to a new InstanceState value, updating the timestamp and
// generation number.
pub(crate) fn transition(&mut self, next: ApiInstanceState) {
self.current.run_state = next;
pub(crate) fn transition(&mut self, next: PublishedInstanceState) {
self.current.run_state = next.into();
self.current.gen = self.current.gen.next();
self.current.time_updated = Utc::now();
}
Expand Down
41 changes: 25 additions & 16 deletions sled-agent/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use crate::common::instance::{
Action as InstanceAction, InstanceStates, ObservedPropolisState,
PublishedInstanceState,
};
use crate::instance_manager::InstanceTicket;
use crate::nexus::LazyNexusClient;
Expand All @@ -27,7 +28,6 @@ use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT;
use illumos_utils::zone::{AddressRequest, PROPOLIS_ZONE_PREFIX};
use omicron_common::address::NEXUS_INTERNAL_PORT;
use omicron_common::address::PROPOLIS_PORT;
use omicron_common::api::external::InstanceState;
use omicron_common::api::internal::nexus::InstanceRuntimeState;
use omicron_common::api::internal::shared::{
NetworkInterface, SourceNatConfig,
Expand Down Expand Up @@ -334,18 +334,21 @@ impl InstanceInner {
) -> Result<Reaction, Error> {
info!(self.log, "Observing new propolis state: {:?}", state);

// The instance might have been rudely terminated between the time the
// call to Propolis returned and the time the instance lock was
// acquired for this call. If that happened, do not publish the Propolis
// state; simply remain in the Destroyed state.
//
// Returning the `Terminate` action is OK because terminating a
// previously-terminated instance is a no-op.
if self.state.current().run_state == InstanceState::Destroyed {
// This instance may no longer have a Propolis zone if it was rudely
// terminated between the time the call to Propolis returned and the
// time this thread acquired the instance lock and entered this routine.
// If the Propolis zone is gone, do not publish the Propolis state;
// instead, maintain whatever state was published when the zone was
// destroyed.
if self.running_state.is_none() {
info!(
self.log,
"Ignoring new propolis state: instance is already destroyed"
"Ignoring new propolis state: Propolis is already destroyed"
);

// Return the Terminate action so that the caller will cleanly
// cease to monitor this Propolis. Note that terminating an instance
// that's already terminated is a no-op.
return Ok(Reaction::Terminate);
}

Expand Down Expand Up @@ -681,7 +684,7 @@ impl Instance {
// logically running (on the source) while the target Propolis
// is being launched.
if migration_params.is_none() {
inner.state.transition(InstanceState::Starting);
inner.state.transition(PublishedInstanceState::Starting);
if let Err(e) = inner.publish_state_to_nexus().await {
break 'setup Err(e);
}
Expand Down Expand Up @@ -713,7 +716,7 @@ impl Instance {
// start a migration target simply leaves the VM running untouched
// on the source.
if migration_params.is_none() && setup_result.is_err() {
inner.state.transition(InstanceState::Failed);
inner.state.transition(PublishedInstanceState::Failed);
inner.publish_state_to_nexus().await?;
}
setup_result?;
Expand Down Expand Up @@ -755,16 +758,22 @@ impl Instance {
// "Destroyed" state and return it to the caller.
if inner.running_state.is_none() {
inner.terminate().await?;
(None, Some(InstanceState::Destroyed))
(None, Some(PublishedInstanceState::Stopped))
} else {
(Some(PropolisRequest::Stop), Some(InstanceState::Stopping))
(
Some(PropolisRequest::Stop),
Some(PublishedInstanceState::Stopping),
)
}
}
InstanceStateRequested::Reboot => {
if inner.running_state.is_none() {
return Err(Error::InstanceNotRunning(*inner.id()));
}
(Some(PropolisRequest::Reboot), Some(InstanceState::Rebooting))
(
Some(PropolisRequest::Reboot),
Some(PublishedInstanceState::Rebooting),
)
}
};

Expand Down Expand Up @@ -995,7 +1004,7 @@ impl Instance {
pub async fn terminate(&self) -> Result<InstanceRuntimeState, Error> {
let mut inner = self.inner.lock().await;
inner.terminate().await?;
inner.state.transition(InstanceState::Destroyed);
inner.state.transition(PublishedInstanceState::Stopped);
Ok(inner.state.current().clone())
}

Expand Down
6 changes: 3 additions & 3 deletions sled-agent/src/sim/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -473,8 +473,8 @@ mod test {
assert_eq!(r1.gen, instance.object.current().gen);
assert!(rx.try_next().is_err());

// Stopping an instance that was never started synchronously destroys
// it.
// Stopping an instance that was never started synchronously marks it
// stopped.
let rprev = r1;
assert!(rprev.run_state.is_stopped());
let dropped =
Expand All @@ -484,7 +484,7 @@ mod test {
let rnext = instance.object.current();
assert!(rnext.gen > rprev.gen);
assert!(rnext.time_updated >= rprev.time_updated);
assert_eq!(rnext.run_state, InstanceState::Destroyed);
assert_eq!(rnext.run_state, InstanceState::Stopped);
assert!(rx.try_next().is_err());

logctx.cleanup_successful();
Expand Down
18 changes: 11 additions & 7 deletions sled-agent/src/sim/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use super::simulatable::Simulatable;

use crate::common::instance::ObservedPropolisState;
use crate::common::instance::{ObservedPropolisState, PublishedInstanceState};
use crate::nexus::NexusClient;
use crate::params::{InstanceMigrationSourceParams, InstanceStateRequested};
use async_trait::async_trait;
Expand Down Expand Up @@ -137,7 +137,7 @@ impl SimInstanceInner {
// cold-booting a new VM (so that the VM appears to be
// starting while its Propolis process is being
// launched).
self.state.transition(ApiInstanceState::Starting);
self.state.transition(PublishedInstanceState::Starting);
self.queue_propolis_state(
PropolisInstanceState::Running,
);
Expand Down Expand Up @@ -166,10 +166,10 @@ impl SimInstanceInner {
InstanceStateRequested::Stopped => {
match self.next_resting_state() {
ApiInstanceState::Creating => {
self.state.transition(ApiInstanceState::Destroyed);
self.state.transition(PublishedInstanceState::Stopped);
}
ApiInstanceState::Running => {
self.state.transition(ApiInstanceState::Stopping);
self.state.transition(PublishedInstanceState::Stopping);
self.queue_propolis_state(
PropolisInstanceState::Stopping,
);
Expand Down Expand Up @@ -202,7 +202,8 @@ impl SimInstanceInner {
!= ApiInstanceState::Rebooting
&& !self.reboot_pending()
{
self.state.transition(ApiInstanceState::Rebooting);
self.state
.transition(PublishedInstanceState::Rebooting);
self.queue_propolis_state(
PropolisInstanceState::Rebooting,
);
Expand Down Expand Up @@ -293,7 +294,10 @@ impl SimInstanceInner {
self.state.current().run_state
} else {
if let Some(last_state) = self.last_queued_instance_state() {
crate::common::instance::InstanceState::from(last_state).0
crate::common::instance::PublishedInstanceState::from(
last_state,
)
.into()
} else {
self.state.current().run_state
}
Expand All @@ -314,7 +318,7 @@ impl SimInstanceInner {
/// Simulates rude termination by moving the instance to the Destroyed state
/// immediately and clearing the queue of pending state transitions.
fn terminate(&mut self) -> InstanceRuntimeState {
self.state.transition(ApiInstanceState::Destroyed);
self.state.transition(PublishedInstanceState::Stopped);
self.queue.clear();
self.state.current().clone()
}
Expand Down