Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
eb3c09a
feat(deploy-queue): add heartbeat to deployments table
Shugenya Dec 22, 2025
ca41c99
feat: add heartbeat command and heartbeat_timestamp field
Shugenya Dec 22, 2025
300a0cc
feat: auto-cancel deployments with stale heartbeats
Shugenya Dec 22, 2025
c83e255
feat(deploy-queue): Use HEARTBEAT_TIMEOUT constant in stale heartbeat…
Shugenya Dec 22, 2025
616f8d5
feat(deploy-queue): When cancelling stale deployments, print componen…
Shugenya Dec 22, 2025
e973cc4
feat(deploy-queue): Moved staleheartbeat query in a separate function
Shugenya Dec 30, 2025
c6198ad
feat(deploy-queue): Increase heartbeat timeout to 15 minutes
Shugenya Jan 5, 2026
a80fa85
feat(deploy-queue): Add retry to heartbeat update
Shugenya Jan 6, 2026
62ff26b
feat(deploy-queue): StaleHearbeatDeployment timestamp is mandatory
Shugenya Jan 12, 2026
12b77b7
feat(deploy-queue): Added deployment_id that triggers stale heartbeat…
Shugenya Jan 12, 2026
dc6e06a
feat(deploy-queue): Changed MissedTickBehavior for heartbeat interval
Shugenya Jan 12, 2026
f48d116
feat(deploy-queue): Increased number of repetition for retrying heart…
Shugenya Jan 12, 2026
dc5f093
Update deploy-queue/src/handler/mod.rs
Shugenya Jan 12, 2026
846177d
Update deploy-queue/src/lib.rs
Shugenya Jan 12, 2026
25043dd
feat(deploy-queue): Removed manual abort
Shugenya Jan 12, 2026
ffd1ec2
feat(deploy-queue): Cancel deployment manually if heartbeat loop exit…
Shugenya Jan 13, 2026
668ea34
feat(deploy-queue): Fixed clippy errors
Shugenya Jan 13, 2026
f806a1c
feat(deploy-queue): Updated readme with heartbeat
Shugenya Jan 14, 2026
5707cfd
feat(deploy-queue): Updated readme heartbeat for break-glass
Shugenya Jan 14, 2026
b8f4ac3
feat(deploy-queue): Updated readme heartbeat example
Shugenya Jan 14, 2026
6e057f8
feat(deploy-queue): Integration tests for heartbeat
Shugenya Jan 14, 2026
9fff2b3
feat(deploy-queue): Fixed compatibility test
Shugenya Jan 20, 2026
ea24669
chore(deploy-queue): version bump
Shugenya Jan 20, 2026
e6ca959
chore(deploy-queue): fix compatibility test
Shugenya Jan 20, 2026
b841721
chore(deploy-queue): updated readme - removed unclear part for heartbeat
Shugenya Jan 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions .github/workflows/deploy-queue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -760,22 +760,22 @@ jobs:
mode: finish
deployment-id: ${{ steps.start-v0-5-2.outputs.deployment-id }}

# Test v0.6.0 with the updated schema
- name: Start deployment (v0.6.0)
id: start-v0-6-0
uses: neondatabase/dev-actions/deploy-queue@deploy-queue-v0.6.0
# Test v0.7.0 with the updated schema
- name: Start deployment (v0.7.0)
id: start-v0-7-0
uses: neondatabase/dev-actions/deploy-queue@deploy-queue-v0.7.0
with:
mode: start
environment: dev
cloud-provider: aws
region: compat-v0-6-0
region: compat-v0-7-0
cell-index: 1
component: compat-test
version: v0.6.0
note: Compatibility test - v0.6.0
version: v0.7.0
note: Compatibility test - v0.7.0

- name: Finish deployment (v0.6.0)
uses: neondatabase/dev-actions/deploy-queue@deploy-queue-v0.6.0
- name: Finish deployment (v0.7.0)
uses: neondatabase/dev-actions/deploy-queue@deploy-queue-v0.7.0
with:
mode: finish
deployment-id: ${{ steps.start-v0-6-0.outputs.deployment-id }}
deployment-id: ${{ steps.start-v0-7-0.outputs.deployment-id }}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion deploy-queue/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion deploy-queue/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "deploy-queue"
version = "0.7.2"
version = "0.8.0"
edition = "2024"

[dependencies]
Expand Down
11 changes: 11 additions & 0 deletions deploy-queue/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ deploy-queue start --environment prod --provider aws --region us-west-2 --cell-i
- Writes `deployment-id=<ID>` to `$GITHUB_OUTPUT` if running in GitHub Actions
- Blocks until all conflicting deployments complete
- Starts the deployment automatically when ready
- Runs a background heartbeat loop that:
- Sends periodic heartbeats during the wait
- Cancels deployments with stale heartbeats (if they block current deployment)

### 2. Finish a Deployment

Expand Down Expand Up @@ -256,6 +259,14 @@ Buffer times are configured in the database:

These values are set in the initial migration and can be adjusted in the `environments` table.

### Heartbeats

The system supports heartbeats to detect stuck deployments:

- **Background heartbeats during `start`**: While waiting for blocking deployments, a background task updates the deployment's `heartbeat_timestamp`. If it fails to send heartbeats repeatedly, it cancels the deployment with a note. It also cancels other deployments with stale heartbeats that are blocking your deployment.
- **Manual heartbeats**: `deploy-queue heartbeat deployment --deployment-id <ID>` (or `heartbeat url --url <URL>`) runs a foreground loop that sends heartbeats until stopped.
- **Stale heartbeat detection**: deployments with a heartbeat older than the configured timeout (currently set to 15 minutes) are cancelled automatically when the heartbeat loop runs.

## Database Schema

The system uses two main tables:
Expand Down
9 changes: 9 additions & 0 deletions deploy-queue/migrations/0008_add_heartbeat_timestamp.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- Add heartbeat_timestamp column to deployments table
-- This field tracks the last heartbeat timestamp for deployment liveness monitoring
ALTER TABLE
deployments
ADD
COLUMN heartbeat_timestamp TIMESTAMPTZ;

-- Add column comment
COMMENT ON COLUMN deployments.heartbeat_timestamp IS 'Last heartbeat timestamp for tracking deployment liveness';
19 changes: 19 additions & 0 deletions deploy-queue/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ pub enum Mode {
#[command(subcommand)]
entity: ListEntity,
},
/// Send periodic heartbeats for a deployment (runs until terminated)
Heartbeat {
#[command(subcommand)]
target: HeartbeatTarget,
},
}

#[derive(Parser, Clone)]
Expand Down Expand Up @@ -136,3 +141,17 @@ pub enum ListEntity {
environment: Environment,
},
}

#[derive(Subcommand, Clone)]
pub enum HeartbeatTarget {
/// Send heartbeat for a deployment by ID
Deployment {
/// Deployment ID to send heartbeat for
deployment_id: i64,
},
/// Send heartbeat for a deployment by URL
Url {
/// GitHub Actions URL to find deployment
url: String,
},
}
3 changes: 3 additions & 0 deletions deploy-queue/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ pub const CONNECTION_TIMEOUT: Duration = Duration::from_secs(10);
pub const ACQUIRE_TIMEOUT: Duration = Duration::from_secs(10);
pub const IDLE_TIMEOUT: Duration = Duration::from_secs(10);
pub const BUSY_RETRY: Duration = Duration::from_secs(5);
pub const HEARTBEAT_INTERVAL: Duration = Duration::from_secs(30);
pub const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(15 * 60); // 15 minutes
pub const HEARTBEAT_UPDATE_TIMEOUT: Duration = Duration::from_secs(20);
72 changes: 71 additions & 1 deletion deploy-queue/src/handler/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use time::Duration;

use crate::{
cli::Environment,
model::{BlockingDeployment, Cell, Deployment, OutlierDeployment},
model::{BlockingDeployment, Cell, Deployment, OutlierDeployment, StaleHeartbeatDeployment},
util::duration::DurationExt,
};

Expand Down Expand Up @@ -52,6 +52,27 @@ pub async fn deployment(client: &Pool<Postgres>, deployment_id: i64) -> Result<O
}
}

pub async fn deployment_id_by_url(client: &Pool<Postgres>, url: &str) -> Result<Option<i64>> {
let row = sqlx::query!(
r#"
SELECT id
FROM deployments
WHERE url = $1
ORDER BY id DESC
LIMIT 1
"#,
url
)
.fetch_optional(client)
.await?;

if let Some(row) = row {
Ok(Some(row.id))
} else {
Ok(None)
}
}

pub async fn blocking_deployments(
client: &Pool<Postgres>,
deployment_id: i64,
Expand Down Expand Up @@ -111,6 +132,55 @@ pub async fn blocking_deployments(
Ok(blocking_deployments)
}

pub async fn stale_heartbeat_deployments(
client: &Pool<Postgres>,
timeout: std::time::Duration,
) -> Result<Vec<StaleHeartbeatDeployment>> {
let interval = timeout.to_pg_interval()?;
let rows = sqlx::query!(
r#"
SELECT
id,
component,
version,
heartbeat_timestamp,
NOW() - heartbeat_timestamp AS time_since_heartbeat
FROM deployments
WHERE heartbeat_timestamp IS NOT NULL
AND finish_timestamp IS NULL
AND cancellation_timestamp IS NULL
AND heartbeat_timestamp < NOW() - $1::interval
"#,
interval
)
.fetch_all(client)
.await?;

let deployments = rows
.into_iter()
.map(|row| -> Result<StaleHeartbeatDeployment> {
let heartbeat_timestamp = row
.heartbeat_timestamp
.context("heartbeat_timestamp should not be NULL")?;
let time_since_heartbeat = row
.time_since_heartbeat
.context("time_since_heartbeat should not be NULL")?
.to_duration()
.context("Failed to convert time_since_heartbeat")?;

Ok(StaleHeartbeatDeployment {
id: row.id,
component: row.component,
version: row.version,
heartbeat_timestamp,
time_since_heartbeat,
})
})
.collect::<Result<Vec<_>>>()?;

Ok(deployments)
}

pub async fn outlier_deployments(client: &Pool<Postgres>) -> Result<Vec<OutlierDeployment>> {
let rows = sqlx::query_file!("queries/active_outliers.sql")
.fetch_all(client)
Expand Down
Loading
Loading