From e66837a789a8f7f9c585789cf18b82ec4c168a59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=86=AC=E7=A5=89?= Date: Mon, 9 Mar 2026 17:10:00 +0800 Subject: [PATCH 1/6] feat(sdks): add pool domain models and config (OSEP-0005) --- .../domain/exceptions/SandboxException.kt | 48 +++++ .../sandbox/domain/pool/AcquirePolicy.kt | 31 ++++ .../sandbox/domain/pool/EmptyBehavior.kt | 31 ++++ .../sandbox/domain/pool/PoolConfig.kt | 173 ++++++++++++++++++ .../sandbox/domain/pool/PoolCreationSpec.kt | 134 ++++++++++++++ .../sandbox/domain/pool/PoolSnapshot.kt | 30 +++ .../sandbox/domain/pool/PoolState.kt | 40 ++++ .../sandbox/domain/pool/PoolStateStore.kt | 79 ++++++++ .../sandbox/domain/pool/StoreCounters.kt | 26 +++ 9 files changed, 592 insertions(+) create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/EmptyBehavior.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolConfig.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolSnapshot.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolState.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/StoreCounters.kt diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt index da033beb..38c711b7 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt @@ -88,6 +88,45 @@ class InvalidArgumentException( error = SandboxError(SandboxError.INVALID_ARGUMENT, message), ) +/** + * Defines standardized common error codes and messages for the Sandbox SDK. + */ +/** + * Thrown when acquire is called with FAIL_FAST policy and no idle sandbox is available. + */ +class PoolEmptyException( + message: String? = "No idle sandbox available and policy is FAIL_FAST", + cause: Throwable? = null, +) : SandboxException( + message = message, + cause = cause, + error = SandboxError(SandboxError.POOL_EMPTY, message), + ) + +/** + * Thrown when the pool state store is unavailable during idle take/put/lock operations. + */ +class PoolStateStoreUnavailableException( + message: String? = null, + cause: Throwable? = null, +) : SandboxException( + message = message, + cause = cause, + error = SandboxError(SandboxError.POOL_STATE_STORE_UNAVAILABLE, message), + ) + +/** + * Thrown when atomic take or lock-update conflicts occur in the state store. + */ +class PoolStateStoreContentionException( + message: String? = null, + cause: Throwable? = null, +) : SandboxException( + message = message, + cause = cause, + error = SandboxError(SandboxError.POOL_STATE_STORE_CONTENTION, message), + ) + /** * Defines standardized common error codes and messages for the Sandbox SDK. */ @@ -101,5 +140,14 @@ data class SandboxError( const val UNHEALTHY = "UNHEALTHY" const val INVALID_ARGUMENT = "INVALID_ARGUMENT" const val UNEXPECTED_RESPONSE = "UNEXPECTED_RESPONSE" + + /** Pool-specific: no idle sandbox and policy is FAIL_FAST. */ + const val POOL_EMPTY = "POOL_EMPTY" + + /** Pool state store unavailable during operations. */ + const val POOL_STATE_STORE_UNAVAILABLE = "POOL_STATE_STORE_UNAVAILABLE" + + /** Pool state store contention (atomic take or lock conflicts). */ + const val POOL_STATE_STORE_CONTENTION = "POOL_STATE_STORE_CONTENTION" } } diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt new file mode 100644 index 00000000..0f9d7906 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * Policy for acquire when the idle buffer is empty. + * + * - FAIL_FAST: throw [com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException] (POOL_EMPTY). + * - DIRECT_CREATE: attempt direct create via lifecycle API, then connect and return. + */ +enum class AcquirePolicy { + /** When no idle sandbox is available, fail immediately with POOL_EMPTY. */ + FAIL_FAST, + + /** When no idle sandbox is available, create a new sandbox via lifecycle API. */ + DIRECT_CREATE, +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/EmptyBehavior.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/EmptyBehavior.kt new file mode 100644 index 00000000..c4b9bde3 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/EmptyBehavior.kt @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * Behavior when the idle buffer is empty at acquire time. + * + * - FAIL_FAST: throw POOL_EMPTY. + * - DIRECT_CREATE: attempt direct create (default). + */ +enum class EmptyBehavior { + /** Throw POOL_EMPTY when no idle sandbox is available. */ + FAIL_FAST, + + /** Create a new sandbox via lifecycle API when no idle is available. */ + DIRECT_CREATE, +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolConfig.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolConfig.kt new file mode 100644 index 00000000..9ecde0bd --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolConfig.kt @@ -0,0 +1,173 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import kotlin.math.ceil + +/** + * Configuration for a client-side sandbox pool. + * + * @property poolName User-defined name and namespace for this logical pool (required). + * @property ownerId Unique process identity for primary lock ownership (required in distributed mode). + * @property maxIdle Standby idle target/cap (required). + * @property warmupConcurrency Max concurrent creation workers during replenish (default: max(1, ceil(maxIdle * 0.2))). + * @property primaryLockTtl Lock TTL for distributed primary ownership (default: 60s). + * @property emptyBehavior Behavior when idle buffer is empty (default: DIRECT_CREATE). + * @property stateStore Injected [PoolStateStore] implementation (required). + * @property connectionConfig Connection config for lifecycle API (required). + * @property creationSpec Template for creating sandboxes (replenish and direct-create) (required). + * @property reconcileInterval Interval between reconcile ticks (default: 30s). + * @property degradedThreshold Consecutive create failures before transitioning to DEGRADED (default: 3). + * @property drainTimeout Max wait during graceful shutdown for in-flight ops (default: 30s). + */ +data class PoolConfig( + val poolName: String, + val ownerId: String, + val maxIdle: Int, + val warmupConcurrency: Int, + val primaryLockTtl: java.time.Duration, + val emptyBehavior: EmptyBehavior, + val stateStore: PoolStateStore, + val connectionConfig: ConnectionConfig, + val creationSpec: PoolCreationSpec, + val reconcileInterval: java.time.Duration, + val degradedThreshold: Int, + val drainTimeout: java.time.Duration, +) { + init { + require(poolName.isNotBlank()) { "poolName must not be blank" } + require(ownerId.isNotBlank()) { "ownerId must not be blank" } + require(maxIdle >= 0) { "maxIdle must be >= 0" } + require(warmupConcurrency > 0) { "warmupConcurrency must be positive" } + require(degradedThreshold > 0) { "degradedThreshold must be positive" } + require(!reconcileInterval.isNegative && !reconcileInterval.isZero) { "reconcileInterval must be positive" } + require(!primaryLockTtl.isNegative && !primaryLockTtl.isZero) { "primaryLockTtl must be positive" } + require(!drainTimeout.isNegative) { "drainTimeout must be non-negative" } + } + + companion object { + private val DEFAULT_RECONCILE_INTERVAL = java.time.Duration.ofSeconds(30) + private val DEFAULT_PRIMARY_LOCK_TTL = java.time.Duration.ofSeconds(60) + private const val DEFAULT_DEGRADED_THRESHOLD = 3 + private val DEFAULT_DRAIN_TIMEOUT = java.time.Duration.ofSeconds(30) + + @JvmStatic + fun builder(): Builder = Builder() + } + + class Builder { + private var poolName: String? = null + private var ownerId: String? = null + private var maxIdle: Int? = null + private var warmupConcurrency: Int? = null + private var primaryLockTtl: java.time.Duration = DEFAULT_PRIMARY_LOCK_TTL + private var emptyBehavior: EmptyBehavior = EmptyBehavior.DIRECT_CREATE + private var stateStore: PoolStateStore? = null + private var connectionConfig: ConnectionConfig? = null + private var creationSpec: PoolCreationSpec? = null + private var reconcileInterval: java.time.Duration = DEFAULT_RECONCILE_INTERVAL + private var degradedThreshold: Int = DEFAULT_DEGRADED_THRESHOLD + private var drainTimeout: java.time.Duration = DEFAULT_DRAIN_TIMEOUT + + fun poolName(poolName: String): Builder { + this.poolName = poolName + return this + } + + fun ownerId(ownerId: String): Builder { + this.ownerId = ownerId + return this + } + + fun maxIdle(maxIdle: Int): Builder { + this.maxIdle = maxIdle + return this + } + + fun warmupConcurrency(warmupConcurrency: Int): Builder { + this.warmupConcurrency = warmupConcurrency + return this + } + + fun primaryLockTtl(primaryLockTtl: java.time.Duration): Builder { + this.primaryLockTtl = primaryLockTtl + return this + } + + fun emptyBehavior(emptyBehavior: EmptyBehavior): Builder { + this.emptyBehavior = emptyBehavior + return this + } + + fun stateStore(stateStore: PoolStateStore): Builder { + this.stateStore = stateStore + return this + } + + fun connectionConfig(connectionConfig: ConnectionConfig): Builder { + this.connectionConfig = connectionConfig + return this + } + + fun creationSpec(creationSpec: PoolCreationSpec): Builder { + this.creationSpec = creationSpec + return this + } + + fun reconcileInterval(reconcileInterval: java.time.Duration): Builder { + this.reconcileInterval = reconcileInterval + return this + } + + fun degradedThreshold(degradedThreshold: Int): Builder { + this.degradedThreshold = degradedThreshold + return this + } + + fun drainTimeout(drainTimeout: java.time.Duration): Builder { + this.drainTimeout = drainTimeout + return this + } + + fun build(): PoolConfig { + val name = poolName ?: throw IllegalArgumentException("poolName is required") + val owner = ownerId ?: throw IllegalArgumentException("ownerId is required") + val max = maxIdle ?: throw IllegalArgumentException("maxIdle is required") + val store = stateStore ?: throw IllegalArgumentException("stateStore is required") + val conn = connectionConfig ?: throw IllegalArgumentException("connectionConfig is required") + val spec = creationSpec ?: throw IllegalArgumentException("creationSpec is required") + + val warmup = warmupConcurrency ?: ceil(max * 0.2).toInt().coerceAtLeast(1) + + return PoolConfig( + poolName = name, + ownerId = owner, + maxIdle = max, + warmupConcurrency = warmup, + primaryLockTtl = primaryLockTtl, + emptyBehavior = emptyBehavior, + stateStore = store, + connectionConfig = conn, + creationSpec = spec, + reconcileInterval = reconcileInterval, + degradedThreshold = degradedThreshold, + drainTimeout = drainTimeout, + ) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt new file mode 100644 index 00000000..91d4f5dd --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt @@ -0,0 +1,134 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume + +/** + * Template for creating sandboxes in the pool (replenish and direct-create). + * + * Pool always uses a fixed 24h timeout for created sandboxes; other parameters + * are taken from this spec. Defaults align with [com.alibaba.opensandbox.sandbox.Sandbox.Builder]. + * + * @property imageSpec Container image specification (required). + * @property entrypoint Entrypoint command (default: tail -f /dev/null). + * @property resource Resource limits (default: cpu=1, memory=2Gi). + * @property env Environment variables. + * @property metadata User-defined metadata. + * @property networkPolicy Optional outbound network policy. + * @property volumes Optional volume mounts. + */ +data class PoolCreationSpec( + val imageSpec: SandboxImageSpec, + val entrypoint: List = DEFAULT_ENTRYPOINT, + val resource: Map = DEFAULT_RESOURCE, + val env: Map = emptyMap(), + val metadata: Map = emptyMap(), + val networkPolicy: NetworkPolicy? = null, + val volumes: List? = null, +) { + companion object { + /** Default entrypoint: keep container running. */ + val DEFAULT_ENTRYPOINT: List = listOf("tail", "-f", "/dev/null") + + /** Default resource limits. */ + val DEFAULT_RESOURCE: Map = mapOf( + "cpu" to "1", + "memory" to "2Gi", + ) + + @JvmStatic + fun builder(): Builder = Builder() + } + + class Builder { + private var imageSpec: SandboxImageSpec? = null + private var entrypoint: List = DEFAULT_ENTRYPOINT + private var resource: Map = DEFAULT_RESOURCE + private var env: Map = emptyMap() + private var metadata: Map = emptyMap() + private var networkPolicy: NetworkPolicy? = null + private var volumes: List? = null + + fun imageSpec(imageSpec: SandboxImageSpec): Builder { + this.imageSpec = imageSpec + return this + } + + fun image(image: String): Builder { + this.imageSpec = SandboxImageSpec.builder().image(image).build() + return this + } + + fun entrypoint(entrypoint: List): Builder { + this.entrypoint = entrypoint + return this + } + + fun entrypoint(vararg entrypoint: String): Builder { + this.entrypoint = entrypoint.toList() + return this + } + + fun resource(resource: Map): Builder { + this.resource = resource + return this + } + + fun resource(configure: MutableMap.() -> Unit): Builder { + val map = DEFAULT_RESOURCE.toMutableMap() + map.configure() + this.resource = map + return this + } + + fun env(env: Map): Builder { + this.env = env + return this + } + + fun metadata(metadata: Map): Builder { + this.metadata = metadata + return this + } + + fun networkPolicy(networkPolicy: NetworkPolicy?): Builder { + this.networkPolicy = networkPolicy + return this + } + + fun volumes(volumes: List?): Builder { + this.volumes = volumes + return this + } + + fun build(): PoolCreationSpec { + val spec = imageSpec ?: throw IllegalArgumentException("PoolCreationSpec imageSpec (or image) must be specified") + return PoolCreationSpec( + imageSpec = spec, + entrypoint = entrypoint, + resource = resource, + env = env, + metadata = metadata, + networkPolicy = networkPolicy, + volumes = volumes, + ) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolSnapshot.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolSnapshot.kt new file mode 100644 index 00000000..0bcccaf8 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolSnapshot.kt @@ -0,0 +1,30 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * Point-in-time snapshot of pool state for observability. + * + * @property state Current pool state (HEALTHY, DEGRADED, DRAINING, STOPPED). + * @property idleCount Number of idle sandboxes in the store. + * @property lastError Last error message if pool is DEGRADED or after failure; null otherwise. + */ +data class PoolSnapshot( + val state: PoolState, + val idleCount: Int, + val lastError: String? = null, +) diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolState.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolState.kt new file mode 100644 index 00000000..e23d5d1a --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolState.kt @@ -0,0 +1,40 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * High-level state of the sandbox pool. + * + * Transitions: + * - HEALTHY -> DEGRADED: consecutive create failures exceed threshold + * - DEGRADED -> HEALTHY: probe or create succeeds, failure counter resets + * - HEALTHY/DEGRADED -> DRAINING: shutdown(graceful=true) called + * - any -> STOPPED: shutdown(graceful=false) or drain completes + */ +enum class PoolState { + /** Pool is operating normally. */ + HEALTHY, + + /** Replenish is failing; backoff applied; acquire still served from existing idle. */ + DEGRADED, + + /** Graceful shutdown in progress; no new replenish, waiting for in-flight ops. */ + DRAINING, + + /** Pool is stopped; no acquire or replenish. */ + STOPPED, +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt new file mode 100644 index 00000000..7cf08082 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt @@ -0,0 +1,79 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +import java.time.Duration +import java.time.Instant + +/** + * Abstraction for storing pool coordination state and idle sandbox membership. + * + * All operations are namespaced by [poolName]. Implementations must ensure: + * - Atomic take: one idle sandbox can only be taken by one acquire. + * - Idempotent put/remove for idle membership. + * - tryTakeIdle should prefer FIFO (oldest idle first) as best-effort. + * + * In distributed mode, only the current primary lock holder may execute + * reconcile maintenance writes (putIdle, reapExpiredIdle). Foreground + * acquire-path writes (tryTakeIdle, removeIdle) are allowed on all nodes. + */ +interface PoolStateStore { + /** + * Atomically removes and returns one idle sandbox ID for the pool, or null if none. + * Best-effort FIFO (oldest first). + */ + fun tryTakeIdle(poolName: String): String? + + /** + * Adds a sandbox ID to the idle set for the pool. + * Idempotent: duplicate put for same sandboxId leaves membership single-copy. + */ + fun putIdle(poolName: String, sandboxId: String) + + /** + * Removes a sandbox ID from the idle set. + * Idempotent: duplicate remove is no-op. + */ + fun removeIdle(poolName: String, sandboxId: String) + + /** + * Tries to acquire the primary (leader) lock for this pool. + * Best-effort mutually exclusive by poolName. Returns true if this node is now primary. + */ + fun tryAcquirePrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean + + /** + * Renews the primary lock for the current owner. Non-owner renew is rejected. + */ + fun renewPrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean + + /** + * Releases the primary lock for the given owner. + */ + fun releasePrimaryLock(poolName: String, ownerId: String) + + /** + * Removes expired idle entries. In-memory store performs sweep; TTL-backed stores may no-op. + */ + fun reapExpiredIdle(poolName: String, now: Instant) + + /** + * Returns a snapshot of counters for the pool (at least idle count). + * Eventually consistent for distributed stores. + */ + fun snapshotCounters(poolName: String): StoreCounters +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/StoreCounters.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/StoreCounters.kt new file mode 100644 index 00000000..d3e3c338 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/StoreCounters.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.pool + +/** + * Snapshot of pool store counters. + * + * @property idleCount Number of sandbox IDs currently in the idle set. + */ +data class StoreCounters( + val idleCount: Int, +) From a1bec16da2ec8f2ee7c14ed3d17ef5acfa76218f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=86=AC=E7=A5=89?= Date: Mon, 9 Mar 2026 17:10:18 +0800 Subject: [PATCH 2/6] feat(sdks): add InMemoryPoolStateStore (OSEP-0005) --- .../pool/InMemoryPoolStateStore.kt | 138 ++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt new file mode 100644 index 00000000..ff5c4b8d --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt @@ -0,0 +1,138 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import com.alibaba.opensandbox.sandbox.domain.pool.StoreCounters +import java.time.Duration +import java.time.Instant +import java.util.ArrayDeque +import java.util.concurrent.ConcurrentHashMap + +/** + * In-memory implementation of [PoolStateStore] for single-node use. + * + * - Idle entries use a fixed 24h TTL; expired entries are removed on take, put, reap, or snapshot. + * - tryTakeIdle returns oldest (FIFO) non-expired idle sandbox ID. + * - Primary lock is process-local and time-bounded; no distributed coordination. + */ +class InMemoryPoolStateStore : PoolStateStore { + + /** Fixed idle TTL per OSEP (24h). */ + private val idleTtl: Duration = Duration.ofHours(24) + + private val lock = Any() + + /** Per pool: queue of (sandboxId, expiresAt) in insertion order for FIFO take. */ + private val idleQueues = ConcurrentHashMap>() + + /** Per pool: primary lock (ownerId, expiresAt). */ + private val primaryLocks = ConcurrentHashMap() + + override fun tryTakeIdle(poolName: String): String? { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return null + evictExpired(poolName, queue, Instant.now()) + val entry = queue.pollFirst() ?: return null + if (queue.isEmpty()) idleQueues.remove(poolName) + return entry.sandboxId + } + } + + override fun putIdle(poolName: String, sandboxId: String) { + val expiresAt = Instant.now().plus(idleTtl) + synchronized(lock) { + val queue = idleQueues.getOrPut(poolName) { ArrayDeque() } + evictExpired(poolName, queue, Instant.now()) + if (queue.any { it.sandboxId == sandboxId }) return + queue.addLast(IdleEntry(sandboxId, expiresAt)) + } + } + + override fun removeIdle(poolName: String, sandboxId: String) { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return + queue.removeIf { it.sandboxId == sandboxId } + if (queue.isEmpty()) idleQueues.remove(poolName) + } + } + + override fun tryAcquirePrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean { + val now = Instant.now() + val expiresAt = now.plus(ttl) + synchronized(lock) { + val current = primaryLocks[poolName] + if (current == null || current.expiresAt.isBefore(now)) { + primaryLocks[poolName] = PrimaryLockHolder(ownerId, expiresAt) + return true + } + if (current.ownerId == ownerId) { + primaryLocks[poolName] = PrimaryLockHolder(ownerId, expiresAt) + return true + } + return false + } + } + + override fun renewPrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean { + val now = Instant.now() + val expiresAt = now.plus(ttl) + synchronized(lock) { + val current = primaryLocks[poolName] ?: return false + if (current.ownerId != ownerId || current.expiresAt.isBefore(now)) return false + primaryLocks[poolName] = PrimaryLockHolder(ownerId, expiresAt) + return true + } + } + + override fun releasePrimaryLock(poolName: String, ownerId: String) { + synchronized(lock) { + val current = primaryLocks[poolName] ?: return + if (current.ownerId == ownerId) primaryLocks.remove(poolName) + } + } + + override fun reapExpiredIdle(poolName: String, now: Instant) { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return + evictExpired(poolName, queue, now) + if (queue.isEmpty()) idleQueues.remove(poolName) + } + } + + override fun snapshotCounters(poolName: String): StoreCounters { + synchronized(lock) { + val queue = idleQueues[poolName] ?: return StoreCounters(idleCount = 0) + evictExpired(poolName, queue, Instant.now()) + return StoreCounters(idleCount = queue.size) + } + } + + private fun evictExpired(poolName: String, queue: ArrayDeque, now: Instant) { + while (queue.isNotEmpty()) { + val head = queue.peekFirst() ?: break + if (head.expiresAt.isAfter(now)) break + queue.pollFirst() + } + // Do not remove pool from map here when queue is empty: putIdle may have just + // getOrPut an empty queue and will add to it after this call. + } + + private data class IdleEntry(val sandboxId: String, val expiresAt: Instant) + + private data class PrimaryLockHolder(val ownerId: String, val expiresAt: Instant) +} From f7cd892a3837d7d9ed3fcbe87b257253b5618cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=86=AC=E7=A5=89?= Date: Mon, 9 Mar 2026 17:10:32 +0800 Subject: [PATCH 3/6] feat(sdks): add PoolReconciler and ReconcileState (OSEP-0005) --- .../infrastructure/pool/PoolReconciler.kt | 117 ++++++++++++++++++ .../infrastructure/pool/ReconcileState.kt | 81 ++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt new file mode 100644 index 00000000..ca14d163 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt @@ -0,0 +1,117 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import org.slf4j.LoggerFactory +import java.time.Instant + +/** + * Runs one reconcile tick: leader-gated replenish and TTL reap. + * + * Only the current primary lock holder performs create + putIdle. + * Call from a periodic scheduler; [createOne] should call lifecycle create and return the new sandbox ID or null on failure. + */ +internal object PoolReconciler { + private val logger = LoggerFactory.getLogger(PoolReconciler::class.java) + + /** + * Runs a single reconcile tick. If this node does not hold the primary lock, returns immediately. + * Otherwise: reaps expired idle, snapshots counters, then creates up to min(deficit, warmupConcurrency) + * sandboxes via [createOne], putting each success into the store and updating [reconcileState] on failure. + */ + fun runReconcileTick( + config: PoolConfig, + stateStore: PoolStateStore, + createOne: () -> String?, + reconcileState: ReconcileState, + ) { + val poolName = config.poolName + val ownerId = config.ownerId + val ttl = config.primaryLockTtl + + if (!stateStore.tryAcquirePrimaryLock(poolName, ownerId, ttl)) { + logger.trace("Reconcile skip (not primary): pool_name={}", poolName) + return + } + try { + runPrimaryReplenishOnce(config, stateStore, createOne, reconcileState) + } finally { + stateStore.releasePrimaryLock(poolName, ownerId) + } + } + + private fun runPrimaryReplenishOnce( + config: PoolConfig, + stateStore: PoolStateStore, + createOne: () -> String?, + reconcileState: ReconcileState, + ) { + val poolName = config.poolName + val ownerId = config.ownerId + val ttl = config.primaryLockTtl + val now = Instant.now() + + stateStore.reapExpiredIdle(poolName, now) + val counters = stateStore.snapshotCounters(poolName) + val deficit = (config.maxIdle - counters.idleCount).coerceAtLeast(0) + val toCreate = minOf(deficit, config.warmupConcurrency) + + if (toCreate == 0 || reconcileState.isBackoffActive(now)) { + stateStore.renewPrimaryLock(poolName, ownerId, ttl) + logger.debug( + "Reconcile tick: pool_name={} idle={} deficit={} toCreate=0 (backoff={})", + poolName, + counters.idleCount, + deficit, + reconcileState.isBackoffActive(now), + ) + return + } + + logger.debug( + "Reconcile tick: pool_name={} idle={} deficit={} toCreate={}", + poolName, + counters.idleCount, + deficit, + toCreate, + ) + var created = 0 + repeat(toCreate) { + if (!stateStore.renewPrimaryLock(poolName, ownerId, ttl)) { + return + } + val newId = try { + createOne() + } catch (e: Exception) { + reconcileState.recordFailure(e.message) + null + } + if (newId != null) { + stateStore.putIdle(poolName, newId) + created++ + reconcileState.recordSuccess() + } else { + reconcileState.recordFailure(null) + } + } + if (created > 0) { + logger.debug("Reconcile created {} sandboxes: pool_name={}", created, poolName) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt new file mode 100644 index 00000000..27aa9664 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt @@ -0,0 +1,81 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState +import java.time.Duration +import java.time.Instant + +/** + * Mutable state for reconcile loop: failure count, pool state, and exponential backoff. + * + * Thread-safe for use from reconcile worker and from pool snapshot. + */ +internal class ReconcileState( + private val degradedThreshold: Int, + private val backoffBase: Duration = Duration.ofSeconds(1), + private val backoffMax: Duration = Duration.ofSeconds(60), +) { + @Volatile + var failureCount: Int = 0 + private set + + @Volatile + var state: PoolState = PoolState.HEALTHY + private set + + @Volatile + var lastError: String? = null + private set + + @Volatile + private var backoffUntil: Instant? = null + + private var backoffAttempts: Int = 0 + + @Synchronized + fun recordSuccess() { + failureCount = 0 + if (state == PoolState.DEGRADED) state = PoolState.HEALTHY + backoffUntil = null + backoffAttempts = 0 + lastError = null + } + + @Synchronized + fun recordFailure(errorMessage: String?) { + failureCount++ + lastError = errorMessage + if (failureCount > degradedThreshold) { + state = PoolState.DEGRADED + backoffAttempts++ + val exponent = backoffAttempts.coerceAtMost(10) + val delaySeconds = backoffBase.seconds * (1L shl exponent) + val delayMs = minOf( + Duration.ofSeconds(delaySeconds).toMillis(), + backoffMax.toMillis(), + ) + backoffUntil = Instant.now().plusMillis(delayMs) + } + } + + /** True if reconciler should skip create attempts this tick (in backoff window). */ + fun isBackoffActive(now: Instant = Instant.now()): Boolean { + val until = backoffUntil ?: return false + return state == PoolState.DEGRADED && now.isBefore(until) + } +} From 366e5392928593c799094a16262a851e573baa8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=86=AC=E7=A5=89?= Date: Mon, 9 Mar 2026 17:10:45 +0800 Subject: [PATCH 4/6] feat(sdks): add SandboxPool API and DefaultSandboxPool (OSEP-0005) --- .../sandbox/pool/DefaultSandboxPool.kt | 345 ++++++++++++++++++ .../opensandbox/sandbox/pool/SandboxPool.kt | 103 ++++++ 2 files changed, 448 insertions(+) create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt new file mode 100644 index 00000000..8cc0bece --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt @@ -0,0 +1,345 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.pool + +import com.alibaba.opensandbox.sandbox.HttpClientProvider +import com.alibaba.opensandbox.sandbox.Sandbox +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException +import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy +import com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig +import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec +import com.alibaba.opensandbox.sandbox.domain.pool.PoolSnapshot +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes +import com.alibaba.opensandbox.sandbox.infrastructure.factory.AdapterFactory +import com.alibaba.opensandbox.sandbox.infrastructure.pool.PoolReconciler +import com.alibaba.opensandbox.sandbox.infrastructure.pool.ReconcileState +import org.slf4j.LoggerFactory +import java.time.Duration +import java.util.concurrent.Executors +import java.util.concurrent.ScheduledExecutorService +import java.util.concurrent.ScheduledFuture +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicReference + +/** Fixed idle TTL per OSEP (24h). */ +private val IDLE_TTL = Duration.ofHours(24) + +/** + * Default implementation of [SandboxPool]. + * + * Uses [PoolStateStore] for idle membership and primary lock; runs a background reconcile loop + * when started. Replenish is leader-gated; acquire is allowed on all nodes. + */ +class DefaultSandboxPool internal constructor( + config: PoolConfig, +) : SandboxPool { + + private val logger = LoggerFactory.getLogger(DefaultSandboxPool::class.java) + + private val config: PoolConfig = config + private val stateStore: PoolStateStore = config.stateStore + private val connectionConfig: ConnectionConfig = config.connectionConfig + private val creationSpec: PoolCreationSpec = config.creationSpec + private val reconcileState = ReconcileState(config.degradedThreshold) + + @Volatile + private var currentMaxIdle: Int = config.maxIdle + + private val lifecycleState = AtomicReference(LifecycleState.NOT_STARTED) + private var httpClientProvider: HttpClientProvider? = null + private var sandboxService: Sandboxes? = null + private var scheduler: ScheduledExecutorService? = null + private var reconcileTask: ScheduledFuture<*>? = null + + override fun start() { + if (lifecycleState.get() == LifecycleState.RUNNING || lifecycleState.get() == LifecycleState.STARTING) { + return + } + lifecycleState.set(LifecycleState.STARTING) + val provider = HttpClientProvider(connectionConfig) + httpClientProvider = provider + val factory = AdapterFactory(provider) + sandboxService = factory.createSandboxes() + val exec = Executors.newSingleThreadScheduledExecutor { r -> + Thread(r, "sandbox-pool-reconcile-${config.poolName}").apply { isDaemon = true } + } + scheduler = exec + val reconcileIntervalMs = config.reconcileInterval.toMillis() + reconcileTask = exec.scheduleAtFixedRate( + { runReconcileTick() }, + if (config.maxIdle > 0) 0 else reconcileIntervalMs, + reconcileIntervalMs, + TimeUnit.MILLISECONDS, + ) + lifecycleState.set(LifecycleState.RUNNING) + logger.info( + "Pool started: pool_name={} state={} maxIdle={}", + config.poolName, + LifecycleState.RUNNING, + currentMaxIdle, + ) + } + + override fun acquire( + sandboxTimeout: Duration?, + policy: AcquirePolicy, + ): Sandbox { + if (lifecycleState.get() != LifecycleState.RUNNING) { + throw IllegalStateException("Pool is not running: ${lifecycleState.get()}") + } + val poolName = config.poolName + val sandboxId = stateStore.tryTakeIdle(poolName) + if (sandboxId != null) { + try { + val sandbox = Sandbox.connector() + .sandboxId(sandboxId) + .connectionConfig(connectionConfig) + .connect() + sandboxTimeout?.let { sandbox.renew(it) } + logger.debug( + "Acquire from idle: pool_name={} sandbox_id={} policy={}", + poolName, + sandboxId, + policy, + ) + return sandbox + } catch (e: Exception) { + logger.debug( + "Idle connect failed, falling back to direct create: pool_name={} sandbox_id={}", + poolName, + sandboxId, + ) + stateStore.removeIdle(poolName, sandboxId) + try { + sandboxService?.killSandbox(sandboxId) + } catch (_: Exception) { + // best-effort kill; do not replace original error + } + // fall through to direct create + } + } + if (policy == AcquirePolicy.FAIL_FAST) { + logger.debug("Acquire FAIL_FAST with empty idle: pool_name={}", poolName) + throw PoolEmptyException("No idle sandbox available and policy is FAIL_FAST") + } + logger.debug("Acquire direct create: pool_name={} policy={}", poolName, policy) + return directCreate(sandboxTimeout) + } + + override fun resize(maxIdle: Int) { + require(maxIdle >= 0) { "maxIdle must be >= 0" } + currentMaxIdle = maxIdle + scheduler?.execute { runReconcileTick() } + } + + override fun snapshot(): PoolSnapshot { + val state = when (lifecycleState.get()) { + LifecycleState.NOT_STARTED, + LifecycleState.STOPPED -> PoolState.STOPPED + LifecycleState.DRAINING -> PoolState.DRAINING + else -> reconcileState.state + } + val counters = stateStore.snapshotCounters(config.poolName) + return PoolSnapshot( + state = state, + idleCount = counters.idleCount, + lastError = reconcileState.lastError, + ) + } + + override fun shutdown(graceful: Boolean) { + if (lifecycleState.get() == LifecycleState.STOPPED) return + if (!graceful) { + stopReconcile() + lifecycleState.set(LifecycleState.STOPPED) + closeProvider() + logger.info("Pool stopped (non-graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) + return + } + lifecycleState.set(LifecycleState.DRAINING) + stopReconcile() + val drainMs = config.drainTimeout.toMillis() + if (drainMs > 0) { + Thread.sleep(drainMs) + } + lifecycleState.set(LifecycleState.STOPPED) + closeProvider() + logger.info("Pool stopped (graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) + } + + private fun runReconcileTick() { + if (lifecycleState.get() != LifecycleState.RUNNING) return + val service = sandboxService ?: return + val reconcileConfig = config.copy(maxIdle = currentMaxIdle) + PoolReconciler.runReconcileTick( + config = reconcileConfig, + stateStore = stateStore, + createOne = { createOneSandbox(service) }, + reconcileState = reconcileState, + ) + } + + private fun createOneSandbox(service: Sandboxes): String? { + return try { + val response = service.createSandbox( + spec = creationSpec.imageSpec, + entrypoint = creationSpec.entrypoint, + env = creationSpec.env, + metadata = creationSpec.metadata, + timeout = IDLE_TTL, + resource = creationSpec.resource, + networkPolicy = creationSpec.networkPolicy, + extensions = emptyMap(), + volumes = creationSpec.volumes, + ) + response.id + } catch (e: Exception) { + logger.warn("Pool create sandbox failed: poolName={}", config.poolName, e) + throw e + } + } + + private fun directCreate(sandboxTimeout: Duration?): Sandbox { + val service = sandboxService ?: throw IllegalStateException("Pool not started") + val response = service.createSandbox( + spec = creationSpec.imageSpec, + entrypoint = creationSpec.entrypoint, + env = creationSpec.env, + metadata = creationSpec.metadata, + timeout = IDLE_TTL, + resource = creationSpec.resource, + networkPolicy = creationSpec.networkPolicy, + extensions = emptyMap(), + volumes = creationSpec.volumes, + ) + val sandbox = Sandbox.connector() + .sandboxId(response.id) + .connectionConfig(connectionConfig) + .connect() + sandboxTimeout?.let { sandbox.renew(it) } + return sandbox + } + + private fun stopReconcile() { + reconcileTask?.cancel(false) + reconcileTask = null + scheduler?.shutdown() + try { + scheduler?.awaitTermination(5, TimeUnit.SECONDS) + } catch (_: InterruptedException) { + Thread.currentThread().interrupt() + } + scheduler = null + } + + private fun closeProvider() { + try { + httpClientProvider?.close() + } catch (e: Exception) { + logger.warn("Error closing pool HTTP client", e) + } + httpClientProvider = null + sandboxService = null + } + + private enum class LifecycleState { + NOT_STARTED, + STARTING, + RUNNING, + DRAINING, + STOPPED, + } + + class Builder internal constructor() { + private var config: PoolConfig? = null + + fun config(config: PoolConfig): Builder { + this.config = config + return this + } + + fun poolName(poolName: String): Builder { + _configBuilder.poolName(poolName) + return this + } + + fun ownerId(ownerId: String): Builder { + _configBuilder.ownerId(ownerId) + return this + } + + fun maxIdle(maxIdle: Int): Builder { + _configBuilder.maxIdle(maxIdle) + return this + } + + fun stateStore(stateStore: PoolStateStore): Builder { + _configBuilder.stateStore(stateStore) + return this + } + + fun connectionConfig(connectionConfig: ConnectionConfig): Builder { + _configBuilder.connectionConfig(connectionConfig) + return this + } + + fun creationSpec(creationSpec: PoolCreationSpec): Builder { + _configBuilder.creationSpec(creationSpec) + return this + } + + fun emptyBehavior(emptyBehavior: com.alibaba.opensandbox.sandbox.domain.pool.EmptyBehavior): Builder { + _configBuilder.emptyBehavior(emptyBehavior) + return this + } + + fun warmupConcurrency(warmupConcurrency: Int): Builder { + _configBuilder.warmupConcurrency(warmupConcurrency) + return this + } + + fun primaryLockTtl(primaryLockTtl: Duration): Builder { + _configBuilder.primaryLockTtl(primaryLockTtl) + return this + } + + fun reconcileInterval(reconcileInterval: Duration): Builder { + _configBuilder.reconcileInterval(reconcileInterval) + return this + } + + fun degradedThreshold(degradedThreshold: Int): Builder { + _configBuilder.degradedThreshold(degradedThreshold) + return this + } + + fun drainTimeout(drainTimeout: Duration): Builder { + _configBuilder.drainTimeout(drainTimeout) + return this + } + + private val _configBuilder = PoolConfig.builder() + + fun build(): DefaultSandboxPool { + val cfg = config ?: _configBuilder.build() + return DefaultSandboxPool(cfg) + } + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt new file mode 100644 index 00000000..8e35506d --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt @@ -0,0 +1,103 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.pool + +import com.alibaba.opensandbox.sandbox.Sandbox +import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy +import com.alibaba.opensandbox.sandbox.domain.pool.PoolSnapshot +import java.time.Duration + +/** + * Client-side sandbox pool for acquiring ready sandboxes with predictable latency. + * + * The pool maintains an idle buffer of clean, borrowable sandboxes. Callers [acquire] a sandbox, + * use it, and terminate it via [Sandbox.kill] when done. No return/finalize API; sandboxes are ephemeral. + * + * ## Usage + * + * ```kotlin + * val pool = SandboxPool.builder() + * .poolName("my-pool") + * .ownerId("worker-1") + * .maxIdle(5) + * .stateStore(InMemoryPoolStateStore()) + * .connectionConfig(connectionConfig) + * .creationSpec(PoolCreationSpec.builder().image("ubuntu:22.04").build()) + * .build() + * pool.start() + * + * val sandbox = pool.acquire(sandboxTimeout = Duration.ofMinutes(30), policy = AcquirePolicy.DIRECT_CREATE) + * try { + * // use sandbox + * } finally { + * sandbox.kill() + * } + * + * pool.shutdown(graceful = true) + * ``` + * + * @see DefaultSandboxPool + * @see com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig + */ +interface SandboxPool { + + /** + * Starts the pool: begins the background reconcile loop and, if [PoolConfig.maxIdle] > 0, + * triggers an immediate warmup tick. + */ + fun start() + + /** + * Acquires a sandbox from the pool or creates one directly per policy. + * + * 1. Tries to take an idle sandbox ID from the store and connect. + * 2. If connect fails (stale ID), removes the ID, best-effort kill, then falls back to direct create. + * 3. If no idle and [policy] is [AcquirePolicy.FAIL_FAST], throws [com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException]. + * 4. If no idle and [policy] is [AcquirePolicy.DIRECT_CREATE], creates a new sandbox via lifecycle API and returns it. + * + * @param sandboxTimeout Optional duration to set on the acquired sandbox (applied via renew after connect). + * @param policy Behavior when idle buffer is empty (default: [AcquirePolicy.DIRECT_CREATE]). + * @return A connected [Sandbox] instance. Caller must call [Sandbox.kill] when done. + * @throws com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException when policy is FAIL_FAST and no idle is available. + * @throws com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException for lifecycle create/connect/renew errors. + */ + fun acquire( + sandboxTimeout: Duration? = null, + policy: AcquirePolicy = AcquirePolicy.DIRECT_CREATE, + ): Sandbox + + /** + * Updates the maximum idle target. Triggers a reconcile tick without blocking on convergence. + */ + fun resize(maxIdle: Int) + + /** + * Returns a point-in-time snapshot of pool state for observability. + */ + fun snapshot(): PoolSnapshot + + /** + * Stops the pool. If [graceful] is true, stops accepting new acquires, stops the reconcile worker, + * and waits up to drainTimeout for in-flight operations. Otherwise stops immediately. + */ + fun shutdown(graceful: Boolean = true) + + companion object { + @JvmStatic + fun builder(): DefaultSandboxPool.Builder = DefaultSandboxPool.Builder() + } +} From 7842d20839fe0c316f4294d820a56c6a1918d794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=86=AC=E7=A5=89?= Date: Mon, 9 Mar 2026 17:11:12 +0800 Subject: [PATCH 5/6] test(sdks): add pool contract and unit tests; doc Module.md (OSEP-0005) --- sdks/sandbox/kotlin/sandbox/Module.md | 29 ++++ .../pool/InMemoryPoolStateStoreTest.kt | 151 ++++++++++++++++++ .../sandbox/pool/DefaultSandboxPoolTest.kt | 118 ++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt create mode 100644 sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPoolTest.kt diff --git a/sdks/sandbox/kotlin/sandbox/Module.md b/sdks/sandbox/kotlin/sandbox/Module.md index 14e36358..07fdbd35 100644 --- a/sdks/sandbox/kotlin/sandbox/Module.md +++ b/sdks/sandbox/kotlin/sandbox/Module.md @@ -11,6 +11,7 @@ The Open Sandbox SDK provides a comprehensive interface for creating and managin - **🔄 Lifecycle Management**: Create, pause, resume, terminate operations - **💚 Health Monitoring**: Automatic readiness detection and status tracking - **🏗️ Fluent API**: Type-safe builder pattern with DSL support +- **📦 Client-Side Sandbox Pool**: Idle-buffer pool for predictable acquire latency (opt-in, see [Sandbox Pool](#sandbox-pool)) ## Quick Start @@ -100,6 +101,34 @@ sandbox.commands.executeStreaming("long-running-task").collect { event -> } ``` +## Sandbox Pool + +Optional client-side pool for acquiring ready sandboxes with lower latency. The pool maintains an idle buffer; use `SandboxPool.builder()` with `stateStore`, `connectionConfig`, and `creationSpec`, then `start()`, `acquire()`, and `shutdown()`. + +```kotlin +import com.alibaba.opensandbox.sandbox.pool.SandboxPool +import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec +import com.alibaba.opensandbox.sandbox.infrastructure.pool.InMemoryPoolStateStore + +val pool = SandboxPool.builder() + .poolName("my-pool") + .ownerId("worker-1") + .maxIdle(5) + .stateStore(InMemoryPoolStateStore()) + .connectionConfig(connectionConfig) + .creationSpec(PoolCreationSpec.builder().image("ubuntu:22.04").build()) + .build() +pool.start() + +val sandbox = pool.acquire(sandboxTimeout = Duration.ofMinutes(30)) +try { + // use sandbox +} finally { + sandbox.kill() +} +pool.shutdown(graceful = true) +``` + ## Key Components ### Sandbox diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt new file mode 100644 index 00000000..36c9bb1d --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt @@ -0,0 +1,151 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.pool + +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import com.alibaba.opensandbox.sandbox.domain.pool.StoreCounters +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import java.time.Duration + +/** + * Contract and behavior tests for [InMemoryPoolStateStore]. + */ +class InMemoryPoolStateStoreTest { + + private lateinit var store: PoolStateStore + private val poolName = "test-pool" + + @BeforeEach + fun setUp() { + store = InMemoryPoolStateStore() + } + + @Test + fun `tryTakeIdle returns null when empty`() { + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `putIdle and tryTakeIdle round-trip`() { + store.putIdle(poolName, "id-1") + assertEquals("id-1", store.tryTakeIdle(poolName)) + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `tryTakeIdle prefers FIFO`() { + store.putIdle(poolName, "id-1") + store.putIdle(poolName, "id-2") + store.putIdle(poolName, "id-3") + assertEquals("id-1", store.tryTakeIdle(poolName)) + assertEquals("id-2", store.tryTakeIdle(poolName)) + assertEquals("id-3", store.tryTakeIdle(poolName)) + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `removeIdle removes entry`() { + store.putIdle(poolName, "id-1") + store.removeIdle(poolName, "id-1") + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `removeIdle is idempotent`() { + store.putIdle(poolName, "id-1") + store.removeIdle(poolName, "id-1") + store.removeIdle(poolName, "id-1") + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `putIdle is idempotent - single copy`() { + store.putIdle(poolName, "id-1") + store.putIdle(poolName, "id-1") + assertEquals("id-1", store.tryTakeIdle(poolName)) + assertNull(store.tryTakeIdle(poolName)) + } + + @Test + fun `snapshotCounters returns idle count`() { + assertEquals(0, store.snapshotCounters(poolName).idleCount) + store.putIdle(poolName, "id-1") + store.putIdle(poolName, "id-2") + assertEquals(2, store.snapshotCounters(poolName).idleCount) + store.tryTakeIdle(poolName) + assertEquals(1, store.snapshotCounters(poolName).idleCount) + } + + @Test + fun `tryAcquirePrimaryLock acquires when free`() { + val ok = store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + assertTrue(ok) + } + + @Test + fun `tryAcquirePrimaryLock fails for different owner when held`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + val ok = store.tryAcquirePrimaryLock(poolName, "owner-2", Duration.ofSeconds(60)) + assertFalse(ok) + } + + @Test + fun `renewPrimaryLock succeeds for owner`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + assertTrue(store.renewPrimaryLock(poolName, "owner-1", Duration.ofSeconds(60))) + } + + @Test + fun `renewPrimaryLock fails for non-owner`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + assertFalse(store.renewPrimaryLock(poolName, "owner-2", Duration.ofSeconds(60))) + } + + @Test + fun `releasePrimaryLock allows new owner`() { + store.tryAcquirePrimaryLock(poolName, "owner-1", Duration.ofSeconds(60)) + store.releasePrimaryLock(poolName, "owner-1") + assertTrue(store.tryAcquirePrimaryLock(poolName, "owner-2", Duration.ofSeconds(60))) + } + + @Test + fun `pool isolation - different pools do not share idle`() { + store.putIdle("pool-a", "id-a") + store.putIdle("pool-b", "id-b") + assertEquals("id-a", store.tryTakeIdle("pool-a")) + assertEquals("id-b", store.tryTakeIdle("pool-b")) + assertNull(store.tryTakeIdle("pool-a")) + } + + @Test + fun `pool isolation - different pools do not share lock`() { + store.tryAcquirePrimaryLock("pool-a", "owner-a", Duration.ofSeconds(60)) + assertTrue(store.tryAcquirePrimaryLock("pool-b", "owner-b", Duration.ofSeconds(60))) + } + + @Test + fun `reapExpiredIdle removes expired entries`() { + store.putIdle(poolName, "id-1") + store.reapExpiredIdle(poolName, java.time.Instant.now().plus(java.time.Duration.ofHours(25))) + assertEquals(StoreCounters(0), store.snapshotCounters(poolName)) + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPoolTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPoolTest.kt new file mode 100644 index 00000000..87609410 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPoolTest.kt @@ -0,0 +1,118 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.pool + +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException +import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState +import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec +import com.alibaba.opensandbox.sandbox.infrastructure.pool.InMemoryPoolStateStore +import java.time.Duration +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertThrows +import org.junit.jupiter.api.Test + +class DefaultSandboxPoolTest { + + @Test + fun `snapshot before start returns STOPPED and zero idle`() { + val pool = buildPool() + val snap = pool.snapshot() + assertEquals(PoolState.STOPPED, snap.state) + assertEquals(0, snap.idleCount) + } + + @Test + fun `start then snapshot returns RUNNING`() { + val pool = buildPool() + pool.start() + try { + val snap = pool.snapshot() + assertEquals(PoolState.HEALTHY, snap.state) + } finally { + pool.shutdown(graceful = false) + } + } + + @Test + fun `resize updates maxIdle`() { + val pool = buildPool() + pool.start() + try { + pool.resize(10) + val snap = pool.snapshot() + assertEquals(PoolState.HEALTHY, snap.state) + } finally { + pool.shutdown(graceful = false) + } + } + + @Test + fun `shutdown graceful then snapshot returns STOPPED`() { + val pool = buildPool() + pool.start() + pool.shutdown(graceful = true) + val snap = pool.snapshot() + assertEquals(PoolState.STOPPED, snap.state) + } + + @Test + fun `shutdown non-graceful then snapshot returns STOPPED`() { + val pool = buildPool() + pool.start() + pool.shutdown(graceful = false) + val snap = pool.snapshot() + assertEquals(PoolState.STOPPED, snap.state) + } + + @Test + fun `acquire with FAIL_FAST and empty idle throws PoolEmptyException`() { + val pool = buildPool() + pool.start() + try { + assertThrows(PoolEmptyException::class.java) { + pool.acquire(policy = AcquirePolicy.FAIL_FAST) + } + } finally { + pool.shutdown(graceful = false) + } + } + + @Test + fun `acquire when not running throws IllegalStateException`() { + val pool = buildPool() + assertThrows(IllegalStateException::class.java) { + pool.acquire(policy = AcquirePolicy.DIRECT_CREATE) + } + } + + private fun buildPool(): SandboxPool { + val config = ConnectionConfig.builder().build() + val spec = PoolCreationSpec.builder().image("ubuntu:22.04").build() + return SandboxPool.builder() + .poolName("test-pool") + .ownerId("test-owner") + .maxIdle(2) + .stateStore(InMemoryPoolStateStore()) + .connectionConfig(config) + .creationSpec(spec) + .drainTimeout(Duration.ofMillis(50)) + .reconcileInterval(Duration.ofSeconds(30)) + .build() + } +} From 86db7f1a4b59e7ec47131446f432cbdab2e289e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=86=AC=E7=A5=89?= Date: Mon, 9 Mar 2026 18:18:19 +0800 Subject: [PATCH 6/6] refactor(sdks): refactor pool --- .../domain/exceptions/SandboxException.kt | 3 - .../sandbox/domain/pool/AcquirePolicy.kt | 4 +- .../sandbox/domain/pool/PoolCreationSpec.kt | 12 +- .../sandbox/domain/pool/PoolStateStore.kt | 32 +- .../pool/InMemoryPoolStateStore.kt | 39 +- .../infrastructure/pool/PoolReconciler.kt | 13 +- .../infrastructure/pool/ReconcileState.kt | 9 +- .../sandbox/pool/DefaultSandboxPool.kt | 345 ------------------ .../opensandbox/sandbox/pool/SandboxPool.kt | 337 ++++++++++++++++- .../pool/InMemoryPoolStateStoreTest.kt | 1 - ...tSandboxPoolTest.kt => SandboxPoolTest.kt} | 7 +- 11 files changed, 407 insertions(+), 395 deletions(-) delete mode 100644 sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt rename sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/{DefaultSandboxPoolTest.kt => SandboxPoolTest.kt} (99%) diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt index 38c711b7..d836752e 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt @@ -88,9 +88,6 @@ class InvalidArgumentException( error = SandboxError(SandboxError.INVALID_ARGUMENT, message), ) -/** - * Defines standardized common error codes and messages for the Sandbox SDK. - */ /** * Thrown when acquire is called with FAIL_FAST policy and no idle sandbox is available. */ diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt index 0f9d7906..3491610d 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/AcquirePolicy.kt @@ -16,10 +16,12 @@ package com.alibaba.opensandbox.sandbox.domain.pool +import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException + /** * Policy for acquire when the idle buffer is empty. * - * - FAIL_FAST: throw [com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException] (POOL_EMPTY). + * - FAIL_FAST: throw [PoolEmptyException] (POOL_EMPTY). * - DIRECT_CREATE: attempt direct create via lifecycle API, then connect and return. */ enum class AcquirePolicy { diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt index 91d4f5dd..7fc2b604 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolCreationSpec.kt @@ -16,6 +16,7 @@ package com.alibaba.opensandbox.sandbox.domain.pool +import com.alibaba.opensandbox.sandbox.Sandbox import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.NetworkPolicy import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxImageSpec import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume @@ -24,7 +25,7 @@ import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.Volume * Template for creating sandboxes in the pool (replenish and direct-create). * * Pool always uses a fixed 24h timeout for created sandboxes; other parameters - * are taken from this spec. Defaults align with [com.alibaba.opensandbox.sandbox.Sandbox.Builder]. + * are taken from this spec. Defaults align with [Sandbox.Builder]. * * @property imageSpec Container image specification (required). * @property entrypoint Entrypoint command (default: tail -f /dev/null). @@ -48,10 +49,11 @@ data class PoolCreationSpec( val DEFAULT_ENTRYPOINT: List = listOf("tail", "-f", "/dev/null") /** Default resource limits. */ - val DEFAULT_RESOURCE: Map = mapOf( - "cpu" to "1", - "memory" to "2Gi", - ) + val DEFAULT_RESOURCE: Map = + mapOf( + "cpu" to "1", + "memory" to "2Gi", + ) @JvmStatic fun builder(): Builder = Builder() diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt index 7cf08082..803a462d 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/pool/PoolStateStore.kt @@ -42,34 +42,54 @@ interface PoolStateStore { * Adds a sandbox ID to the idle set for the pool. * Idempotent: duplicate put for same sandboxId leaves membership single-copy. */ - fun putIdle(poolName: String, sandboxId: String) + fun putIdle( + poolName: String, + sandboxId: String, + ) /** * Removes a sandbox ID from the idle set. * Idempotent: duplicate remove is no-op. */ - fun removeIdle(poolName: String, sandboxId: String) + fun removeIdle( + poolName: String, + sandboxId: String, + ) /** * Tries to acquire the primary (leader) lock for this pool. * Best-effort mutually exclusive by poolName. Returns true if this node is now primary. */ - fun tryAcquirePrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean + fun tryAcquirePrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean /** * Renews the primary lock for the current owner. Non-owner renew is rejected. */ - fun renewPrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean + fun renewPrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean /** * Releases the primary lock for the given owner. */ - fun releasePrimaryLock(poolName: String, ownerId: String) + fun releasePrimaryLock( + poolName: String, + ownerId: String, + ) /** * Removes expired idle entries. In-memory store performs sweep; TTL-backed stores may no-op. */ - fun reapExpiredIdle(poolName: String, now: Instant) + fun reapExpiredIdle( + poolName: String, + now: Instant, + ) /** * Returns a snapshot of counters for the pool (at least idle count). diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt index ff5c4b8d..a88611d7 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStore.kt @@ -31,7 +31,6 @@ import java.util.concurrent.ConcurrentHashMap * - Primary lock is process-local and time-bounded; no distributed coordination. */ class InMemoryPoolStateStore : PoolStateStore { - /** Fixed idle TTL per OSEP (24h). */ private val idleTtl: Duration = Duration.ofHours(24) @@ -53,7 +52,10 @@ class InMemoryPoolStateStore : PoolStateStore { } } - override fun putIdle(poolName: String, sandboxId: String) { + override fun putIdle( + poolName: String, + sandboxId: String, + ) { val expiresAt = Instant.now().plus(idleTtl) synchronized(lock) { val queue = idleQueues.getOrPut(poolName) { ArrayDeque() } @@ -63,7 +65,10 @@ class InMemoryPoolStateStore : PoolStateStore { } } - override fun removeIdle(poolName: String, sandboxId: String) { + override fun removeIdle( + poolName: String, + sandboxId: String, + ) { synchronized(lock) { val queue = idleQueues[poolName] ?: return queue.removeIf { it.sandboxId == sandboxId } @@ -71,7 +76,11 @@ class InMemoryPoolStateStore : PoolStateStore { } } - override fun tryAcquirePrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean { + override fun tryAcquirePrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean { val now = Instant.now() val expiresAt = now.plus(ttl) synchronized(lock) { @@ -88,7 +97,11 @@ class InMemoryPoolStateStore : PoolStateStore { } } - override fun renewPrimaryLock(poolName: String, ownerId: String, ttl: Duration): Boolean { + override fun renewPrimaryLock( + poolName: String, + ownerId: String, + ttl: Duration, + ): Boolean { val now = Instant.now() val expiresAt = now.plus(ttl) synchronized(lock) { @@ -99,14 +112,20 @@ class InMemoryPoolStateStore : PoolStateStore { } } - override fun releasePrimaryLock(poolName: String, ownerId: String) { + override fun releasePrimaryLock( + poolName: String, + ownerId: String, + ) { synchronized(lock) { val current = primaryLocks[poolName] ?: return if (current.ownerId == ownerId) primaryLocks.remove(poolName) } } - override fun reapExpiredIdle(poolName: String, now: Instant) { + override fun reapExpiredIdle( + poolName: String, + now: Instant, + ) { synchronized(lock) { val queue = idleQueues[poolName] ?: return evictExpired(poolName, queue, now) @@ -122,7 +141,11 @@ class InMemoryPoolStateStore : PoolStateStore { } } - private fun evictExpired(poolName: String, queue: ArrayDeque, now: Instant) { + private fun evictExpired( + poolName: String, + queue: ArrayDeque, + now: Instant, + ) { while (queue.isNotEmpty()) { val head = queue.peekFirst() ?: break if (head.expiresAt.isAfter(now)) break diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt index ca14d163..1b6f8eb0 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/PoolReconciler.kt @@ -96,12 +96,13 @@ internal object PoolReconciler { if (!stateStore.renewPrimaryLock(poolName, ownerId, ttl)) { return } - val newId = try { - createOne() - } catch (e: Exception) { - reconcileState.recordFailure(e.message) - null - } + val newId = + try { + createOne() + } catch (e: Exception) { + reconcileState.recordFailure(e.message) + null + } if (newId != null) { stateStore.putIdle(poolName, newId) created++ diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt index 27aa9664..82b66f0b 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/ReconcileState.kt @@ -65,10 +65,11 @@ internal class ReconcileState( backoffAttempts++ val exponent = backoffAttempts.coerceAtMost(10) val delaySeconds = backoffBase.seconds * (1L shl exponent) - val delayMs = minOf( - Duration.ofSeconds(delaySeconds).toMillis(), - backoffMax.toMillis(), - ) + val delayMs = + minOf( + Duration.ofSeconds(delaySeconds).toMillis(), + backoffMax.toMillis(), + ) backoffUntil = Instant.now().plusMillis(delayMs) } } diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt deleted file mode 100644 index 8cc0bece..00000000 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPool.kt +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright 2025 Alibaba Group Holding Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.opensandbox.sandbox.pool - -import com.alibaba.opensandbox.sandbox.HttpClientProvider -import com.alibaba.opensandbox.sandbox.Sandbox -import com.alibaba.opensandbox.sandbox.config.ConnectionConfig -import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException -import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy -import com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig -import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec -import com.alibaba.opensandbox.sandbox.domain.pool.PoolSnapshot -import com.alibaba.opensandbox.sandbox.domain.pool.PoolState -import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore -import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes -import com.alibaba.opensandbox.sandbox.infrastructure.factory.AdapterFactory -import com.alibaba.opensandbox.sandbox.infrastructure.pool.PoolReconciler -import com.alibaba.opensandbox.sandbox.infrastructure.pool.ReconcileState -import org.slf4j.LoggerFactory -import java.time.Duration -import java.util.concurrent.Executors -import java.util.concurrent.ScheduledExecutorService -import java.util.concurrent.ScheduledFuture -import java.util.concurrent.TimeUnit -import java.util.concurrent.atomic.AtomicReference - -/** Fixed idle TTL per OSEP (24h). */ -private val IDLE_TTL = Duration.ofHours(24) - -/** - * Default implementation of [SandboxPool]. - * - * Uses [PoolStateStore] for idle membership and primary lock; runs a background reconcile loop - * when started. Replenish is leader-gated; acquire is allowed on all nodes. - */ -class DefaultSandboxPool internal constructor( - config: PoolConfig, -) : SandboxPool { - - private val logger = LoggerFactory.getLogger(DefaultSandboxPool::class.java) - - private val config: PoolConfig = config - private val stateStore: PoolStateStore = config.stateStore - private val connectionConfig: ConnectionConfig = config.connectionConfig - private val creationSpec: PoolCreationSpec = config.creationSpec - private val reconcileState = ReconcileState(config.degradedThreshold) - - @Volatile - private var currentMaxIdle: Int = config.maxIdle - - private val lifecycleState = AtomicReference(LifecycleState.NOT_STARTED) - private var httpClientProvider: HttpClientProvider? = null - private var sandboxService: Sandboxes? = null - private var scheduler: ScheduledExecutorService? = null - private var reconcileTask: ScheduledFuture<*>? = null - - override fun start() { - if (lifecycleState.get() == LifecycleState.RUNNING || lifecycleState.get() == LifecycleState.STARTING) { - return - } - lifecycleState.set(LifecycleState.STARTING) - val provider = HttpClientProvider(connectionConfig) - httpClientProvider = provider - val factory = AdapterFactory(provider) - sandboxService = factory.createSandboxes() - val exec = Executors.newSingleThreadScheduledExecutor { r -> - Thread(r, "sandbox-pool-reconcile-${config.poolName}").apply { isDaemon = true } - } - scheduler = exec - val reconcileIntervalMs = config.reconcileInterval.toMillis() - reconcileTask = exec.scheduleAtFixedRate( - { runReconcileTick() }, - if (config.maxIdle > 0) 0 else reconcileIntervalMs, - reconcileIntervalMs, - TimeUnit.MILLISECONDS, - ) - lifecycleState.set(LifecycleState.RUNNING) - logger.info( - "Pool started: pool_name={} state={} maxIdle={}", - config.poolName, - LifecycleState.RUNNING, - currentMaxIdle, - ) - } - - override fun acquire( - sandboxTimeout: Duration?, - policy: AcquirePolicy, - ): Sandbox { - if (lifecycleState.get() != LifecycleState.RUNNING) { - throw IllegalStateException("Pool is not running: ${lifecycleState.get()}") - } - val poolName = config.poolName - val sandboxId = stateStore.tryTakeIdle(poolName) - if (sandboxId != null) { - try { - val sandbox = Sandbox.connector() - .sandboxId(sandboxId) - .connectionConfig(connectionConfig) - .connect() - sandboxTimeout?.let { sandbox.renew(it) } - logger.debug( - "Acquire from idle: pool_name={} sandbox_id={} policy={}", - poolName, - sandboxId, - policy, - ) - return sandbox - } catch (e: Exception) { - logger.debug( - "Idle connect failed, falling back to direct create: pool_name={} sandbox_id={}", - poolName, - sandboxId, - ) - stateStore.removeIdle(poolName, sandboxId) - try { - sandboxService?.killSandbox(sandboxId) - } catch (_: Exception) { - // best-effort kill; do not replace original error - } - // fall through to direct create - } - } - if (policy == AcquirePolicy.FAIL_FAST) { - logger.debug("Acquire FAIL_FAST with empty idle: pool_name={}", poolName) - throw PoolEmptyException("No idle sandbox available and policy is FAIL_FAST") - } - logger.debug("Acquire direct create: pool_name={} policy={}", poolName, policy) - return directCreate(sandboxTimeout) - } - - override fun resize(maxIdle: Int) { - require(maxIdle >= 0) { "maxIdle must be >= 0" } - currentMaxIdle = maxIdle - scheduler?.execute { runReconcileTick() } - } - - override fun snapshot(): PoolSnapshot { - val state = when (lifecycleState.get()) { - LifecycleState.NOT_STARTED, - LifecycleState.STOPPED -> PoolState.STOPPED - LifecycleState.DRAINING -> PoolState.DRAINING - else -> reconcileState.state - } - val counters = stateStore.snapshotCounters(config.poolName) - return PoolSnapshot( - state = state, - idleCount = counters.idleCount, - lastError = reconcileState.lastError, - ) - } - - override fun shutdown(graceful: Boolean) { - if (lifecycleState.get() == LifecycleState.STOPPED) return - if (!graceful) { - stopReconcile() - lifecycleState.set(LifecycleState.STOPPED) - closeProvider() - logger.info("Pool stopped (non-graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) - return - } - lifecycleState.set(LifecycleState.DRAINING) - stopReconcile() - val drainMs = config.drainTimeout.toMillis() - if (drainMs > 0) { - Thread.sleep(drainMs) - } - lifecycleState.set(LifecycleState.STOPPED) - closeProvider() - logger.info("Pool stopped (graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) - } - - private fun runReconcileTick() { - if (lifecycleState.get() != LifecycleState.RUNNING) return - val service = sandboxService ?: return - val reconcileConfig = config.copy(maxIdle = currentMaxIdle) - PoolReconciler.runReconcileTick( - config = reconcileConfig, - stateStore = stateStore, - createOne = { createOneSandbox(service) }, - reconcileState = reconcileState, - ) - } - - private fun createOneSandbox(service: Sandboxes): String? { - return try { - val response = service.createSandbox( - spec = creationSpec.imageSpec, - entrypoint = creationSpec.entrypoint, - env = creationSpec.env, - metadata = creationSpec.metadata, - timeout = IDLE_TTL, - resource = creationSpec.resource, - networkPolicy = creationSpec.networkPolicy, - extensions = emptyMap(), - volumes = creationSpec.volumes, - ) - response.id - } catch (e: Exception) { - logger.warn("Pool create sandbox failed: poolName={}", config.poolName, e) - throw e - } - } - - private fun directCreate(sandboxTimeout: Duration?): Sandbox { - val service = sandboxService ?: throw IllegalStateException("Pool not started") - val response = service.createSandbox( - spec = creationSpec.imageSpec, - entrypoint = creationSpec.entrypoint, - env = creationSpec.env, - metadata = creationSpec.metadata, - timeout = IDLE_TTL, - resource = creationSpec.resource, - networkPolicy = creationSpec.networkPolicy, - extensions = emptyMap(), - volumes = creationSpec.volumes, - ) - val sandbox = Sandbox.connector() - .sandboxId(response.id) - .connectionConfig(connectionConfig) - .connect() - sandboxTimeout?.let { sandbox.renew(it) } - return sandbox - } - - private fun stopReconcile() { - reconcileTask?.cancel(false) - reconcileTask = null - scheduler?.shutdown() - try { - scheduler?.awaitTermination(5, TimeUnit.SECONDS) - } catch (_: InterruptedException) { - Thread.currentThread().interrupt() - } - scheduler = null - } - - private fun closeProvider() { - try { - httpClientProvider?.close() - } catch (e: Exception) { - logger.warn("Error closing pool HTTP client", e) - } - httpClientProvider = null - sandboxService = null - } - - private enum class LifecycleState { - NOT_STARTED, - STARTING, - RUNNING, - DRAINING, - STOPPED, - } - - class Builder internal constructor() { - private var config: PoolConfig? = null - - fun config(config: PoolConfig): Builder { - this.config = config - return this - } - - fun poolName(poolName: String): Builder { - _configBuilder.poolName(poolName) - return this - } - - fun ownerId(ownerId: String): Builder { - _configBuilder.ownerId(ownerId) - return this - } - - fun maxIdle(maxIdle: Int): Builder { - _configBuilder.maxIdle(maxIdle) - return this - } - - fun stateStore(stateStore: PoolStateStore): Builder { - _configBuilder.stateStore(stateStore) - return this - } - - fun connectionConfig(connectionConfig: ConnectionConfig): Builder { - _configBuilder.connectionConfig(connectionConfig) - return this - } - - fun creationSpec(creationSpec: PoolCreationSpec): Builder { - _configBuilder.creationSpec(creationSpec) - return this - } - - fun emptyBehavior(emptyBehavior: com.alibaba.opensandbox.sandbox.domain.pool.EmptyBehavior): Builder { - _configBuilder.emptyBehavior(emptyBehavior) - return this - } - - fun warmupConcurrency(warmupConcurrency: Int): Builder { - _configBuilder.warmupConcurrency(warmupConcurrency) - return this - } - - fun primaryLockTtl(primaryLockTtl: Duration): Builder { - _configBuilder.primaryLockTtl(primaryLockTtl) - return this - } - - fun reconcileInterval(reconcileInterval: Duration): Builder { - _configBuilder.reconcileInterval(reconcileInterval) - return this - } - - fun degradedThreshold(degradedThreshold: Int): Builder { - _configBuilder.degradedThreshold(degradedThreshold) - return this - } - - fun drainTimeout(drainTimeout: Duration): Builder { - _configBuilder.drainTimeout(drainTimeout) - return this - } - - private val _configBuilder = PoolConfig.builder() - - fun build(): DefaultSandboxPool { - val cfg = config ?: _configBuilder.build() - return DefaultSandboxPool(cfg) - } - } -} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt index 8e35506d..def33fa1 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPool.kt @@ -16,10 +16,29 @@ package com.alibaba.opensandbox.sandbox.pool +import com.alibaba.opensandbox.sandbox.HttpClientProvider import com.alibaba.opensandbox.sandbox.Sandbox +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException +import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy +import com.alibaba.opensandbox.sandbox.domain.pool.EmptyBehavior +import com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig +import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec import com.alibaba.opensandbox.sandbox.domain.pool.PoolSnapshot +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState +import com.alibaba.opensandbox.sandbox.domain.pool.PoolStateStore +import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes +import com.alibaba.opensandbox.sandbox.infrastructure.factory.AdapterFactory +import com.alibaba.opensandbox.sandbox.infrastructure.pool.PoolReconciler +import com.alibaba.opensandbox.sandbox.infrastructure.pool.ReconcileState +import org.slf4j.LoggerFactory import java.time.Duration +import java.util.concurrent.Executors +import java.util.concurrent.ScheduledExecutorService +import java.util.concurrent.ScheduledFuture +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicReference /** * Client-side sandbox pool for acquiring ready sandboxes with predictable latency. @@ -27,6 +46,9 @@ import java.time.Duration * The pool maintains an idle buffer of clean, borrowable sandboxes. Callers [acquire] a sandbox, * use it, and terminate it via [Sandbox.kill] when done. No return/finalize API; sandboxes are ephemeral. * + * Uses [PoolStateStore] for idle membership and primary lock; runs a background reconcile loop + * when started. Replenish is leader-gated; acquire is allowed on all nodes. + * * ## Usage * * ```kotlin @@ -50,54 +72,345 @@ import java.time.Duration * pool.shutdown(graceful = true) * ``` * - * @see DefaultSandboxPool - * @see com.alibaba.opensandbox.sandbox.domain.pool.PoolConfig + * @see PoolConfig */ -interface SandboxPool { +class SandboxPool internal constructor( + config: PoolConfig, +) { + private val logger = LoggerFactory.getLogger(SandboxPool::class.java) + + /** Fixed idle TTL per OSEP (24h). */ + private val idleTtl = Duration.ofHours(24) + + private val config: PoolConfig = config + private val stateStore: PoolStateStore = config.stateStore + private val connectionConfig: ConnectionConfig = config.connectionConfig + private val creationSpec: PoolCreationSpec = config.creationSpec + private val reconcileState = ReconcileState(config.degradedThreshold) + + @Volatile + private var currentMaxIdle: Int = config.maxIdle + + private val lifecycleState = AtomicReference(LifecycleState.NOT_STARTED) + private var httpClientProvider: HttpClientProvider? = null + private var sandboxService: Sandboxes? = null + private var scheduler: ScheduledExecutorService? = null + private var reconcileTask: ScheduledFuture<*>? = null /** * Starts the pool: begins the background reconcile loop and, if [PoolConfig.maxIdle] > 0, * triggers an immediate warmup tick. */ - fun start() + fun start() { + if (lifecycleState.get() == LifecycleState.RUNNING || lifecycleState.get() == LifecycleState.STARTING) { + return + } + lifecycleState.set(LifecycleState.STARTING) + val provider = HttpClientProvider(connectionConfig) + httpClientProvider = provider + val factory = AdapterFactory(provider) + sandboxService = factory.createSandboxes() + val exec = + Executors.newSingleThreadScheduledExecutor { r -> + Thread(r, "sandbox-pool-reconcile-${config.poolName}").apply { isDaemon = true } + } + scheduler = exec + val reconcileIntervalMs = config.reconcileInterval.toMillis() + reconcileTask = + exec.scheduleAtFixedRate( + { runReconcileTick() }, + if (config.maxIdle > 0) 0 else reconcileIntervalMs, + reconcileIntervalMs, + TimeUnit.MILLISECONDS, + ) + lifecycleState.set(LifecycleState.RUNNING) + logger.info( + "Pool started: pool_name={} state={} maxIdle={}", + config.poolName, + LifecycleState.RUNNING, + currentMaxIdle, + ) + } /** * Acquires a sandbox from the pool or creates one directly per policy. * * 1. Tries to take an idle sandbox ID from the store and connect. * 2. If connect fails (stale ID), removes the ID, best-effort kill, then falls back to direct create. - * 3. If no idle and [policy] is [AcquirePolicy.FAIL_FAST], throws [com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException]. + * 3. If no idle and [policy] is [AcquirePolicy.FAIL_FAST], throws [PoolEmptyException]. * 4. If no idle and [policy] is [AcquirePolicy.DIRECT_CREATE], creates a new sandbox via lifecycle API and returns it. * * @param sandboxTimeout Optional duration to set on the acquired sandbox (applied via renew after connect). * @param policy Behavior when idle buffer is empty (default: [AcquirePolicy.DIRECT_CREATE]). * @return A connected [Sandbox] instance. Caller must call [Sandbox.kill] when done. - * @throws com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException when policy is FAIL_FAST and no idle is available. - * @throws com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException for lifecycle create/connect/renew errors. + * @throws PoolEmptyException when policy is FAIL_FAST and no idle is available. + * @throws SandboxException for lifecycle create/connect/renew errors. */ fun acquire( sandboxTimeout: Duration? = null, policy: AcquirePolicy = AcquirePolicy.DIRECT_CREATE, - ): Sandbox + ): Sandbox { + if (lifecycleState.get() != LifecycleState.RUNNING) { + throw IllegalStateException("Pool is not running: ${lifecycleState.get()}") + } + val poolName = config.poolName + val sandboxId = stateStore.tryTakeIdle(poolName) + if (sandboxId != null) { + try { + val sandbox = + Sandbox.connector() + .sandboxId(sandboxId) + .connectionConfig(connectionConfig) + .connect() + sandboxTimeout?.let { sandbox.renew(it) } + logger.debug( + "Acquire from idle: pool_name={} sandbox_id={} policy={}", + poolName, + sandboxId, + policy, + ) + return sandbox + } catch (e: Exception) { + logger.debug( + "Idle connect failed, falling back to direct create: pool_name={} sandbox_id={}", + poolName, + sandboxId, + ) + stateStore.removeIdle(poolName, sandboxId) + try { + sandboxService?.killSandbox(sandboxId) + } catch (_: Exception) { + // best-effort kill; do not replace original error + } + // fall through to direct create + } + } + if (policy == AcquirePolicy.FAIL_FAST) { + logger.debug("Acquire FAIL_FAST with empty idle: pool_name={}", poolName) + throw PoolEmptyException("No idle sandbox available and policy is FAIL_FAST") + } + logger.debug("Acquire direct create: pool_name={} policy={}", poolName, policy) + return directCreate(sandboxTimeout) + } /** * Updates the maximum idle target. Triggers a reconcile tick without blocking on convergence. */ - fun resize(maxIdle: Int) + fun resize(maxIdle: Int) { + require(maxIdle >= 0) { "maxIdle must be >= 0" } + currentMaxIdle = maxIdle + scheduler?.execute { runReconcileTick() } + } /** * Returns a point-in-time snapshot of pool state for observability. */ - fun snapshot(): PoolSnapshot + fun snapshot(): PoolSnapshot { + val state = + when (lifecycleState.get()) { + LifecycleState.NOT_STARTED, + LifecycleState.STOPPED, + -> PoolState.STOPPED + LifecycleState.DRAINING -> PoolState.DRAINING + else -> reconcileState.state + } + val counters = stateStore.snapshotCounters(config.poolName) + return PoolSnapshot( + state = state, + idleCount = counters.idleCount, + lastError = reconcileState.lastError, + ) + } /** * Stops the pool. If [graceful] is true, stops accepting new acquires, stops the reconcile worker, * and waits up to drainTimeout for in-flight operations. Otherwise stops immediately. */ - fun shutdown(graceful: Boolean = true) + fun shutdown(graceful: Boolean = true) { + if (lifecycleState.get() == LifecycleState.STOPPED) return + if (!graceful) { + stopReconcile() + lifecycleState.set(LifecycleState.STOPPED) + closeProvider() + logger.info("Pool stopped (non-graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) + return + } + lifecycleState.set(LifecycleState.DRAINING) + stopReconcile() + val drainMs = config.drainTimeout.toMillis() + if (drainMs > 0) { + Thread.sleep(drainMs) + } + lifecycleState.set(LifecycleState.STOPPED) + closeProvider() + logger.info("Pool stopped (graceful): pool_name={} state={}", config.poolName, LifecycleState.STOPPED) + } + + private fun runReconcileTick() { + if (lifecycleState.get() != LifecycleState.RUNNING) return + val service = sandboxService ?: return + val reconcileConfig = config.copy(maxIdle = currentMaxIdle) + PoolReconciler.runReconcileTick( + config = reconcileConfig, + stateStore = stateStore, + createOne = { createOneSandbox(service) }, + reconcileState = reconcileState, + ) + } + + private fun createOneSandbox(service: Sandboxes): String? { + return try { + val response = + service.createSandbox( + spec = creationSpec.imageSpec, + entrypoint = creationSpec.entrypoint, + env = creationSpec.env, + metadata = creationSpec.metadata, + timeout = idleTtl, + resource = creationSpec.resource, + networkPolicy = creationSpec.networkPolicy, + extensions = emptyMap(), + volumes = creationSpec.volumes, + ) + response.id + } catch (e: Exception) { + logger.warn("Pool create sandbox failed: poolName={}", config.poolName, e) + throw e + } + } + + private fun directCreate(sandboxTimeout: Duration?): Sandbox { + val service = sandboxService ?: throw IllegalStateException("Pool not started") + val response = + service.createSandbox( + spec = creationSpec.imageSpec, + entrypoint = creationSpec.entrypoint, + env = creationSpec.env, + metadata = creationSpec.metadata, + timeout = idleTtl, + resource = creationSpec.resource, + networkPolicy = creationSpec.networkPolicy, + extensions = emptyMap(), + volumes = creationSpec.volumes, + ) + val sandbox = + Sandbox.connector() + .sandboxId(response.id) + .connectionConfig(connectionConfig) + .connect() + sandboxTimeout?.let { sandbox.renew(it) } + return sandbox + } + + private fun stopReconcile() { + reconcileTask?.cancel(false) + reconcileTask = null + scheduler?.shutdown() + try { + scheduler?.awaitTermination(5, TimeUnit.SECONDS) + } catch (_: InterruptedException) { + Thread.currentThread().interrupt() + } + scheduler = null + } + + private fun closeProvider() { + try { + httpClientProvider?.close() + } catch (e: Exception) { + logger.warn("Error closing pool HTTP client", e) + } + httpClientProvider = null + sandboxService = null + } + + @Suppress("ktlint:standard:property-naming") + private enum class LifecycleState { + NOT_STARTED, + STARTING, + RUNNING, + DRAINING, + STOPPED, + } companion object { @JvmStatic - fun builder(): DefaultSandboxPool.Builder = DefaultSandboxPool.Builder() + fun builder(): Builder = Builder() + } + + class Builder internal constructor() { + private var config: PoolConfig? = null + + fun config(config: PoolConfig): Builder { + this.config = config + return this + } + + fun poolName(poolName: String): Builder { + configBuilder.poolName(poolName) + return this + } + + fun ownerId(ownerId: String): Builder { + configBuilder.ownerId(ownerId) + return this + } + + fun maxIdle(maxIdle: Int): Builder { + configBuilder.maxIdle(maxIdle) + return this + } + + fun stateStore(stateStore: PoolStateStore): Builder { + configBuilder.stateStore(stateStore) + return this + } + + fun connectionConfig(connectionConfig: ConnectionConfig): Builder { + configBuilder.connectionConfig(connectionConfig) + return this + } + + fun creationSpec(creationSpec: PoolCreationSpec): Builder { + configBuilder.creationSpec(creationSpec) + return this + } + + fun emptyBehavior(emptyBehavior: EmptyBehavior): Builder { + configBuilder.emptyBehavior(emptyBehavior) + return this + } + + fun warmupConcurrency(warmupConcurrency: Int): Builder { + configBuilder.warmupConcurrency(warmupConcurrency) + return this + } + + fun primaryLockTtl(primaryLockTtl: Duration): Builder { + configBuilder.primaryLockTtl(primaryLockTtl) + return this + } + + fun reconcileInterval(reconcileInterval: Duration): Builder { + configBuilder.reconcileInterval(reconcileInterval) + return this + } + + fun degradedThreshold(degradedThreshold: Int): Builder { + configBuilder.degradedThreshold(degradedThreshold) + return this + } + + fun drainTimeout(drainTimeout: Duration): Builder { + configBuilder.drainTimeout(drainTimeout) + return this + } + + private val configBuilder = PoolConfig.builder() + + fun build(): SandboxPool { + val cfg = config ?: configBuilder.build() + return SandboxPool(cfg) + } } } diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt index 36c9bb1d..81d92623 100644 --- a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/pool/InMemoryPoolStateStoreTest.kt @@ -30,7 +30,6 @@ import java.time.Duration * Contract and behavior tests for [InMemoryPoolStateStore]. */ class InMemoryPoolStateStoreTest { - private lateinit var store: PoolStateStore private val poolName = "test-pool" diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPoolTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPoolTest.kt similarity index 99% rename from sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPoolTest.kt rename to sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPoolTest.kt index 87609410..e98c68d4 100644 --- a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/DefaultSandboxPoolTest.kt +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/pool/SandboxPoolTest.kt @@ -19,16 +19,15 @@ package com.alibaba.opensandbox.sandbox.pool import com.alibaba.opensandbox.sandbox.config.ConnectionConfig import com.alibaba.opensandbox.sandbox.domain.exceptions.PoolEmptyException import com.alibaba.opensandbox.sandbox.domain.pool.AcquirePolicy -import com.alibaba.opensandbox.sandbox.domain.pool.PoolState import com.alibaba.opensandbox.sandbox.domain.pool.PoolCreationSpec +import com.alibaba.opensandbox.sandbox.domain.pool.PoolState import com.alibaba.opensandbox.sandbox.infrastructure.pool.InMemoryPoolStateStore -import java.time.Duration import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertThrows import org.junit.jupiter.api.Test +import java.time.Duration -class DefaultSandboxPoolTest { - +class SandboxPoolTest { @Test fun `snapshot before start returns STOPPED and zero idle`() { val pool = buildPool()