Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/solana-build-anchor-programs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ jobs:
uses: actions/cache@v4
with:
path: validator-image.tar.gz
# FIXME: this key will make the cache succeed even if some other nix files which affect it change.
# we should *really* build this with nix instead of a manual hashFiles / cache here
key: validator-image-${{ runner.os }}-${{ hashFiles('shared/coordinator/src/coordinator.rs', 'architectures/decentralized/solana-coordinator/**/*.rs', 'architectures/decentralized/solana-coordinator/**/*.toml', 'architectures/decentralized/solana-coordinator/Cargo.lock', 'architectures/decentralized/solana-authorizer/**/*.rs', 'architectures/decentralized/solana-authorizer/**/*.toml', 'architectures/decentralized/solana-authorizer/Cargo.lock', 'docker/test/psyche_solana_validator_entrypoint.sh', 'nix/docker.nix', 'flake.lock') }}
lookup-only: true

Expand Down
28 changes: 9 additions & 19 deletions .github/workflows/solana-integration-test-base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ jobs:
substituters = https://cache.nixos.org/ https://cache.garnix.io/ https://nix-community.cachix.org
trusted-public-keys = cache.garnix.io:CTFPyKSLcx5RMJKfLo5EEPUObbA78b0YQ2DTCJXqr9g= cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs=

- name: Install just
run: |
nix profile install nixpkgs#just

# Step 1: Get Validator Image from cache
- name: Get Validator Image from cache
id: cache-validator
Expand Down Expand Up @@ -79,30 +75,24 @@ jobs:
echo "Disk space before client build"
df -h

# Calculate the derivation hash
echo "Calculating derivation path"
DRV_PATH=$(nix eval --raw .#docker-psyche-solana-test-client-no-python.drvPath)
echo "Derivation path: $DRV_PATH"

OUT_PATH=$(nix derivation show $DRV_PATH | jq -r '.[].outputs.out.path')
echo "Output path: $OUT_PATH"

# download from Garnix cache first
# download from Garnix cache
echo "Attempting to fetch from Garnix cache"
nix-store --realise $OUT_PATH --option substitute true
OUT_PATH=$(nix build .#docker-psyche-solana-test-client-no-python --no-link --print-out-paths)

# Load the image into Docker
$OUT_PATH | docker load

echo "Disk space after client build"
df -h

# Clean Nix store after client build
- name: Clean after client build
- name: Build run-manager binary
run: |
# Clean up the path file
rm -f client-image-path.txt
nix build --out-link run-manager .#run-manager
nix build --out-link _keep_tests_binary .#test-psyche-decentralized-testing-integration_tests

# Clean Nix store
- name: Clean after client build
run: |
# Clean nix store garbage
nix-collect-garbage -d
nix store optimise
Expand All @@ -129,4 +119,4 @@ jobs:
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
nix develop --command cargo test --release -p psyche-decentralized-testing --test integration_tests -- --nocapture "${{ inputs.test-name }}"
cd architectures/decentralized/testing/ && nix run .#test-psyche-decentralized-testing-integration_tests -- --nocapture "${{ inputs.test-name }}"
52 changes: 20 additions & 32 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ indicatif = "0.17.5"
tokenizers = { version = "0.20.0", default-features = false, features = [
"onig",
] }
tch = { git = "https://github.com/jquesnelle/tch-rs.git", rev = "11d1ca2ef6dbd3f1e5b0986fab0a90fbb6734496" }
torch-sys = { git = "https://github.com/jquesnelle/tch-rs.git", rev = "11d1ca2ef6dbd3f1e5b0986fab0a90fbb6734496" }
pyo3-tch = { git = "https://github.com/jquesnelle/tch-rs.git", rev = "11d1ca2ef6dbd3f1e5b0986fab0a90fbb6734496" }
tch = { git = "https://github.com/jquesnelle/tch-rs.git", rev = "dda507e05a776547a112b6854d1e611684f8c729" }
torch-sys = { git = "https://github.com/jquesnelle/tch-rs.git", rev = "dda507e05a776547a112b6854d1e611684f8c729" }
pyo3-tch = { git = "https://github.com/jquesnelle/tch-rs.git", rev = "dda507e05a776547a112b6854d1e611684f8c729" }
#tch = { path = "../tch-rs" }
#torch-sys = { path = "../tch-rs/torch-sys" }
#pyo3-tch = { path = "../tch-rs/pyo3-tch" }
Expand Down
18 changes: 18 additions & 0 deletions architectures/decentralized/testing/packages.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
psycheLib,
pkgs,
inputs,
...
}:

let
system = pkgs.stdenv.hostPlatform.system;
in
psycheLib.buildRustPackage {
cratePath = ./.;
# all tests need solana CLI and just
buildInputs.test = [
inputs.solana-pkgs.packages.${system}.solana
pkgs.just
];
}
3 changes: 2 additions & 1 deletion architectures/inference-only/inference-node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ anyhow.workspace = true
tracing.workspace = true
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

clap = { version = "4", features = ["derive"] }
clap.workspace = true
clap-markdown.workspace = true

iroh.workspace = true
iroh-blobs.workspace = true
Expand Down
12 changes: 12 additions & 0 deletions architectures/inference-only/inference-node/packages.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{ psycheLib, ... }:

psycheLib.buildRustPackage {
needsPython = true;
needsGpu = true;
cratePath = ./.;
# vllm doesn't build on macos
supportedSystems = [
"x86_64-linux"
"aarch64-linux"
];
}
73 changes: 56 additions & 17 deletions architectures/inference-only/inference-node/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
//! - Supports dynamic checkpoint reloading
use anyhow::{Context, Result};
use clap::Parser;
use clap::{Args as ClapArgs, Parser, Subcommand};
use psyche_inference::{InferenceGossipMessage, InferenceNode};
use psyche_metrics::ClientMetrics;
use psyche_network::{DiscoveryMode, NetworkConnection, NetworkEvent, RelayKind, allowlist};
Expand All @@ -20,9 +20,31 @@ use tracing::{debug, error, info, warn};

#[derive(Parser, Debug)]
#[command(name = "psyche-inference-node")]
struct Args {
struct Cli {
#[command(subcommand)]
command: Option<Commands>,

#[command(flatten)]
run_args: RunArgs,
}

#[derive(Subcommand, Debug)]
enum Commands {
/// Run the inference node (default)
Run(RunArgs),

// Prints the help, optionally as markdown. Used for docs generation.
#[clap(hide = true)]
PrintAllHelp {
#[arg(long, required = true)]
markdown: bool,
},
}

#[derive(ClapArgs, Debug, Clone)]
struct RunArgs {
#[arg(long)]
model_name: String,
model_name: Option<String>,

#[arg(long, default_value = "1")]
tensor_parallel_size: usize,
Expand Down Expand Up @@ -56,27 +78,44 @@ async fn main() -> Result<()> {
)
.init();

let args = Args::parse();
let cli = Cli::parse();

// If no subcommand is provided, default to run with the flattened args
let run_args = match cli.command {
Some(Commands::PrintAllHelp { markdown }) => {
assert!(markdown);
clap_markdown::print_help_markdown::<Cli>();
return Ok(());
}
Some(Commands::Run(args)) => args,
None => cli.run_args,
};

let model_name = run_args.model_name.context("--model-name is required")?;

info!("Starting Psyche Inference Node");
info!("Model: {}", args.model_name);
info!("Tensor Parallel Size: {}", args.tensor_parallel_size);
info!("GPU Memory Utilization: {}", args.gpu_memory_utilization);
info!("Model: {}", model_name);
info!("Tensor Parallel Size: {}", run_args.tensor_parallel_size);
info!(
"GPU Memory Utilization: {}",
run_args.gpu_memory_utilization
);

let discovery_mode: DiscoveryMode = args
let discovery_mode: DiscoveryMode = run_args
.discovery_mode
.parse()
.map_err(|e| anyhow::anyhow!("Invalid discovery mode: {}", e))?;

let relay_kind: RelayKind = args
let relay_kind: RelayKind = run_args
.relay_kind
.parse()
.map_err(|e| anyhow::anyhow!("Invalid relay kind: {}", e))?;

let capabilities: Vec<String> = if args.capabilities.is_empty() {
let capabilities: Vec<String> = if run_args.capabilities.is_empty() {
vec![]
} else {
args.capabilities
run_args
.capabilities
.split(',')
.map(|s| s.trim().to_string())
.collect()
Expand All @@ -90,15 +129,15 @@ async fn main() -> Result<()> {

info!("Initializing vLLM engine...");
let mut inference_node = InferenceNode::new(
args.model_name.clone(),
Some(args.tensor_parallel_size),
Some(args.gpu_memory_utilization),
model_name.clone(),
Some(run_args.tensor_parallel_size),
Some(run_args.gpu_memory_utilization),
);

inference_node
.initialize(
Some(args.tensor_parallel_size),
Some(args.gpu_memory_utilization),
Some(run_args.tensor_parallel_size),
Some(run_args.gpu_memory_utilization),
)
.context("Failed to initialize vLLM engine")?;

Expand Down Expand Up @@ -131,7 +170,7 @@ async fn main() -> Result<()> {

// Announce availability via gossip
let availability_msg = InferenceGossipMessage::NodeAvailable {
model_name: args.model_name.clone(),
model_name: model_name.clone(),
checkpoint_id: None, // TODO: Track actual checkpoint when reloading - do we need this?
capabilities: capabilities.clone(),
};
Expand Down
Loading