diff --git a/ushadow/backend/src/models/kubernetes.py b/ushadow/backend/src/models/kubernetes.py index 3b377501..e1d599f8 100644 --- a/ushadow/backend/src/models/kubernetes.py +++ b/ushadow/backend/src/models/kubernetes.py @@ -114,6 +114,10 @@ class KubernetesNode(BaseModel): external_ip: Optional[str] = Field(None, description="External IP address") hostname: Optional[str] = Field(None, description="Hostname") + # GPU capacity (from extended resources) + gpu_capacity_nvidia: Optional[int] = Field(None, description="NVIDIA GPU count from nvidia.com/gpu") + gpu_capacity_amd: Optional[int] = Field(None, description="AMD GPU count from amd.com/gpu") + # Taints and labels taints: List[Dict[str, str]] = Field(default_factory=list, description="Node taints") labels: Dict[str, str] = Field(default_factory=dict, description="Node labels") diff --git a/ushadow/backend/src/models/unode.py b/ushadow/backend/src/models/unode.py index b6521d56..0037c5a2 100644 --- a/ushadow/backend/src/models/unode.py +++ b/ushadow/backend/src/models/unode.py @@ -36,6 +36,16 @@ class UNodeType(str, Enum): KUBERNETES = "kubernetes" # Kubernetes cluster +class GPUDevice(BaseModel): + """A single GPU device detected on the node.""" + vendor: str # "nvidia" or "amd" + index: int = 0 + model: str = "" + vram_mb: Optional[int] = None + cuda_version: Optional[str] = None + rocm_version: Optional[str] = None + + class UNodeCapabilities(BaseModel): """Capabilities of a u-node.""" can_run_docker: bool = True @@ -45,6 +55,11 @@ class UNodeCapabilities(BaseModel): available_memory_mb: int = 0 available_cpu_cores: float = 0 available_disk_gb: float = 0 + # GPU details (additive, backward-compatible defaults) + gpu_count: int = 0 + gpu_devices: List[GPUDevice] = Field(default_factory=list) + gpu_model: Optional[str] = None + gpu_vram_mb: Optional[int] = None class UNodeBase(BaseModel): diff --git a/ushadow/backend/src/services/kubernetes_manager.py b/ushadow/backend/src/services/kubernetes_manager.py index 5b3b1be7..be4e57f6 100644 --- a/ushadow/backend/src/services/kubernetes_manager.py +++ b/ushadow/backend/src/services/kubernetes_manager.py @@ -283,6 +283,12 @@ async def list_nodes(self, cluster_id: str) -> List["KubernetesNode"]: "effect": taint.effect }) + # Parse GPU extended resources + gpu_nvidia_raw = capacity.get("nvidia.com/gpu") + gpu_amd_raw = capacity.get("amd.com/gpu") + gpu_capacity_nvidia = int(gpu_nvidia_raw) if gpu_nvidia_raw else None + gpu_capacity_amd = int(gpu_amd_raw) if gpu_amd_raw else None + k8s_node = KubernetesNode( name=node.metadata.name, cluster_id=cluster_id, @@ -296,6 +302,8 @@ async def list_nodes(self, cluster_id: str) -> List["KubernetesNode"]: memory_capacity=capacity.get("memory"), cpu_allocatable=allocatable.get("cpu"), memory_allocatable=allocatable.get("memory"), + gpu_capacity_nvidia=gpu_capacity_nvidia, + gpu_capacity_amd=gpu_capacity_amd, roles=roles, internal_ip=internal_ip, external_ip=external_ip, diff --git a/ushadow/frontend/src/components/kubernetes/ClusterNodeList.tsx b/ushadow/frontend/src/components/kubernetes/ClusterNodeList.tsx new file mode 100644 index 00000000..126ac2f1 --- /dev/null +++ b/ushadow/frontend/src/components/kubernetes/ClusterNodeList.tsx @@ -0,0 +1,159 @@ +import { useState } from 'react' +import { ChevronDown, ChevronRight, Server, RefreshCw } from 'lucide-react' +import { kubernetesApi, KubernetesNode } from '../../services/api' + +interface ClusterNodeListProps { + clusterId: string + clusterStatus: string + nodeCount?: number +} + +function formatMemory(memStr?: string): string { + if (!memStr) return '?' + const ki = parseInt(memStr.replace('Ki', '')) + if (!isNaN(ki)) return `${(ki / 1024 / 1024).toFixed(1)} Gi` + if (memStr.endsWith('Gi')) return memStr + return memStr +} + +function GpuBadge({ node }: { node: KubernetesNode }) { + const nvidia = node.gpu_capacity_nvidia + const amd = node.gpu_capacity_amd + if (!nvidia && !amd) return null + + const parts: string[] = [] + if (nvidia) parts.push(`${nvidia}x NVIDIA`) + if (amd) parts.push(`${amd}x AMD`) + + return ( + + {parts.join(', ')} GPU + + ) +} + +function K8sNodeCard({ node }: { node: KubernetesNode }) { + return ( +
{error}
+ )} + {!error && nodes.map((node) => ( +No nodes found
+ )} +