diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go index 9ce4ea5b4d..133ae81de8 100644 --- a/cli/cmd/cluster.go +++ b/cli/cmd/cluster.go @@ -887,7 +887,7 @@ func printInfoNodes(infoResponse *schema.InfoResponse) { numAPIInstances := len(infoResponse.NodeInfos) var totalReplicas int - var doesClusterHaveGPUs, doesClusterHaveInfs bool + var doesClusterHaveGPUs, doesClusterHaveInfs, doesClusterHaveAsyncAPIs bool for _, nodeInfo := range infoResponse.NodeInfos { totalReplicas += nodeInfo.NumReplicas if nodeInfo.ComputeUserCapacity.GPU > 0 { @@ -896,6 +896,9 @@ func printInfoNodes(infoResponse *schema.InfoResponse) { if nodeInfo.ComputeUserCapacity.Inf > 0 { doesClusterHaveInfs = true } + if nodeInfo.NumAsyncGatewayReplicas > 0 { + doesClusterHaveAsyncAPIs = true + } } var pendingReplicasStr string @@ -913,6 +916,7 @@ func printInfoNodes(infoResponse *schema.InfoResponse) { {Title: "instance type"}, {Title: "lifecycle"}, {Title: "replicas"}, + {Title: "async gateway replicas", Hidden: !doesClusterHaveAsyncAPIs}, {Title: "CPU (requested / total allocatable)"}, {Title: "memory (requested / total allocatable)"}, {Title: "GPU (requested / total allocatable)", Hidden: !doesClusterHaveGPUs}, @@ -930,7 +934,7 @@ func printInfoNodes(infoResponse *schema.InfoResponse) { memStr := nodeInfo.ComputeUserRequested.Mem.String() + " / " + nodeInfo.ComputeUserCapacity.Mem.String() gpuStr := s.Int64(nodeInfo.ComputeUserRequested.GPU) + " / " + s.Int64(nodeInfo.ComputeUserCapacity.GPU) infStr := s.Int64(nodeInfo.ComputeUserRequested.Inf) + " / " + s.Int64(nodeInfo.ComputeUserCapacity.Inf) - rows = append(rows, []interface{}{nodeInfo.InstanceType, lifecycle, nodeInfo.NumReplicas, cpuStr, memStr, gpuStr, infStr}) + rows = append(rows, []interface{}{nodeInfo.InstanceType, lifecycle, nodeInfo.NumReplicas, nodeInfo.NumAsyncGatewayReplicas, cpuStr, memStr, gpuStr, infStr}) } t := table.Table{ diff --git a/pkg/operator/endpoints/info.go b/pkg/operator/endpoints/info.go index 25835acab3..0b723e8f1f 100644 --- a/pkg/operator/endpoints/info.go +++ b/pkg/operator/endpoints/info.go @@ -106,6 +106,7 @@ func getNodeInfos() ([]schema.NodeInfo, int, error) { pod := pods[i] _, isAPIPod := pod.Labels["apiName"] + asyncDeploymentType, isAsyncPod := pod.Labels["cortex.dev/async"] if pod.Spec.NodeName == "" && isAPIPod { numPendingReplicas++ @@ -118,7 +119,12 @@ func getNodeInfos() ([]schema.NodeInfo, int, error) { } if isAPIPod { - node.NumReplicas++ + if !isAsyncPod || asyncDeploymentType == "api" { + node.NumReplicas++ + } + if !isAsyncPod || asyncDeploymentType == "gateway" { + node.NumAsyncGatewayReplicas++ + } } cpu, mem, gpu, inf := k8s.TotalPodCompute(&pod.Spec) diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go index f820f6710b..9486e1f225 100644 --- a/pkg/operator/schema/schema.go +++ b/pkg/operator/schema/schema.go @@ -31,15 +31,16 @@ type InfoResponse struct { } type NodeInfo struct { - Name string `json:"name"` - NodeGroupName string `json:"nodegroup_name"` - InstanceType string `json:"instance_type"` - IsSpot bool `json:"is_spot"` - Price float64 `json:"price"` - NumReplicas int `json:"num_replicas"` - ComputeUserCapacity userconfig.Compute `json:"compute_user_capacity"` // the total resources available to the user on a node - ComputeAvailable userconfig.Compute `json:"compute_available"` // unused resources on a node - ComputeUserRequested userconfig.Compute `json:"compute_user_requested"` // total resources requested by user on a node + Name string `json:"name"` + NodeGroupName string `json:"nodegroup_name"` + InstanceType string `json:"instance_type"` + IsSpot bool `json:"is_spot"` + Price float64 `json:"price"` + NumReplicas int `json:"num_replicas"` + NumAsyncGatewayReplicas int `json:"num_async_gateway_replicas"` + ComputeUserCapacity userconfig.Compute `json:"compute_user_capacity"` // the total resources available to the user on a node + ComputeAvailable userconfig.Compute `json:"compute_available"` // unused resources on a node + ComputeUserRequested userconfig.Compute `json:"compute_user_requested"` // total resources requested by user on a node } type DeployResult struct {