From fff7f8791cfed3c6ad73f1e77acdcec2b07caa78 Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Fri, 21 Jun 2019 12:09:22 -0700 Subject: [PATCH 1/2] Remove status command, fold into get and logs --- cli/cmd/get.go | 126 ++++++++++++++++++++++++++++ cli/cmd/logs.go | 3 +- cli/cmd/root.go | 11 ++- cli/cmd/status.go | 208 ---------------------------------------------- 4 files changed, 138 insertions(+), 210 deletions(-) delete mode 100644 cli/cmd/status.go diff --git a/cli/cmd/get.go b/cli/cmd/get.go index 67ddf1090b..436c43eb3c 100644 --- a/cli/cmd/get.go +++ b/cli/cmd/get.go @@ -41,6 +41,7 @@ func init() { addAppNameFlag(getCmd) addEnvFlag(getCmd) addWatchFlag(getCmd) + addSummaryFlag(getCmd) addResourceTypesToHelp(getCmd) } @@ -64,6 +65,9 @@ func runGet(cmd *cobra.Command, args []string) (string, error) { switch len(args) { case 0: + if flagSummary { + return resourceStatusesStr(resourcesRes), nil + } return allResourcesStr(resourcesRes), nil case 1: @@ -515,3 +519,125 @@ func titleStr(title string) string { bottom := strings.Repeat("-", titleLength) return "\n" + top + "\n" + title + "\n" + bottom + "\n\n" } + +func resourceStatusesStr(resourcesRes *schema.GetResourcesResponse) string { + out := "\n" + out += pythonPackageStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" + out += rawColumnStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" + out += aggregateStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" + out += transformedColumnStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" + out += trainingDatasetStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" + out += modelStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" + out += apiStatusesStr(resourcesRes.APIGroupStatuses) + return out +} + +func pythonPackageStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { + var statuses = make([]resource.Status, len(ctx.PythonPackages)) + i := 0 + for _, pythonPackage := range ctx.PythonPackages { + statuses[i] = dataStatuses[pythonPackage.GetID()] + i++ + } + return "Python Packages: " + StatusStr(statuses) +} + +func rawColumnStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { + var statuses = make([]resource.Status, len(ctx.RawColumns)) + i := 0 + for _, rawColumn := range ctx.RawColumns { + statuses[i] = dataStatuses[rawColumn.GetID()] + i++ + } + return "Raw Columns: " + StatusStr(statuses) +} + +func aggregateStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { + var statuses = make([]resource.Status, len(ctx.Aggregates)) + i := 0 + for _, aggregate := range ctx.Aggregates { + statuses[i] = dataStatuses[aggregate.GetID()] + i++ + } + return "Aggregates: " + StatusStr(statuses) +} + +func transformedColumnStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { + var statuses = make([]resource.Status, len(ctx.TransformedColumns)) + i := 0 + for _, transformedColumn := range ctx.TransformedColumns { + statuses[i] = dataStatuses[transformedColumn.GetID()] + i++ + } + return "Transformed Columns: " + StatusStr(statuses) +} + +func trainingDatasetStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { + var statuses = make([]resource.Status, len(ctx.Models)) + i := 0 + for _, model := range ctx.Models { + statuses[i] = dataStatuses[model.Dataset.GetID()] + i++ + } + return "Training Datasets: " + StatusStr(statuses) +} + +func modelStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { + var statuses = make([]resource.Status, len(ctx.Models)) + i := 0 + for _, model := range ctx.Models { + statuses[i] = dataStatuses[model.GetID()] + i++ + } + return "Models: " + StatusStr(statuses) +} + +func apiStatusesStr(apiGroupStatuses map[string]*resource.APIGroupStatus) string { + var statuses = make([]resource.Status, len(apiGroupStatuses)) + i := 0 + for _, apiGroupStatus := range apiGroupStatuses { + statuses[i] = apiGroupStatus + i++ + } + return "APIs: " + StatusStr(statuses) +} + +func StatusStr(statuses []resource.Status) string { + if len(statuses) == 0 { + return "none" + } + + messageBuckets := make(map[int][]string) + for _, status := range statuses { + bucketKey := status.GetCode().SortBucket() + messageBuckets[bucketKey] = append(messageBuckets[bucketKey], status.Message()) + } + + var bucketKeys []int + for bucketKey := range messageBuckets { + bucketKeys = append(bucketKeys, bucketKey) + } + sort.Ints(bucketKeys) + + var messageItems []string + + for _, bucketKey := range bucketKeys { + messageCounts := make(map[string]int) + for _, message := range messageBuckets[bucketKey] { + messageCounts[message]++ + } + + var messages []string + for message := range messageCounts { + messages = append(messages, message) + } + sort.Strings(messages) + + for _, message := range messages { + messageItem := fmt.Sprintf("%d %s", messageCounts[message], message) + messageItems = append(messageItems, messageItem) + } + } + + return strings.Join(messageItems, " | ") +} diff --git a/cli/cmd/logs.go b/cli/cmd/logs.go index 5da8a4d368..4b340fb342 100644 --- a/cli/cmd/logs.go +++ b/cli/cmd/logs.go @@ -26,6 +26,7 @@ import ( func init() { addAppNameFlag(logsCmd) addEnvFlag(logsCmd) + addVerboseFlag(logsCmd) addResourceTypesToHelp(logsCmd) } @@ -58,7 +59,7 @@ var logsCmd = &cobra.Command{ errors.Exit(err) } - err = StreamLogs(appName, resourceName, resourceTypeStr, true) + err = StreamLogs(appName, resourceName, resourceTypeStr, flagVerbose) if err != nil { errors.Exit(err) } diff --git a/cli/cmd/root.go b/cli/cmd/root.go index 9750fb5136..95870ce23c 100644 --- a/cli/cmd/root.go +++ b/cli/cmd/root.go @@ -38,6 +38,8 @@ var cmdStr string var flagEnv string var flagWatch bool var flagAppName string +var flagVerbose bool +var flagSummary bool var configFileExts = []string{"yaml", "yml"} @@ -73,7 +75,6 @@ func Execute() { rootCmd.AddCommand(deleteCmd) rootCmd.AddCommand(getCmd) - rootCmd.AddCommand(statusCmd) rootCmd.AddCommand(logsCmd) rootCmd.AddCommand(configureCmd) @@ -99,6 +100,14 @@ func addAppNameFlag(cmd *cobra.Command) { cmd.PersistentFlags().StringVarP(&flagAppName, "app", "a", "", "app name") } +func addVerboseFlag(cmd *cobra.Command) { + cmd.PersistentFlags().BoolVarP(&flagVerbose, "verbose", "v", false, "show verbose output") +} + +func addSummaryFlag(cmd *cobra.Command) { + cmd.PersistentFlags().BoolVarP(&flagSummary, "summary", "s", false, "show summarized output") +} + var resourceTypesHelp = fmt.Sprintf("\nResource Types:\n %s\n", strings.Join(resource.VisibleTypes.StringList(), "\n ")) func addResourceTypesToHelp(cmd *cobra.Command) { diff --git a/cli/cmd/status.go b/cli/cmd/status.go deleted file mode 100644 index 144bb37449..0000000000 --- a/cli/cmd/status.go +++ /dev/null @@ -1,208 +0,0 @@ -/* -Copyright 2019 Cortex Labs, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package cmd - -import ( - "fmt" - "sort" - "strings" - - "github.com/spf13/cobra" - - "github.com/cortexlabs/cortex/pkg/operator/api/context" - "github.com/cortexlabs/cortex/pkg/operator/api/resource" - "github.com/cortexlabs/cortex/pkg/operator/api/schema" -) - -func init() { - addAppNameFlag(statusCmd) - addEnvFlag(statusCmd) - addWatchFlag(statusCmd) - addResourceTypesToHelp(statusCmd) -} - -var statusCmd = &cobra.Command{ - Use: "status [RESOURCE_TYPE] [RESOURCE_NAME]", - Short: "get resource statuses", - Long: "Get resource statuses.", - Args: cobra.RangeArgs(0, 2), - Run: func(cmd *cobra.Command, args []string) { - rerun(func() (string, error) { - return runStatus(cmd, args) - }) - }, -} - -func runStatus(cmd *cobra.Command, args []string) (string, error) { - resourceName, resourceTypeStr := "", "" - switch len(args) { - case 0: - resourcesRes, err := getResourcesResponse() - if err != nil { - return "", err - } - return resourceStatusesStr(resourcesRes), nil - case 1: - resourceName = args[0] - case 2: - userResourceType := args[0] - resourceName = args[1] - - if userResourceType != "" { - resourceType, err := resource.VisibleResourceTypeFromPrefix(userResourceType) - if err != nil { - return "", err - } - - resourceTypeStr = resourceType.String() - } - } - - appName, err := AppNameFromFlagOrConfig() - if err != nil { - return "", err - } - - err = StreamLogs(appName, resourceName, resourceTypeStr, false) - if err != nil { - return "", err - } - - return "", nil -} - -func resourceStatusesStr(resourcesRes *schema.GetResourcesResponse) string { - out := "\n" - out += pythonPackageStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" - out += rawColumnStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" - out += aggregateStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" - out += transformedColumnStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" - out += trainingDatasetStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" - out += modelStatusesStr(resourcesRes.DataStatuses, resourcesRes.Context) + "\n" - out += apiStatusesStr(resourcesRes.APIGroupStatuses) - return out -} - -func pythonPackageStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { - var statuses = make([]resource.Status, len(ctx.PythonPackages)) - i := 0 - for _, pythonPackage := range ctx.PythonPackages { - statuses[i] = dataStatuses[pythonPackage.GetID()] - i++ - } - return "Python Packages: " + StatusStr(statuses) -} - -func rawColumnStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { - var statuses = make([]resource.Status, len(ctx.RawColumns)) - i := 0 - for _, rawColumn := range ctx.RawColumns { - statuses[i] = dataStatuses[rawColumn.GetID()] - i++ - } - return "Raw Columns: " + StatusStr(statuses) -} - -func aggregateStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { - var statuses = make([]resource.Status, len(ctx.Aggregates)) - i := 0 - for _, aggregate := range ctx.Aggregates { - statuses[i] = dataStatuses[aggregate.GetID()] - i++ - } - return "Aggregates: " + StatusStr(statuses) -} - -func transformedColumnStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { - var statuses = make([]resource.Status, len(ctx.TransformedColumns)) - i := 0 - for _, transformedColumn := range ctx.TransformedColumns { - statuses[i] = dataStatuses[transformedColumn.GetID()] - i++ - } - return "Transformed Columns: " + StatusStr(statuses) -} - -func trainingDatasetStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { - var statuses = make([]resource.Status, len(ctx.Models)) - i := 0 - for _, model := range ctx.Models { - statuses[i] = dataStatuses[model.Dataset.GetID()] - i++ - } - return "Training Datasets: " + StatusStr(statuses) -} - -func modelStatusesStr(dataStatuses map[string]*resource.DataStatus, ctx *context.Context) string { - var statuses = make([]resource.Status, len(ctx.Models)) - i := 0 - for _, model := range ctx.Models { - statuses[i] = dataStatuses[model.GetID()] - i++ - } - return "Models: " + StatusStr(statuses) -} - -func apiStatusesStr(apiGroupStatuses map[string]*resource.APIGroupStatus) string { - var statuses = make([]resource.Status, len(apiGroupStatuses)) - i := 0 - for _, apiGroupStatus := range apiGroupStatuses { - statuses[i] = apiGroupStatus - i++ - } - return "APIs: " + StatusStr(statuses) -} - -func StatusStr(statuses []resource.Status) string { - if len(statuses) == 0 { - return "none" - } - - messageBuckets := make(map[int][]string) - for _, status := range statuses { - bucketKey := status.GetCode().SortBucket() - messageBuckets[bucketKey] = append(messageBuckets[bucketKey], status.Message()) - } - - var bucketKeys []int - for bucketKey := range messageBuckets { - bucketKeys = append(bucketKeys, bucketKey) - } - sort.Ints(bucketKeys) - - var messageItems []string - - for _, bucketKey := range bucketKeys { - messageCounts := make(map[string]int) - for _, message := range messageBuckets[bucketKey] { - messageCounts[message]++ - } - - var messages []string - for message := range messageCounts { - messages = append(messages, message) - } - sort.Strings(messages) - - for _, message := range messages { - messageItem := fmt.Sprintf("%d %s", messageCounts[message], message) - messageItems = append(messageItems, messageItem) - } - } - - return strings.Join(messageItems, " | ") -} From 416041272314f0d44a48a516d830db925878e667 Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Fri, 21 Jun 2019 17:17:33 -0700 Subject: [PATCH 2/2] Update documentation and error messages --- .github/ISSUE_TEMPLATE/bug-report.md | 2 +- docs/applications/resources/statuses.md | 4 ++-- docs/operator/cli.md | 31 ++++--------------------- pkg/workloads/spark_job/spark_job.py | 4 ++-- pkg/workloads/tf_api/api.py | 12 +++++++--- pkg/workloads/tf_train/train.py | 8 +++++-- 6 files changed, 24 insertions(+), 37 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index 0b3a64135b..ba99ba9155 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -37,7 +37,7 @@ assignees: '' ### Stack Trace -[If applicable, the stack trace which shows the error. Find it with `cortex logs `, or use `kubectl get pods -n cortex` and use the name of the failed pod in `kubectl logs -n cortex`] +[If applicable, the stack trace which shows the error. Find it with `cortex logs -v `, or use `kubectl get pods -n cortex` and use the name of the failed pod in `kubectl logs -n cortex`] ``` diff --git a/docs/applications/resources/statuses.md b/docs/applications/resources/statuses.md index 7d9b80169b..e1a0f00752 100644 --- a/docs/applications/resources/statuses.md +++ b/docs/applications/resources/statuses.md @@ -7,7 +7,7 @@ | ready | Resource is ready | | pending | Resource is waiting for another resource to be ready, or its workload is initializing | | running, ingesting,
aggregating, transforming,
generating, training | Resource is being created | -| error | Resource was not created due to an error; run `cortex logs ` to view the logs | +| error | Resource was not created due to an error; run `cortex logs -v ` to view the logs | | skipped | Resource was not created due to an error in another resource in the same workload | | terminated | Resource was terminated | | terminated (out of mem) | Resource was terminated due to insufficient memory | @@ -25,7 +25,7 @@ | update pending | API will be updated when the new model is ready; a previous version of this API is ready | | stopping | API is stopping | | stopped | API is stopped | -| error | API was not created due to an error; run `cortex logs ` to view the logs | +| error | API was not created due to an error; run `cortex logs -v ` to view the logs | | skipped | API was not created due to an error in another resource | | update skipped | API was not updated due to an error in another resource; a previous version of this API is ready | | upstream error | API was not created due to an error in one of its dependencies; a previous version of this API may be ready | diff --git a/docs/operator/cli.md b/docs/operator/cli.md index 59344ce007..b65704e00b 100644 --- a/docs/operator/cli.md +++ b/docs/operator/cli.md @@ -99,35 +99,11 @@ Flags: -a, --app string app name -e, --env string environment (default "dev") -h, --help help for get + -s, --summary show summarized output -w, --watch re-run the command every 2 seconds ``` -The `get` command outputs the current state of all resources on the cluster. Specifying a resource name provides a more detailed view of the configuration and state of that particular resource. - -## status - -``` -Get resource statuses. - -Usage: - cortex status [RESOURCE_TYPE] [RESOURCE_NAME] [flags] - -Resource Types: - raw_column - aggregate - transformed_column - training_dataset - model - api - -Flags: - -a, --app string app name - -e, --env string environment (default "dev") - -h, --help help for status - -w, --watch re-run the command every 2 seconds -``` - -The `status` command outputs a condensed summary of all resources on the cluster. Specifying a resource name provides detailed real-time view of the status of that particular resource. +The `get` command outputs the current state of all resources on the cluster. Specifying a resource name provides a more detailed view of the configuration and state of that particular resource. Using the `-s` or `--summary` flag will show a summarized view of all resource statuses. ## logs @@ -149,9 +125,10 @@ Flags: -a, --app string app name -e, --env string environment (default "dev") -h, --help help for logs + -v, --verbose show verbose output ``` -The `logs` command streams logs from the workload corresponding to the specified resource. For example, `cortex logs models dnn` will get the logs from the most recent training workload for `dnn`. +The `logs` command streams logs from the workload corresponding to the specified resource. For example, `cortex logs models dnn` will get the Cortex logs from the most recent training workload for `dnn`. Using the `-v` or `--verbose` flag will show all of the logs for the workload (not just Cortex's logs). ## configure diff --git a/pkg/workloads/spark_job/spark_job.py b/pkg/workloads/spark_job/spark_job.py index 508e0bc514..ea0ed60345 100644 --- a/pkg/workloads/spark_job/spark_job.py +++ b/pkg/workloads/spark_job/spark_job.py @@ -320,7 +320,7 @@ def run_job(args): e.wrap("error") logger.error(str(e)) logger.exception( - "An error occurred, see `cx logs {} {}` for more details.".format( + "An error occurred, see `cortex logs -v {} {}` for more details.".format( ctx.id_map[resource_id_list[0]]["resource_type"], ctx.id_map[resource_id_list[0]]["name"], ) @@ -328,7 +328,7 @@ def run_job(args): sys.exit(1) except Exception as e: logger.exception( - "An error occurred, see `cx logs {} {}` for more details.".format( + "An error occurred, see `cortex logs -v {} {}` for more details.".format( ctx.id_map[resource_id_list[0]]["resource_type"], ctx.id_map[resource_id_list[0]]["name"], ) diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index 496e7de8ec..ecdf38079b 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -346,12 +346,16 @@ def predict(app_name, api_name): e.wrap("error", "sample {}".format(i + 1)) logger.error(str(e)) logger.exception( - "An error occurred, see `cx logs api {}` for more details.".format(api["name"]) + "An error occurred, see `cortex logs -v api {}` for more details.".format( + api["name"] + ) ) return prediction_failed(sample, str(e)) except Exception as e: logger.exception( - "An error occurred, see `cx logs api {}` for more details.".format(api["name"]) + "An error occurred, see `cortex logs -v api {}` for more details.".format( + api["name"] + ) ) return prediction_failed(sample, str(e)) @@ -422,7 +426,9 @@ def start(args): except Exception as e: if i == limit - 1: logger.exception( - "An error occurred, see `cx logs api {}` for more details.".format(api["name"]) + "An error occurred, see `cortex logs -v api {}` for more details.".format( + api["name"] + ) ) sys.exit(1) diff --git a/pkg/workloads/tf_train/train.py b/pkg/workloads/tf_train/train.py index 5e9f03c52c..f812a2fe9f 100644 --- a/pkg/workloads/tf_train/train.py +++ b/pkg/workloads/tf_train/train.py @@ -59,13 +59,17 @@ def train(args): e.wrap("error") logger.error(str(e)) logger.exception( - "An error occurred, see `cx logs model {}` for more details.".format(model["name"]) + "An error occurred, see `cortex logs -v model {}` for more details.".format( + model["name"] + ) ) sys.exit(1) except Exception as e: ctx.upload_resource_status_failed(model) logger.exception( - "An error occurred, see `cx logs model {}` for more details.".format(model["name"]) + "An error occurred, see `cortex logs -v model {}` for more details.".format( + model["name"] + ) ) sys.exit(1)