Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions pkg/ccl/changefeedccl/mocks/tenant_status_server_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions pkg/kv/kvclient/kvtenant/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,17 @@ func (c *connector) TenantRanges(
return
}

// NetworkConnectivity implements the serverpb.TenantStatusServer interface
func (c *connector) NetworkConnectivity(
ctx context.Context, req *serverpb.NetworkConnectivityRequest,
) (resp *serverpb.NetworkConnectivityResponse, retErr error) {
retErr = c.withClient(ctx, func(ctx context.Context, client *client) (err error) {
resp, err = client.NetworkConnectivity(ctx, req)
return
})
return
}

// NewIterator implements the rangedesc.IteratorFactory interface.
func (c *connector) NewIterator(
ctx context.Context, span roachpb.Span,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,6 @@ func newTenantDoesNotHaveCapabilityError(cap tenantcapabilities.ID, req kvpb.Req
return errors.Newf("client tenant does not have capability %q (%T)", cap, req)
}

var (
errCannotQueryMetadata = errors.New("client tenant does not have capability to query cluster node metadata")
errCannotQueryTSDB = errors.New("client tenant does not have capability to query timeseries data")
errCannotQueryAllMetrics = errors.New("client tenant does not have capability to query non-tenant metrics")
errCannotUseNodelocal = errors.New("client tenant does not have capability to use nodelocal storage")
errCannotDebugProcess = errors.New("client tenant does not have capability to debug the process")
)

// methodCapability associates a KV method with a capability. The capability can
// either be static for all instances of the method, or it can be determined
// dynamically by a function based on the request's contents.
Expand Down Expand Up @@ -316,33 +308,25 @@ func (a *Authorizer) BindReader(reader tenantcapabilities.Reader) {
a.capabilitiesReader = reader
}

func (a *Authorizer) HasNodeStatusCapability(ctx context.Context, tenID roachpb.TenantID) error {
if tenID.IsSystem() {
return nil
}
entry, mode := a.getMode(ctx, tenID)
switch mode {
case authorizerModeOn:
break
case authorizerModeAllowAll:
return nil
case authorizerModeV222:
return errCannotQueryMetadata
default:
err := errors.AssertionFailedf("unknown authorizer mode: %d", mode)
logcrash.ReportOrPanic(ctx, &a.settings.SV, "%v", err)
return err
}
var (
errCannotQueryMetadata = errors.New("client tenant does not have capability to query cluster node metadata")
errCannotQueryTSDB = errors.New("client tenant does not have capability to query timeseries data")
errCannotQueryAllMetrics = errors.New("client tenant does not have capability to query non-tenant metrics")
errCannotUseNodelocal = errors.New("client tenant does not have capability to use nodelocal storage")
errCannotDebugProcess = errors.New("client tenant does not have capability to debug the process")
)

if !tenantcapabilities.MustGetBoolByID(
entry.TenantCapabilities, tenantcapabilities.CanViewNodeInfo,
) {
return errCannotQueryMetadata
}
return nil
var insufficientCapErrMap = map[tenantcapabilities.ID]error{
tenantcapabilities.CanViewNodeInfo: errCannotQueryMetadata,
tenantcapabilities.CanViewTSDBMetrics: errCannotQueryTSDB,
tenantcapabilities.CanUseNodelocalStorage: errCannotUseNodelocal,
tenantcapabilities.CanDebugProcess: errCannotDebugProcess,
tenantcapabilities.CanViewAllMetrics: errCannotQueryAllMetrics,
}

func (a *Authorizer) HasTSDBQueryCapability(ctx context.Context, tenID roachpb.TenantID) error {
func (a *Authorizer) hasCapability(
ctx context.Context, tenID roachpb.TenantID, cap tenantcapabilities.ID,
) error {
if tenID.IsSystem() {
return nil
}
Expand All @@ -354,47 +338,41 @@ func (a *Authorizer) HasTSDBQueryCapability(ctx context.Context, tenID roachpb.T
case authorizerModeAllowAll:
return nil
case authorizerModeV222:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should do a cleanup and remove this setting option in a separate PR

return errCannotQueryTSDB
return insufficientCapErrMap[cap]
default:
err := errors.AssertionFailedf("unknown authorizer mode: %d", mode)
logcrash.ReportOrPanic(ctx, &a.settings.SV, "%v", err)
return err
}

if !tenantcapabilities.MustGetBoolByID(
entry.TenantCapabilities, tenantcapabilities.CanViewTSDBMetrics,
) {
return errCannotQueryTSDB
if !tenantcapabilities.MustGetBoolByID(entry.TenantCapabilities, cap) {
return insufficientCapErrMap[cap]
}
return nil
}

func (a *Authorizer) HasNodeStatusCapability(ctx context.Context, tenID roachpb.TenantID) error {
return a.hasCapability(ctx, tenID, tenantcapabilities.CanViewNodeInfo)
}

func (a *Authorizer) HasTSDBQueryCapability(ctx context.Context, tenID roachpb.TenantID) error {
return a.hasCapability(ctx, tenID, tenantcapabilities.CanViewTSDBMetrics)
}

func (a *Authorizer) HasNodelocalStorageCapability(
ctx context.Context, tenID roachpb.TenantID,
) error {
if tenID.IsSystem() {
return nil
}
entry, mode := a.getMode(ctx, tenID)
switch mode {
case authorizerModeOn:
break
case authorizerModeAllowAll:
return nil
case authorizerModeV222:
return errCannotUseNodelocal
default:
err := errors.AssertionFailedf("unknown authorizer mode: %d", mode)
logcrash.ReportOrPanic(ctx, &a.settings.SV, "%v", err)
return err
}
return a.hasCapability(ctx, tenID, tenantcapabilities.CanUseNodelocalStorage)
}

if !tenantcapabilities.MustGetBoolByID(
entry.TenantCapabilities, tenantcapabilities.CanUseNodelocalStorage,
) {
return errCannotUseNodelocal
}
return nil
func (a *Authorizer) HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error {
return a.hasCapability(ctx, tenID, tenantcapabilities.CanDebugProcess)
}

func (a *Authorizer) HasTSDBAllMetricsCapability(
ctx context.Context, tenID roachpb.TenantID,
) error {
return a.hasCapability(ctx, tenID, tenantcapabilities.CanViewAllMetrics)
}

// IsExemptFromRateLimiting returns true if the tenant is not subject to rate limiting.
Expand All @@ -419,61 +397,6 @@ func (a *Authorizer) IsExemptFromRateLimiting(ctx context.Context, tenID roachpb
return tenantcapabilities.MustGetBoolByID(entry.TenantCapabilities, tenantcapabilities.ExemptFromRateLimiting)
}

func (a *Authorizer) HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error {
if tenID.IsSystem() {
return nil
}
entry, mode := a.getMode(ctx, tenID)
switch mode {
case authorizerModeOn:
break
case authorizerModeAllowAll:
return nil
case authorizerModeV222:
return errCannotDebugProcess
default:
err := errors.AssertionFailedf("unknown authorizer mode: %d", mode)
logcrash.ReportOrPanic(ctx, &a.settings.SV, "%v", err)
return err
}

if !tenantcapabilities.MustGetBoolByID(
entry.TenantCapabilities, tenantcapabilities.CanDebugProcess,
) {
return errCannotDebugProcess
}
return nil
}

func (a *Authorizer) HasTSDBAllMetricsCapability(
ctx context.Context, tenID roachpb.TenantID,
) error {
if tenID.IsSystem() {
return nil
}

entry, mode := a.getMode(ctx, tenID)
switch mode {
case authorizerModeOn:
break
case authorizerModeAllowAll:
return nil
case authorizerModeV222:
return errCannotQueryTSDB
default:
err := errors.AssertionFailedf("unknown authorizer mode: %d", mode)
logcrash.ReportOrPanic(ctx, &a.settings.SV, "%v", err)
return err
}

if !tenantcapabilities.MustGetBoolByID(
entry.TenantCapabilities, tenantcapabilities.CanViewAllMetrics,
) {
return errCannotQueryAllMetrics
}
return nil
}

// getMode retrieves the authorization mode.
func (a *Authorizer) getMode(
ctx context.Context, tid roachpb.TenantID,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ client tenant does not have capability to query timeseries data

has-tsdb-all-capability ten=10
----
client tenant does not have capability to query timeseries data
client tenant does not have capability to query non-tenant metrics

# Disable the capability checks by allowing all requests.
set-authorizer-mode value=allow-all
Expand Down
3 changes: 3 additions & 0 deletions pkg/rpc/auth_tenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ func (a tenantAuthorizer) authorize(
case "/cockroach.server.serverpb.Status/Ranges":
return a.authRanges(tenID)

case "/cockroach.server.serverpb.Status/NetworkConnectivity":
return a.capabilitiesAuthorizer.HasProcessDebugCapability(ctx, tenID)

case "/cockroach.server.serverpb.Status/TransactionContentionEvents":
return a.authTenant(tenID)

Expand Down
1 change: 1 addition & 0 deletions pkg/server/serverpb/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ type TenantStatusServer interface {
// download remote files. A method that mutates state should not be on the
// status server and so in the long run we should move it.
DownloadSpan(ctx context.Context, request *DownloadSpanRequest) (*DownloadSpanResponse, error)
NetworkConnectivity(context.Context, *NetworkConnectivityRequest) (*NetworkConnectivityResponse, error)
}

// OptionalNodesStatusServer returns the wrapped NodesStatusServer, if it is
Expand Down
25 changes: 25 additions & 0 deletions pkg/server/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -2119,6 +2119,31 @@ func (s *statusServer) NodeUI(
return &resp, nil
}

// NetworkConnectivity collects info about connection statuses across all nodes.
// This isn't tenant-specific information; it's about networking activity across
// all tenants between nodes. It's accessible via the system tenant, and here
// made available to secondary tenants with the `can_debug_process` capability.
// This works well for shared-process mode, but in external-process mode, this
// endpoint won't give a complete picture of network connectivity since the SQL
// server might run entirely outside the KV node. We might need to extend this
// endpoint or create a new one for SQL-SQL servers and SQL server to KV nodes.
// This work is for the future. Currently, this endpoint only shows KV-KV nodes
// network connectivity. So, it's not ready for external-process mode and should
// only be enabled for shared-process mode. There's nothing enforcing this, but
// it shouldn't be a problem. See issue #138156
func (t *statusServer) NetworkConnectivity(
ctx context.Context, req *serverpb.NetworkConnectivityRequest,
) (*serverpb.NetworkConnectivityResponse, error) {
ctx = t.AnnotateCtx(ctx)

err := t.privilegeChecker.RequireViewClusterMetadataPermission(ctx)
if err != nil {
return nil, err
}

return t.sqlServer.tenantConnect.NetworkConnectivity(ctx, req)
}

// NetworkConnectivity collects information about connections statuses across all nodes.
func (s *systemStatusServer) NetworkConnectivity(
ctx context.Context, req *serverpb.NetworkConnectivityRequest,
Expand Down
44 changes: 36 additions & 8 deletions pkg/server/storage_api/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities"
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/server/srvtestutils"
"github.com/cockroachdb/cockroach/pkg/testutils"
Expand All @@ -26,20 +27,25 @@ func TestNetworkConnectivity(t *testing.T) {
defer log.Scope(t).Close(t)
numNodes := 3
testCluster := serverutils.StartCluster(t, numNodes, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
DefaultTestTenant: base.TestIsForStuffThatShouldWorkWithSecondaryTenantsButDoesntYet(110024),
},

ReplicationMode: base.ReplicationManual,
})
ctx := context.Background()
defer testCluster.Stopper().Stop(ctx)

// TODO(#110024): grant the appropriate capability to the test
// tenant before the connectivity endpoint can be accessed. See
// example in `TestNodeStatusResponse`.

s0 := testCluster.Server(0)

if s0.TenantController().StartedDefaultTestTenant() {
_, err := s0.SystemLayer().SQLConn(t).Exec(
`ALTER TENANT [$1] GRANT CAPABILITY can_debug_process=true`,
serverutils.TestTenantID().ToUint64(),
)
require.NoError(t, err)

serverutils.WaitForTenantCapabilities(t, s0, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{
tenantcapabilities.CanDebugProcess: "true",
}, "")
}

ts := s0.ApplicationLayer()

var resp serverpb.NetworkConnectivityResponse
Expand Down Expand Up @@ -75,3 +81,25 @@ func TestNetworkConnectivity(t *testing.T) {
return nil
})
}

func TestNetworkConnectivityTenantCapability(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
numNodes := 3
testCluster := serverutils.StartCluster(t, numNodes, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
// Note: We're only testing external-process mode because shared service
// mode tenants have all capabilities. See PR #119211 for more info.
DefaultTestTenant: base.ExternalTestTenantAlwaysEnabled,
},
ReplicationMode: base.ReplicationManual,
})
ctx := context.Background()
defer testCluster.Stopper().Stop(ctx)

var resp serverpb.NetworkConnectivityResponse
err := srvtestutils.GetStatusJSONProto(
testCluster.Server(0).ApplicationLayer(), "connectivity", &resp)
require.ErrorContains(t, err,
"client tenant does not have capability to debug the process")
}
Loading