-
Notifications
You must be signed in to change notification settings - Fork 155
fix(schema): Use sample instead of find for schema sampling #580
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
39aa3ac
4219bd5
8206acc
6b81bd3
e0324c2
8f31898
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,19 +1,35 @@ | ||
| import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; | ||
| import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js"; | ||
| import type { ToolArgs, OperationType } from "../../tool.js"; | ||
| import type { ToolArgs, OperationType, ToolExecutionContext } from "../../tool.js"; | ||
| import { formatUntrustedData } from "../../tool.js"; | ||
| import { getSimplifiedSchema } from "mongodb-schema"; | ||
| import z from "zod"; | ||
| import { ONE_MB } from "../../../helpers/constants.js"; | ||
| import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorUntilMaxBytes.js"; | ||
|
|
||
| export class CollectionSchemaTool extends MongoDBToolBase { | ||
| public name = "collection-schema"; | ||
| protected description = "Describe the schema for a collection"; | ||
| protected argsShape = DbOperationArgs; | ||
| protected argsShape = { | ||
| ...DbOperationArgs, | ||
| sampleSize: z.number().optional().default(50).describe("Number of documents to sample for schema inference"), | ||
| responseBytesLimit: z.number().optional().default(ONE_MB).describe(`The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`), | ||
| }; | ||
|
|
||
| public operationType: OperationType = "metadata"; | ||
|
|
||
| protected async execute({ database, collection }: ToolArgs<typeof DbOperationArgs>): Promise<CallToolResult> { | ||
| protected async execute( | ||
| { database, collection, sampleSize, responseBytesLimit }: ToolArgs<typeof DbOperationArgs>, | ||
| { signal }: ToolExecutionContext | ||
| ): Promise<CallToolResult> { | ||
| const provider = await this.ensureConnected(); | ||
| const documents = await provider.find(database, collection, {}, { limit: 5 }).toArray(); | ||
| const cursor = provider.aggregate(database, collection, [{ $sample: { size: Math.min(sampleSize, this.config.maxDocumentsPerQuery) } }]); | ||
|
Check failure on line 26 in src/tools/mongodb/metadata/collectionSchema.ts
|
||
|
||
| const { cappedBy, documents } = await collectCursorUntilMaxBytesLimit({ | ||
| cursor, | ||
| configuredMaxBytesPerQuery: this.config.maxBytesPerQuery, | ||
| toolResponseBytesLimit: responseBytesLimit, | ||
| abortSignal: signal, | ||
| }); | ||
| const schema = await getSimplifiedSchema(documents); | ||
|
|
||
| const fieldsCount = Object.entries(schema).length; | ||
|
|
@@ -28,9 +44,12 @@ | |
| }; | ||
| } | ||
|
|
||
| const header = `Found ${fieldsCount} fields in the schema for "${database}.${collection}"`; | ||
| const cappedWarning = cappedBy !== undefined ? `\nThe schema was inferred from a subset of documents due to the response size limit. (${cappedBy})` : ""; | ||
|
|
||
| return { | ||
| content: formatUntrustedData( | ||
| `Found ${fieldsCount} fields in the schema for "${database}.${collection}"`, | ||
| `${header}${cappedWarning}`, | ||
| JSON.stringify(schema) | ||
| ), | ||
| }; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.