Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ node_modules

.cursor
src/tools/test.ts

assets/
7 changes: 7 additions & 0 deletions .sfdevrc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
"@types/node"
],
"wireit": {
"build": {
"dependencies": [
"compile",
"lint",
"build:tool-index"
]
},
"test": {
"dependencies": [
"test:only",
Expand Down
7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"type": "module",
"scripts": {
"build": "wireit",
"build:tool-index": "node --loader ts-node/esm --no-warnings=ExperimentalWarning ./scripts/build-tool-index.ts",
"build:watch": "yarn build --watch",
"clean": "sf-clean",
"clean-all": "sf-clean all",
Expand All @@ -36,7 +37,8 @@
"bin",
"lib",
"!lib/**/*.map",
"messages"
"messages",
"assets/"
],
"dependencies": {
"@huggingface/transformers": "^3.7.0",
Expand Down Expand Up @@ -82,7 +84,8 @@
"build": {
"dependencies": [
"compile",
"lint"
"lint",
"build:tool-index"
]
},
"compile": {
Expand Down
25 changes: 25 additions & 0 deletions scripts/.eslintrc.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright 2025, Salesforce, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

module.exports = {
extends: '../.eslintrc.cjs',
parserOptions: {
project: ['./tsconfig.json', './test/tsconfig.json', './scripts/tsconfig.json'],
},
rules: {
'import/no-extraneous-dependencies': ['error', { devDependencies: true }],
},
};
216 changes: 216 additions & 0 deletions scripts/build-tool-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* Copyright 2025, Salesforce, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import fs from 'node:fs';
import path from 'node:path';
import { spawn } from 'node:child_process';
import { Args, Parser, ux } from '@oclif/core';
import { pipeline } from '@huggingface/transformers';
import faiss from 'faiss-node';

import { Tool } from '@modelcontextprotocol/sdk/types.js';

export type InvocableTool = {
name: string;
function: {
name: string;
description: string | undefined;
parameters: Tool['inputSchema'];
annotations: Tool['annotations'];
};
};

export const getToolsList = async (): Promise<InvocableTool[]> => {
const toolsList: string = await new Promise<string>((resolve, reject) => {
const isWindows = process.platform === 'win32';
const command = isWindows ? 'npx.cmd' : 'npx';
const binPath = path.join('bin', 'run.js');

const child = spawn(
command,
[
'@modelcontextprotocol/inspector',
'--cli',
'node',
binPath,
'--orgs',
'DEFAULT_TARGET_ORG',
'--method',
'tools/list',
],
{
stdio: ['pipe', 'pipe', 'pipe'],
shell: isWindows,
}
);

let stdout = '';
let stderr = '';

child.stdout?.on('data', (data: Buffer) => {
stdout += data.toString();
});

child.stderr?.on('data', (data: Buffer) => {
stderr += data.toString();
});

child.on('close', (code) => {
if (code !== 0) {
reject(new Error(`Command failed with code ${code}: ${stderr}`));
return;
}
if (stderr) {
reject(new Error(stderr));
return;
}
resolve(stdout);
});

child.on('error', (error) => {
reject(error);
});
});

const parsedToolsList = JSON.parse(toolsList) as { tools: Tool[] };

return (parsedToolsList.tools ?? []).map((tool) => ({
name: tool.name,
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema,
annotations: tool.annotations,
},
}));
};

const extractFromDescription = (description: string | undefined): { exampleUsage: string; summary: string } => {
if (!description) return { exampleUsage: '', summary: '' };

const exampleUsageMatch = description.match(/EXAMPLE USAGE:\s*(.*?)(?=\n\n|AGENT INSTRUCTIONS:|$)/s);
const summary = description.split('\n')[0].trim();

return {
exampleUsage: exampleUsageMatch?.[1]?.trim() ?? '',
summary,
};
};

const main = async (): Promise<void> => {
const {
args: { outputDir },
} = await Parser.parse(process.argv.slice(2), {
args: {
outputDir: Args.string({
description: 'Directory to save the output files',
default: './assets',
}),
},
});

if (!outputDir) {
ux.stderr('Output directory not specified. Please provide a path as the first argument.');
process.exit(1);
}

// Define the output file paths
const mcpToolsPath = path.join(outputDir, 'sf-mcp-tools.json');
const faissIndexPath = path.join(outputDir, 'faiss-tools-index.bin');

ux.stderr('Starting offline data preparation...');

// 1. Ensure output directory exists
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir);
}

// 2. Get Command Data from Salesforce CLI
ux.stderr('Fetching commands from sf mcp sever...');
const rawTools = await getToolsList();

// 3. Process and Clean the Data
ux.stderr('Processing and cleaning command data...');

const toolsData = rawTools.map((tool, index: number) => {
const { exampleUsage, summary } = extractFromDescription(tool.function.description);
return {
id: index, // Use our own sequential ID for FAISS
name: tool.name,
description: tool.function.description,
parameters: tool.function.parameters,
annotations: tool.function.annotations,
// Create a more descriptive text for better embedding quality
// This will be stripped from the final output sent to the LLM to save token count
embeddingText: `SUMMARY: ${summary}
EXAMPLE USAGE: ${exampleUsage}
PARAMETERS:
${Object.keys(tool.function.parameters.properties ?? {}).join('\n')}`,
};
});

if (toolsData.length === 0) {
ux.stderr('No tool data could be processed.');
return;
}

ux.stderr(`Processed ${toolsData.length} tools.`);

// 4. Generate Embeddings
ux.stderr('Loading embedding model... (This may take a moment)');
const embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
dtype: 'fp32',
});

ux.stderr('Generating embeddings for all tools...');
const embeddings = await Promise.all(
toolsData.map((cmd) => embedder(cmd.embeddingText, { pooling: 'mean', normalize: true }))
);

// The output tensor needs to be converted to a flat Float32Array for FAISS
const embeddingDimension = embeddings[0].dims[1];
const flattenedEmbeddings = new Float32Array(toolsData.length * embeddingDimension);
embeddings.forEach((tensor, i) => {
flattenedEmbeddings.set(tensor.data as Float32Array, i * embeddingDimension);
});
ux.stderr(`Generated embeddings with dimension: ${embeddingDimension}`);

// 5. Build and Save the FAISS Index
ux.stderr('Building FAISS index...');
const index = new faiss.IndexFlatL2(embeddingDimension);

// Convert Float32Array to regular array for faiss-node
const embeddingsArray = Array.from(flattenedEmbeddings);
index.add(embeddingsArray);

const vectorCount = index.ntotal();

ux.stderr(`FAISS index built with ${String(vectorCount)} vectors.`);
// Use the correct method name for faiss-node
index.write(faissIndexPath);
ux.stderr(`FAISS index saved to: ${faissIndexPath}`);

// 6. Save the Processed Command Data
fs.writeFileSync(mcpToolsPath, JSON.stringify(toolsData, null, 2));

ux.stderr(`Command data saved to: ${mcpToolsPath}`);
ux.stderr('Offline preparation complete!');
};

main().catch((error: unknown) => {
// eslint-disable-next-line no-console
console.error(error);
});
9 changes: 9 additions & 0 deletions scripts/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"extends": "../tsconfig.json",
"compilerOptions": {
"outDir": "./lib",
"rootDir": "./",
"skipLibCheck": true
},
"include": ["./**/*.ts"]
}
51 changes: 49 additions & 2 deletions src/assets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { spawn } from 'node:child_process';
import faiss from 'faiss-node';
import { pipeline, FeatureExtractionPipeline } from '@huggingface/transformers';
import { ux } from '@oclif/core';
import { Tool } from '@modelcontextprotocol/sdk/types.js';

type CommandData = {
id: number;
Expand All @@ -40,13 +41,27 @@ type CommandData = {
embeddingText: string;
};

type Assets = {
type CommandSearchAssets = {
commands: CommandData[];
commandNames: string[];
faissIndex: faiss.IndexFlatL2;
embedder: FeatureExtractionPipeline;
};

type ToolSearchAssets = {
tools: Array<{
id: number;
name: string;
description: string | undefined;
parameters: Tool['inputSchema'];
annotations: Tool['annotations'];
embeddingText: string;
}>;
toolNames: string[];
faissIndex: faiss.IndexFlatL2;
embedder: FeatureExtractionPipeline;
};

let CACHED_DATA_DIR: string | null = null;

/**
Expand Down Expand Up @@ -118,7 +133,7 @@ function spawnBuildScript(outputDir: string, detached: boolean): Promise<void> {
}
}

export async function getAssets(): Promise<Assets> {
export async function getCommandSearchAssets(): Promise<CommandSearchAssets> {
if (!CACHED_DATA_DIR) {
throw new Error('Data directory not set. Please call maybeBuildIndex first.');
}
Expand Down Expand Up @@ -159,3 +174,35 @@ export async function getAssets(): Promise<Assets> {
throw new Error(`Failed to load assets: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}

export async function getToolSearchAssets(): Promise<ToolSearchAssets> {
const mcpToolsPath = resolve(import.meta.dirname, '..', 'assets', 'sf-mcp-tools.json');
const faissIndexPath = resolve(import.meta.dirname, '..', 'assets', 'faiss-tools-index.bin');

try {
await fs.promises.access(mcpToolsPath);
await fs.promises.access(faissIndexPath);
} catch (error) {
throw new Error(`Assets not found: ${error instanceof Error ? error.message : 'Unknown error'}`);
}

const toolsData = JSON.parse(await fs.promises.readFile(mcpToolsPath, 'utf-8')) as Array<{
id: number;
name: string;
description: string | undefined;
parameters: Tool['inputSchema'];
annotations: Tool['annotations'];
embeddingText: string;
}>;
const faissIndex = faiss.IndexFlatL2.read(faissIndexPath);
const embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
dtype: 'fp32',
});

return {
tools: toolsData,
toolNames: toolsData.map((tool) => tool.name),
faissIndex,
embedder,
};
}
Loading
Loading