BrowserOperator · olesho · Sep 16, 2025 · Sep 16, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/MODEL-CONFIGS.md b/MODEL-CONFIGS.md
diff --git a/eval-server/nodejs/.env.example b/eval-server/nodejs/.env.example
@@ -0,0 +1,45 @@
+# Evaluation Server Configuration
+# Copy this file to .env and configure your settings
+
+# Server Configuration
+PORT=8080
+HOST=127.0.0.1
+
+# LLM Provider API Keys
+# Configure one or more providers for evaluation
+
+# OpenAI Configuration
+OPENAI_API_KEY=sk-your-openai-api-key-here
+
+# LiteLLM Configuration (if using a LiteLLM server)
+LITELLM_ENDPOINT=http://localhost:4000
+LITELLM_API_KEY=your-litellm-api-key-here
+
+# Groq Configuration
+GROQ_API_KEY=gsk_your-groq-api-key-here
+
+# OpenRouter Configuration
+OPENROUTER_API_KEY=sk-or-v1-your-openrouter-api-key-here
+
+# Default LLM Configuration for Evaluations
+# These will be used as fallbacks when not specified in evaluation requests
+DEFAULT_PROVIDER=openai
+DEFAULT_MAIN_MODEL=gpt-4
+DEFAULT_MINI_MODEL=gpt-4-mini
+DEFAULT_NANO_MODEL=gpt-3.5-turbo
+
+# Logging Configuration
+LOG_LEVEL=info
+LOG_DIR=./logs
+
+# Client Configuration
+CLIENTS_DIR=./clients
+EVALS_DIR=./evals
+
+# RPC Configuration
+RPC_TIMEOUT=30000
+
+# Security
+# Set this to enable authentication for client connections
+# Leave empty to disable authentication
+AUTH_SECRET_KEY=
diff --git a/eval-server/nodejs/CLAUDE.md b/eval-server/nodejs/CLAUDE.md
@@ -22,6 +22,16 @@ bo-eval-server is a WebSocket-based evaluation server for LLM agents that implem
 - `OPENAI_API_KEY` - OpenAI API key for LLM judge functionality
 - `PORT` - WebSocket server port (default: 8080)
 
+### LLM Provider Configuration (Optional)
+- `GROQ_API_KEY` - Groq API key for Groq provider support
+- `OPENROUTER_API_KEY` - OpenRouter API key for OpenRouter provider support
+- `LITELLM_ENDPOINT` - LiteLLM server endpoint URL
+- `LITELLM_API_KEY` - LiteLLM API key for LiteLLM provider support
+- `DEFAULT_PROVIDER` - Default LLM provider (openai, groq, openrouter, litellm)
+- `DEFAULT_MAIN_MODEL` - Default main model name
+- `DEFAULT_MINI_MODEL` - Default mini model name
+- `DEFAULT_NANO_MODEL` - Default nano model name
+
 ## Architecture
 
 ### Core Components
@@ -33,10 +43,11 @@ bo-eval-server is a WebSocket-based evaluation server for LLM agents that implem
 - Handles bidirectional RPC communication
 
 **RPC Client** (`src/rpc-client.js`)
-- Implements JSON-RPC 2.0 protocol for server-to-client calls
+- Implements JSON-RPC 2.0 protocol for bidirectional communication
 - Manages request/response correlation with unique IDs
 - Handles timeouts and error conditions
 - Calls `Evaluate(request: String) -> String` method on connected agents
+- Supports `configure_llm` method for dynamic LLM provider configuration
 
 **LLM Evaluator** (`src/evaluator.js`)
 - Integrates with OpenAI API for LLM-as-a-judge functionality
@@ -78,7 +89,10 @@ logs/                  # Log files (created automatically)
 ### Key Features
 
 - **Bidirectional RPC**: Server can call methods on connected clients
-- **LLM-as-a-Judge**: Automated evaluation of agent responses using GPT-4
+- **Multi-Provider LLM Support**: Support for OpenAI, Groq, OpenRouter, and LiteLLM providers
+- **Dynamic LLM Configuration**: Runtime configuration via `configure_llm` JSON-RPC method
+- **Per-Client Configuration**: Each connected client can have different LLM settings
+- **LLM-as-a-Judge**: Automated evaluation of agent responses using configurable LLM providers
 - **Concurrent Evaluations**: Support for multiple agents and parallel evaluations
 - **Structured Logging**: All interactions logged as JSON for analysis
 - **Interactive CLI**: Built-in CLI for testing and server management
@@ -93,6 +107,61 @@ Agents must implement:
 - `Evaluate(task: string) -> string` method
 - "ready" message to signal availability for evaluations
 
+### LLM Configuration Protocol
+
+The server supports dynamic LLM configuration via the `configure_llm` JSON-RPC method:
+
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "configure_llm",
+  "params": {
+    "provider": "openai|groq|openrouter|litellm",
+    "apiKey": "your-api-key",
+    "endpoint": "endpoint-url-for-litellm",
+    "models": {
+      "main": "main-model-name",
+      "mini": "mini-model-name",
+      "nano": "nano-model-name"
+    },
+    "partial": false
+  },
+  "id": "config-request-id"
+}
+```
+
+### Evaluation Model Configuration
+
+Evaluations support nested model configuration for flexible per-tier settings:
+
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "evaluate",
+  "params": {
+    "tool": "chat",
+    "input": {"message": "Hello"},
+    "model": {
+      "main_model": {
+        "provider": "openai",
+        "model": "gpt-4",
+        "api_key": "sk-main-key"
+      },
+      "mini_model": {
+        "provider": "openai",
+        "model": "gpt-4-mini",
+        "api_key": "sk-mini-key"
+      },
+      "nano_model": {
+        "provider": "groq",
+        "model": "llama-3.1-8b-instant",
+        "api_key": "gsk-nano-key"
+      }
+    }
+  }
+}
+```
+
 ### Configuration
 
 All configuration is managed through environment variables and `src/config.js`. Key settings:

diff --git a/eval-server/nodejs/examples/library-usage.js b/eval-server/nodejs/examples/library-usage.js
@@ -7,6 +7,7 @@
 // Simple example demonstrating the programmatic API usage
 
 import { EvalServer } from '../src/lib/EvalServer.js';
+import { CONFIG } from '../src/config.js';
 
 console.log('🔧 Creating server...');
 const server = new EvalServer({
@@ -31,20 +32,57 @@ server.onConnect(async client => {
   console.log('   - Client tabId:', client.tabId);
   console.log('   - Client info:', client.getInfo());
 
+  // Check available LLM providers
+  console.log('\n🔑 Available LLM Providers:');
+  const availableProviders = [];
+  if (CONFIG.providers.openai.apiKey) {
+    availableProviders.push('openai');
+    console.log('   ✅ OpenAI configured');
+  }
+  if (CONFIG.providers.groq.apiKey) {
+    availableProviders.push('groq');
+    console.log('   ✅ Groq configured');
+  }
+  if (CONFIG.providers.openrouter.apiKey) {
+    availableProviders.push('openrouter');
+    console.log('   ✅ OpenRouter configured');
+  }
+  if (CONFIG.providers.litellm.apiKey && CONFIG.providers.litellm.endpoint) {
+    availableProviders.push('litellm');
+    console.log('   ✅ LiteLLM configured');
+  }
+
+  if (availableProviders.length === 0) {
+    console.log('   ❌ No providers configured. Add API keys to .env file.');
+    console.log('   ℹ️  Example: OPENAI_API_KEY=sk-your-key-here');
+  }
+
   try {
-    console.log('🔄 Starting evaluation...');
+    // Demonstrate basic evaluation first
+    console.log('\n🔄 Starting basic evaluation...');
     let response = await client.evaluate({
-      id: "test_eval",
-      name: "Capital of France", 
-      description: "Simple test evaluation",
+      id: "basic_eval",
+      name: "Capital of France",
+      description: "Basic test evaluation",
       tool: "chat",
       input: {
         message: "What is the capital of France?"
       }
     });
-    
-    console.log('✅ Evaluation completed!');
+
+    console.log('✅ Basic evaluation completed!');
     console.log('📊 Response:', JSON.stringify(response, null, 2));
+
+    // Demonstrate explicit model selection if OpenAI is available
+    if (CONFIG.providers.openai.apiKey) {
+      await demonstrateModelSelection(client);
+    }
+
+    // Demonstrate LLM configuration if providers are available
+    if (availableProviders.length > 0) {
+      await demonstrateLLMConfiguration(client, availableProviders);
+    }
+
   } catch (error) {
     console.log('❌ Evaluation failed:', error.message);
   }
@@ -54,6 +92,150 @@ server.onDisconnect(clientInfo => {
   console.log('👋 CLIENT DISCONNECTED:', clientInfo);
 });
 
+// Function to demonstrate explicit model selection within OpenAI
+async function demonstrateModelSelection(client) {
+  console.log('\n🤖 Demonstrating Model Selection (OpenAI)...');
+
+  const modelTests = [
+    {
+      model: 'gpt-4',
+      task: 'Complex reasoning',
+      message: 'Solve this step by step: If a train travels 60 mph for 2.5 hours, how far does it go?'
+    },
+    {
+      model: 'gpt-4-mini',
+      task: 'Simple question',
+      message: 'What is 2 + 2?'
+    },
+    {
+      model: 'gpt-3.5-turbo',
+      task: 'Creative writing',
+      message: 'Write a one-sentence story about a cat.'
+    }
+  ];
+
+  for (const test of modelTests) {
+    console.log(`\n🔧 Testing ${test.model} for ${test.task}...`);
+
+    try {
+      const response = await client.evaluate({
+        id: `model_test_${test.model.replace(/[^a-z0-9]/g, '_')}`,
+        name: `${test.model} ${test.task}`,
+        tool: "chat",
+        input: {
+          message: test.message
+        },
+        model: {
+          main_model: {
+            provider: "openai",
+            model: test.model,
+            api_key: CONFIG.providers.openai.apiKey
+          }
+        }
+      });
+
+      console.log(`   ✅ ${test.model} completed successfully`);
+      console.log(`   📊 Response: ${JSON.stringify(response.output).substring(0, 100)}...`);
+
+      // Wait between tests
+      await new Promise(resolve => setTimeout(resolve, 1500));
+
+    } catch (error) {
+      console.log(`   ❌ ${test.model} failed: ${error.message}`);
+    }
+  }
+
+  console.log('\n✨ Model selection demonstration completed!');
+}
+
+// Function to demonstrate LLM configuration
+async function demonstrateLLMConfiguration(client, availableProviders) {
+  console.log('\n🧪 Demonstrating LLM Configuration...');
+
+  for (const provider of availableProviders.slice(0, 2)) { // Test up to 2 providers
+    console.log(`\n🔧 Configuring ${provider.toUpperCase()} provider...`);
+
+    try {
+      // Configure different models based on provider
+      let models;
+      switch (provider) {
+        case 'openai':
+          models = {
+            main: 'gpt-4',
+            mini: 'gpt-4-mini',
+            nano: 'gpt-3.5-turbo'
+          };
+          break;
+        case 'groq':
+          models = {
+            main: 'llama-3.1-8b-instant',
+            mini: 'llama-3.1-8b-instant',
+            nano: 'llama-3.1-8b-instant'
+          };
+          break;
+        case 'openrouter':
+          models = {
+            main: 'anthropic/claude-3-sonnet',
+            mini: 'anthropic/claude-3-haiku',
+            nano: 'anthropic/claude-3-haiku'
+          };
+          break;
+        case 'litellm':
+          models = {
+            main: 'claude-3-sonnet-20240229',
+            mini: 'claude-3-haiku-20240307',
+            nano: 'claude-3-haiku-20240307'
+          };
+          break;
+      }
+
+      console.log(`   📦 Models: main=${models.main}, mini=${models.mini}, nano=${models.nano}`);
+
+      // Run evaluation with specific provider configuration
+      const response = await client.evaluate({
+        id: `${provider}_config_eval`,
+        name: `${provider.toUpperCase()} Configuration Test`,
+        description: `Test evaluation using ${provider} provider`,
+        tool: "chat",
+        input: {
+          message: `Hello! This is a test using the ${provider} provider. Please respond with a brief confirmation.`
+        },
+        model: {
+          main_model: {
+            provider: provider,
+            model: models.main,
+            api_key: CONFIG.providers[provider].apiKey,
+            endpoint: CONFIG.providers[provider].endpoint
+          },
+          mini_model: {
+            provider: provider,
+            model: models.mini,
+            api_key: CONFIG.providers[provider].apiKey,
+            endpoint: CONFIG.providers[provider].endpoint
+          },
+          nano_model: {
+            provider: provider,
+            model: models.nano,
+            api_key: CONFIG.providers[provider].apiKey,
+            endpoint: CONFIG.providers[provider].endpoint
+          }
+        }
+      });
+
+      console.log(`   ✅ ${provider.toUpperCase()} evaluation completed successfully`);
+      console.log(`   📊 Response preview: ${JSON.stringify(response.output).substring(0, 100)}...`);
+
+      // Wait between provider tests
+      await new Promise(resolve => setTimeout(resolve, 2000));
+
+    } catch (error) {
+      console.log(`   ❌ ${provider.toUpperCase()} configuration test failed:`, error.message);
+    }
+  }
+
+  console.log('\n✨ LLM configuration demonstration completed!');
+}
+
 console.log('🔧 Starting server...');
 await server.start();
 console.log('✅ Server started successfully on ws://127.0.0.1:8080');