diff --git a/litellm-thinking/app.py b/litellm-thinking/app.py new file mode 100644 index 00000000..3d729a45 --- /dev/null +++ b/litellm-thinking/app.py @@ -0,0 +1,75 @@ +from functools import partial +import chainlit as cl +from litellm import acompletion + + +LLM_MODEL_NAME: str = "ollama/qwen3:latest" +API_BASE_URL: str = "http://localhost:11434" + + +client = partial( + acompletion, + model=LLM_MODEL_NAME, + api_base=API_BASE_URL, + stream=True, + top_p=0.9, + temperature=0.2, + think=True, +) + + +@cl.on_chat_start +def start_chat(): + cl.user_session.set( + "message_history", + [ + { + "content": "Act like a professional communication strategist", + "role": "system", + } + ], + ) + + +@cl.step(name="Reasoning", show_input=False) +async def reasoning_step(stream): + current_step = cl.context.current_step + + async for reason in stream: + chunk = reason.choices[0].delta + if not hasattr(chunk, "reasoning_content"): + break + + else: + await current_step.stream_token(chunk.reasoning_content) + + + +@cl.on_message +async def on_message(message: cl.Message): + message_history = cl.user_session.get("message_history", []) + message_history.append({"role": "user", "content": message.content}) + final_message = cl.Message(content="", author="Assistant") + + response = await client( + messages=message_history, + ) + + ai_response = "" + async for chunkie in response: + chunk = chunkie.choices[0].delta + has_reasoning = hasattr(chunk, "reasoning_content") + + if has_reasoning: + await reasoning_step(response) + else: + ai_response += chunk.content or "" + await final_message.update() + await final_message.stream_token(chunk.content) + + + if ai_response: + message_history.append({"role": "assistant", "content": ai_response}) + cl.user_session.set("message_history", message_history) + + diff --git a/litellm-thinking/chainlit.md b/litellm-thinking/chainlit.md new file mode 100644 index 00000000..0f673dc0 --- /dev/null +++ b/litellm-thinking/chainlit.md @@ -0,0 +1,14 @@ +# Welcome to Chainlit! 🚀🤖 + +Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs. + +## Useful Links 🔗 + +- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚 +- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/ZThrUxbAYw) to ask questions, share your projects, and connect with other developers! 💬 + +We can't wait to see what you create with Chainlit! Happy coding! 💻😊 + +## Welcome screen + +To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty. diff --git a/litellm-thinking/config.toml b/litellm-thinking/config.toml new file mode 100644 index 00000000..5e0b29f6 --- /dev/null +++ b/litellm-thinking/config.toml @@ -0,0 +1,147 @@ +[project] +# List of environment variables to be provided by each user to use the app. +user_env = [] + +# Duration (in seconds) during which the session is saved when the connection is lost +session_timeout = 3600 + +# Duration (in seconds) of the user session expiry +user_session_timeout = 1296000 # 15 days + +# Enable third parties caching (e.g., LangChain cache) +cache = false + +# Whether to persist user environment variables (API keys) to the database +# Set to true to store user env vars in DB, false to exclude them for security +persist_user_env = false + +# Whether to mask user environment variables (API keys) in the UI with password type +# Set to true to show API keys as ***, false to show them as plain text +mask_user_env = false + +# Authorized origins +allow_origins = ["*"] + +[features] +# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) +unsafe_allow_html = false + +# Process and display mathematical expressions. This can clash with "$" characters in messages. +latex = false + +# Autoscroll new user messages at the top of the window +user_message_autoscroll = true + +# Automatically tag threads with the current chat profile (if a chat profile is used) +auto_tag_thread = true + +# Allow users to edit their own messages +edit_message = true + +[features.slack] +# Add emoji reaction when message is received (requires reactions:write OAuth scope) +reaction_on_message_received = false + +# Authorize users to spontaneously upload files with messages +[features.spontaneous_file_upload] + enabled = true + # Define accepted file types using MIME types + # Examples: + # 1. For specific file types: + # accept = ["image/jpeg", "image/png", "application/pdf"] + # 2. For all files of certain type: + # accept = ["image/*", "audio/*", "video/*"] + # 3. For specific file extensions: + # accept = { "application/octet-stream" = [".xyz", ".pdb"] } + # Note: Using "*/*" is not recommended as it may cause browser warnings + accept = ["*/*"] + max_files = 20 + max_size_mb = 500 + +[features.audio] + # Enable audio features + enabled = false + # Sample rate of the audio + sample_rate = 24000 + +[features.mcp] + # Enable Model Context Protocol (MCP) features + enabled = false + +[features.mcp.sse] + enabled = true + +[features.mcp.streamable-http] + enabled = true + +[features.mcp.stdio] + enabled = true + # Only the executables in the allow list can be used for MCP stdio server. + # Only need the base name of the executable, e.g. "npx", not "/usr/bin/npx". + # Please don't comment this line for now, we need it to parse the executable name. + allowed_executables = [ "npx", "uvx" ] + +[UI] +# Name of the assistant. +name = "Assistant" + +# default_theme = "dark" + +# layout = "wide" + +# default_sidebar_state = "open" + +# Description of the assistant. This is used for HTML tags. +# description = "" + +# Chain of Thought (CoT) display mode. Can be "hidden", "tool_call" or "full". +cot = "full" + +# Specify a CSS file that can be used to customize the user interface. +# The CSS file can be served from the public directory or via an external link. +custom_css = 'public/stylesheet.css' + +# Specify additional attributes for a custom CSS file +# custom_css_attributes = "media=\"print\"" + +# Specify a JavaScript file that can be used to customize the user interface. +# The JavaScript file can be served from the public directory. +custom_js = 'public/script.js' + +# The style of alert boxes. Can be "classic" or "modern". +alert_style = "classic" + +# Specify additional attributes for custom JS file +# custom_js_attributes = "async type = \"module\"" + +# Custom login page image, relative to public directory or external URL +# login_page_image = "/public/custom-background.jpg" + +# Custom login page image filter (Tailwind internal filters, no dark/light variants) +# login_page_image_filter = "brightness-50 grayscale" +# login_page_image_dark_filter = "contrast-200 blur-sm" + + +# Specify a custom meta image url. +# custom_meta_image_url = "https://chainlit-cloud.s3.eu-west-3.amazonaws.com/logo/chainlit_banner.png" + +# Load assistant logo directly from URL. +logo_file_url = "" + +# Load assistant avatar image directly from URL. +default_avatar_file_url = "" + +# Specify a custom build directory for the frontend. +# This can be used to customize the frontend code. +# Be careful: If this is a relative path, it should not start with a slash. +# custom_build = "./public/build" + +# Specify optional one or more custom links in the header. +# [[UI.header_links]] +# name = "Issues" +# display_name = "Report Issue" +# icon_url = "https://avatars.githubusercontent.com/u/128686189?s=200&v=4" +# url = "https://github.com/Chainlit/chainlit/issues" + +[meta] +generated_by = "2.6.7" diff --git a/litellm-thinking/public/favicon.svg b/litellm-thinking/public/favicon.svg new file mode 100644 index 00000000..b6fc81c8 --- /dev/null +++ b/litellm-thinking/public/favicon.svg @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/litellm-thinking/public/logo_dark.png b/litellm-thinking/public/logo_dark.png new file mode 100644 index 00000000..208b415e Binary files /dev/null and b/litellm-thinking/public/logo_dark.png differ diff --git a/litellm-thinking/public/logo_light.png b/litellm-thinking/public/logo_light.png new file mode 100644 index 00000000..208b415e Binary files /dev/null and b/litellm-thinking/public/logo_light.png differ diff --git a/litellm-thinking/public/script.js b/litellm-thinking/public/script.js new file mode 100644 index 00000000..982fa947 --- /dev/null +++ b/litellm-thinking/public/script.js @@ -0,0 +1,61 @@ +const alreadyCollapsed = new WeakSet(); + +function autoCollapseSteps(element) { + if (element.matches?.('button[id^="step-"]')) { + tryCollapse(element); + } + element.querySelectorAll?.('button[id^="step-"]').forEach((btn) => { + tryCollapse(btn); + }); +} + +function tryCollapse(btn) { + const isOpen = btn.getAttribute('data-state') === 'open'; + if ( + isOpen && + !alreadyCollapsed.has(btn) && + btn.querySelector('svg.lucide-chevron-up') // icon for expanded state + ) { + btn.click(); // close it + alreadyCollapsed.add(btn); + } +} + +function removeCopyButtons() { + document.querySelectorAll('button').forEach((button) => { + if (button.querySelector('.lucide-copy')) { + button.remove(); + } + }); +} + +removeCopyButtons(); + +const mutationObserver = new MutationObserver((mutationList) => { + for (const mutation of mutationList) { + if (mutation.type === 'childList') { + for (const node of mutation.addedNodes) { + if (node.nodeType === Node.ELEMENT_NODE) { + autoCollapseSteps(node); + } + } + } + } +}); + +mutationObserver.observe(document.body, { + childList: true, + subtree: true, +}); + +const copyButtonObserver = new MutationObserver(() => { + removeCopyButtons(); +}); + +copyButtonObserver.observe(document.body, { + childList: true, + subtree: true, +}); + +document.querySelectorAll('button[id^="step-"]').forEach(autoCollapseSteps); + diff --git a/litellm-thinking/public/stylesheet.css b/litellm-thinking/public/stylesheet.css new file mode 100644 index 00000000..416b7630 --- /dev/null +++ b/litellm-thinking/public/stylesheet.css @@ -0,0 +1,18 @@ +#theme-toggle, +#new-chat-button { + display: none !important; +} + +.watermark { + display: none !important; + visibility: hidden !important; + opacity: 0 !important; + pointer-events: none !important; + height: 0px !important; + width: 0px !important; + overflow: hidden !important; +} + +#chat-input:empty::before { + content: 'Hi, 👋🏾 How can I help ...'; +} diff --git a/litellm-thinking/readme.md b/litellm-thinking/readme.md new file mode 100644 index 00000000..61242c31 --- /dev/null +++ b/litellm-thinking/readme.md @@ -0,0 +1,33 @@ +--- +title: 'LiteLLM Thinking Models Integration' +tags: ['litellm', 'thinking', 'streaming'] +--- + +# LiteLLM Thinking Models Integration + +This repository demonstrates how to integrate thinking-enabled LLMs (like Qwen, DeepSeek-R1) with ChainLit using LiteLLM. + +## Description + +The integration showcases how to connect to any thinking-capable model through LiteLLM's unified API. The application captures and streams the model's internal reasoning process in real-time, allowing users to observe how the model thinks through problems step-by-step before providing final answers. + +### Function Definitions + +#### Thinking LLM Integration (`thinking_llm.py`) + +- `start_chat`: Initializes chat session with system prompt and message history +- `reasoning_step`: ChainLit step that captures and streams reasoning content +- `on_message`: Handles user messages, coordinates thinking display and final response streaming +- `client`: Configured LiteLLM client with partial application for consistent model parameters + +### Configuration +[!IMPORTANT] +Replace `.chainlit/config.toml` to `config.toml` to apply the custom CSS and JS. + +```python +# Example using ollama but can adopt to any given litellm +LLM_MODEL_NAME = "ollama/qwen3:latest" +API_BASE_URL = "http://localhost:11434" # Provider endpoint +``` + +The application separates the thinking process into a dedicated ChainLit step, allowing users to see the model's reasoning before the final response. diff --git a/litellm-thinking/requirements.txt b/litellm-thinking/requirements.txt new file mode 100644 index 00000000..0dfb000e --- /dev/null +++ b/litellm-thinking/requirements.txt @@ -0,0 +1,2 @@ +chainlit +litellm