Gradio-Lite: Serverless Gradio Running Entirely in Your Browser

import gradio as gr from transformers_js_py import pipeline generator = await pipeline( "text-generation", "onnx-community/Qwen2.5-0.5B-Instruct", { "dtype": "q4", "device": "webgpu" } ) async def chat_response(message, history): messages = [ { "role": "system", "content": "You are a great assistant." }, { "role": "user", "content": message } ] output = await generator(messages, { "max_new_tokens": 256, "do_sample": True, "temperature": 0.3, }) response = output[0]["generated_text"][-1]["content"] return response demo = gr.ChatInterface(chat_response, type="messages", autofocus=False) demo.launch()

transformers-js-py