import gradio as gr
from transformers_js_py import pipeline
generator = await pipeline(
"text-generation",
"onnx-community/Qwen2.5-0.5B-Instruct",
{ "dtype": "q4", "device": "webgpu" }
)
async def chat_response(message, history):
messages = [
{ "role": "system", "content": "You are a great assistant." },
{ "role": "user", "content": message }
]
output = await generator(messages, {
"max_new_tokens": 256,
"do_sample": True,
"temperature": 0.3,
})
response = output[0]["generated_text"][-1]["content"]
return response
demo = gr.ChatInterface(chat_response, type="messages", autofocus=False)
demo.launch()
transformers-js-py