Lab22 / app.py
Marcus719's picture
Update app.py
c65e984 verified
import gradio as gr
from huggingface_hub import InferenceClient
# ============================================
# KTH ID2223 Lab 2 - Llama 3.2 ChatBot
# ============================================
# 使用你的微调模型(safetensors 格式)
MODEL_ID = "Marcus719/Llama-3.2-3B-Instruct-Lab2"
client = InferenceClient(model=MODEL_ID)
def chat(message, history, system_message, max_tokens, temperature, top_p):
"""Generate response using HuggingFace Inference API"""
messages = [{"role": "system", "content": system_message}]
# Add conversation history
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Stream response
response = ""
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
if chunk.choices and chunk.choices[0].delta.content:
token = chunk.choices[0].delta.content
response += token
yield response
# ============================================
# Gradio 界面
# ============================================
DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest assistant."
with gr.Blocks(theme=gr.themes.Soft(), title="🦙 Llama 3.2 ChatBot") as demo:
gr.Markdown(
"""
# 🦙 Llama 3.2 3B Instruct - Fine-tuned on FineTome
**KTH ID2223 Scalable Machine Learning - Lab 2**
This chatbot uses my fine-tuned Llama 3.2 3B model trained on the FineTome-100k dataset.
📦 Model: [Marcus719/Llama-3.2-3B-Instruct-Lab2](https://huggingface.co/Marcus719/Llama-3.2-3B-Instruct-Lab2)
"""
)
chatbot = gr.Chatbot(label="Chat", height=450, show_copy_button=True)
with gr.Row():
msg = gr.Textbox(
placeholder="Type your message here...",
scale=4,
container=False,
autofocus=True
)
submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
with gr.Accordion("⚙️ Settings", open=False):
system_prompt = gr.Textbox(
label="System Prompt",
value=DEFAULT_SYSTEM_PROMPT,
lines=2
)
with gr.Row():
max_tokens = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens")
temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
with gr.Row():
clear_btn = gr.Button("🗑️ Clear Chat")
retry_btn = gr.Button("🔄 Regenerate")
gr.Examples(
examples=[
"Hello! Can you introduce yourself?",
"Explain machine learning in simple terms.",
"What is the difference between fine-tuning and pre-training?",
"Write a short poem about AI.",
],
inputs=msg,
label="💡 Try these examples"
)
# Event handlers
def user_input(message, history):
return "", history + [[message, None]]
def bot_response(history, system_prompt, max_tokens, temperature, top_p):
if not history:
return history
message = history[-1][0]
history_for_model = history[:-1]
for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
history[-1][1] = response
yield history
def retry_last(history, system_prompt, max_tokens, temperature, top_p):
if history:
history[-1][1] = None
message = history[-1][0]
history_for_model = history[:-1]
for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p):
history[-1][1] = response
yield history
msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
)
submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then(
bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot
)
clear_btn.click(lambda: [], None, chatbot, queue=False)
retry_btn.click(retry_last, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot)
gr.Markdown(
"""
---
### 📝 About This Project
**Fine-tuning Details:**
- Base Model: `meta-llama/Llama-3.2-3B-Instruct`
- Dataset: [FineTome-100k](https://huggingface.co/datasets/mlabonne/FineTome-100k)
- Method: QLoRA (4-bit quantization + LoRA)
- Framework: [Unsloth](https://github.com/unslothai/unsloth)
Built with ❤️ for KTH ID2223 Lab 2
"""
)
if __name__ == "__main__":
demo.launch()