Spaces:

hsuwill000
/

qwen3_test

Paused

App Files Files Community

hsuwill000 commited on Jun 16

Commit

8e9ef4f

verified ·

1 Parent(s): fdd7135

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -107

app.py CHANGED Viewed

@@ -1,113 +1,70 @@
-import huggingface_hub as hf_hub
-import time
-import openvino_genai as ov_genai
-import numpy as np
 import gradio as gr
-import re
-import threading
-# 下載模型
-model_ids = [
-    "OpenVINO/Qwen3-0.6B-int4-ov",
-    "OpenVINO/Qwen3-1.7B-int4-ov",
-    #"OpenVINO/Qwen3-4B-int4-ov",  #不可用
-    "OpenVINO/Qwen3-8B-int4-ov",
-    "OpenVINO/Qwen3-14B-int4-ov",
-]
-model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  # Create Dictionary
-def download_model(model_id):
-    model_path = model_id.split("/")[-1]  # Extract model name
-    try:
         hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
-        print(f"Successfully downloaded {model_id} to {model_path}")
-        # 檢查模型檔案是否完整 (可以加入具體的檔案檢查)
-        # 例如，檢查必須存在的檔案是否存在，或驗證檔案大小
-        return True
-    except Exception as e:
-        print(f"Error downloading {model_id}: {e}")
-        return False
-# 下載所有模型
-for model_id in model_ids:
-    if not download_model(model_id):
-        print(f"Failed to download {model_id}, skipping.")
-# 建立推理管線 (Initialize with a default model first)
-device = "CPU"
-default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
-# 全局变量，用于存储推理管线、分词器、Markdown 组件和累计文本
-pipe = None
-tokenizer = None
-accumulated_text = ""
-# 初始化 Markdown 组件
-markdown_component = None  # 在全局範圍初始化
-# 建立 Gradio 介面
-model_choices = list(model_name_to_full_id.keys())
-# 创建 streamer 函数 (保持原有架构)
-def streamer(subword):
-    global accumulated_text
-    accumulated_text += subword
-    print(subword, end='', flush=True)  # 保留打印到控制台
-    return accumulated_text # 返回更新後的文字，Gradio會自動更新Markdown元件
-# 模型載入函數
-def load_model(model_name):
-    global pipe, tokenizer
-    model_path = model_name
-    print(f"Loading model: {model_name}")
-    try:
-        pipe = ov_genai.LLMPipeline(model_path, device)
-        tokenizer = pipe.get_tokenizer()
-        tokenizer.set_chat_template(tokenizer.chat_template)  # 確保 chat template 已設定
-        print(f"Model {model_name} loaded successfully.")
-        return True
-    except Exception as e:
-        print(f"Error loading model {model_name}: {e}")
-        return False
-# 產生回應的函數
-def generate_response(prompt, model_name):
-    global pipe, tokenizer, accumulated_text
-    # 如果模型尚未載入，或需要切換模型，則載入模型
-    if pipe is None or pipe.model_name != model_name:
-        if not load_model(model_name):
-            return "模型載入失敗", "模型載入失敗", "模型載入失敗"
-    accumulated_text = "" #重置累積文字
-    try:
-        generated = pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
-        tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
-        return tokenpersec, accumulated_text
-    except Exception as e:
-        error_message = f"生成回應時發生錯誤：{e}"
-        print(error_message)
-        return "發生錯誤", "發生錯誤", error_message
 with gr.Blocks() as demo:
-    markdown_component = gr.Markdown(label="回應")  # 在Blocks內部初始化
-    with gr.Row():
-        prompt_textbox = gr.Textbox(lines=5, label="輸入提示 (Prompt)")
-        model_dropdown = gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型")
-    with gr.Row():
-        token_per_sec_textbox = gr.Textbox(label="tokens/sec")
-    def process_input(prompt, model_name):
-        tokens_sec, response = generate_response(prompt, model_name)
-        return tokens_sec, response
-    prompt_textbox.submit(
-        fn=process_input,
-        inputs=[prompt_textbox, model_dropdown],
-        outputs=[token_per_sec_textbox, markdown_component]
     )
-demo.launch()

 import gradio as gr
+import openvino_genai as ov_genai
+import huggingface_hub as hf_hub
+# OpenVINO Setup
+model_id = "OpenVINO/Qwen3-0.6B-int4-ov"  # Or your chosen model
+model_path = "Qwen3-0.6B-int4-ov"         # Local directory for the model
+# Download the model if it doesn't exist locally
+try:
+    # Check if the model directory exists.  A quick and dirty check.  Adjust as needed.
+    import os
+    if not os.path.exists(model_path):
         hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
+except Exception as e:
+    print(f"Error downloading model: {e}")
+    print("Please ensure you have huggingface_hub installed and are authenticated if required.")
+    exit()  # Or handle the error more gracefully
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+tokenizer = pipe.get_tokenizer()
+tokenizer.set_chat_template(tokenizer.chat_template)
+pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization
+# Gradio Chatbot UI
+def user(user_message, history: list):
+    return "", history + [{"role": "user", "content": user_message}]
+def bot(history: list):
+    # Get the user's last message from the history
+    user_message = history[-1]["content"]
+    # Use OpenVINO to generate a response
+    full_response = ""  # Store the complete response
+    def streamer(subword):  # Local streamer function
+        nonlocal full_response  # Allow modification of outer scope variable
+        full_response += subword  # Accumulate the subword
+        history[-1]['content'] = full_response  # Update chatbot content
+        yield history
+        return ov_genai.StreamingStatus.RUNNING
+    # Initialize the bot message in history
+    history.append({"role": "assistant", "content": ""})
+    # Generate the response using the streaming function
+    for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100):
+        yield updated_history
+    # Alternatively, without the step-by-step updates, you can just do this:
+    # full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming
+    # history[-1]['content'] = full_response
+    # yield history
 with gr.Blocks() as demo:
+    chatbot = gr.Chatbot(type="messages")
+    msg = gr.Textbox()
+    clear = gr.Button("Clear")
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, chatbot, chatbot
     )
+    clear.click(lambda: None, None, chatbot, queue=False)
+if __name__ == "__main__":
+    demo.queue().launch()