Spaces:

hsuwill000
/

qwen3_test

Paused

App Files Files Community

hsuwill000 commited on Jun 16

Commit

50398e9

verified ·

1 Parent(s): 635ee63

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -50

app.py CHANGED Viewed

@@ -10,85 +10,104 @@ import threading
 model_ids = [
     "OpenVINO/Qwen3-0.6B-int4-ov",
     "OpenVINO/Qwen3-1.7B-int4-ov",
-    #"OpenVINO/Qwen3-4B-int4-ov",#不可用
     "OpenVINO/Qwen3-8B-int4-ov",
     "OpenVINO/Qwen3-14B-int4-ov",
 ]
-model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  #Create Dictionary
-for model_id in model_ids:
     model_path = model_id.split("/")[-1]  # Extract model name
     try:
-      hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
-      print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
     except Exception as e:
-      print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
 # 建立推理管線 (Initialize with a default model first)
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
 # 全局变量，用于存储推理管线、分词器、Markdown 组件和累计文本
 pipe = None
 tokenizer = None
-markdown_component = None  # 初始化
 accumulated_text = ""
-#  定义同步更新 Markdown 组件的函数
-def update_markdown(text):
-    global markdown_component
-    if markdown_component:
-        markdown_component.update(value=text)
 # 创建 streamer 函数 (保持原有架构)
 def streamer(subword):
     global accumulated_text
     accumulated_text += subword
     print(subword, end='', flush=True)  # 保留打印到控制台
-    #  使用线程来异步更新 Markdown 组件
-    threading.Thread(target=update_markdown, args=(accumulated_text,)).start() # 异步更新 UI
-    return ov_genai.StreamingStatus.RUNNING
 def generate_response(prompt, model_name):
-    global pipe, tokenizer  # Access the global variables
-    model_path = model_name
-    print(f"Switching to model: {model_name}")
-    pipe = ov_genai.LLMPipeline(model_path, device)
-    tokenizer = pipe.get_tokenizer()
-    tokenizer.set_chat_template(tokenizer.chat_template)
     try:
-        #generated = pipe.generate([prompt], max_length=1024)
         generated = pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
-        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
-        return tokenpersec, generated
     except Exception as e:
-        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤：{e}"
-# 建立 Gradio 介面
-model_choices = list(model_name_to_full_id.keys())
-demo = gr.Interface(
-    fn=generate_response,
-    inputs=[
-        gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
-        gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
-    ],
-    outputs=[
-        gr.Textbox(label="tokens/sec"),
-        #gr.Textbox(label="回應"),
-        markdown_component = gr.Markdown(label="回应")
-    ],
-    title="Qwen3 Model Inference",
-    description="基於 Qwen3 推理應用，支援思考過程分離與 GUI。"
-)
-if __name__ == "__main__":
-    demo.launch()

 model_ids = [
     "OpenVINO/Qwen3-0.6B-int4-ov",
     "OpenVINO/Qwen3-1.7B-int4-ov",
+    #"OpenVINO/Qwen3-4B-int4-ov",  #不可用
     "OpenVINO/Qwen3-8B-int4-ov",
     "OpenVINO/Qwen3-14B-int4-ov",
 ]
+model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  # Create Dictionary
+def download_model(model_id):
     model_path = model_id.split("/")[-1]  # Extract model name
     try:
+        hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
+        print(f"Successfully downloaded {model_id} to {model_path}")
+        # 檢查模型檔案是否完整 (可以加入具體的檔案檢查)
+        # 例如，檢查必須存在的檔案是否存在，或驗證檔案大小
+        return True
     except Exception as e:
+        print(f"Error downloading {model_id}: {e}")
+        return False
+# 下載所有模型
+for model_id in model_ids:
+    if not download_model(model_id):
+        print(f"Failed to download {model_id}, skipping.")
 # 建立推理管線 (Initialize with a default model first)
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
 # 全局变量，用于存储推理管线、分词器、Markdown 组件和累计文本
 pipe = None
 tokenizer = None
 accumulated_text = ""
+# 初始化 Markdown 组件
+markdown_component = None  # 在全局範圍初始化
+# 建立 Gradio 介面
+model_choices = list(model_name_to_full_id.keys())
 # 创建 streamer 函数 (保持原有架构)
 def streamer(subword):
     global accumulated_text
     accumulated_text += subword
     print(subword, end='', flush=True)  # 保留打印到控制台
+    return accumulated_text # 返回更新後的文字，Gradio會自動更新Markdown元件
+# 模型載入函數
+def load_model(model_name):
+    global pipe, tokenizer
+    model_path = model_name
+    print(f"Loading model: {model_name}")
+    try:
+        pipe = ov_genai.LLMPipeline(model_path, device)
+        tokenizer = pipe.get_tokenizer()
+        tokenizer.set_chat_template(tokenizer.chat_template)  # 確保 chat template 已設定
+        print(f"Model {model_name} loaded successfully.")
+        return True
+    except Exception as e:
+        print(f"Error loading model {model_name}: {e}")
+        return False
+# 產生回應的函數
 def generate_response(prompt, model_name):
+    global pipe, tokenizer, accumulated_text
+    # 如果模型尚未載入，或需要切換模型，則載入模型
+    if pipe is None or pipe.model_name != model_name:
+        if not load_model(model_name):
+            return "模型載入失敗", "模型載入失敗", "模型載入失敗"
+    accumulated_text = "" #重置累積文字
     try:
         generated = pipe.generate(prompt, streamer=streamer, max_new_tokens=100)
+        tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
+        return tokenpersec, accumulated_text
     except Exception as e:
+        error_message = f"生成回應時發生錯誤：{e}"
+        print(error_message)
+        return "發生錯誤", "發生錯誤", error_message
+with gr.Blocks() as demo:
+    markdown_component = gr.Markdown(label="回应")  # 在Blocks內部初始化
+    with gr.Row():
+        prompt_textbox = gr.Textbox(lines=5, label="輸入提示 (Prompt)")
+        model_dropdown = gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型")
+    with gr.Row():
+        token_per_sec_textbox = gr.Textbox(label="tokens/sec")
+    def process_input(prompt, model_name):
+        tokens_sec, response = generate_response(prompt, model_name)
+        return tokens_sec, response
+    prompt_textbox.submit(
+        fn=process_input,
+        inputs=[prompt_textbox, model_dropdown],
+        outputs=[token_per_sec_textbox, markdown_component]
+    )
+demo.launch()