| import os |
| import gc |
| import time |
| import shutil |
| import subprocess |
| import streamlit as st |
|
|
| from huggingface_hub import snapshot_download, HfApi |
| os.environ["PYTHONUNBUFFERED"] = "1" |
| |
| |
| |
|
|
| LLAMA_CPP_PATH = "/app/llama.cpp" |
|
|
| CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py" |
| QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize" |
|
|
| MODEL_DIR = "/tmp/hf_model" |
| GGUF_DIR = "/tmp/gguf" |
|
|
| |
| |
| |
|
|
| if "cleaned" not in st.session_state: |
|
|
| for p in [MODEL_DIR, GGUF_DIR]: |
|
|
| try: |
| shutil.rmtree(p) |
| except: |
| pass |
|
|
| os.makedirs(p, exist_ok=True) |
|
|
| gc.collect() |
|
|
| st.session_state.cleaned = True |
|
|
| |
| |
| |
|
|
| QUANT_TYPES = [ |
| "Q2_K", |
| "Q3_K_S", |
| "Q3_K_M", |
| "Q4_K_S", |
| "Q4_K_M", |
| "Q5_K_S", |
| "Q5_K_M" |
| ] |
|
|
| |
| |
| |
|
|
| def log(msg): |
| st.write(f"🧾 {msg}") |
|
|
| |
| |
| |
| def run_cmd(cmd): |
|
|
| log(f"RUNNING:\n{' '.join(cmd)}") |
|
|
| process = subprocess.Popen( |
| cmd, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.STDOUT, |
| text=True, |
| bufsize=1 |
| ) |
|
|
| log_lines = [] |
|
|
| log_area = st.empty() |
|
|
| while True: |
|
|
| line = process.stdout.readline() |
|
|
| if not line and process.poll() is not None: |
| break |
|
|
| if line: |
|
|
| |
| print(line.strip()) |
|
|
| log_lines.append(line) |
|
|
| |
| log_lines = log_lines[-1000:] |
|
|
| log_area.code("".join(log_lines)) |
|
|
| process.stdout.close() |
|
|
| return_code = process.wait() |
|
|
| |
| log_area.code("".join(log_lines)) |
|
|
| if return_code != 0: |
|
|
| raise RuntimeError( |
| f"Command failed with exit code {return_code}" |
| ) |
| |
| |
| |
|
|
| def get_api(): |
|
|
| token = os.getenv("HF_TOKEN") |
|
|
| if not token: |
| st.error("❌ HF_TOKEN missing") |
| return None |
|
|
| log("🔐 HF token loaded") |
|
|
| return HfApi(token=token) |
|
|
| |
| |
| |
|
|
| def download_model(repo): |
|
|
| log("📥 Downloading model") |
|
|
| path = snapshot_download( |
| repo_id=repo, |
| local_dir=MODEL_DIR, |
| local_dir_use_symlinks=False |
| ) |
|
|
| st.success("✅ Downloaded") |
|
|
| gc.collect() |
|
|
| return path |
|
|
| |
| |
| |
|
|
| def make_temp_gguf(model_path): |
|
|
| out = os.path.join( |
| GGUF_DIR, |
| "temp.gguf" |
| ) |
|
|
| log("🔄 Generating temporary GGUF") |
|
|
| cmd = [ |
| "python3", |
| CONVERT_SCRIPT, |
| model_path, |
| "--outfile", |
| out, |
| "--outtype", |
| "f16", |
| "--no-lazy" |
| ] |
|
|
| run_cmd(cmd) |
|
|
| if not os.path.exists(out): |
| raise RuntimeError("Temp GGUF failed") |
|
|
| size = os.path.getsize(out) / (1024**3) |
|
|
| st.success( |
| f"✅ Temp GGUF ready ({size:.2f} GB)" |
| ) |
|
|
| return out |
|
|
| |
| |
| |
|
|
| def quantize(temp_gguf, q): |
|
|
| out = os.path.join( |
| GGUF_DIR, |
| f"{q}.gguf" |
| ) |
|
|
| log(f"⚡ Quantizing {q}") |
|
|
| cmd = [ |
| QUANTIZE_BIN, |
| temp_gguf, |
| out, |
| q.lower() |
| ] |
|
|
| run_cmd(cmd) |
|
|
| if not os.path.exists(out): |
| raise RuntimeError("Quant failed") |
|
|
| size = os.path.getsize(out) / (1024**3) |
|
|
| st.success( |
| f"✅ Quant ready ({size:.2f} GB)" |
| ) |
|
|
| return out |
|
|
| |
| |
| |
|
|
| def upload(api, file, repo): |
|
|
| if not os.path.exists(file): |
| st.error("❌ Upload file missing") |
| return False |
|
|
| size = os.path.getsize(file) / (1024**3) |
|
|
| for i in range(5): |
|
|
| try: |
|
|
| log( |
| f"🚀 Upload attempt {i+1}/5 " |
| f"({size:.2f} GB)" |
| ) |
|
|
| api.upload_file( |
| path_or_fileobj=file, |
| path_in_repo=os.path.basename(file), |
| repo_id=repo |
| ) |
|
|
| st.success( |
| f"✅ Uploaded {os.path.basename(file)}" |
| ) |
|
|
| return True |
|
|
| except Exception as e: |
|
|
| st.error(str(e)) |
|
|
| time.sleep(5) |
|
|
| return False |
|
|
| |
| |
| |
|
|
| st.title("🔥 GGUF Quant Uploader") |
|
|
| model_id = st.text_input( |
| "HF Model", |
| "FINAL-Bench/Darwin-4B-Genesis" |
| ) |
|
|
| selected_quant = st.selectbox( |
| "Quant Type", |
| QUANT_TYPES, |
| index=4 |
| ) |
|
|
| repo = st.text_input( |
| "Upload Repo", |
| "rahul7star/darwin-gguf" |
| ) |
|
|
| |
| |
| |
|
|
| if st.button("🚀 Start"): |
|
|
| try: |
|
|
| |
| |
| |
|
|
| api = get_api() |
|
|
| if not api: |
| st.stop() |
|
|
| api.create_repo( |
| repo, |
| exist_ok=True |
| ) |
|
|
| st.success("✅ HF repo ready") |
|
|
| |
| |
| |
|
|
| model_path = download_model(model_id) |
|
|
| log(f"Model path: {model_path}") |
|
|
| |
| |
| |
|
|
| temp_gguf = make_temp_gguf( |
| model_path |
| ) |
|
|
| |
| |
| |
|
|
| try: |
|
|
| shutil.rmtree( |
| model_path, |
| ignore_errors=True |
| ) |
|
|
| gc.collect() |
|
|
| log("🧹 Deleted HF weights") |
|
|
| except Exception as e: |
|
|
| st.error(str(e)) |
|
|
| |
| |
| |
|
|
| out = quantize( |
| temp_gguf, |
| selected_quant |
| ) |
|
|
| |
| |
| |
|
|
| try: |
|
|
| os.remove(temp_gguf) |
|
|
| gc.collect() |
|
|
| log("🧹 Deleted temp GGUF") |
|
|
| except Exception as e: |
|
|
| st.error(str(e)) |
|
|
| |
| |
| |
|
|
| if not os.path.exists(out): |
|
|
| st.error("❌ Final quant missing") |
| st.stop() |
|
|
| |
| |
| |
|
|
| ok = upload( |
| api, |
| out, |
| repo |
| ) |
|
|
| if not ok: |
|
|
| st.error("❌ Upload failed") |
| st.stop() |
|
|
| st.success("🎉 DONE") |
|
|
| except Exception as e: |
|
|
| st.error(str(e)) |