rahul7star's picture
Update app.py
458b789 verified
import os
import gc
import time
import shutil
import subprocess
import streamlit as st
from huggingface_hub import snapshot_download, HfApi
os.environ["PYTHONUNBUFFERED"] = "1"
# ============================================================
# CONFIG
# ============================================================
LLAMA_CPP_PATH = "/app/llama.cpp"
CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py"
QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize"
MODEL_DIR = "/tmp/hf_model"
GGUF_DIR = "/tmp/gguf"
# ============================================================
# CLEAN START
# ============================================================
if "cleaned" not in st.session_state:
for p in [MODEL_DIR, GGUF_DIR]:
try:
shutil.rmtree(p)
except:
pass
os.makedirs(p, exist_ok=True)
gc.collect()
st.session_state.cleaned = True
# ============================================================
# QUANTS
# ============================================================
QUANT_TYPES = [
"Q2_K",
"Q3_K_S",
"Q3_K_M",
"Q4_K_S",
"Q4_K_M",
"Q5_K_S",
"Q5_K_M"
]
# ============================================================
# LOGGING
# ============================================================
def log(msg):
st.write(f"🧾 {msg}")
# ============================================================
# STREAM COMMAND
# ============================================================
def run_cmd(cmd):
log(f"RUNNING:\n{' '.join(cmd)}")
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1
)
log_lines = []
log_area = st.empty()
while True:
line = process.stdout.readline()
if not line and process.poll() is not None:
break
if line:
# print in terminal too
print(line.strip())
log_lines.append(line)
# keep lots of lines visible
log_lines = log_lines[-1000:]
log_area.code("".join(log_lines))
process.stdout.close()
return_code = process.wait()
# final render so logs stay visible
log_area.code("".join(log_lines))
if return_code != 0:
raise RuntimeError(
f"Command failed with exit code {return_code}"
)
# ============================================================
# HF API
# ============================================================
def get_api():
token = os.getenv("HF_TOKEN")
if not token:
st.error("❌ HF_TOKEN missing")
return None
log("🔐 HF token loaded")
return HfApi(token=token)
# ============================================================
# DOWNLOAD
# ============================================================
def download_model(repo):
log("📥 Downloading model")
path = snapshot_download(
repo_id=repo,
local_dir=MODEL_DIR,
local_dir_use_symlinks=False
)
st.success("✅ Downloaded")
gc.collect()
return path
# ============================================================
# TEMP GGUF
# ============================================================
def make_temp_gguf(model_path):
out = os.path.join(
GGUF_DIR,
"temp.gguf"
)
log("🔄 Generating temporary GGUF")
cmd = [
"python3",
CONVERT_SCRIPT,
model_path,
"--outfile",
out,
"--outtype",
"f16",
"--no-lazy"
]
run_cmd(cmd)
if not os.path.exists(out):
raise RuntimeError("Temp GGUF failed")
size = os.path.getsize(out) / (1024**3)
st.success(
f"✅ Temp GGUF ready ({size:.2f} GB)"
)
return out
# ============================================================
# QUANTIZE
# ============================================================
def quantize(temp_gguf, q):
out = os.path.join(
GGUF_DIR,
f"{q}.gguf"
)
log(f"⚡ Quantizing {q}")
cmd = [
QUANTIZE_BIN,
temp_gguf,
out,
q.lower()
]
run_cmd(cmd)
if not os.path.exists(out):
raise RuntimeError("Quant failed")
size = os.path.getsize(out) / (1024**3)
st.success(
f"✅ Quant ready ({size:.2f} GB)"
)
return out
# ============================================================
# UPLOAD
# ============================================================
def upload(api, file, repo):
if not os.path.exists(file):
st.error("❌ Upload file missing")
return False
size = os.path.getsize(file) / (1024**3)
for i in range(5):
try:
log(
f"🚀 Upload attempt {i+1}/5 "
f"({size:.2f} GB)"
)
api.upload_file(
path_or_fileobj=file,
path_in_repo=os.path.basename(file),
repo_id=repo
)
st.success(
f"✅ Uploaded {os.path.basename(file)}"
)
return True
except Exception as e:
st.error(str(e))
time.sleep(5)
return False
# ============================================================
# UI
# ============================================================
st.title("🔥 GGUF Quant Uploader")
model_id = st.text_input(
"HF Model",
"FINAL-Bench/Darwin-4B-Genesis"
)
selected_quant = st.selectbox(
"Quant Type",
QUANT_TYPES,
index=4
)
repo = st.text_input(
"Upload Repo",
"rahul7star/darwin-gguf"
)
# ============================================================
# RUN
# ============================================================
if st.button("🚀 Start"):
try:
# ====================================================
# HF API
# ====================================================
api = get_api()
if not api:
st.stop()
api.create_repo(
repo,
exist_ok=True
)
st.success("✅ HF repo ready")
# ====================================================
# DOWNLOAD
# ====================================================
model_path = download_model(model_id)
log(f"Model path: {model_path}")
# ====================================================
# TEMP GGUF
# ====================================================
temp_gguf = make_temp_gguf(
model_path
)
# ====================================================
# DELETE HF WEIGHTS ASAP
# ====================================================
try:
shutil.rmtree(
model_path,
ignore_errors=True
)
gc.collect()
log("🧹 Deleted HF weights")
except Exception as e:
st.error(str(e))
# ====================================================
# QUANTIZE
# ====================================================
out = quantize(
temp_gguf,
selected_quant
)
# ====================================================
# DELETE TEMP GGUF ASAP
# ====================================================
try:
os.remove(temp_gguf)
gc.collect()
log("🧹 Deleted temp GGUF")
except Exception as e:
st.error(str(e))
# ====================================================
# VERIFY
# ====================================================
if not os.path.exists(out):
st.error("❌ Final quant missing")
st.stop()
# ====================================================
# UPLOAD
# ====================================================
ok = upload(
api,
out,
repo
)
if not ok:
st.error("❌ Upload failed")
st.stop()
st.success("🎉 DONE")
except Exception as e:
st.error(str(e))