import os import gc import time import shutil import subprocess import streamlit as st from huggingface_hub import snapshot_download, HfApi os.environ["PYTHONUNBUFFERED"] = "1" # ============================================================ # CONFIG # ============================================================ LLAMA_CPP_PATH = "/app/llama.cpp" CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py" QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize" MODEL_DIR = "/tmp/hf_model" GGUF_DIR = "/tmp/gguf" # ============================================================ # CLEAN START # ============================================================ if "cleaned" not in st.session_state: for p in [MODEL_DIR, GGUF_DIR]: try: shutil.rmtree(p) except: pass os.makedirs(p, exist_ok=True) gc.collect() st.session_state.cleaned = True # ============================================================ # QUANTS # ============================================================ QUANT_TYPES = [ "Q2_K", "Q3_K_S", "Q3_K_M", "Q4_K_S", "Q4_K_M", "Q5_K_S", "Q5_K_M" ] # ============================================================ # LOGGING # ============================================================ def log(msg): st.write(f"๐Ÿงพ {msg}") # ============================================================ # STREAM COMMAND # ============================================================ def run_cmd(cmd): log(f"RUNNING:\n{' '.join(cmd)}") process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 ) log_lines = [] log_area = st.empty() while True: line = process.stdout.readline() if not line and process.poll() is not None: break if line: # print in terminal too print(line.strip()) log_lines.append(line) # keep lots of lines visible log_lines = log_lines[-1000:] log_area.code("".join(log_lines)) process.stdout.close() return_code = process.wait() # final render so logs stay visible log_area.code("".join(log_lines)) if return_code != 0: raise RuntimeError( f"Command failed with exit code {return_code}" ) # ============================================================ # HF API # ============================================================ def get_api(): token = os.getenv("HF_TOKEN") if not token: st.error("โŒ HF_TOKEN missing") return None log("๐Ÿ” HF token loaded") return HfApi(token=token) # ============================================================ # DOWNLOAD # ============================================================ def download_model(repo): log("๐Ÿ“ฅ Downloading model") path = snapshot_download( repo_id=repo, local_dir=MODEL_DIR, local_dir_use_symlinks=False ) st.success("โœ… Downloaded") gc.collect() return path # ============================================================ # TEMP GGUF # ============================================================ def make_temp_gguf(model_path): out = os.path.join( GGUF_DIR, "temp.gguf" ) log("๐Ÿ”„ Generating temporary GGUF") cmd = [ "python3", CONVERT_SCRIPT, model_path, "--outfile", out, "--outtype", "f16", "--no-lazy" ] run_cmd(cmd) if not os.path.exists(out): raise RuntimeError("Temp GGUF failed") size = os.path.getsize(out) / (1024**3) st.success( f"โœ… Temp GGUF ready ({size:.2f} GB)" ) return out # ============================================================ # QUANTIZE # ============================================================ def quantize(temp_gguf, q): out = os.path.join( GGUF_DIR, f"{q}.gguf" ) log(f"โšก Quantizing {q}") cmd = [ QUANTIZE_BIN, temp_gguf, out, q.lower() ] run_cmd(cmd) if not os.path.exists(out): raise RuntimeError("Quant failed") size = os.path.getsize(out) / (1024**3) st.success( f"โœ… Quant ready ({size:.2f} GB)" ) return out # ============================================================ # UPLOAD # ============================================================ def upload(api, file, repo): if not os.path.exists(file): st.error("โŒ Upload file missing") return False size = os.path.getsize(file) / (1024**3) for i in range(5): try: log( f"๐Ÿš€ Upload attempt {i+1}/5 " f"({size:.2f} GB)" ) api.upload_file( path_or_fileobj=file, path_in_repo=os.path.basename(file), repo_id=repo ) st.success( f"โœ… Uploaded {os.path.basename(file)}" ) return True except Exception as e: st.error(str(e)) time.sleep(5) return False # ============================================================ # UI # ============================================================ st.title("๐Ÿ”ฅ GGUF Quant Uploader") model_id = st.text_input( "HF Model", "FINAL-Bench/Darwin-4B-Genesis" ) selected_quant = st.selectbox( "Quant Type", QUANT_TYPES, index=4 ) repo = st.text_input( "Upload Repo", "rahul7star/darwin-gguf" ) # ============================================================ # RUN # ============================================================ if st.button("๐Ÿš€ Start"): try: # ==================================================== # HF API # ==================================================== api = get_api() if not api: st.stop() api.create_repo( repo, exist_ok=True ) st.success("โœ… HF repo ready") # ==================================================== # DOWNLOAD # ==================================================== model_path = download_model(model_id) log(f"Model path: {model_path}") # ==================================================== # TEMP GGUF # ==================================================== temp_gguf = make_temp_gguf( model_path ) # ==================================================== # DELETE HF WEIGHTS ASAP # ==================================================== try: shutil.rmtree( model_path, ignore_errors=True ) gc.collect() log("๐Ÿงน Deleted HF weights") except Exception as e: st.error(str(e)) # ==================================================== # QUANTIZE # ==================================================== out = quantize( temp_gguf, selected_quant ) # ==================================================== # DELETE TEMP GGUF ASAP # ==================================================== try: os.remove(temp_gguf) gc.collect() log("๐Ÿงน Deleted temp GGUF") except Exception as e: st.error(str(e)) # ==================================================== # VERIFY # ==================================================== if not os.path.exists(out): st.error("โŒ Final quant missing") st.stop() # ==================================================== # UPLOAD # ==================================================== ok = upload( api, out, repo ) if not ok: st.error("โŒ Upload failed") st.stop() st.success("๐ŸŽ‰ DONE") except Exception as e: st.error(str(e))