Remove make-gguf.sh

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2025-02-22 12:54:46 +00:00
parent 3849223340
commit 46feaf6296
No known key found for this signature in database
6 changed files with 38 additions and 59 deletions

1
Cargo.lock generated
View File

@ -4754,6 +4754,7 @@ dependencies = [
"async-trait",
"bindgen 0.71.1",
"clap 4.5.30",
"hf-hub",
"num_cpus",
"pkg-config",
"text-generation-router",

View File

@ -79,9 +79,6 @@ COPY --from=builder /usr/lib/libllama.so /usr/lib/
COPY --from=builder /usr/lib/libggml*.so /usr/lib/
COPY --from=builder /app/target/release/text-generation-router-llamacpp /usr/bin/
COPY backends/llamacpp/make-gguf.sh make-gguf.sh
ENV MAKE_GGUF=./make-gguf.sh
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENTRYPOINT ["text-generation-router-llamacpp"]

View File

@ -12,6 +12,7 @@ pkg-config = "0.3.31"
[dependencies]
async-trait = "0.1.85"
clap = "4.5.27"
hf-hub.workspace = true
num_cpus = "1.16.0"
text-generation-router = { path = "../../router" }
thiserror = "2.0.11"

View File

@ -1,30 +0,0 @@
#!/bin/sh
[ "$#" -ge 2 ] || {
echo "Usage: $0 <GGUF> <MODEL_ID> [<REV>]" >&2
return 1
}
case "$1" in (*?.gguf) ;; (*)
echo "Not a valid GGUF file: $1"
return 1;
esac
GGUF="$1"
GGUF_DIR=$(dirname -- "$GGUF")
MODEL_ID="$2"
MODEL_DIR="model.src/$2"
REV="${3-main}"
mkdir -p model.src "$GGUF_DIR"
huggingface-cli download \
--revision "$REV" \
--local-dir "$MODEL_DIR" \
"$MODEL_ID" &&
convert_hf_to_gguf.py \
--outfile "$GGUF" \
"$MODEL_DIR"
rm -rf -- model.src

View File

@ -9,10 +9,12 @@ use backend::{
LlamacppSplitMode,
};
use clap::Parser;
use hf_hub::api::tokio::ApiBuilder;
use hf_hub::{Repo, RepoType};
use std::path::Path;
use text_generation_router::{logging, server, usage_stats};
use thiserror::Error;
use tokenizers::{FromPretrainedParameters, Tokenizer};
use tokenizers::Tokenizer;
use tokio::process::Command;
use tokio::sync::oneshot::error::RecvError;
use tracing::{error, warn};
@ -200,37 +202,47 @@ async fn main() -> Result<(), RouterError> {
));
}
// TODO: check if we use the same cache of Server
// check if llamacpp is faster
let tokenizer = {
let token = std::env::var("HF_TOKEN")
.or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN"))
.ok();
let params = FromPretrainedParameters {
revision: args.revision.clone(),
token,
..Default::default()
};
Tokenizer::from_pretrained(&args.model_id, Some(params))?
let api_builder = || {
let mut builder = ApiBuilder::new().with_progress(true);
if let Ok(cache_dir) = std::env::var("HUGGINGFACE_HUB_CACHE") {
builder = builder.with_cache_dir(cache_dir.into());
}
if let Ok(token) = std::env::var("HF_TOKEN") {
builder = builder.with_token(token.into());
}
builder
};
let api_repo = api_builder().build()?.repo(Repo::with_revision(
args.model_id.clone(),
RepoType::Model,
args.revision.clone(),
));
let tokenizer_path = api_repo.get("tokenizer.json").await?;
let tokenizer = Tokenizer::from_file(&tokenizer_path)?;
let model_gguf = if let Some(model_gguf) = args.model_gguf {
model_gguf
} else {
let make_gguf = std::env::var("MAKE_GGUF").map_err(|e| {
error!("No GGUF model given and environment variable MAKE_GGUF is missing.");
RouterError::VarError(e)
})?;
let model_gguf = format!("models/{}/model.gguf", args.model_id);
let model_gguf_path = Path::new(&model_gguf);
if !Path::new(&model_gguf).exists() {
if !model_gguf_path.exists() {
let tmp_gguf = "models/tmp.gguf";
let status = Command::new(make_gguf)
if let Some(parent) = Path::new(model_gguf_path).parent() {
std::fs::create_dir_all(parent)?;
}
let cache_path = tokenizer_path.parent().unwrap();
for sibling in api_repo.info().await?.siblings {
let _ = api_repo.get(&sibling.rfilename).await?;
}
let status = Command::new("convert_hf_to_gguf.py")
.arg("--outfile")
.arg(tmp_gguf)
.arg(&args.model_id)
.arg(&args.revision)
.arg(cache_path)
.spawn()?
.wait()
.await?;
@ -327,4 +339,6 @@ enum RouterError {
QuantizeError(String),
#[error("Command error: {0}")]
CommandError(i32),
#[error("HF hub error: {0}")]
HubError(#[from] hf_hub::api::tokio::ApiError),
}

View File

@ -1,7 +1,6 @@
use crate::llamacpp;
use std::ffi::CString;
use std::path::Path;
#[repr(u32)]
#[derive(Debug, Clone, Copy)]
@ -15,9 +14,6 @@ pub fn model(
ftype: QuantizeType,
n_threads: usize,
) -> Result<(), String> {
if !Path::new(input_path).exists() {
return Err(format!("Input file '{}' does not exist", input_path));
}
let c_input_path =
CString::new(input_path).map_err(|e| format!("Failed to convert input path: {}", e))?;