mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 20:12:07 +00:00
Remove make-gguf.sh
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
3849223340
commit
46feaf6296
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -4754,6 +4754,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"bindgen 0.71.1",
|
||||
"clap 4.5.30",
|
||||
"hf-hub",
|
||||
"num_cpus",
|
||||
"pkg-config",
|
||||
"text-generation-router",
|
||||
|
@ -79,9 +79,6 @@ COPY --from=builder /usr/lib/libllama.so /usr/lib/
|
||||
COPY --from=builder /usr/lib/libggml*.so /usr/lib/
|
||||
COPY --from=builder /app/target/release/text-generation-router-llamacpp /usr/bin/
|
||||
|
||||
COPY backends/llamacpp/make-gguf.sh make-gguf.sh
|
||||
ENV MAKE_GGUF=./make-gguf.sh
|
||||
|
||||
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
||||
|
||||
ENTRYPOINT ["text-generation-router-llamacpp"]
|
||||
|
@ -12,6 +12,7 @@ pkg-config = "0.3.31"
|
||||
[dependencies]
|
||||
async-trait = "0.1.85"
|
||||
clap = "4.5.27"
|
||||
hf-hub.workspace = true
|
||||
num_cpus = "1.16.0"
|
||||
text-generation-router = { path = "../../router" }
|
||||
thiserror = "2.0.11"
|
||||
|
@ -1,30 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
[ "$#" -ge 2 ] || {
|
||||
echo "Usage: $0 <GGUF> <MODEL_ID> [<REV>]" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
case "$1" in (*?.gguf) ;; (*)
|
||||
echo "Not a valid GGUF file: $1"
|
||||
return 1;
|
||||
esac
|
||||
|
||||
GGUF="$1"
|
||||
GGUF_DIR=$(dirname -- "$GGUF")
|
||||
MODEL_ID="$2"
|
||||
MODEL_DIR="model.src/$2"
|
||||
REV="${3-main}"
|
||||
|
||||
mkdir -p model.src "$GGUF_DIR"
|
||||
|
||||
huggingface-cli download \
|
||||
--revision "$REV" \
|
||||
--local-dir "$MODEL_DIR" \
|
||||
"$MODEL_ID" &&
|
||||
|
||||
convert_hf_to_gguf.py \
|
||||
--outfile "$GGUF" \
|
||||
"$MODEL_DIR"
|
||||
|
||||
rm -rf -- model.src
|
@ -9,10 +9,12 @@ use backend::{
|
||||
LlamacppSplitMode,
|
||||
};
|
||||
use clap::Parser;
|
||||
use hf_hub::api::tokio::ApiBuilder;
|
||||
use hf_hub::{Repo, RepoType};
|
||||
use std::path::Path;
|
||||
use text_generation_router::{logging, server, usage_stats};
|
||||
use thiserror::Error;
|
||||
use tokenizers::{FromPretrainedParameters, Tokenizer};
|
||||
use tokenizers::Tokenizer;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::oneshot::error::RecvError;
|
||||
use tracing::{error, warn};
|
||||
@ -200,37 +202,47 @@ async fn main() -> Result<(), RouterError> {
|
||||
));
|
||||
}
|
||||
|
||||
// TODO: check if we use the same cache of Server
|
||||
// check if llamacpp is faster
|
||||
let tokenizer = {
|
||||
let token = std::env::var("HF_TOKEN")
|
||||
.or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN"))
|
||||
.ok();
|
||||
let params = FromPretrainedParameters {
|
||||
revision: args.revision.clone(),
|
||||
token,
|
||||
..Default::default()
|
||||
};
|
||||
Tokenizer::from_pretrained(&args.model_id, Some(params))?
|
||||
let api_builder = || {
|
||||
let mut builder = ApiBuilder::new().with_progress(true);
|
||||
|
||||
if let Ok(cache_dir) = std::env::var("HUGGINGFACE_HUB_CACHE") {
|
||||
builder = builder.with_cache_dir(cache_dir.into());
|
||||
}
|
||||
if let Ok(token) = std::env::var("HF_TOKEN") {
|
||||
builder = builder.with_token(token.into());
|
||||
}
|
||||
builder
|
||||
};
|
||||
let api_repo = api_builder().build()?.repo(Repo::with_revision(
|
||||
args.model_id.clone(),
|
||||
RepoType::Model,
|
||||
args.revision.clone(),
|
||||
));
|
||||
|
||||
let tokenizer_path = api_repo.get("tokenizer.json").await?;
|
||||
let tokenizer = Tokenizer::from_file(&tokenizer_path)?;
|
||||
|
||||
let model_gguf = if let Some(model_gguf) = args.model_gguf {
|
||||
model_gguf
|
||||
} else {
|
||||
let make_gguf = std::env::var("MAKE_GGUF").map_err(|e| {
|
||||
error!("No GGUF model given and environment variable MAKE_GGUF is missing.");
|
||||
RouterError::VarError(e)
|
||||
})?;
|
||||
|
||||
let model_gguf = format!("models/{}/model.gguf", args.model_id);
|
||||
let model_gguf_path = Path::new(&model_gguf);
|
||||
|
||||
if !Path::new(&model_gguf).exists() {
|
||||
if !model_gguf_path.exists() {
|
||||
let tmp_gguf = "models/tmp.gguf";
|
||||
|
||||
let status = Command::new(make_gguf)
|
||||
if let Some(parent) = Path::new(model_gguf_path).parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let cache_path = tokenizer_path.parent().unwrap();
|
||||
|
||||
for sibling in api_repo.info().await?.siblings {
|
||||
let _ = api_repo.get(&sibling.rfilename).await?;
|
||||
}
|
||||
let status = Command::new("convert_hf_to_gguf.py")
|
||||
.arg("--outfile")
|
||||
.arg(tmp_gguf)
|
||||
.arg(&args.model_id)
|
||||
.arg(&args.revision)
|
||||
.arg(cache_path)
|
||||
.spawn()?
|
||||
.wait()
|
||||
.await?;
|
||||
@ -327,4 +339,6 @@ enum RouterError {
|
||||
QuantizeError(String),
|
||||
#[error("Command error: {0}")]
|
||||
CommandError(i32),
|
||||
#[error("HF hub error: {0}")]
|
||||
HubError(#[from] hf_hub::api::tokio::ApiError),
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
use crate::llamacpp;
|
||||
|
||||
use std::ffi::CString;
|
||||
use std::path::Path;
|
||||
|
||||
#[repr(u32)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@ -15,9 +14,6 @@ pub fn model(
|
||||
ftype: QuantizeType,
|
||||
n_threads: usize,
|
||||
) -> Result<(), String> {
|
||||
if !Path::new(input_path).exists() {
|
||||
return Err(format!("Input file '{}' does not exist", input_path));
|
||||
}
|
||||
let c_input_path =
|
||||
CString::new(input_path).map_err(|e| format!("Failed to convert input path: {}", e))?;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user