mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-29 14:02:12 +00:00
Remove make-gguf.sh
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
3849223340
commit
46feaf6296
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -4754,6 +4754,7 @@ dependencies = [
|
|||||||
"async-trait",
|
"async-trait",
|
||||||
"bindgen 0.71.1",
|
"bindgen 0.71.1",
|
||||||
"clap 4.5.30",
|
"clap 4.5.30",
|
||||||
|
"hf-hub",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"pkg-config",
|
"pkg-config",
|
||||||
"text-generation-router",
|
"text-generation-router",
|
||||||
|
@ -79,9 +79,6 @@ COPY --from=builder /usr/lib/libllama.so /usr/lib/
|
|||||||
COPY --from=builder /usr/lib/libggml*.so /usr/lib/
|
COPY --from=builder /usr/lib/libggml*.so /usr/lib/
|
||||||
COPY --from=builder /app/target/release/text-generation-router-llamacpp /usr/bin/
|
COPY --from=builder /app/target/release/text-generation-router-llamacpp /usr/bin/
|
||||||
|
|
||||||
COPY backends/llamacpp/make-gguf.sh make-gguf.sh
|
|
||||||
ENV MAKE_GGUF=./make-gguf.sh
|
|
||||||
|
|
||||||
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
||||||
|
|
||||||
ENTRYPOINT ["text-generation-router-llamacpp"]
|
ENTRYPOINT ["text-generation-router-llamacpp"]
|
||||||
|
@ -12,6 +12,7 @@ pkg-config = "0.3.31"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
async-trait = "0.1.85"
|
async-trait = "0.1.85"
|
||||||
clap = "4.5.27"
|
clap = "4.5.27"
|
||||||
|
hf-hub.workspace = true
|
||||||
num_cpus = "1.16.0"
|
num_cpus = "1.16.0"
|
||||||
text-generation-router = { path = "../../router" }
|
text-generation-router = { path = "../../router" }
|
||||||
thiserror = "2.0.11"
|
thiserror = "2.0.11"
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
[ "$#" -ge 2 ] || {
|
|
||||||
echo "Usage: $0 <GGUF> <MODEL_ID> [<REV>]" >&2
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
case "$1" in (*?.gguf) ;; (*)
|
|
||||||
echo "Not a valid GGUF file: $1"
|
|
||||||
return 1;
|
|
||||||
esac
|
|
||||||
|
|
||||||
GGUF="$1"
|
|
||||||
GGUF_DIR=$(dirname -- "$GGUF")
|
|
||||||
MODEL_ID="$2"
|
|
||||||
MODEL_DIR="model.src/$2"
|
|
||||||
REV="${3-main}"
|
|
||||||
|
|
||||||
mkdir -p model.src "$GGUF_DIR"
|
|
||||||
|
|
||||||
huggingface-cli download \
|
|
||||||
--revision "$REV" \
|
|
||||||
--local-dir "$MODEL_DIR" \
|
|
||||||
"$MODEL_ID" &&
|
|
||||||
|
|
||||||
convert_hf_to_gguf.py \
|
|
||||||
--outfile "$GGUF" \
|
|
||||||
"$MODEL_DIR"
|
|
||||||
|
|
||||||
rm -rf -- model.src
|
|
@ -9,10 +9,12 @@ use backend::{
|
|||||||
LlamacppSplitMode,
|
LlamacppSplitMode,
|
||||||
};
|
};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
use hf_hub::api::tokio::ApiBuilder;
|
||||||
|
use hf_hub::{Repo, RepoType};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use text_generation_router::{logging, server, usage_stats};
|
use text_generation_router::{logging, server, usage_stats};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokenizers::{FromPretrainedParameters, Tokenizer};
|
use tokenizers::Tokenizer;
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
use tokio::sync::oneshot::error::RecvError;
|
use tokio::sync::oneshot::error::RecvError;
|
||||||
use tracing::{error, warn};
|
use tracing::{error, warn};
|
||||||
@ -200,37 +202,47 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: check if we use the same cache of Server
|
let api_builder = || {
|
||||||
// check if llamacpp is faster
|
let mut builder = ApiBuilder::new().with_progress(true);
|
||||||
let tokenizer = {
|
|
||||||
let token = std::env::var("HF_TOKEN")
|
if let Ok(cache_dir) = std::env::var("HUGGINGFACE_HUB_CACHE") {
|
||||||
.or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN"))
|
builder = builder.with_cache_dir(cache_dir.into());
|
||||||
.ok();
|
}
|
||||||
let params = FromPretrainedParameters {
|
if let Ok(token) = std::env::var("HF_TOKEN") {
|
||||||
revision: args.revision.clone(),
|
builder = builder.with_token(token.into());
|
||||||
token,
|
}
|
||||||
..Default::default()
|
builder
|
||||||
};
|
|
||||||
Tokenizer::from_pretrained(&args.model_id, Some(params))?
|
|
||||||
};
|
};
|
||||||
|
let api_repo = api_builder().build()?.repo(Repo::with_revision(
|
||||||
|
args.model_id.clone(),
|
||||||
|
RepoType::Model,
|
||||||
|
args.revision.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
|
let tokenizer_path = api_repo.get("tokenizer.json").await?;
|
||||||
|
let tokenizer = Tokenizer::from_file(&tokenizer_path)?;
|
||||||
|
|
||||||
let model_gguf = if let Some(model_gguf) = args.model_gguf {
|
let model_gguf = if let Some(model_gguf) = args.model_gguf {
|
||||||
model_gguf
|
model_gguf
|
||||||
} else {
|
} else {
|
||||||
let make_gguf = std::env::var("MAKE_GGUF").map_err(|e| {
|
|
||||||
error!("No GGUF model given and environment variable MAKE_GGUF is missing.");
|
|
||||||
RouterError::VarError(e)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let model_gguf = format!("models/{}/model.gguf", args.model_id);
|
let model_gguf = format!("models/{}/model.gguf", args.model_id);
|
||||||
|
let model_gguf_path = Path::new(&model_gguf);
|
||||||
|
|
||||||
if !Path::new(&model_gguf).exists() {
|
if !model_gguf_path.exists() {
|
||||||
let tmp_gguf = "models/tmp.gguf";
|
let tmp_gguf = "models/tmp.gguf";
|
||||||
|
|
||||||
let status = Command::new(make_gguf)
|
if let Some(parent) = Path::new(model_gguf_path).parent() {
|
||||||
|
std::fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
let cache_path = tokenizer_path.parent().unwrap();
|
||||||
|
|
||||||
|
for sibling in api_repo.info().await?.siblings {
|
||||||
|
let _ = api_repo.get(&sibling.rfilename).await?;
|
||||||
|
}
|
||||||
|
let status = Command::new("convert_hf_to_gguf.py")
|
||||||
|
.arg("--outfile")
|
||||||
.arg(tmp_gguf)
|
.arg(tmp_gguf)
|
||||||
.arg(&args.model_id)
|
.arg(cache_path)
|
||||||
.arg(&args.revision)
|
|
||||||
.spawn()?
|
.spawn()?
|
||||||
.wait()
|
.wait()
|
||||||
.await?;
|
.await?;
|
||||||
@ -327,4 +339,6 @@ enum RouterError {
|
|||||||
QuantizeError(String),
|
QuantizeError(String),
|
||||||
#[error("Command error: {0}")]
|
#[error("Command error: {0}")]
|
||||||
CommandError(i32),
|
CommandError(i32),
|
||||||
|
#[error("HF hub error: {0}")]
|
||||||
|
HubError(#[from] hf_hub::api::tokio::ApiError),
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
use crate::llamacpp;
|
use crate::llamacpp;
|
||||||
|
|
||||||
use std::ffi::CString;
|
use std::ffi::CString;
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
#[repr(u32)]
|
#[repr(u32)]
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
@ -15,9 +14,6 @@ pub fn model(
|
|||||||
ftype: QuantizeType,
|
ftype: QuantizeType,
|
||||||
n_threads: usize,
|
n_threads: usize,
|
||||||
) -> Result<(), String> {
|
) -> Result<(), String> {
|
||||||
if !Path::new(input_path).exists() {
|
|
||||||
return Err(format!("Input file '{}' does not exist", input_path));
|
|
||||||
}
|
|
||||||
let c_input_path =
|
let c_input_path =
|
||||||
CString::new(input_path).map_err(|e| format!("Failed to convert input path: {}", e))?;
|
CString::new(input_path).map_err(|e| format!("Failed to convert input path: {}", e))?;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user