Merge branch 'habana-main' into 2.3.0

This commit is contained in:
yuanwu 2024-12-17 02:06:22 +00:00
commit 15de6c9195
4 changed files with 6 additions and 7 deletions

View File

@ -155,7 +155,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// We need to download it outside of the Tokio runtime // We need to download it outside of the Tokio runtime
let params = FromPretrainedParameters { let params = FromPretrainedParameters {
revision, revision,
auth_token, token: auth_token,
..Default::default() ..Default::default()
}; };
Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap() Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap()

View File

@ -22,7 +22,7 @@ To run benchmark use below command:
python run_generation --model_id MODEL_ID python run_generation --model_id MODEL_ID
``` ```
where `MODEL_ID` should be set to the same value as in the TGI server instance. where `MODEL_ID` should be set to the same value as in the TGI server instance.
> For gated models such as [LLama](https://huggingface.co/meta-llama) or [StarCoder](https://huggingface.co/bigcode/starcoder), you will have to set environment variable `HUGGING_FACE_HUB_TOKEN=<token>` with a valid Hugging Face Hub read token. > For gated models such as [LLama](https://huggingface.co/meta-llama) or [StarCoder](https://huggingface.co/bigcode/starcoder), you will have to set environment variable `HF_TOKEN=<token>` with a valid Hugging Face Hub read token.
All possible parameters are described in the below table: All possible parameters are described in the below table:
<div align="left"> <div align="left">

View File

@ -32,7 +32,7 @@ from text_generation.types import (
) )
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None) DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None)
HF_TOKEN = os.getenv("HF_TOKEN", None) HUGGING_FACE_HUB_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")
DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data") DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
DOCKER_DEVICES = os.getenv("DOCKER_DEVICES") DOCKER_DEVICES = os.getenv("DOCKER_DEVICES")
@ -498,8 +498,8 @@ def launcher(event_loop):
if attention is not None: if attention is not None:
env["ATTENTION"] = attention env["ATTENTION"] = attention
if HF_TOKEN is not None: if HUGGING_FACE_HUB_TOKEN is not None:
env["HF_TOKEN"] = HF_TOKEN env["HF_TOKEN"] = HUGGING_FACE_HUB_TOKEN
volumes = [] volumes = []
if DOCKER_VOLUME: if DOCKER_VOLUME:

View File

@ -689,7 +689,6 @@ async fn completions(
.. ..
} = req; } = req;
let max_new_tokens = max_tokens.or(Some(100));
let stop = stop.unwrap_or_default(); let stop = stop.unwrap_or_default();
// enable greedy only when temperature is 0 // enable greedy only when temperature is 0
let (do_sample, temperature) = match temperature { let (do_sample, temperature) = match temperature {
@ -740,7 +739,7 @@ async fn completions(
top_p: req.top_p, top_p: req.top_p,
typical_p: None, typical_p: None,
do_sample, do_sample,
max_new_tokens, max_new_tokens: max_tokens,
return_full_text: None, return_full_text: None,
stop: stop.clone(), stop: stop.clone(),
truncate: None, truncate: None,