mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Remove some scaffolding.
This commit is contained in:
parent
a78b6fd1e8
commit
ca8a115adc
@ -80,7 +80,7 @@ fn get_config(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let content = std::fs::read_to_string(filename)?;
|
let content = std::fs::read_to_string(filename)?;
|
||||||
let config: RawConfig = serde_json::from_str(&content).expect("?");
|
let config: RawConfig = serde_json::from_str(&content)?;
|
||||||
|
|
||||||
let config: Config = config.into();
|
let config: Config = config.into();
|
||||||
Ok(config)
|
Ok(config)
|
||||||
|
@ -2112,10 +2112,6 @@ class FlashCausalLM(Model):
|
|||||||
|
|
||||||
if prefill and prefill_logprobs:
|
if prefill and prefill_logprobs:
|
||||||
# Get prefill logprobs with inplace softmax (avoid copying the `out` tensor (max_batch_prefill_tokens * vocab_size))
|
# Get prefill logprobs with inplace softmax (avoid copying the `out` tensor (max_batch_prefill_tokens * vocab_size))
|
||||||
free_memory = get_free_memory(self.device, MEMORY_FRACTION)
|
|
||||||
logger.info(f"Free memory {free_memory / 1e9}GB")
|
|
||||||
logmemory = out.nelement() * out.element_size()
|
|
||||||
logger.info(f"Log memory {logmemory / 1e9}GB")
|
|
||||||
torch.log_softmax(out, -1, out=out)
|
torch.log_softmax(out, -1, out=out)
|
||||||
prefill_logprobs_tensor = out
|
prefill_logprobs_tensor = out
|
||||||
prefill_logprobs = torch.gather(
|
prefill_logprobs = torch.gather(
|
||||||
|
Loading…
Reference in New Issue
Block a user