mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
0.98
This commit is contained in:
parent
406b094002
commit
2934543a59
@ -743,9 +743,9 @@ class FlashCausalLM(Model):
|
|||||||
|
|
||||||
total_gpu_memory = torch.cuda.get_device_properties(self.device).total_memory
|
total_gpu_memory = torch.cuda.get_device_properties(self.device).total_memory
|
||||||
|
|
||||||
# 0.985 to add some wiggle room
|
# 0.98 to add some wiggle room
|
||||||
num_blocks = (
|
num_blocks = (
|
||||||
int((total_gpu_memory * 0.985 - peak_memory) // total_cache_size)
|
int((total_gpu_memory * 0.98 - peak_memory) // total_cache_size)
|
||||||
# Add batch.blocks as we allocated it above, so it is included in the peak memory.
|
# Add batch.blocks as we allocated it above, so it is included in the peak memory.
|
||||||
+ batch.blocks
|
+ batch.blocks
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user