diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 0530d521..1a3c5c39 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -80,7 +80,7 @@ fn get_config( }; let content = std::fs::read_to_string(filename)?; - let config: RawConfig = serde_json::from_str(&content).expect("?"); + let config: RawConfig = serde_json::from_str(&content)?; let config: Config = config.into(); Ok(config) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 98c6f419..6e941a4e 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -2112,10 +2112,6 @@ class FlashCausalLM(Model): if prefill and prefill_logprobs: # Get prefill logprobs with inplace softmax (avoid copying the `out` tensor (max_batch_prefill_tokens * vocab_size)) - free_memory = get_free_memory(self.device, MEMORY_FRACTION) - logger.info(f"Free memory {free_memory / 1e9}GB") - logmemory = out.nelement() * out.element_size() - logger.info(f"Log memory {logmemory / 1e9}GB") torch.log_softmax(out, -1, out=out) prefill_logprobs_tensor = out prefill_logprobs = torch.gather(