mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 07:22:07 +00:00
* Attempt at automatic max batch prefill. * Taking into account number of shards. * Adding more cards. * Adding A100 + H100 * Adding a few more cards. * Logprobs cost too much. * h100 better name, and keep factor of 2 * Damn inflated sparse tflops. * Typo in h100. * Updated the flops calculation (checked with fvcore). * chunking by default. * Fix prefix caching for chat completion since we removed logprobs. * More tests. * Dropping all the prefill logprobs. * Add a flag that enables users to get logprobs back. * Repairing prompt token counting. * Fixing a few tests. * Remove some scaffolding. * Attempting to reduces the issues (workarounds for now).
23 lines
706 B
Python
23 lines
706 B
Python
import os
|
|
import json
|
|
|
|
|
|
for root, dirs, files in os.walk("."):
|
|
for filename in files:
|
|
if filename.endswith(".json"):
|
|
with open(os.path.join(root, filename), "r") as f:
|
|
data = json.load(f)
|
|
|
|
print(os.path.join(root, filename))
|
|
try:
|
|
if filename.endswith("_load.json"):
|
|
for i in range(len(data)):
|
|
data[i]["details"]["prefill"] = []
|
|
else:
|
|
data["details"]["prefill"] = []
|
|
except Exception:
|
|
pass
|
|
|
|
with open(os.path.join(root, filename), "w") as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|