mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
20 lines
426 B
Python
20 lines
426 B
Python
|
import datasets
|
||
|
import json
|
||
|
|
||
|
|
||
|
dataset = datasets.load_dataset("ccdv/govreport-summarization")
|
||
|
max_new_tokens = 50
|
||
|
|
||
|
|
||
|
conversations = []
|
||
|
|
||
|
for i, item in enumerate(dataset["test"]):
|
||
|
report = item["report"]
|
||
|
|
||
|
messages = [{"from": "human", "value": f"Summarize this report: ```{report}```"}]
|
||
|
|
||
|
conversations.append({"conversations": messages})
|
||
|
|
||
|
with open("long.json", "w") as f:
|
||
|
json.dump(conversations, f, indent=4)
|