mirror of
				https://github.com/huggingface/text-generation-inference.git
				synced 2025-10-20 12:25:23 +00:00 
			
		
		
		
	
		
			
	
	
		
			20 lines
		
	
	
		
			426 B
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			20 lines
		
	
	
		
			426 B
		
	
	
	
		
			Python
		
	
	
	
	
	
|  | import datasets | ||
|  | import json | ||
|  | 
 | ||
|  | 
 | ||
|  | dataset = datasets.load_dataset("ccdv/govreport-summarization") | ||
|  | max_new_tokens = 50 | ||
|  | 
 | ||
|  | 
 | ||
|  | conversations = [] | ||
|  | 
 | ||
|  | for i, item in enumerate(dataset["test"]): | ||
|  |     report = item["report"] | ||
|  | 
 | ||
|  |     messages = [{"from": "human", "value": f"Summarize this report: ```{report}```"}] | ||
|  | 
 | ||
|  |     conversations.append({"conversations": messages}) | ||
|  | 
 | ||
|  | with open("long.json", "w") as f: | ||
|  |     json.dump(conversations, f, indent=4) |