langchain-chat-with-milvus/import_from_wordpress.py

41 lines
1.3 KiB
Python
Raw Normal View History

2023-11-15 14:24:03 +00:00
import html2text
import requests
wordpress_url = "https://ivampiresp.com"
api_url = wordpress_url + "/wp-json/wp/v2/posts"
leaf_api_url = "http://localhost:8080/api/documents"
jwt = "eyJpc3MiOiJvYXV0aCIsImlhdCI6MTY5OTk2OTQyOCwiZXhwIjoxNzAwNDAxNDI4LCJuYmYiOjE2OTk5Njk0MjgsImp0aSI6IjJXZUJtQWdVZGtPRUQ2am0iLCJzdWIiOiIyIiwicHJ2IjoiMjNiZDVjODk0OWY2MDBhZGIzOWU3MDFjNDAwODcyZGI3YTU5NzZmNyIsInRlYW1faWQiOm51bGwsInVzZXIiOnsiaWQiOjIsInV1aWQiOiI0NmY0ODNkYi03Y2M3LTQwYmUtOTljZC04NTY4NThiYTg3YmUiLCJuYW1lIjoiMSIsImVtYWlsIjoiaW1AaXZhbXBpcmVzcC5jb20iLCJlbWFpbF92ZXJpZmllZF9hdCI6IjIwMjMtMTEtMDVUMTE6Mzc6NTcuMDAwMDAwWiIsInJlYWxfbmFtZV92ZXJpZmllZF9hdCI6bnVsbH0sImFwcCI6InRvZG8ifQ"
# 获取全部文章
res = requests.get(api_url)
res_json = res.json()
for i in range(len(res_json)):
title = res_json[i]["title"]["rendered"]
post_id = res_json[i]["id"]
url = res_json[i]["link"]
text = html2text.HTML2Text().handle(res_json[i]["content"]["rendered"])
content = f"""
文章ID: {post_id}
链接: {url}
---
{text}
"""
result = requests.post(leaf_api_url, json={
"Title": title,
"Content": content,
"LibraryId": 9
}, headers={
# "Authorization": f"Bearer {jwt}"
"X-Jwt-Payload": f"{jwt}"
})
print(result.json())