更改 依赖项目
This commit is contained in:
parent
66754e0016
commit
9374e45e1e
@ -1,5 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
|
||||||
|
import langchain
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
|
||||||
import document_query_pb2
|
import document_query_pb2
|
||||||
import document_query_pb2_grpc
|
import document_query_pb2_grpc
|
||||||
import grpc
|
import grpc
|
||||||
@ -11,6 +15,10 @@ from langchain.schema.document import Document
|
|||||||
from langchain.embeddings import OpenAIEmbeddings
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
||||||
|
|
||||||
|
from langchain.cache import InMemoryCache
|
||||||
|
|
||||||
|
langchain.llm_cache = InMemoryCache()
|
||||||
|
|
||||||
|
|
||||||
class AIServer(document_query_pb2_grpc.DocumentQuery):
|
class AIServer(document_query_pb2_grpc.DocumentQuery):
|
||||||
def Query(self, request, context):
|
def Query(self, request, context):
|
||||||
@ -50,17 +58,27 @@ class AIServer(document_query_pb2_grpc.DocumentQuery):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
print(real_document)
|
# print(real_document)
|
||||||
|
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0)
|
||||||
|
all_splits = text_splitter.split_documents(real_document)
|
||||||
|
|
||||||
|
print("real_document: ", all_splits)
|
||||||
|
|
||||||
|
# 文档长度
|
||||||
|
# print("文档长度: ", len(all_splits))
|
||||||
|
|
||||||
print("正在调用 LLM: " + question + "...")
|
print("正在调用 LLM: " + question + "...")
|
||||||
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="map_reduce",
|
|
||||||
|
chain = load_qa_with_sources_chain(OpenAI(temperature=0, max_tokens=4097), chain_type="map_reduce",
|
||||||
return_intermediate_steps=False,
|
return_intermediate_steps=False,
|
||||||
verbose=False)
|
verbose=False)
|
||||||
output = chain({"input_documents": real_document, "question": question}, return_only_outputs=False)
|
output = chain({"input_documents": all_splits, "question": question}, return_only_outputs=False)
|
||||||
print("回复:" + output["output_text"])
|
print("回复:" + output["output_text"])
|
||||||
|
|
||||||
return document_query_pb2.QueryResponse(
|
return document_query_pb2.QueryResponse(
|
||||||
text=output["output_text"]
|
text=output["output_text"]
|
||||||
|
# text = "test"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -75,5 +93,3 @@ def serve():
|
|||||||
server.add_insecure_port(_ADDR)
|
server.add_insecure_port(_ADDR)
|
||||||
server.start()
|
server.start()
|
||||||
server.wait_for_termination()
|
server.wait_for_termination()
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,4 +32,4 @@ def sync_documents():
|
|||||||
print(update_vector_id_response)
|
print(update_vector_id_response)
|
||||||
print("更新向量完成")
|
print("更新向量完成")
|
||||||
|
|
||||||
time.sleep(1 * 60)
|
time.sleep(1 * 5)
|
||||||
|
40
import_from_wordpress.py
Normal file
40
import_from_wordpress.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import html2text
|
||||||
|
import requests
|
||||||
|
|
||||||
|
wordpress_url = "https://ivampiresp.com"
|
||||||
|
|
||||||
|
api_url = wordpress_url + "/wp-json/wp/v2/posts"
|
||||||
|
|
||||||
|
leaf_api_url = "http://localhost:8080/api/documents"
|
||||||
|
|
||||||
|
jwt = "eyJpc3MiOiJvYXV0aCIsImlhdCI6MTY5OTk2OTQyOCwiZXhwIjoxNzAwNDAxNDI4LCJuYmYiOjE2OTk5Njk0MjgsImp0aSI6IjJXZUJtQWdVZGtPRUQ2am0iLCJzdWIiOiIyIiwicHJ2IjoiMjNiZDVjODk0OWY2MDBhZGIzOWU3MDFjNDAwODcyZGI3YTU5NzZmNyIsInRlYW1faWQiOm51bGwsInVzZXIiOnsiaWQiOjIsInV1aWQiOiI0NmY0ODNkYi03Y2M3LTQwYmUtOTljZC04NTY4NThiYTg3YmUiLCJuYW1lIjoiMSIsImVtYWlsIjoiaW1AaXZhbXBpcmVzcC5jb20iLCJlbWFpbF92ZXJpZmllZF9hdCI6IjIwMjMtMTEtMDVUMTE6Mzc6NTcuMDAwMDAwWiIsInJlYWxfbmFtZV92ZXJpZmllZF9hdCI6bnVsbH0sImFwcCI6InRvZG8ifQ"
|
||||||
|
|
||||||
|
# 获取全部文章
|
||||||
|
res = requests.get(api_url)
|
||||||
|
res_json = res.json()
|
||||||
|
|
||||||
|
for i in range(len(res_json)):
|
||||||
|
title = res_json[i]["title"]["rendered"]
|
||||||
|
|
||||||
|
post_id = res_json[i]["id"]
|
||||||
|
url = res_json[i]["link"]
|
||||||
|
|
||||||
|
text = html2text.HTML2Text().handle(res_json[i]["content"]["rendered"])
|
||||||
|
content = f"""
|
||||||
|
文章ID: {post_id}
|
||||||
|
链接: {url}
|
||||||
|
---
|
||||||
|
{text}
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = requests.post(leaf_api_url, json={
|
||||||
|
"Title": title,
|
||||||
|
"Content": content,
|
||||||
|
"LibraryId": 9
|
||||||
|
}, headers={
|
||||||
|
# "Authorization": f"Bearer {jwt}"
|
||||||
|
"X-Jwt-Payload": f"{jwt}"
|
||||||
|
})
|
||||||
|
|
||||||
|
print(result.json())
|
||||||
|
|
Loading…
Reference in New Issue
Block a user