更改 依赖项目

This commit is contained in:
iVamp 2023-11-15 22:24:03 +08:00
parent 66754e0016
commit 9374e45e1e
3 changed files with 62 additions and 6 deletions

View File

@ -1,5 +1,9 @@
import os import os
from concurrent import futures from concurrent import futures
import langchain
from langchain.text_splitter import RecursiveCharacterTextSplitter
import document_query_pb2 import document_query_pb2
import document_query_pb2_grpc import document_query_pb2_grpc
import grpc import grpc
@ -11,6 +15,10 @@ from langchain.schema.document import Document
from langchain.embeddings import OpenAIEmbeddings from langchain.embeddings import OpenAIEmbeddings
from langchain.chains.qa_with_sources import load_qa_with_sources_chain from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.cache import InMemoryCache
langchain.llm_cache = InMemoryCache()
class AIServer(document_query_pb2_grpc.DocumentQuery): class AIServer(document_query_pb2_grpc.DocumentQuery):
def Query(self, request, context): def Query(self, request, context):
@ -50,17 +58,27 @@ class AIServer(document_query_pb2_grpc.DocumentQuery):
except Exception as e: except Exception as e:
print(e) print(e)
print(real_document) # print(real_document)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0)
all_splits = text_splitter.split_documents(real_document)
print("real_document: ", all_splits)
# 文档长度
# print("文档长度: ", len(all_splits))
print("正在调用 LLM: " + question + "...") print("正在调用 LLM: " + question + "...")
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="map_reduce",
chain = load_qa_with_sources_chain(OpenAI(temperature=0, max_tokens=4097), chain_type="map_reduce",
return_intermediate_steps=False, return_intermediate_steps=False,
verbose=False) verbose=False)
output = chain({"input_documents": real_document, "question": question}, return_only_outputs=False) output = chain({"input_documents": all_splits, "question": question}, return_only_outputs=False)
print("回复:" + output["output_text"]) print("回复:" + output["output_text"])
return document_query_pb2.QueryResponse( return document_query_pb2.QueryResponse(
text=output["output_text"] text=output["output_text"]
# text = "test"
) )
@ -75,5 +93,3 @@ def serve():
server.add_insecure_port(_ADDR) server.add_insecure_port(_ADDR)
server.start() server.start()
server.wait_for_termination() server.wait_for_termination()

View File

@ -32,4 +32,4 @@ def sync_documents():
print(update_vector_id_response) print(update_vector_id_response)
print("更新向量完成") print("更新向量完成")
time.sleep(1 * 60) time.sleep(1 * 5)

40
import_from_wordpress.py Normal file
View File

@ -0,0 +1,40 @@
import html2text
import requests
wordpress_url = "https://ivampiresp.com"
api_url = wordpress_url + "/wp-json/wp/v2/posts"
leaf_api_url = "http://localhost:8080/api/documents"
jwt = "eyJpc3MiOiJvYXV0aCIsImlhdCI6MTY5OTk2OTQyOCwiZXhwIjoxNzAwNDAxNDI4LCJuYmYiOjE2OTk5Njk0MjgsImp0aSI6IjJXZUJtQWdVZGtPRUQ2am0iLCJzdWIiOiIyIiwicHJ2IjoiMjNiZDVjODk0OWY2MDBhZGIzOWU3MDFjNDAwODcyZGI3YTU5NzZmNyIsInRlYW1faWQiOm51bGwsInVzZXIiOnsiaWQiOjIsInV1aWQiOiI0NmY0ODNkYi03Y2M3LTQwYmUtOTljZC04NTY4NThiYTg3YmUiLCJuYW1lIjoiMSIsImVtYWlsIjoiaW1AaXZhbXBpcmVzcC5jb20iLCJlbWFpbF92ZXJpZmllZF9hdCI6IjIwMjMtMTEtMDVUMTE6Mzc6NTcuMDAwMDAwWiIsInJlYWxfbmFtZV92ZXJpZmllZF9hdCI6bnVsbH0sImFwcCI6InRvZG8ifQ"
# 获取全部文章
res = requests.get(api_url)
res_json = res.json()
for i in range(len(res_json)):
title = res_json[i]["title"]["rendered"]
post_id = res_json[i]["id"]
url = res_json[i]["link"]
text = html2text.HTML2Text().handle(res_json[i]["content"]["rendered"])
content = f"""
文章ID: {post_id}
链接: {url}
---
{text}
"""
result = requests.post(leaf_api_url, json={
"Title": title,
"Content": content,
"LibraryId": 9
}, headers={
# "Authorization": f"Bearer {jwt}"
"X-Jwt-Payload": f"{jwt}"
})
print(result.json())