langchain-chat-with-milvus/document_ai/search.py
2023-11-15 16:20:30 +08:00

80 lines
2.3 KiB
Python

import json
import documents_pb2
from langchain import text_splitter
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms.openai import OpenAI
from langchain.schema.document import Document
from pymilvus import (
connections,
utility,
FieldSchema,
CollectionSchema,
DataType,
Collection,
)
import init
import doc_client
question = """
yarn : File C:\\Users\\ivamp\\AppData\\Roaming\\npm\\yarn.ps1 cannot be loaded because running scripts is disabled on this sy
stem. For more information, see about_Execution_Policies at https:/go.microsoft.com/fwlink/?LinkID=135170.
At line:1 char:1
+ yarn config set registry https://registry.npm.taobao.org/
+ ~~~~
+ CategoryInfo : SecurityError: (:) [], PSSecurityException
+ FullyQualifiedErrorId : UnauthorizedAccess
是什么问题,该怎么解决
"""
vec = init.text_to_vector(question + " (必须使用中文回复)")
# vec = ""
#
# with open("../question_vec.json", "r") as f:
# vec = json.load(f)
search_param = {
"data": [vec],
"anns_field": "vector",
"param": {"metric_type": "L2"},
"limit": 10,
"expr": "user_id == 2",
"output_fields": ["todo_id", "title", "source", "todo_description", "language", "text", "user_id"],
}
res = init.collection.search(**search_param)
document_ids = []
real_document = []
for i in range(len(res[0])):
_doc_id = res[0][i].id
print("正在获取 " + str(_doc_id) + " 的内容...")
try:
_doc_content = doc_client.stub.GetDocumentById(documents_pb2.GetDocumentByIdRequest(
id=_doc_id
))
_doc_content_full = _doc_content.title + "\n" + _doc_content.content
# real_document.append(_doc_content)
doc_obj = Document(page_content=_doc_content_full, metadata={"source": _doc_content.title})
real_document.append(doc_obj)
except Exception as e:
print(e)
print(real_document)
print("正在调用 LLM...")
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True,
verbose=True)
output = chain({"input_documents": real_document, "question": question}, return_only_outputs=False)
print("回复:" + output["output_text"])