import json import proto.documents_pb2 from langchain import text_splitter from langchain.chains.qa_with_sources import load_qa_with_sources_chain from langchain.embeddings import OpenAIEmbeddings from langchain.llms.openai import OpenAI from langchain.schema.document import Document from pymilvus import ( connections, utility, FieldSchema, CollectionSchema, DataType, Collection, ) import init import doc_client # # question = """ # yarn : File C:\\Users\\ivamp\\AppData\\Roaming\\npm\\yarn.ps1 cannot be loaded because running scripts is disabled on this sy # stem. For more information, see about_Execution_Policies at https:/go.microsoft.com/fwlink/?LinkID=135170. # At line:1 char:1 # + yarn config set registry https://registry.npm.taobao.org/ # + ~~~~ # + CategoryInfo : SecurityError: (:) [], PSSecurityException # + FullyQualifiedErrorId : UnauthorizedAccess # # 是什么问题,该怎么解决 # """ question = """ 为什么我会在 WHMCS 下开发摸不着头脑 """ vec = init.text_to_vector(question) # vec = "" # # with open("../question_vec.json", "r") as f: # vec = json.load(f) search_param = { "data": [vec], "anns_field": "vector", "param": {"metric_type": "L2"}, "limit": 10, "expr": "user_id == 2", "output_fields": ["todo_id", "title", "source", "todo_description", "language", "text", "user_id"], } res = init.collection.search(**search_param) document_chunk_ids = [] real_document = [] for i in range(len(res[0])): _chunk_id = res[0][i].id print("正在获取分块 " + str(_chunk_id) + " 的内容...") try: _chunk_content = doc_client.stub.GetDocumentChunk(proto.documents_pb2.GetDocumentChunkByIdRequest( id=_chunk_id )) # print(_chunk_content) _doc_content_full = _chunk_content.content # real_document.append(_doc_content) # doc_obj = Document(page_content=_doc_content_full, metadata={"source": _chunk_content.title}) doc_obj = Document(page_content=_doc_content_full, metadata={"source": "chunked content"}) real_document.append(doc_obj) except Exception as e: print(e) print(real_document) print("正在调用 LLM...") chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True, verbose=True) question = "必须使用中文回复:" + question output = chain({"input_documents": real_document, "question": question}, return_only_outputs=False) print("回复:" + output["output_text"])