报错: USER_AGENT environment variable not set, consider setting it to identify your requests.-CSDN博客
C. 完整代码
# https://coreyclip.github.io/Ollama-Web-Summaries/import os
os.environ['USER_AGENT']='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0'import argparse
import os
import datetime
from langchain_community.llms import Ollama
from langchain_community.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from dotenv import load_dotenv# set OLLAMA_MODEL env var or create a .env file with OLLAMA_MODEL set to the model of your choice
load_dotenv()ollama_model = os.getenv("OLLAMA_MODEL","qwen2:7b")defsave_to_markdown(title, content, url, filename):timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")word_count =len(content["output_text"].split())withopen(filename,"w", encoding="utf-8")as f:f.write(f"# {title}\n\n")f.write(f"**Source URL:** {url}\n\n")f.write(f"**Timestamp:** {timestamp}\n\n")f.write(f"**Word Count:** {word_count}\n\n")f.write(f"---\n\n")f.write(content["output_text"])defmain():# setting up commandline argumentsparser = argparse.ArgumentParser(description="Summarize a webpage via a llm model available via ollama")parser.add_argument("website",type=str,help="The URL of the website to summarize.")parser.add_argument("-o","--output",type=str,help="Output markdown file to save the summary. If not provided, output will be printed to stdout.")args = parser.parse_args()# load into langchainloader = WebBaseLoader(args.website)docs = loader.load()# invoke langchain llm = Ollama(model=ollama_model)chain = load_summarize_chain(llm, chain_type="stuff")result = chain.invoke(docs)# Extract webpage title and other metadatatitle ="Webpage Summary"# Default title if none is foundif docs and docs[0].metadata and"title"in docs[0].metadata:title = docs[0].metadata["title"]if args.output:save_to_markdown(title, result, args.website,args.output)else:word_count =len(result["output_text"].split())timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")print(f"# {title}\n")print(f"**Source URL:** {args.website}\n")print(f"**Timestamp:** {timestamp}\n")print(f"**Word Count:** {word_count}\n")print(f"---\n")print(result)if __name__ =="__main__":main()
D. 参考文献
Summarization with LangChain. Stuff — Map_reduce — Refine | by Abonia Sojasingarayar | Medium
python-agents-tutorial/2_local_agent.py at main · run-llama/python-agents-tutorial
Summarizing Web Pages with Ollama – The Blog of Corey Kiyoshi Clippinger – Think pieces, tutorials, and other content about Data Science, Web Development, and the Big Wide World
Integrating LangGraph with Ollama for Advanced LLM Applications | by Aleksandr Lifanov | Medium