网站建设合同管辖地,百度竞价包年推广公司,公司网络搭建,淘宝做网站的都是模板本文介绍了Llama2模型集成LangChain框架的具体实现#xff0c;这样可更方便地基于Llama2开发文档检索、问答机器人和智能体应用等。
1.调用Llama2类 针对LangChain[1]框架封装的Llama2 LLM类见examples/llama2_for_langchain.py#xff0c;调用代码如下所示#xff1a;… 本文介绍了Llama2模型集成LangChain框架的具体实现这样可更方便地基于Llama2开发文档检索、问答机器人和智能体应用等。
1.调用Llama2类 针对LangChain[1]框架封装的Llama2 LLM类见examples/llama2_for_langchain.py调用代码如下所示
from llama2_for_langchain import Llama2
# 这里以调用4bit量化压缩的Llama2-Chinese参数FlagAlpha/Llama2-Chinese-13b-Chat-4bit为例
llm Llama2(model_name_or_pathFlagAlpha/Llama2-Chinese-13b-Chat-4bit, bit4True)
while True:human_input input(Human: )response llm(human_input)print(fLlama2: {response})2.Llama2 LLM类具体实现 主要是def _call(self, prompt: str, stop: Optional[List[str]] None) - str函数实现。LangChain八股文也不难实现如下所示
from langchain.llms.base import LLM
from typing import Dict, List, Any, Optional
import torch,sys,os
from transformers import AutoTokenizerclass Llama2(LLM): # LLM是一个抽象类需要实现_call方法max_token: int 2048 # 最大token数temperature: float 0.1 # 生成温度top_p: float 0.95 # 生成概率tokenizer: Any # 分词器model: Any # 模型def __init__(self, model_name_or_path, bit4True):super().__init__()self.tokenizer AutoTokenizer.from_pretrained(model_name_or_path,use_fastFalse)self.tokenizer.pad_token self.tokenizer.eos_tokenif bit4False: # 32bitfrom transformers import AutoModelForCausalLMself.model AutoModelForCausalLM.from_pretrained(model_name_or_path,device_mapauto,torch_dtypetorch.float16,load_in_8bitTrue)self.model.eval()else: # 4bitfrom auto_gptq import AutoGPTQForCausalLMself.model AutoGPTQForCausalLM.from_quantized(model_name_or_path,low_cpu_mem_usageTrue, devicecuda:0, use_tritonFalse,inject_fused_attentionFalse,inject_fused_mlpFalse)if torch.__version__ 2 and sys.platform ! win32:self.model torch.compile(self.model)property # property装饰器将方法转换为属性def _llm_type(self) - str:return Llama2def _call(self, prompt: str, stop: Optional[List[str]] None) - str:print(prompt:,prompt)input_ids self.tokenizer(prompt, return_tensorspt,add_special_tokensFalse).input_ids.to(cuda)generate_input {input_ids:input_ids,max_new_tokens:1024,do_sample:True,top_k:50,top_p:self.top_p,temperature:self.temperature,repetition_penalty:1.2,eos_token_id:self.tokenizer.eos_token_id,bos_token_id:self.tokenizer.bos_token_id,pad_token_id:self.tokenizer.pad_token_id}generate_ids self.model.generate(**generate_input)generate_ids [item[len(input_ids[0]):-1] for item in generate_ids]result_message self.tokenizer.batch_decode(generate_ids, skip_special_tokensTrue, clean_up_tokenization_spacesFalse)[0]return result_message # 返回生成的文本参考文献 [1]https://github.com/FlagAlpha/Llama2-Chinese/blob/main/examples/llama2_for_langchain.py [2]https://github.com/langchain-ai/langchain