工具类网站设计,网站开发所需人员,wordpress手机验证码插件,如何收集网站建设资料1.配置环境
2.数据集准备
3.模型下载
4.注册SwanLab
5.微调
6.训练过程可视化 1.配置环境
本博客使用的是2B模型#xff0c;所以仅用了单卡3090#xff0c;若大一点的模型#xff0c;自行根据实际情况准备显卡
安装Python3.8
安装Qwen2-VL必要的库
pip install…
1.配置环境
2.数据集准备
3.模型下载
4.注册SwanLab
5.微调
6.训练过程可视化 1.配置环境
本博客使用的是2B模型所以仅用了单卡3090若大一点的模型自行根据实际情况准备显卡
安装Python3.8
安装Qwen2-VL必要的库
pip install modelscope1.18.0
pip install transformers4.46.2
pip install accelerate1.1.1
pip install datasets2.18.0
pip install peft0.13.2
pip install qwen-vl-utils0.0.8
2.数据集准备
本博客使用的是coco_2014_caption数据集中的部分json格式如下 数据集下载可以使用如下同样会产生一个csv from modelscope.msdatasets import MsDataset
import os
import pandas as pdMAX_DATA_NUMBER 500if not os.path.exists(coco_2014_caption):# 从modelscope下载COCO 2014图像描述数据集ds MsDataset.load(modelscope/coco_2014_caption, subset_namecoco_2014_caption, splittrain)print(len(ds))total min(MAX_DATA_NUMBER, len(ds))os.makedirs(coco_2014_caption, exist_okTrue)image_paths []captions []for i in range(total):# 获取每个样本的信息item ds[i]image_id item[image_id]caption item[caption]image item[image]# 保存图片并记录路径image_path os.path.abspath(fcoco_2014_caption/{image_id}.jpg)image.save(image_path)# 将路径和描述添加到列表中image_paths.append(image_path)captions.append(caption)# 每处理50张图片打印一次进度if (i 1) % 50 0:print(fProcessing {i1}/{total} images ({(i1)/total*100:.1f}%))# 将图片路径和描述保存为CSV文件df pd.DataFrame({image_path: image_paths,caption: captions})# 将数据保存为CSV文件df.to_csv(./coco-2024-dataset.csv, indexFalse)print(f数据处理完成共处理了{total}张图片)else:print(coco_2014_caption目录已存在,跳过数据处理步骤)我们需要得到json所以执行下面脚本得到
import pandas as pd
import json# 载入CSV文件
df pd.read_csv(./coco-2024-dataset.csv)
conversations []# 添加对话数据
for i in range(len(df)):conversations.append({id: fidentity_{i1},conversations: [{from: user,value: fCOCO Yes: |vision_start|{df.iloc[i][image_path]}|vision_end|},{from: assistant, value: df.iloc[i][caption]}]})# 保存为Json
with open(data_vl.json, w, encodingutf-8) as f:json.dump(conversations, f, ensure_asciiFalse, indent2)以上则是数据集准备若使用自定义数据集则根据上面的格式准备
3.模型下载
本博客使用魔塔社区中的Qwen2-VL-2B模型
from modelscope import snapshot_download, AutoTokenizer
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq, Qwen2VLForConditionalGeneration, AutoProcessor
import torchmodel_dir snapshot_download(Qwen/Qwen2-VL-2B-Instruct, cache_dir./, revisionmaster)
tokenizer AutoTokenizer.from_pretrained(./Qwen/Qwen2-VL-2B-Instruct/, use_fastFalse, trust_remote_codeTrue)
model Qwen2VLForConditionalGeneration.from_pretrained(./Qwen/Qwen2-VL-2B-Instruct/, device_mapauto, torch_dtypetorch.bfloat16, trust_remote_codeTrue,)
model.enable_input_require_grads() 4.注册SwanLab
为了微调时随时查看各项指标注册SwanLab,复制key,粘贴到运行过程中如下图 5.微调
微调前保证当前文件夹下面包含以下 train.py代码如下
import torch
from datasets import Dataset
from modelscope import snapshot_download, AutoTokenizer
from swanlab.integration.transformers import SwanLabCallback
from qwen_vl_utils import process_vision_info
from peft import LoraConfig, TaskType, get_peft_model, PeftModel
from transformers import (TrainingArguments,Trainer,DataCollatorForSeq2Seq,Qwen2VLForConditionalGeneration,AutoProcessor,
)
import swanlab
import jsondef process_func(example):将数据集进行预处理MAX_LENGTH 8192input_ids, attention_mask, labels [], [], []conversation example[conversations]input_content conversation[0][value]output_content conversation[1][value]file_path input_content.split(|vision_start|)[1].split(|vision_end|)[0] # 获取图像路径messages [{role: user,content: [{type: image,image: f{file_path},resized_height: 280,resized_width: 280,},{type: text, text: COCO Yes:},],}]text processor.apply_chat_template(messages, tokenizeFalse, add_generation_promptTrue) # 获取文本image_inputs, video_inputs process_vision_info(messages) # 获取数据数据预处理过inputs processor(text[text],imagesimage_inputs,videosvideo_inputs,paddingTrue,return_tensorspt,)inputs {key: value.tolist() for key, value in inputs.items()} #tensor - list,为了方便拼接instruction inputsresponse tokenizer(f{output_content}, add_special_tokensFalse)input_ids (instruction[input_ids][0] response[input_ids] [tokenizer.pad_token_id])attention_mask instruction[attention_mask][0] response[attention_mask] [1]labels ([-100] * len(instruction[input_ids][0]) response[input_ids] [tokenizer.pad_token_id])if len(input_ids) MAX_LENGTH: # 做一个截断input_ids input_ids[:MAX_LENGTH]attention_mask attention_mask[:MAX_LENGTH]labels labels[:MAX_LENGTH]input_ids torch.tensor(input_ids)attention_mask torch.tensor(attention_mask)labels torch.tensor(labels)inputs[pixel_values] torch.tensor(inputs[pixel_values])inputs[image_grid_thw] torch.tensor(inputs[image_grid_thw]).squeeze(0) #由1,h,w)变换为h,wreturn {input_ids: input_ids, attention_mask: attention_mask, labels: labels,pixel_values: inputs[pixel_values], image_grid_thw: inputs[image_grid_thw]}def predict(messages, model):# 准备推理text processor.apply_chat_template(messages, tokenizeFalse, add_generation_promptTrue)image_inputs, video_inputs process_vision_info(messages)inputs processor(text[text],imagesimage_inputs,videosvideo_inputs,paddingTrue,return_tensorspt,)inputs inputs.to(cuda)# 生成输出generated_ids model.generate(**inputs, max_new_tokens128)generated_ids_trimmed [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]output_text processor.batch_decode(generated_ids_trimmed, skip_special_tokensTrue, clean_up_tokenization_spacesFalse)return output_text[0]# 在modelscope上下载Qwen2-VL模型到本地目录下
model_dir snapshot_download(Qwen/Qwen2-VL-2B-Instruct, cache_dir./, revisionmaster)# 使用Transformers加载模型权重
tokenizer AutoTokenizer.from_pretrained(./Qwen/Qwen2-VL-2B-Instruct/, use_fastFalse, trust_remote_codeTrue)
processor AutoProcessor.from_pretrained(./Qwen/Qwen2-VL-2B-Instruct)model Qwen2VLForConditionalGeneration.from_pretrained(./Qwen/Qwen2-VL-2B-Instruct/, device_mapauto, torch_dtypetorch.bfloat16, trust_remote_codeTrue,)
model.enable_input_require_grads() # 开启梯度检查点时要执行该方法# 处理数据集读取json文件
# 拆分成训练集和测试集保存为data_vl_train.json和data_vl_test.json
train_json_path data_vl.json
with open(train_json_path, r) as f:data json.load(f)train_data data[:-4]test_data data[-4:]with open(data_vl_train.json, w) as f:json.dump(train_data, f)with open(data_vl_test.json, w) as f:json.dump(test_data, f)train_ds Dataset.from_json(data_vl_train.json)
train_dataset train_ds.map(process_func)# 配置LoRA
config LoraConfig(task_typeTaskType.CAUSAL_LM,target_modules[q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj],inference_modeFalse, # 训练模式r64, # Lora 秩lora_alpha16, # Lora alaph具体作用参见 Lora 原理lora_dropout0.05, # Dropout 比例biasnone,
)# 获取LoRA模型
peft_model get_peft_model(model, config)# 配置训练参数
args TrainingArguments(output_dir./output/Qwen2-VL-2B,per_device_train_batch_size4,gradient_accumulation_steps4,logging_steps10,logging_first_step5,num_train_epochs2,save_steps100,learning_rate1e-4,save_on_each_nodeTrue,gradient_checkpointingTrue,report_tonone,
)# 设置SwanLab回调
swanlab_callback SwanLabCallback(projectQwen2-VL-finetune,experiment_nameqwen2-vl-coco2014,config{model: https://modelscope.cn/models/Qwen/Qwen2-VL-2B-Instruct,dataset: https://modelscope.cn/datasets/modelscope/coco_2014_caption/quickstart,github: https://github.com/datawhalechina/self-llm,prompt: COCO Yes: ,train_data_number: len(train_data),lora_rank: 64,lora_alpha: 16,lora_dropout: 0.1,},
)# 配置Trainer
trainer Trainer(modelpeft_model,argsargs,train_datasettrain_dataset,data_collatorDataCollatorForSeq2Seq(tokenizertokenizer, paddingTrue),callbacks[swanlab_callback],
)# 开启模型训练
trainer.train()# 配置测试参数
val_config LoraConfig(task_typeTaskType.CAUSAL_LM,target_modules[q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj],inference_modeTrue, # 训练模式r64, # Lora 秩lora_alpha16, # Lora alaph具体作用参见 Lora 原理lora_dropout0.05, # Dropout 比例biasnone,
)# 获取测试模型
val_peft_model PeftModel.from_pretrained(model, model_id./output/Qwen2-VL-2B/checkpoint-62, configval_config)# 读取测试数据
with open(data_vl_test.json, r) as f:test_dataset json.load(f)test_image_list []
for item in test_dataset:input_image_prompt item[conversations][0][value]# 去掉前后的|vision_start|和|vision_end|origin_image_path input_image_prompt.split(|vision_start|)[1].split(|vision_end|)[0]messages [{role: user, content: [{type: image, image: origin_image_path},{type: text,text: COCO Yes:}]}]response predict(messages, val_peft_model)messages.append({role: assistant, content: f{response}})print(messages[-1])test_image_list.append(swanlab.Image(origin_image_path, captionresponse))swanlab.log({Prediction: test_image_list})swanlab.finish()6.训练过程可视化