gpt2 adapter finetune
1. 安装依赖:
pip install -U adapter-transformers
pip install datasets
2.训练代码:
from datasets import load_dataset
from transformers import AutoModelForCausalLM
from transformers import GPT2Tokenizer
from transformers import AdapterTrainer, TrainingArgumentsdataset = load_dataset("poem_sentiment")
print(dataset)def encode_batch(batch):"""Encodes a batch of input data using the model tokenizer."""encoding = tokenizer(batch["verse_text"])# For language modeling the labels need to be the input_ids#encoding["labels"] = encoding["input_ids"]return encodingtokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# The GPT-2 tokenizer does not have a padding token. In order to process the data
# in batches we set one here
tokenizer.pad_token = tokenizer.eos_token
column_names = dataset["train"].column_names
dataset = dataset.map(encode_batch, remove_columns=column_names, batched=True)block_size = 50
# Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
def group_texts(examples):# Concatenate all texts.concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}total_length = len(concatenated_examples[list(examples.keys())[0]])# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can# customize this part to your needs.total_length = (total_length // block_size) * block_size# Split by chunks of max_len.result = {k: [t[i : i + block_size] for i in range(0, total_length, block_size)]for k, t in concatenated_examples.items()}result["labels"] = result["input_ids"].copy()return resultdataset = dataset.map(group_texts,batched=True,)dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])model = AutoModelForCausalLM.from_pretrained("gpt2")
# add new adapter
model.add_adapter("poem")
# activate adapter for training
model.train_adapter("poem")training_args = TrainingArguments(output_dir="./examples", do_train=True,remove_unused_columns=False,learning_rate=5e-4,num_train_epochs=3,
)trainer = AdapterTrainer(model=model,args=training_args,tokenizer=tokenizer,train_dataset=dataset["train"],eval_dataset=dataset["validation"], )trainer.train()model.save_adapter("adapter_poem", "poem")
3.测试代码:
from transformers import GPT2LMHeadModel, GPT2Tokenizermodel = GPT2LMHeadModel.from_pretrained("gpt2")
# You can also load your locally trained adapter
model.load_adapter("adapter_poem")
model.set_active_adapters("poem")PREFIX = "In the night"encoding = tokenizer(PREFIX, return_tensors="pt")
output_sequence = model.generate(input_ids=encoding["input_ids"],attention_mask=encoding["attention_mask"],do_sample=True,num_return_sequences=5,max_length = 50,
)for generated_sequence_idx, generated_sequence in enumerate(output_sequence):print("=== GENERATED SEQUENCE {} ===".format(generated_sequence_idx + 1))generated_sequence = generated_sequence.tolist()# Decode texttext = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)# Remove EndOfSentence Tokenstext = text[: text.find(tokenizer.eos_token)]print(text)
4.结果输出
=== GENERATED SEQUENCE 1 === In the night, he would go;and she is the queen, and a mistress,and she keeps in the nightthe king who died" (the "giant," said the ancient, as a poet)and a child in his home === GENERATED SEQUENCE 2 === In the night,when one thinks of the war upon the world, and of men who live in it;that's all you have, though, that's all, that's what you want. and that makes me want, but here's th === GENERATED SEQUENCE 3 === In the night, she was the first, for once, the girl of good cheer!--of the people, the love of her life, she has not come to see her sister again;yet i think if i could not have loved her I wer === GENERATED SEQUENCE 4 === In the night, she sang the sweetest lullaby of morning-the very sound he heard:the silent and delicate voice of the holy sea,that his face would not come to grief.a quiet and silent night,the song as always i === GENERATED SEQUENCE 5 === In the nighttime, the king says:but there can be no peace or sorrow if that night's not a blessing,the only hope to her heart lies in the bright day.a good old fool, like a son of a friend,ho