-
Notifications
You must be signed in to change notification settings - Fork 77
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Error when trying to load PEFT model after v0.2.3 release #151
Comments
Hey, thanks for reporting. Strange, because Also this works fine, but the output is not the same with peft, but the output is the same without: #import tempfile
import torch, os
from transformers import AutoModelForCausalLM, HqqConfig
from peft import get_peft_model, LoraConfig, PeftModel
device = 'cuda:0'
enable_peft = True
torch.manual_seed(0)
x = torch.randint(0, 100, (1, 100), device=device)
model_id = "facebook/opt-125m" # other models also fail
model = AutoModelForCausalLM.from_pretrained(model_id,
torch_dtype=torch.bfloat16,
device_map=device,
quantization_config=HqqConfig(nbits=4, group_size=64),
).eval()
if(enable_peft):
config = LoraConfig(
target_modules=["q_proj", "v_proj"],
task_type="CAUSAL_LM",
init_lora_weights=False,
)
model = get_peft_model(model, config)
# layers = model.model.model.decoder.layers
# for i in range(len(layers)):
# assert 'W_q' in layers[i].self_attn.q_proj.base_layer.state_dict()
# assert 'W_q' in layers[i].self_attn.k_proj.state_dict()
# assert 'W_q' in layers[i].self_attn.v_proj.base_layer.state_dict()
# assert 'W_q' in layers[i].self_attn.out_proj.state_dict()
# assert 'W_q' in layers[i].fc1.state_dict()
# assert 'W_q' in layers[i].fc2.state_dict()
with torch.no_grad():
out = model(x).logits
quant_path = 'quant_model'
os.system('rm -R ' + quant_path)
model.save_pretrained(quant_path)
############################################################################################################
#from hqq.models.hf.base import AutoHQQHFModel
#AutoHQQHFModel.save_to_safetensors(model, quant_path, num_blocks_per_file=10000)
model_loaded = AutoModelForCausalLM.from_pretrained(quant_path,
torch_dtype=torch.bfloat16,
device_map=device,
).eval()
if(enable_peft):
model_loaded = PeftModel.from_pretrained(model_loaded, quant_path).eval()
with torch.no_grad():
out_loaded = model_loaded(x).logits
print((out_loaded - out).abs().mean()) #0.3691 |
It turns out, peft loading is not working properly, it doesn't seem to load HQQLinear layers when there's no peft applied: In [16]: model.model.model.decoder.layers[i].self_attn.k_proj
Out[16]: HQQLinear(in_features=768, out_features=768, bias=True)
In [17]: model_loaded.model.model.decoder.layers[i].self_attn.k_proj
Out[17]: Linear(in_features=768, out_features=768, bias=True) I don't really know what's going since I didn't implement this. Do you what could be the issue? |
It seems that the issue is caused by the adapter files located in the same folder, which is strange. import torch, os
from transformers import AutoModelForCausalLM, HqqConfig
from peft import get_peft_model, LoraConfig, PeftModel, PeftConfig
######################################################################################################
def load_adapter(model_loaded, adapter_dir):
from safetensors import safe_open
#Load config
model_loaded = get_peft_model(model_loaded, PeftConfig.from_pretrained(adapter_dir))
#Load weights
tensors = {}
with safe_open(adapter_dir + "/adapter_model.safetensors", framework="pt", device=model_loaded.device.type) as f:
for key in f.keys():
base, param = '.'.join(key.split('.')[:-1]) + '.default', key.split('.')[-1]
if(base not in tensors):
tensors[base] = {}
tensors[base][param] = torch.nn.Parameter(f.get_tensor(key))
#Full module name
for name, module in model_loaded.named_modules():
module.name = name
#Assign weights
def _patch(model_loaded):
for name, layer in model_loaded.named_children():
if(layer.name in tensors):
for p,v in tensors[layer.name].items():
setattr(layer, p, v)
_patch(layer)
_patch(model_loaded)
######################################################################################################
device = 'cuda:0'
enable_peft = True
torch.manual_seed(0)
x = torch.randint(0, 100, (1, 100), device=device)
quant_path = 'quant_model'
os.system('rm -R ' + quant_path)
model_id = "facebook/opt-125m"
model = AutoModelForCausalLM.from_pretrained(model_id,
torch_dtype=torch.bfloat16,
device_map=device,
quantization_config=HqqConfig(nbits=4, group_size=64),
)
#Save base model
model.save_pretrained(quant_path)
if(enable_peft):
config = LoraConfig(target_modules=["q_proj", "v_proj"], task_type="CAUSAL_LM", lora_dropout=0.)
model = get_peft_model(model, config)
#Save adapter
model.save_pretrained('adapter')
model = model.eval()
with torch.no_grad():
out = model(x).logits
############################################################################################################
model_loaded = AutoModelForCausalLM.from_pretrained(quant_path, torch_dtype=torch.bfloat16, device_map=device)
if(enable_peft):
load_adapter(model_loaded, 'adapter')
model_loaded = model_loaded.eval()
with torch.no_grad():
out_loaded = model_loaded(x).logits
print((out_loaded - out).abs().mean())
#tensor(0., device='cuda:0', dtype=torch.bfloat16) |
Since the v0.2.3 release, a PEFT unit test involving HQQ is failing. I could boil down the reproducer to this:
The error is:
The commit that caused this is most likely this one: 73cb373.
I jumped into the debugger for a bit more context:
Note that the
base_model.model.
part of theprefix
stems from PEFT wrapping the original model. I don't really have enough knowledge about HQQ to debug further from here.The text was updated successfully, but these errors were encountered: