【人工智能课程】计算机科学博士作业三
来源:李宏毅2022课程第10课的作业
1 图片攻击概念
图片攻击是指故意对数字图像进行修改,以使机器学习模型产生错误的输出或者产生预期之外的结果。这种攻击是通过将微小的、通常对人类难以察觉的扰动应用于输入图像来实现的。图片攻击是对深度学习系统中的鲁棒性和安全性的一种测试,也可以用于欺骗、隐私侵犯、对抗性水印等。以下是一些常见的图片攻击的目的:
- 对抗样本研究:通过图片攻击,研究人员可以探索深度学习模型的鲁棒性和对抗样本的生成方法,以便改进模型的安全性和鲁棒性。
- 欺骗机器学习模型:攻击者可能希望通过修改图片使其被错误地分类,从而欺骗机器学习模型,例如将停车标志误识别为速限标志。
- 隐私攻击:通过修改图片,攻击者可以消除敏感信息,或者使图像模糊以保护隐私。
- 对抗性水印:攻击者可能希望通过添加微小的、难以察觉的扰动,来隐藏或改变水印,或者转移水印,以保护知识产权或者追踪盗版图片来源。
总之,图片攻击的目的可以是为了研究、测试模型的鲁棒性,也可以是出于恶意目的,比如欺骗或者损害隐私。在实际应用中,保护和提高模型的鲁棒性,并确保数据的安全与隐私是至关重要的。
2 算法
除了FGSM、IFGSM和MIFGSM之外,还有一些其他流行的图像攻击算法,包括:
DeepFool:它是一种迭代的无目标攻击算法,通过找到沿着特征空间最不相关的方向来生成对抗样本。
CW攻击(Carlini and Wagner攻击):这个算法试图最小化一种特定的损失函数,以欺骗分类器,并在L2和L∞规范下产生对抗样本。
JSMA(Jacobian-based Saliency Map Attack):JSMA算法通过最大化目标函数的梯度,以找到最有效的像素扰动,使图像被错误分类。
EOT(Expectation over Transformation):EOT算法通过对输入图像进行多个随机扰动,然后对这些扰动的预期值进行优化,生成对抗样本。
One-pixel攻击:这种攻击算法仅修改图像中的几个像素,以欺骗分类器,同时尽可能减少对原始图像的影响。
3 Python实现
3.1 下载数据
# 设置环境
!pip install pytorchcv
!pip install imgaug
# 下载数据
!wget https://github.com/DanielLin94144/ML-attack-dataset/files/8167812/data.zip
# 解压
!unzip ./data.zip
!rm ./data.zip
3.2 参数设置
import torch
import torch.nn as nn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 8
# 平均值和标准差是根据cifar10数据集计算的统计数据
cifar_10_mean = (0.491, 0.482, 0.447) # cifar_10 图片数据三个通道的均值
cifar_10_std = (0.202, 0.199, 0.201) # cifar_10 图片数据三个通道的标准差
# 将mean和std转换为三维张量,用于未来的运算
mean = torch.tensor(cifar_10_mean).to(device).view(3, 1, 1)
std = torch.tensor(cifar_10_std).to(device).view(3, 1, 1)
epsilon = 8/255/std
root = '/data' # 用于存储`benign images`的目录
3.3 导入数据
import os
import glob
import shutil
import numpy as np
from PIL import Image
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(cifar_10_mean, cifar_10_std)
])
class AdvDataset(Dataset):
def __init__(self, data_dir, transform):
self.images = []
self.labels = []
self.names = []
'''
data_dir
├── class_dir
│ ├── class1.png
│ ├── ...
│ ├── class20.png
'''
for i, class_dir in enumerate(sorted(glob.glob(f'{data_dir}/*'))):
images = sorted(glob.glob(f'{class_dir}/*'))
self.images += images
self.labels += ([i] * len(images))
self.names += [os.path.relpath(imgs, data_dir) for imgs in images]
self.transform = transform
def __getitem__(self, idx):
image = self.transform(Image.open(self.images[idx]))
label = self.labels[idx]
return image, label
def __getname__(self):
return self.names
def __len__(self):
return len(self.images)
adv_set = AdvDataset(root, transform=transform)
adv_names = adv_set.__getname__()
adv_loader = DataLoader(adv_set, batch_size=batch_size, shuffle=False)
print(f'number of images = {adv_set.__len__()}')
3.4 工具函数
(1)评估模型在良性图像上的性能
# 评估模型在良性图像上的性能
def epoch_benign(model, loader, loss_fn):
model.eval()
train_acc, train_loss = 0.0, 0.0
for x, y in loader:
x, y = x.to(device), y.to(device)
yp = model(x)
loss = loss_fn(yp, y)
train_acc += (yp.argmax(dim=1) == y).sum().item()
train_loss += loss.item() * x.shape[0]
return train_acc / len(loader.dataset), train_loss / len(loader.dataset)
(2)
# 执行对抗性攻击 并生成对抗性示例
def gen_adv_examples(model, loader, attack, loss_fn):
model.eval()
adv_names = []
train_acc, train_loss = 0.0, 0.0
for i, (x, y) in enumerate(loader):
x, y = x.to(device), y.to(device)
x_adv = attack(model, x, y, loss_fn) # obtain adversarial examples
yp = model(x_adv)
loss = loss_fn(yp, y)
train_acc += (yp.argmax(dim=1) == y).sum().item()
train_loss += loss.item() * x.shape[0]
# store adversarial examples
adv_ex = ((x_adv) * std + mean).clamp(0, 1) # to 0-1 scale
adv_ex = (adv_ex * 255).clamp(0, 255) # 0-255 scale
adv_ex = adv_ex.detach().cpu().data.numpy().round() # round to remove decimal part
adv_ex = adv_ex.transpose((0, 2, 3, 1)) # transpose (bs, C, H, W) back to (bs, H, W, C)
adv_examples = adv_ex if i == 0 else np.r_[adv_examples, adv_ex]
return adv_examples, train_acc / len(loader.dataset), train_loss / len(loader.dataset)
# 创建存储对抗性示例的目录
def create_dir(data_dir, adv_dir, adv_examples, adv_names):
if os.path.exists(adv_dir) is not True:
_ = shutil.copytree(data_dir, adv_dir)
for example, name in zip(adv_examples, adv_names):
im = Image.fromarray(example.astype(np.uint8)) # 图片数据需要转成 uint8
im.save(os.path.join(adv_dir, name))
3.5 攻击算法
(1)fgsm算法
def fgsm(model, x, y, loss_fn, epsilon=epsilon):
x_adv = x.detach().clone() # 用良性图片初始化 x_adv
x_adv.requires_grad = True # 需要获取 x_adv 的梯度
loss = loss_fn(model(x_adv), y) # 计算损失
loss.backward()
# fgsm: 在x_adv上使用梯度上升来最大化损失
grad = x_adv.grad.detach()
x_adv = x_adv + epsilon * grad.sign()
return x_adv
(2)ifgsm算法
# 在“全局设置”部分中将alpha设置为步长
# alpha和num_iter可以自己决定设定成何值
alpha = 0.8 / 255 / std
def ifgsm(model, x, y, loss_fn, epsilon=epsilon, alpha=alpha, num_iter=20):
x_adv = x
# num_iter 次迭代
for i in range(num_iter):
x_adv = fgsm(model, x_adv, y, loss_fn, alpha) # 用(ε=α)调用fgsm以获得新的x_adv
# x_adv = x_adv.detach().clone()
# x_adv.requires_grad = True
# loss = loss_fn(model(x_adv), y)
# loss.backward()
# grad = x_adv.grad.detach()
# x_adv = x_adv + alpha * grad.sign()
x_adv = torch.max(torch.min(x_adv, x+epsilon), x-epsilon) # x_adv 裁剪到 [x-epsilon, x+epsilon]范围
return x_adv
(3)mifgsm算法
def mifgsm(model, x, y, loss_fn, epsilon=epsilon, alpha=alpha, num_iter=20, decay=1.0):
x_adv = x
# 初始化 momentum tensor
momentum = torch.zeros_like(x).detach().to(device)
# num_iter 次迭代
for i in range(num_iter):
x_adv = x_adv.detach().clone()
x_adv.requires_grad = True
loss = loss_fn(model(x_adv), y)
loss.backward()
# TODO: Momentum calculation
grad = x_adv.grad.detach() + (1 - decay) * momentum
momentum = grad
x_adv = x_adv + alpha * grad.sign()
x_adv = torch.max(torch.min(x_adv, x+epsilon), x-epsilon) # x_adv 裁剪到 [x-epsilon, x+epsilon]范围
return x_adv
(4)EOT算法
def eot_attack(model, x, y, loss_fn, epsilon= 0.03, num_samples= 10, sigma= 0.1):
best_adv_example = None
best_adv_loss = float('inf')
for _ in range(num_samples):
perturbation = torch.randn_like(x) * sigma
x_adv = x + perturbation
x_adv = torch.clamp(x_adv, 0, 1) # 将像素值限制在合理范围内
# 使用 fgsm 得到对抗样本
x_adv = fgsm(model, x_adv, y, loss_fn, epsilon)
# 计算对抗样本的损失
adv_loss = loss_fn(model(x_adv), y).item()
# 保留损失最小的对抗样本
if adv_loss < best_adv_loss:
best_adv_example = x_adv.detach()
best_adv_loss = adv_loss
return best_adv_example
3.6 算法评估
准确率越低越好,损失越大越好
(1)基准模型
from pytorchcv.model_provider import get_model as ptcv_get_model
model = ptcv_get_model('resnet110_cifar10', pretrained=True).to(device)
loss_fn = nn.CrossEntropyLoss()
benign_acc, benign_loss = epoch_benign(model, adv_loader, loss_fn)
print(f'[ Base(未Attack图片评估) ] benign_acc = {benign_acc:.5f}, benign_loss = {benign_loss:.5f}')
[ Base(未Attack图片评估) ] benign_acc = 0.95000, benign_loss = 0.22678
(2)FGSM算法
adv_examples, fgsm_acc, fgsm_loss = gen_adv_examples(model, adv_loader, fgsm, loss_fn)
print(f'[ Attack(FGSM Attack图片评估) ] fgsm_acc = {fgsm_acc:.5f}, fgsm_loss = {fgsm_loss:.5f}')
# create_dir(root, 'fgsm', adv_examples, adv_names)
[ Attack(FGSM Attack图片评估) ] fgsm_acc = 0.59000, fgsm_loss = 2.49272
(3)I-FGSM算法
adv_examples, ifgsm_acc, ifgsm_loss = gen_adv_examples(model, adv_loader, ifgsm, loss_fn)
print(f'[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = {ifgsm_acc:.5f}, ifgsm_loss = {ifgsm_loss:.5f}')
create_dir(root, 'ifgsm', adv_examples, adv_names)
[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = 0.01000, ifgsm_loss = 17.30204
(4)EOT算法
adv_examples, ifgsm_acc, ifgsm_loss = gen_adv_examples(model, adv_loader, eot_attack, loss_fn)
print(f'[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = {ifgsm_acc:.5f}, ifgsm_loss = {ifgsm_loss:.5f}')
create_dir(root, 'eot', adv_examples, adv_names)
[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = 0.21000, ifgsm_loss = 5.35198
(5)基于Ensemble 模型的IFGSM算法
class ensembleNet(nn.Module):
def __init__(self, model_names):
super().__init__()
self.models = nn.ModuleList([ptcv_get_model(name, pretrained=True) for name in model_names])
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
ensemble_logits = 0
for i, m in enumerate(self.models):
ensemble_logits += m(x)
return ensemble_logits/len(self.models)
model_names = [
'nin_cifar10',
'resnet20_cifar10',
'preresnet20_cifar10'
]
ensemble_model = ensembleNet(model_names).to(device)
loss_fn = nn.CrossEntropyLoss()
adv_examples, ifgsm_acc, ifgsm_loss = gen_adv_examples(ensemble_model, adv_loader, ifgsm, loss_fn)
print(f'[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = {ifgsm_acc:.5f}, ifgsm_loss = {ifgsm_loss:.5f}')
# create_dir(root, 'ensemble_model_ifgsm', adv_examples, adv_names)
[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = 0.00000, ifgsm_loss = 13.37727
(6)基于Ensemble 模型的EOT算法
class ensembleNet(nn.Module):
def __init__(self, model_names):
super().__init__()
self.models = nn.ModuleList([ptcv_get_model(name, pretrained=True) for name in model_names])
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
ensemble_logits = 0
for i, m in enumerate(self.models):
ensemble_logits += m(x)
return ensemble_logits/len(self.models)
model_names = [
'nin_cifar10',
'resnet20_cifar10',
'preresnet20_cifar10'
]
ensemble_model = ensembleNet(model_names).to(device)
loss_fn = nn.CrossEntropyLoss()
adv_examples, ifgsm_acc, ifgsm_loss = gen_adv_examples(ensemble_model, adv_loader, eot_attack, loss_fn)
print(f'[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = {ifgsm_acc:.5f}, ifgsm_loss = {ifgsm_loss:.5f}')
create_dir(root, 'ensemble_model_eot', adv_examples, adv_names)
[ Attack(I-FGSM Attack图片评估) ] ifgsm_acc = 0.08000, ifgsm_loss = 3.68992
3.7 可视化
import matplotlib.pyplot as plt
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
plt.figure(figsize=(10, 20))
cnt = 0
for i, cls_name in enumerate(classes):
path = f'{cls_name}/{cls_name}1.png'
# 未Attack图片(benign image)
cnt += 1
plt.subplot(len(classes), 4, cnt)
im = Image.open(f'./data/{path}')
logit = model(transform(im).unsqueeze(0).to(device))[0]
predict = logit.argmax(-1).item()
prob = logit.softmax(-1)[predict].item()
plt.title(f'benign: {cls_name}1.png\n{classes[predict]}: {prob:.2%}')
plt.axis('off')
plt.imshow(np.array(im))
# Attack后图片(adversarial image)
cnt += 1
plt.subplot(len(classes), 4, cnt)
im = Image.open(f'./ensemble_model_ifgsm/{path}')
logit = model(transform(im).unsqueeze(0).to(device))[0]
predict = logit.argmax(-1).item()
prob = logit.softmax(-1)[predict].item()
plt.title(f'adversarial: {cls_name}1.png\n{classes[predict]}: {prob:.2%}')
plt.axis('off')
plt.imshow(np.array(im))
plt.tight_layout()
plt.show()
可以从图中可以看到,图片的识别出现了错误,说明图片攻击成功。