实验是在 19-48 岁年龄段的 30 名志愿者中进行的。每个人在腰部佩戴智能手机(三星 Galaxy S II)进行六项活动(步行、楼上步行、楼下步行、坐、站、躺)。实验以 50Hz 的恒定速率捕获 3 轴线性加速度和 3 轴角速度。
- 训练集8千条数据;
- 测试集共2000条数据;
import pandas as pd import numpy as np train = pd.read_csv('train.csv.zip')
- 数据说明:选手需要提交测试集队伍排名预测,具体的提交格式如下:
- 评估指标:本次竞赛的使用准确率进行评分,数值越高精度越高,评估代码参考:
from sklearn.metrics import accuracy_score y_pred = [0, 2, 1, 3] y_true = [0, 1, 2, 3] accuracy_score(y_true, y_pred)
import pandas as pd import paddle import numpy as np %pylab inline import seaborn as sns train_df = pd.read_csv('data/data137267/train.csv.zip') test_df = pd.read_csv('data/data137267/test.csv.zip')
(8000, 562)
Index(['tBodyAcc-mean()-X', 'tBodyAcc-mean()-Y', 'tBodyAcc-mean()-Z', 'tBodyAcc-std()-X', 'tBodyAcc-std()-Y', 'tBodyAcc-std()-Z', 'tBodyAcc-mad()-X', 'tBodyAcc-mad()-Y', 'tBodyAcc-mad()-Z', 'tBodyAcc-max()-X', ... 'fBodyBodyGyroJerkMag-skewness()', 'fBodyBodyGyroJerkMag-kurtosis()', 'angle(tBodyAccMean,gravity)', 'angle(tBodyAccJerkMean),gravityMean)', 'angle(tBodyGyroMean,gravityMean)', 'angle(tBodyGyroJerkMean,gravityMean)', 'angle(X,gravityMean)', 'angle(Y,gravityMean)', 'angle(Z,gravityMean)', 'Activity'], dtype='object', length=562)
plt.figure(figsize=(10, 5)) sns.boxplot(y='tBodyAcc-mean()-X', x='Activity', data=train_df) plt.tight_layout()
train_df['Activity'] = train_df['Activity'].map({ 'LAYING': 0, 'STANDING': 1, 'SITTING': 2, 'WALKING': 3, 'WALKING_UPSTAIRS': 4, 'WALKING_DOWNSTAIRS': 5 })
from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(train_df.values[:, :-1]) train_df.iloc[:, :-1] = scaler.transform(train_df.values[:, :-1]) test_df.iloc[:, :] = scaler.transform(test_df.values)
class Classifier(paddle.nn.Layer): # self代表类的实例自身 def __init__(self): # 初始化父类中的一些参数 super(Classifier, self).__init__() self.conv1 = paddle.nn.Conv1D(in_channels=1, out_channels=16, kernel_size=3) self.conv2 = paddle.nn.Conv1D(in_channels=16, out_channels=32, kernel_size=3) self.conv3 = paddle.nn.Conv1D(in_channels=32, out_channels=64, kernel_size=3) self.flatten = paddle.nn.Flatten() self.dropout = paddle.nn.Dropout() self.fc = paddle.nn.Linear(in_features=128, out_features=6) self.relu = paddle.nn.ReLU() self.pool = paddle.nn.MaxPool1D(6) self.softmax = paddle.nn.Softmax() # 网络的前向计算 def forward(self, inputs): x = self.pool(self.relu(self.conv1(inputs))) x = self.pool(self.relu(self.conv2(x))) x = self.dropout(x) x = self.pool(self.relu(self.conv3(x))) x = self.dropout(x) x = self.flatten(x) x = self.relu(self.fc(x)) x = self.softmax(x) return x
class Classifier(paddle.nn.Layer): # self代表类的实例自身 def __init__(self): # 初始化父类中的一些参数 super(Classifier, self).__init__() self.fc = paddle.nn.Linear(in_features=561, out_features=6) self.relu = paddle.nn.ReLU() self.softmax = paddle.nn.Softmax() # 网络的前向计算 def forward(self, inputs): x = self.relu(self.fc(inputs)) x = self.softmax(x) return x
class Classifier(paddle.nn.Layer): # self代表类的实例自身 def __init__(self): # 初始化父类中的一些参数 super(Classifier, self).__init__() self.conv1 = paddle.nn.Conv1D(in_channels=1, out_channels=16, kernel_size=3) self.conv2 = paddle.nn.Conv1D(in_channels=16, out_channels=32, kernel_size=3) self.conv3 = paddle.nn.Conv1D(in_channels=32, out_channels=64, kernel_size=3) self.flatten = paddle.nn.Flatten() self.dropout = paddle.nn.Dropout() self.fc = paddle.nn.Linear(in_features=128, out_features=6) self.relu = paddle.nn.ReLU() self.pool = paddle.nn.MaxPool1D(6) self.softmax = paddle.nn.Softmax() # 网络的前向计算 def forward(self, inputs): x = self.pool(self.relu(self.conv1(inputs))) x = self.pool(self.relu(self.conv2(x))) x = self.dropout(x) x = self.pool(self.relu(self.conv3(x))) x = self.dropout(x) x = self.flatten(x) x = self.relu(self.fc(x)) x = self.softmax(x) return x
model = Classifier() model.train() opt = paddle.optimizer.SGD(learning_rate=0.005, parameters=model.parameters()) loss_fn = paddle.nn.CrossEntropyLoss()
EPOCH_NUM = 10000 # 设置外层循环次数 BATCH_SIZE = 512 # 设置batch大小 training_data = train_df.iloc[:-1000].values.astype(np.float32) val_data = train_df.iloc[-1000:].values.astype(np.float32) training_data = training_data.reshape(-1, 1, 562) val_data = val_data.reshape(-1, 1, 562)
# 定义外层循环 for epoch_id in range(EPOCH_NUM): # 在每轮迭代开始之前,将训练数据的顺序随机的打乱 np.random.shuffle(training_data) # 将训练数据进行拆分,每个batch包含10条数据 mini_batches = [training_data[k:k+BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)] # 定义内层循环 for iter_id, mini_batch in enumerate(mini_batches): model.train() x = np.array(mini_batch[:,:, :-1]) # 获得当前批次训练数据 y = np.array(mini_batch[:,:, -1:]) # 获得当前批次训练标签 # 将numpy数据转为飞桨动态图tensor的格式 features = paddle.to_tensor(x) y = paddle.to_tensor(y) # 前向计算 predicts = model(features) print( predicts) print( y.flatten()) # 计算损失 loss = loss_fn(predicts, y.flatten().astype(int)) avg_loss = paddle.mean(loss) # 反向传播,计算每层参数的梯度值 avg_loss.backward() # 更新参数,根据设置好的学习率迭代一步 opt.step() # 清空梯度变量,以备下一轮计算 opt.clear_grad() # 训练与验证 if iter_id%2000==0 and epoch_id % 10 == 0: acc = predicts.argmax(1) == y.flatten().astype(int) acc = acc.astype(float).mean() model.eval() val_predict = model(paddle.to_tensor(val_data[:, :, :-1])).argmax(1) val_label = val_data[:, :, -1] val_acc = np.mean(val_predict.numpy() == val_label.flatten()) print("epoch: {}, iter: {}, loss is: {}, acc is {} / {}".format( epoch_id, iter_id, avg_loss.numpy(), acc.numpy(), val_acc))
epoch: {}, iter: {}, loss is: {}, acc is {} / {}".format(
epoch_id, iter_id, avg_loss.numpy(), acc.numpy(), val_acc))
model.eval() test_data = paddle.to_tensor(test_df.values.reshape(-1, 1, 561).astype(np.float32)) test_predict = model(test_data) test_predict = test_predict.argmax(1).numpy()
test_predict = pd.DataFrame({'Activity': test_predict}) test_predict['Activity'] = test_predict['Activity'].map({ 0:'LAYING', 1:'STANDING', 2:'SITTING', 3:'WALKING', 4:'WALKING_UPSTAIRS', 5:'WALKING_DOWNSTAIRS' })
test_predict.to_csv('submission.csv', index=None) !zip submission.zip submission.csv
- 模型可以加入残差结构,参考resnet。
- 数据可以加入数据扩增,比如加噪音。