0%

项目记录

导入原始流量数据分析包

1
2
3
import csv
from scapy.all import *
import os
1
2
3
# 查看流量结构信息
pkts = rdpcap("./data/test.pcap")
pkts[0].show()
###[ Ethernet ]### 
  dst       = 52:54:00:12:34:56
  src       = 52:55:10:00:02:02
  type      = IPv4
###[ IP ]### 
     version   = 4
     ihl       = 5
     tos       = 0x0
     len       = 48
     id        = 645
     flags     = DF
     frag      = 0
     ttl       = 128
     proto     = tcp
     chksum    = 0xb48e
     src       = 192.168.0.67
     dst       = 115.223.14.234
     \options   \
###[ TCP ]### 
        sport     = 1040
        dport     = https
        seq       = 1155960802
        ack       = 0
        dataofs   = 7
        reserved  = 0
        flags     = S
        window    = 65535
        chksum    = 0x64d7
        urgptr    = 0
        options   = [('MSS', 1460), ('NOP', None), ('NOP', None), ('SAckOK', b'')]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# [Ethernet] 
# dst: 目标mac地址
# scr:本地mac地址
# type:IPv4

# [IP]
# version
# ihl 首部长度(Internet Header Length, IHL)
# tos 服务类型type of serve
# len IP包长度
# id
# flags 标志字段
# frag 碎片
# ttl 数据包在网络中的时间是否太长而应被丢弃time to live
# proto 协议
# chksum 校验和
# dst 目的ip

# [TCP]
# sport 源端口
# dport 目标端口
# seq 顺序号码
# ack 确认
# dataofs 数据偏移字段
# reserved
# flags 标志
# window 窗口大小
# chksum 校验和
# urgptr
# options 头部选项字段

原始流量数据集:

train.py

  • black (1500个pcap文件)
  • white (1500个pcap文件)

test.py (没有提供标签)

1
import pandas as pd
1
2
data = pd.read_csv("./data/train.csv")
data.shape
(1752060, 24)
1
data.head()

dst src type version ihl tos len id flags frag ... dport seq ack dataofs reserved flags.1 window chksum.1 urgptr label
0 52:54:00:12:34:56 52:55:10:00:02:02 2048 4 5 0 48 196 DF 0 ... 443 3781184629 0 7 0 S 65535 15493 0 1
1 52:54:00:12:34:56 52:55:10:00:02:02 2048 4 5 0 48 197 DF 0 ... 443 3781184629 0 7 0 S 65535 15493 0 1
2 52:54:00:12:34:56 52:55:10:00:02:02 2048 4 5 0 48 198 DF 0 ... 443 3781184629 0 7 0 S 65535 15493 0 1
3 52:54:00:12:34:56 52:55:10:00:02:02 2048 4 5 0 48 199 DF 0 ... 443 2676329384 0 7 0 S 65535 16171 0 1
4 52:55:10:00:02:02 52:54:00:12:34:56 2048 4 5 0 44 3 NaN 0 ... 1034 2752001 2676329385 6 0 SA 8192 13814 0 1

5 rows × 24 columns

1
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1752060 entries, 0 to 1752059
Data columns (total 24 columns):
dst         object
src         object
type        int64
version     int64
ihl         int64
tos         int64
len         int64
id          int64
flags       object
frag        int64
ttl         int64
proto       int64
chksum      int64
sport       int64
dport       int64
seq         int64
ack         int64
dataofs     int64
reserved    int64
flags.1     object
window      int64
chksum.1    int64
urgptr      int64
label       int64
dtypes: int64(20), object(4)
memory usage: 320.8+ MB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# DNN模型
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Embedding
from keras.layers import LSTM, SimpleRNN, GRU
from keras.datasets import imdb
from keras.utils.np_utils import to_categorical
from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error)
from sklearn import metrics
from sklearn.preprocessing import Normalizer
import h5py
from keras import callbacks
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger

data = pd.read_csv("./data/train.csv")
data = shuffle(data)
# 选取了其中数值特征作为训练
x = data.iloc[:,[2,3,4,5,6,7,10,11,12,13,14,15,16,17,18,20,21,22]]
y = data.iloc[:,-1]

# 划分数据集 7:3
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

# 归一化
scaler=Normalizer().fit(x_train)
x_train=scaler.transform(x_train)

scaler=Normalizer().fit(x_test)
x_test=scaler.transform(x_test)

# 数组转换
x_train=np.array(x_train)
y_train=np.array(y_train)
x_test=np.array(x_test)
y_test=np.array(y_test)

batch_size = 1024

# 1. define the network
model = Sequential()
model.add(Dense(128,input_dim=18,activation='relu'))
model.add(Dense(64))
model.add(Dense(32))
model.add(Dense(1))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=200, callbacks=None,verbose=2)
loss,accuracy = model.evaluate(x_test,y_test,verbose=2)
print(loss,accuracy)
D:\anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel_launcher.py:55: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.


Epoch 1/200
 - 4s - loss: 0.6592 - acc: 0.6275
Epoch 2/200
 - 4s - loss: 0.6573 - acc: 0.6311
Epoch 3/200
 - 4s - loss: 0.6571 - acc: 0.6313
Epoch 4/200
 - 4s - loss: 0.6567 - acc: 0.6316
Epoch 5/200
 - 4s - loss: 0.6566 - acc: 0.6319
Epoch 6/200
 - 4s - loss: 0.6563 - acc: 0.6322
Epoch 7/200
 - 4s - loss: 0.6558 - acc: 0.6328
Epoch 8/200
 - 4s - loss: 0.6553 - acc: 0.6337
Epoch 9/200
 - 4s - loss: 0.6542 - acc: 0.6354
Epoch 10/200
 - 4s - loss: 0.6512 - acc: 0.6384
Epoch 11/200
 - 4s - loss: 0.6401 - acc: 0.6442
Epoch 12/200
 - 4s - loss: 0.6149 - acc: 0.6652
Epoch 13/200
 - 4s - loss: 0.5915 - acc: 0.6933
Epoch 14/200
 - 5s - loss: 0.5746 - acc: 0.7140
Epoch 15/200
 - 4s - loss: 0.5644 - acc: 0.7247
Epoch 16/200
 - 4s - loss: 0.5575 - acc: 0.7296
Epoch 17/200
 - 4s - loss: 0.5538 - acc: 0.7319
Epoch 18/200
 - 4s - loss: 0.5506 - acc: 0.7333
Epoch 19/200
 - 4s - loss: 0.5482 - acc: 0.7356
Epoch 20/200
 - 4s - loss: 0.5430 - acc: 0.7372
Epoch 21/200
 - 4s - loss: 0.5399 - acc: 0.7395
Epoch 22/200
 - 4s - loss: 0.5370 - acc: 0.7411
Epoch 23/200
 - 4s - loss: 0.5327 - acc: 0.7432
Epoch 24/200
 - 4s - loss: 0.5298 - acc: 0.7449
Epoch 25/200
 - 4s - loss: 0.5289 - acc: 0.7456
Epoch 26/200
 - 4s - loss: 0.5257 - acc: 0.7463
Epoch 27/200
 - 4s - loss: 0.5235 - acc: 0.7478
Epoch 28/200
 - 4s - loss: 0.5215 - acc: 0.7489
Epoch 29/200
 - 4s - loss: 0.5174 - acc: 0.7517
Epoch 30/200
 - 4s - loss: 0.5151 - acc: 0.7527
Epoch 31/200
 - 4s - loss: 0.5133 - acc: 0.7539
Epoch 32/200
 - 4s - loss: 0.5111 - acc: 0.7555
Epoch 33/200
 - 4s - loss: 0.5078 - acc: 0.7561
Epoch 34/200
 - 4s - loss: 0.5096 - acc: 0.7555
Epoch 35/200
 - 4s - loss: 0.5022 - acc: 0.7599
Epoch 36/200
 - 4s - loss: 0.5017 - acc: 0.7608
Epoch 37/200
 - 4s - loss: 0.4988 - acc: 0.7619
Epoch 38/200
 - 4s - loss: 0.4981 - acc: 0.7617
Epoch 39/200
 - 4s - loss: 0.4982 - acc: 0.7629
Epoch 40/200
 - 4s - loss: 0.4971 - acc: 0.7619
Epoch 41/200
 - 4s - loss: 0.4942 - acc: 0.7634
Epoch 42/200
 - 4s - loss: 0.4947 - acc: 0.7623
Epoch 43/200
 - 4s - loss: 0.4920 - acc: 0.7627
Epoch 44/200
 - 4s - loss: 0.4914 - acc: 0.7634
Epoch 45/200
 - 4s - loss: 0.4896 - acc: 0.7651
Epoch 46/200
 - 4s - loss: 0.4880 - acc: 0.7670
Epoch 47/200
 - 4s - loss: 0.4865 - acc: 0.7667
Epoch 48/200
 - 4s - loss: 0.4906 - acc: 0.7659
Epoch 49/200
 - 4s - loss: 0.4893 - acc: 0.7669
Epoch 50/200
 - 4s - loss: 0.4933 - acc: 0.7648
Epoch 51/200
 - 4s - loss: 0.4879 - acc: 0.7680
Epoch 52/200
 - 4s - loss: 0.4879 - acc: 0.7681
Epoch 53/200
 - 4s - loss: 0.4865 - acc: 0.7692
Epoch 54/200
 - 4s - loss: 0.4845 - acc: 0.7696
Epoch 55/200
 - 4s - loss: 0.4852 - acc: 0.7691
Epoch 56/200
 - 4s - loss: 0.4821 - acc: 0.7712
Epoch 57/200
 - 4s - loss: 0.4844 - acc: 0.7692
Epoch 58/200
 - 4s - loss: 0.4782 - acc: 0.7731
Epoch 59/200
 - 4s - loss: 0.4776 - acc: 0.7738
Epoch 60/200
 - 4s - loss: 0.4777 - acc: 0.7733
Epoch 61/200
 - 4s - loss: 0.4772 - acc: 0.7733
Epoch 62/200
 - 4s - loss: 0.4768 - acc: 0.7741
Epoch 63/200
 - 4s - loss: 0.4734 - acc: 0.7757
Epoch 64/200
 - 4s - loss: 0.4725 - acc: 0.7770
Epoch 65/200
 - 4s - loss: 0.4707 - acc: 0.7782
Epoch 66/200
 - 4s - loss: 0.4701 - acc: 0.7777
Epoch 67/200
 - 4s - loss: 0.4674 - acc: 0.7793
Epoch 68/200
 - 4s - loss: 0.4666 - acc: 0.7802
Epoch 69/200
 - 4s - loss: 0.4652 - acc: 0.7798
Epoch 70/200
 - 4s - loss: 0.4648 - acc: 0.7804
Epoch 71/200
 - 4s - loss: 0.4640 - acc: 0.7814
Epoch 72/200
 - 4s - loss: 0.4640 - acc: 0.7811
Epoch 73/200
 - 4s - loss: 0.4605 - acc: 0.7836
Epoch 74/200
 - 4s - loss: 0.4617 - acc: 0.7820
Epoch 75/200
 - 4s - loss: 0.4598 - acc: 0.7834
Epoch 76/200
 - 4s - loss: 0.4599 - acc: 0.7832
Epoch 77/200
 - 4s - loss: 0.4575 - acc: 0.7847
Epoch 78/200
 - 4s - loss: 0.4563 - acc: 0.7853
Epoch 79/200
 - 4s - loss: 0.4581 - acc: 0.7836
Epoch 80/200
 - 4s - loss: 0.4565 - acc: 0.7851
Epoch 81/200
 - 4s - loss: 0.4549 - acc: 0.7861
Epoch 82/200
 - 4s - loss: 0.4534 - acc: 0.7870
Epoch 83/200
 - 4s - loss: 0.4533 - acc: 0.7876
Epoch 84/200
 - 4s - loss: 0.4536 - acc: 0.7871
Epoch 85/200
 - 4s - loss: 0.4522 - acc: 0.7882
Epoch 86/200
 - 4s - loss: 0.4503 - acc: 0.7884
Epoch 87/200
 - 4s - loss: 0.4498 - acc: 0.7897
Epoch 88/200
 - 4s - loss: 0.4487 - acc: 0.7893
Epoch 89/200
 - 4s - loss: 0.4489 - acc: 0.7890
Epoch 90/200
 - 4s - loss: 0.4494 - acc: 0.7890
Epoch 91/200
 - 4s - loss: 0.4479 - acc: 0.7899
Epoch 92/200
 - 4s - loss: 0.4468 - acc: 0.7908
Epoch 93/200
 - 4s - loss: 0.4476 - acc: 0.7902
Epoch 94/200
 - 4s - loss: 0.4426 - acc: 0.7939
Epoch 95/200
 - 4s - loss: 0.4442 - acc: 0.7930
Epoch 96/200
 - 4s - loss: 0.4403 - acc: 0.7953
Epoch 97/200
 - 4s - loss: 0.4436 - acc: 0.7929
Epoch 98/200
 - 4s - loss: 0.4407 - acc: 0.7948
Epoch 99/200
 - 4s - loss: 0.4418 - acc: 0.7950
Epoch 100/200
 - 4s - loss: 0.4601 - acc: 0.7886
Epoch 101/200
 - 4s - loss: 0.4545 - acc: 0.7913
Epoch 102/200
 - 4s - loss: 0.4562 - acc: 0.7895
Epoch 103/200
 - 4s - loss: 0.4569 - acc: 0.7879
Epoch 104/200
 - 4s - loss: 0.4516 - acc: 0.7910
Epoch 105/200
 - 4s - loss: 0.4516 - acc: 0.7910
Epoch 106/200
 - 4s - loss: 0.4638 - acc: 0.7840
Epoch 107/200
 - 4s - loss: 0.4592 - acc: 0.7868
Epoch 108/200
 - 4s - loss: 0.4649 - acc: 0.7850
Epoch 109/200
 - 4s - loss: 0.4640 - acc: 0.7861
Epoch 110/200
 - 4s - loss: 0.4616 - acc: 0.7878
Epoch 111/200
 - 4s - loss: 0.4576 - acc: 0.7889
Epoch 112/200
 - 4s - loss: 0.4551 - acc: 0.7898
Epoch 113/200
 - 4s - loss: 0.4532 - acc: 0.7910
Epoch 114/200
 - 4s - loss: 0.4514 - acc: 0.7910
Epoch 115/200
 - 4s - loss: 0.4478 - acc: 0.7934
Epoch 116/200
 - 4s - loss: 0.4485 - acc: 0.7925
Epoch 117/200
 - 4s - loss: 0.4453 - acc: 0.7946
Epoch 118/200
 - 4s - loss: 0.4452 - acc: 0.7936
Epoch 119/200
 - 4s - loss: 0.4439 - acc: 0.7945
Epoch 120/200
 - 4s - loss: 0.4410 - acc: 0.7960
Epoch 121/200
 - 4s - loss: 0.4427 - acc: 0.7951
Epoch 122/200
 - 4s - loss: 0.4411 - acc: 0.7952
Epoch 123/200
 - 4s - loss: 0.4400 - acc: 0.7955
Epoch 124/200
 - 4s - loss: 0.4372 - acc: 0.7974
Epoch 125/200
 - 4s - loss: 0.4378 - acc: 0.7966
Epoch 126/200
 - 4s - loss: 0.4360 - acc: 0.7973
Epoch 127/200
 - 4s - loss: 0.4344 - acc: 0.7985
Epoch 128/200
 - 4s - loss: 0.4316 - acc: 0.8000
Epoch 129/200
 - 4s - loss: 0.4344 - acc: 0.7976
Epoch 130/200
 - 4s - loss: 0.4335 - acc: 0.7984
Epoch 131/200
 - 4s - loss: 0.4303 - acc: 0.8008
Epoch 132/200
 - 4s - loss: 0.4341 - acc: 0.7980
Epoch 133/200
 - 4s - loss: 0.4331 - acc: 0.7978
Epoch 134/200
 - 4s - loss: 0.4296 - acc: 0.8006
Epoch 135/200
 - 4s - loss: 0.4279 - acc: 0.8015
Epoch 136/200
 - 4s - loss: 0.4277 - acc: 0.8012
Epoch 137/200
 - 4s - loss: 0.4296 - acc: 0.8000
Epoch 138/200
 - 4s - loss: 0.4253 - acc: 0.8032
Epoch 139/200
 - 4s - loss: 0.4264 - acc: 0.8026
Epoch 140/200
 - 4s - loss: 0.4292 - acc: 0.8005
Epoch 141/200
 - 4s - loss: 0.4341 - acc: 0.7956
Epoch 142/200
 - 4s - loss: 0.4346 - acc: 0.7946
Epoch 143/200
 - 4s - loss: 0.4349 - acc: 0.7942
Epoch 144/200
 - 4s - loss: 0.4463 - acc: 0.7891
Epoch 145/200
 - 4s - loss: 0.4489 - acc: 0.7876
Epoch 146/200
 - 4s - loss: 0.4474 - acc: 0.7882
Epoch 147/200
 - 4s - loss: 0.4441 - acc: 0.7905
Epoch 148/200
 - 4s - loss: 0.4460 - acc: 0.7889
Epoch 149/200
 - 4s - loss: 0.4426 - acc: 0.7906
Epoch 150/200
 - 4s - loss: 0.4411 - acc: 0.7915
Epoch 151/200
 - 4s - loss: 0.4367 - acc: 0.7946
Epoch 152/200
 - 4s - loss: 0.4392 - acc: 0.7929
Epoch 153/200
 - 4s - loss: 0.4391 - acc: 0.7937
Epoch 154/200
 - 4s - loss: 0.4421 - acc: 0.7920
Epoch 155/200
 - 4s - loss: 0.4398 - acc: 0.7931
Epoch 156/200
 - 4s - loss: 0.4394 - acc: 0.7935
Epoch 157/200
 - 4s - loss: 0.4386 - acc: 0.7935
Epoch 158/200
 - 4s - loss: 0.4362 - acc: 0.7952
Epoch 159/200
 - 4s - loss: 0.4369 - acc: 0.7948
Epoch 160/200
 - 4s - loss: 0.4344 - acc: 0.7962
Epoch 161/200
 - 4s - loss: 0.4319 - acc: 0.7975
Epoch 162/200
 - 4s - loss: 0.4324 - acc: 0.7966
Epoch 163/200
 - 4s - loss: 0.4307 - acc: 0.7981
Epoch 164/200
 - 4s - loss: 0.4295 - acc: 0.7985
Epoch 165/200
 - 4s - loss: 0.4280 - acc: 0.7997
Epoch 166/200
 - 4s - loss: 0.4279 - acc: 0.8000
Epoch 167/200
 - 4s - loss: 0.4268 - acc: 0.8000
Epoch 168/200
 - 4s - loss: 0.4278 - acc: 0.7995
Epoch 169/200
 - 4s - loss: 0.4244 - acc: 0.8014
Epoch 170/200
 - 4s - loss: 0.4235 - acc: 0.8019
Epoch 171/200
 - 4s - loss: 0.4242 - acc: 0.8007
Epoch 172/200
 - 4s - loss: 0.4229 - acc: 0.8023
Epoch 173/200
 - 4s - loss: 0.4238 - acc: 0.8014
Epoch 174/200
 - 4s - loss: 0.4237 - acc: 0.8017
Epoch 175/200
 - 4s - loss: 0.4202 - acc: 0.8036
Epoch 176/200
 - 4s - loss: 0.4359 - acc: 0.7952
Epoch 177/200
 - 4s - loss: 0.4371 - acc: 0.7935
Epoch 178/200
 - 4s - loss: 0.4340 - acc: 0.7955
Epoch 179/200
 - 4s - loss: 0.4354 - acc: 0.7955
Epoch 180/200
 - 4s - loss: 0.4367 - acc: 0.7952
Epoch 181/200
 - 4s - loss: 0.4339 - acc: 0.7964
Epoch 182/200
 - 4s - loss: 0.4369 - acc: 0.7964
Epoch 183/200
 - 4s - loss: 0.4481 - acc: 0.7963
Epoch 184/200
 - 4s - loss: 0.4412 - acc: 0.7996
Epoch 185/200
 - 4s - loss: 0.4403 - acc: 0.7982
Epoch 186/200
 - 4s - loss: 0.4388 - acc: 0.7985
Epoch 187/200
 - 4s - loss: 0.4360 - acc: 0.8002
Epoch 188/200
 - 4s - loss: 0.4433 - acc: 0.7952
Epoch 189/200
 - 4s - loss: 0.4400 - acc: 0.7971
Epoch 190/200
 - 4s - loss: 0.4426 - acc: 0.7955
Epoch 191/200
 - 4s - loss: 0.4406 - acc: 0.7968
Epoch 192/200
 - 4s - loss: 0.4408 - acc: 0.7968
Epoch 193/200
 - 4s - loss: 0.4396 - acc: 0.7972
Epoch 194/200
 - 4s - loss: 0.4397 - acc: 0.7970
Epoch 195/200
 - 4s - loss: 0.4383 - acc: 0.7981
Epoch 196/200
 - 4s - loss: 0.4387 - acc: 0.7976
Epoch 197/200
 - 4s - loss: 0.4368 - acc: 0.7990
Epoch 198/200
 - 4s - loss: 0.4402 - acc: 0.7965
Epoch 199/200
 - 4s - loss: 0.4360 - acc: 0.7991
Epoch 200/200
 - 4s - loss: 0.4360 - acc: 0.7997
0.43021377035252795 0.7874159560738833
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

model=DecisionTreeClassifier()
model.fit(x_train,y_train)

predicted=model.predict(x_test)
accuracy=accuracy_score(y_test,predicted)
precision=precision_score(y_test,predicted,average='binary')
recall=recall_score(y_test,predicted,average='binary')
f1=f1_score(y_test,predicted,average='binary')
print("Accuracy:",accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("F1:",f1)
Accuracy: 0.9642687274788915
Precision: 0.9260356995707014
Recall: 0.9901188017507626
F1: 0.9570056658845075
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

model=GaussianNB()
model.fit(x_train,y_train)

predicted=model.predict(x_test)
accuracy=accuracy_score(y_test,predicted)
precision=precision_score(y_test,predicted,average='binary')
recall=recall_score(y_test,predicted,average='binary')
f1=f1_score(y_test,predicted,average='binary')
print("Accuracy:",accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("F1:",f1)
Accuracy: 0.6075343690665084
Precision: 0.6645055620009555
Recall: 0.04612331129090323
F1: 0.08625936277745049
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

model=LogisticRegression()
model.fit(x_train,y_train)

predicted=model.predict(x_test)
accuracy=accuracy_score(y_test,predicted)
precision=precision_score(y_test,predicted,average='binary')
recall=recall_score(y_test,predicted,average='binary')
f1=f1_score(y_test,predicted,average='binary')
print("Accuracy:",accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("F1:",f1)
D:\anaconda3\envs\tensorflow-gpu\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)


Accuracy: 0.632501170051254
Precision: 0.7340969576788603
Recall: 0.13327301665498228
F1: 0.2255907374295405
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

model=RandomForestClassifier()
model.fit(x_train,y_train)

predicted=model.predict(x_test)
accuracy=accuracy_score(y_test,predicted)
precision=precision_score(y_test,predicted,average='binary')
recall=recall_score(y_test,predicted,average='binary')
f1=f1_score(y_test,predicted,average='binary')
print("Accuracy:",accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("F1:",f1)
D:\anaconda3\envs\tensorflow-gpu\lib\site-packages\sklearn\ensemble\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)


Accuracy: 0.8054499655643452
Precision: 0.8850908164637123
Recall: 0.592535574208462
F1: 0.7098516898055539
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

model=AdaBoostClassifier()
model.fit(x_train,y_train)

predicted=model.predict(x_test)
accuracy=accuracy_score(y_test,predicted)
precision=precision_score(y_test,predicted,average='binary')
recall=recall_score(y_test,predicted,average='binary')
f1=f1_score(y_test,predicted,average='binary')
print("Accuracy:",accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("F1:",f1)
Accuracy: 0.7218074723468374
Precision: 0.7559626657830165
Recall: 0.45387195179718437
F1: 0.5672019061431679
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Embedding
from keras.layers import LSTM, SimpleRNN, GRU
from keras.datasets import imdb
from keras.utils.np_utils import to_categorical
from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error)
from sklearn import metrics
from sklearn.preprocessing import Normalizer
import h5py
from keras import callbacks
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger

data = pd.read_csv("./data/train.csv")
data = shuffle(data)
# 选取了其中数值特征作为训练
x = data.iloc[:,[2,3,4,5,6,7,10,11,12,13,14,15,16,17,18,20,21,22]]
y = data.iloc[:,-1]

# 划分数据集 7:3
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

# 归一化
scaler=Normalizer().fit(x_train)
x_train=scaler.transform(x_train)

scaler=Normalizer().fit(x_test)
x_test=scaler.transform(x_test)

# 数组转换
x_train=np.array(x_train)
y_train=np.array(y_train)
x_test=np.array(x_test)
y_test=np.array(y_test)
# reshape input to be [samples, time steps, features]
x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))


batch_size = 1024

# 1. define the network
model = Sequential()
model.add(LSTM(64,input_dim=18)) # try using a GRU instead, for fun
model.add(Dropout(0.1))
model.add(Dense(1))
model.add(Activation('sigmoid'))
print(model.get_config())

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=50,callbacks=None,verbose=2)
loss, accuracy = model.evaluate(x_test, y_test)
print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100))
D:\anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel_launcher.py:49: UserWarning: The `input_dim` and `input_length` arguments in recurrent layers are deprecated. Use `input_shape` instead.
D:\anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel_launcher.py:49: UserWarning: Update your `LSTM` call to the Keras 2 API: `LSTM(64, input_shape=(None, 18))`


{'name': 'sequential_6', 'layers': [{'class_name': 'LSTM', 'config': {'name': 'lstm_4', 'trainable': True, 'batch_input_shape': (None, None, 18), 'dtype': 'float32', 'return_sequences': False, 'return_state': False, 'go_backwards': False, 'stateful': False, 'unroll': False, 'units': 64, 'activation': 'tanh', 'recurrent_activation': 'hard_sigmoid', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None}}, 'recurrent_initializer': {'class_name': 'Orthogonal', 'config': {'gain': 1.0, 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'unit_forget_bias': True, 'kernel_regularizer': None, 'recurrent_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'recurrent_constraint': None, 'bias_constraint': None, 'dropout': 0.0, 'recurrent_dropout': 0.0, 'implementation': 1}}, {'class_name': 'Dropout', 'config': {'name': 'dropout_4', 'trainable': True, 'rate': 0.1, 'noise_shape': None, 'seed': None}}, {'class_name': 'Dense', 'config': {'name': 'dense_12', 'trainable': True, 'units': 1, 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}}, {'class_name': 'Activation', 'config': {'name': 'activation_6', 'trainable': True, 'activation': 'sigmoid'}}]}


D:\anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel_launcher.py:57: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.


Epoch 1/50
 - 6s - loss: 0.6720 - acc: 0.5977
Epoch 2/50
 - 5s - loss: 0.6670 - acc: 0.6082
Epoch 3/50
 - 5s - loss: 0.6618 - acc: 0.6252
Epoch 4/50
 - 5s - loss: 0.6599 - acc: 0.6294
Epoch 5/50
 - 5s - loss: 0.6590 - acc: 0.6307
Epoch 6/50
 - 5s - loss: 0.6584 - acc: 0.6316
Epoch 7/50
 - 5s - loss: 0.6580 - acc: 0.6323
Epoch 8/50
 - 5s - loss: 0.6576 - acc: 0.6326
Epoch 9/50
 - 5s - loss: 0.6573 - acc: 0.6326
Epoch 10/50
 - 5s - loss: 0.6570 - acc: 0.6327
Epoch 11/50
 - 5s - loss: 0.6569 - acc: 0.6329
Epoch 12/50
 - 5s - loss: 0.6566 - acc: 0.6331
Epoch 13/50
 - 5s - loss: 0.6564 - acc: 0.6334
Epoch 14/50
 - 5s - loss: 0.6562 - acc: 0.6334
Epoch 15/50
 - 5s - loss: 0.6559 - acc: 0.6339
Epoch 16/50
 - 5s - loss: 0.6556 - acc: 0.6344
Epoch 17/50
 - 5s - loss: 0.6554 - acc: 0.6345
Epoch 18/50
 - 5s - loss: 0.6551 - acc: 0.6349
Epoch 19/50
 - 5s - loss: 0.6549 - acc: 0.6353
Epoch 20/50
 - 5s - loss: 0.6544 - acc: 0.6359
Epoch 21/50
 - 5s - loss: 0.6541 - acc: 0.6364
Epoch 22/50
 - 5s - loss: 0.6536 - acc: 0.6372
Epoch 23/50
 - 5s - loss: 0.6529 - acc: 0.6378
Epoch 24/50
 - 5s - loss: 0.6522 - acc: 0.6385
Epoch 25/50
 - 5s - loss: 0.6513 - acc: 0.6391
Epoch 26/50
 - 5s - loss: 0.6506 - acc: 0.6393
Epoch 27/50
 - 5s - loss: 0.6497 - acc: 0.6398
Epoch 28/50
 - 5s - loss: 0.6488 - acc: 0.6401
Epoch 29/50
 - 5s - loss: 0.6476 - acc: 0.6405
Epoch 30/50
 - 5s - loss: 0.6465 - acc: 0.6406
Epoch 31/50
 - 5s - loss: 0.6452 - acc: 0.6409
Epoch 32/50
 - 5s - loss: 0.6442 - acc: 0.6410
Epoch 33/50
 - 5s - loss: 0.6432 - acc: 0.6412
Epoch 34/50
 - 5s - loss: 0.6421 - acc: 0.6415
Epoch 35/50
 - 5s - loss: 0.6412 - acc: 0.6418
Epoch 36/50
 - 5s - loss: 0.6399 - acc: 0.6422
Epoch 37/50
 - 5s - loss: 0.6389 - acc: 0.6424
Epoch 38/50
 - 5s - loss: 0.6378 - acc: 0.6428
Epoch 39/50
 - 5s - loss: 0.6365 - acc: 0.6431
Epoch 40/50
 - 5s - loss: 0.6353 - acc: 0.6436
Epoch 41/50
 - 5s - loss: 0.6341 - acc: 0.6437
Epoch 42/50
 - 5s - loss: 0.6331 - acc: 0.6444
Epoch 43/50
 - 5s - loss: 0.6317 - acc: 0.6446
Epoch 44/50
 - 5s - loss: 0.6305 - acc: 0.6451
Epoch 45/50
 - 5s - loss: 0.6289 - acc: 0.6459
Epoch 46/50
 - 5s - loss: 0.6278 - acc: 0.6465
Epoch 47/50
 - 5s - loss: 0.6264 - acc: 0.6470
Epoch 48/50
 - 5s - loss: 0.6252 - acc: 0.6482
Epoch 49/50
 - 5s - loss: 0.6239 - acc: 0.6484
Epoch 50/50
 - 5s - loss: 0.6224 - acc: 0.6497
525618/525618 [==============================] - 19s 35us/step

Loss: 0.62, Accuracy: 64.41%
1
2