MLP无法通过无特征分类方式区分正常报文,XSS与SQL注入攻击报文,MLP神经网路训练报文ASCII码的方式可行性值得反思。

训练第一批样本:

1000 XSS攻击样本,3000 SQL注入样本,预测正常,XSS,SQL报文,分类结果:

(array([[0, 1]]), array([[0, 1]]), array([[0, 1]])),全部识别为SQL注入。

训练第二批样本:

10000 XSS攻击样本,3000 SQL注入样本,预测正常,XSS,SQL报文,分类结果:

(array([[1, 0]]), array([[1, 0]]), array([[1, 0]])),全部识别为XSS注入。

完整代码:

import sys
import urllib
import numpy as np
import tensorflow as tf
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import time
NUM = 100
def elt(line):
x = []
for i, c in enumerate(line):
c = c.lower()
x.append(ord(c))
return x

def load_file(filename,label,ms=[],ns=[]):
with open(filename) as f:
for line in f:
line = line.strip(
)
line = urllib.unquote(line)
if len(line)<= NUM:
m = elt(line)
if(label):
n = 1
else:
n = 0
ms.append(m)
ns.append(n)
print(len(ms))

def load_files(file2,file3):
xs = []
ys = []
load_file(file2,0,xs,ys)
load_file(file3,1,xs,ys)
return xs,ys
def train(x,y):
graph1 = tf.Graph()
with graph1.as_default():
x_train, x_test, y_train, y_test=train_test_split( x,y, test_size=0.4,random_state=0)
x_train = pad_sequences(x_train,maxlen=NUM,value=0.)
x_test = pad_sequences(x_test,maxlen=NUM,value=0.)
y_train = to_categorical(y_train, nb_classes=2)
y_test = to_categorical(y_test, nb_classes=2)
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
solver=sgd, verbose=10, tol=1e-4, random_state=1,
learning_rate_init=.1)
mlp.fit(x_train,y_train)
n = mlp.score(x_test, y_test)
print("score: %f"%n)
return mlp

def my_test(mlp):
normal = "/aikdhauhgda/ajdiajh/1.php"
x_xss = "/0_1/?%22onmouseover=prompt(42873)bad=%22%3E"
x_sql = "/wp-login.php?action=lostpassword%25%27%20LIMIT%201%2C1%20UNION%20ALL%20SELECT%20NULL%2C%20NULL%2C%20NULL%2C%20NULL%2C%20NU LL%2C%20NULL%2C%20NULL %2C%20NULL%23"

x_xss = urllib.unquote(x_xss)
x_sql = urllib.unquote(x_sql)

normal = elt(normal)
x_xss = elt(x_sql)
x_sql = elt(x_sql)

normal = pad_sequences([normal],maxlen=NUM,value=0.)
x_xss = pad_sequences([x_xss],maxlen=NUM,value=0.)
x_sql = pad_sequences([x_sql],maxlen=NUM,value=0.)

ans_normal = mlp.predict(normal)
ans_xss = mlp.predict(x_xss)
ans_sql = mlp.predict(x_sql)
print(ans_normal,ans_xss,ans_sql)

if __name__ == "__main__":
xs,ys = load_files(sys.argv[1],sys.argv[2])
mlp = train(xs,ys)
my_test(mlp)

推荐阅读:

相关文章