Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

zahodi

Project: raz'eb
Views: 32
Kernel: Python 3 (Ubuntu Linux)
import sys, os, pickle from keras.models import Sequential, model_from_json import pandas as pd from keras.layers.advanced_activations import LeakyReLU from keras.layers import BatchNormalization from keras.optimizers import Adam, RMSprop, SGD import sys, re, glob import numpy as np from keras.layers import Input, Dense, Flatten import sys, re, glob import pandas as pd import numpy as np from lxml import etree from sklearn.model_selection import StratifiedKFold

Data

proto_dict = { 'ns3::WifiMacHeader' : 'MAC', 'ns3::LlcSnapHeader' : 'LLC', 'ns3::ArpHeader' : 'ARP', 'ns3::Ipv4Header' : 'IPv4', 'ns3::UdpHeader' : 'UDP', 'ns3::aodv::TypeHeader' : 'AODV_Type', 'ns3::aodv::RrepHeader' : 'AODV_RREP', 'ns3::aodv::RreqHeader' : 'AODV_RREQ', 'ns3::aodv::RrepAckHeader' : 'AODV_RACK', 'ns3::aodv::RerrHeader' : 'AODV_RERR', 'ns3::Icmpv4Header' : 'ICMPv4_HEADER', 'ns3::Icmpv4TimeExceeded' : 'ICMPv4_TE', 'ns3::Icmpv4DestinationUnreachable' : 'ICMPv4_DU' } def parce_packets(input_file): with open(input_file, 'r') as f: data = f.read() data = data.split('meta-info="') meta_info = [] data.pop(0) for b in data: meta_info.append(b.split('"')[0]) packets = [] for m in meta_info: raw = {} for proto in proto_dict: if m.find(proto) != -1: raw[proto_dict[proto]] = m.split(proto+' (')[1].split(') ns3')[0] p = {} if 'MAC' in raw and 'LLC' in raw: p['MAC'] = {} p['MAC']['src'] = raw['MAC'].split('SA=')[1].split(',')[0] p['MAC']['dst'] = raw['MAC'].split('DA=')[1].split(',')[0] p['MAC']['type'] = int(raw['LLC'].split('type ')[1], 16) if 'ARP' in raw: p['ARP'] = {} if raw['ARP'].find('request') != -1: p['ARP']['type'] = 'request' else: p['ARP']['type'] = 'reply' p['ARP']['MAC_dst'] = raw['ARP'].split('dest mac: ')[1].split(' ')[0] p['ARP']['MAC_src'] = raw['ARP'].split('source mac: ')[1].split(' ')[0] p['ARP']['IP_src'] = raw['ARP'].split('source ipv4: ')[1].split(' ')[0] p['ARP']['IP_dst'] = raw['ARP'].split('dest ipv4: ')[1] if 'IPv4' in raw: p['IPv4'] = {} ipv4data = raw['IPv4'].split(' ') p['IPv4']['src'] = ipv4data[-3] p['IPv4']['dst'] = ipv4data[-1] p['IPv4']['proto'] = int(raw['IPv4'].split('protocol ')[1].split(' ')[0]) p['IPv4']['ttl'] = int(raw['IPv4'].split('ttl ')[1].split(' ')[0]) p['IPv4']['length'] = int(raw['IPv4'].split('length: ')[1].split(' ')[0]) if 'UDP' in raw: p['UDP'] = {} p['UDP']['length'] = int(raw['UDP'].split('length: ')[1].split(' ')[0]) if 'AODV_Type' in raw: p['AODV'] = {} p['AODV']['type'] = raw['AODV_Type'] if 'AODV_RREP' in raw: p['AODV']['RREP'] = {} p['AODV']['RREP']['dst'] = raw['AODV_RREP'].split('destination: ipv4 ')[1].split(' ')[0] p['AODV']['RREP']['src'] = raw['AODV_RREP'].split('source ipv4 ')[1].split(' ')[0] p['AODV']['RREP']['SN'] = int(raw['AODV_RREP'].split('sequence number ')[1].split(' ')[0]) p['AODV']['RREP']['lifetime'] = int(raw['AODV_RREP'].split('lifetime ')[1].split(' ')[0]) p['AODV']['RREP']['acknowledgment'] = raw['AODV_RREP'].split('acknowledgment ')[1].split(' ')[0] p['AODV']['RREP']['flag'] = raw['AODV_RREP'].split('flag ')[1] if 'AODV_RREQ' in raw: p['AODV']['RREQ'] = {} p['AODV']['RREQ']['ID'] = int(raw['AODV_RREQ'].split('ID ')[1].split(' ')[0]) p['AODV']['RREQ']['dst'] = raw['AODV_RREQ'].split('destination: ipv4 ')[1].split(' ')[0] p['AODV']['RREQ']['src'] = raw['AODV_RREQ'].split('source: ipv4 ')[1].split(' ')[0] p['AODV']['RREQ']['SN'] = int(raw['AODV_RREQ'].split('sequence number ')[1].split(' ')[0]) p['AODV']['RREQ']['flags'] = raw['AODV_RREQ'].split('flags: ')[1] if 'AODV_RACK' in raw: pass # seems to be empty if 'AODV_RERR' in raw: p['AODV']['RERR'] = raw['AODV_RERR'] if 'ICMPv4_HEADER' in raw: p['ICMPv4'] = {} p['ICMPv4']['type'] = int(raw['ICMPv4_HEADER'].split('type=')[1].split(',')[0]) p['ICMPv4']['code'] = int(raw['ICMPv4_HEADER'].split('code=')[1]) if 'ICMPv4_TE' in raw: p['ICMPv4']['data'] = raw['ICMPv4_TE'] if 'ICMPv4_DU' in raw: p['ICMPv4']['data'] = raw['ICMPv4_DU'] if len(p) > 0: packets.append(p) return packets
def mac_to_id(mac): return int(mac.split(':')[-1], 16) - 1 def ip_to_id(ip): return int(ip.split('.')[-1]) - 1 def entropy(arr): s = arr.sum() arr = arr / s arr = arr * np.log(arr) / np.log(2) return -(arr.sum() / np.log(len(arr)))
def load_data(path, rng): routes = list() flows = list() packets = list() for i in range(rng): for routes_file in glob.glob('{}/output_{}/*.routes'.format(path, i)): info = re.findall('(\w+)/output_(\d+)', routes_file) handle = open(routes_file, 'r') data = handle.read() handle.close() nodes = re.split('\n\n', data) nodes.pop() for node in nodes: header = re.findall('Node:\s+(\d+)\s+Time:\s+(\d+)', node) lines = re.findall('(\d{1,3}(?:\.\d{1,3}){3})\s+(\d{1,3}(?:\.\d{1,3}){3})\s+(\d{1,3}(?:\.\d{1,3}){3})\s+(\w+)\s+(-?\d+\.\d+)\s+(\d+)', node) for line in lines: l = list(line) l[4] = float(line[4]) l[5] = int(line[5]) routes.append(header[0] + tuple(l) + info[0]) for flowmon_file in glob.glob('{}/output_{}/*.flowmon'.format(path, i)): info = re.findall('(\w+)/output_(\d+)', flowmon_file) with open(flowmon_file) as fobj: xml = fobj.read() root = etree.fromstring(xml) for flow in root.xpath('/FlowMonitor/FlowStats/Flow'): attributes = list() for attrib in flow.attrib: attr = flow.attrib[attrib] if 'ns' in attr: attr = re.findall('(\d+)', attr)[0] attributes.append(int(attr)) flows.append(tuple(attributes) + info[0]) for packets_file in glob.glob('{}/output_{}/*.xml'.format(path, i)): if "routingtable-wireless.xml" in packets_file: continue info = re.findall('(\w+)/output_(\d+)', packets_file) packets_dict = parce_packets(packets_file) table_packets = [] for p in packets_dict: if 'IPv4' in p: table_packets.append([mac_to_id(p['MAC']['src']), mac_to_id(p['MAC']['dst']), ip_to_id(p['IPv4']['src']), ip_to_id(p['IPv4']['dst'])]) table_packets = pd.DataFrame(table_packets, columns=['mac_src', 'mac_dst', 'ip_src', 'ip_dst']) table_packets = table_packets[table_packets['ip_dst'] != 254] table_packets['input'] = 0 table_packets['output'] = 0 input_cnt = table_packets.groupby('mac_src').agg({'input':'count'}) output_cnt = table_packets.groupby('mac_dst').agg({'output':'count'}) io_mac = input_cnt.join(output_cnt) io_mac["diff"] = io_mac['input'] - io_mac['output'] io_mac["normalized"] = io_mac["diff"] - io_mac["diff"].min() + 1 input = io_mac.agg({'input': [entropy, 'min', 'max', 'mean', 'var']}).values.flatten() output = io_mac.agg({'output': [entropy, 'min', 'max', 'mean', 'var']}).values.flatten() diff = io_mac.agg({'diff': ['min', 'max', 'mean', 'var']}).values.flatten() normalized = io_mac.agg({'normalized': [entropy, 'min', 'max', 'mean', 'var']}).values.flatten() src_count = table_packets[table_packets['mac_src'] == table_packets['ip_src']].count()[0] dst_count = table_packets[table_packets['mac_dst'] == table_packets['ip_dst']].count()[0] packets.append(np.concatenate((input, output, diff, normalized, [src_count, dst_count], info[0]))) routes_table = pd.DataFrame(routes, columns=['Node', 'Time', 'Destination', 'Gateway', 'Interface', 'Flag', 'Expire', 'Hops', 'Type', 'Test']) #print(routes_table) flag_count = routes_table.groupby(['Type', 'Test', 'Time', 'Flag']).agg({'Flag' : ['count']}) flag_count = flag_count.reset_index(col_level=1) flag_count.columns = flag_count.columns.droplevel() flag_agg = flag_count.groupby(['Type', 'Test', 'Flag']).agg({'count': ['max', 'min', 'mean', 'var']}).unstack().reset_index() flag_agg.columns = [col[0]+col[1]+col[2] for col in flag_agg.columns] flag_agg = flag_agg.set_index(['Type', 'Test']) hops_node_dest_agg = routes_table.groupby(['Type', 'Test', 'Node', 'Destination']).agg({'Hops' : ['min', 'max', 'mean']}) hops_node_agg = hops_node_dest_agg.reset_index().groupby(["Type", "Test", "Node"]).agg(['min', 'max', 'mean']).reset_index(col_level=1) hops_node_agg.columns = hops_node_agg.columns.droplevel() hops_agg = hops_node_agg.reset_index().groupby(["Type", "Test"]).agg(['min', 'max', 'mean']).reset_index(col_level=1) hops_agg.columns = [col[0]+col[1]+col[2] for col in hops_agg.columns] hops_agg = hops_agg.set_index(['Type', 'Test']) flows_table = pd.DataFrame(flows, columns=['flowId', 'timeFirstTxPacket', 'timeFirstRxPacket', 'timeLastTxPacket', 'timeLastRxPacket', 'delaySum', 'jitterSum', 'lastDelay', 'txBytes', 'rxBytes', 'txPackets', 'rxPackets', 'lostPackets', 'timesForwarded', 'Type', 'Test']) lost_agg = flows_table.groupby(['Type', 'Test']).agg({'lostPackets' : ['sum', 'mean']}) forwarded_agg = flows_table.groupby(['Type', 'Test']).agg({'timesForwarded' : ['sum', 'max', 'mean', 'var']}) packets_table = pd.DataFrame(packets, columns=["entopy_input", "min_input", "max_input", "mean_input", "var_input", "entopy_output", "min_output", "max_output", "mean_output", "var_output", "min_diff", "max_diff", "mean_diff", "var_diff", "entopy_normolized", "min_normolized", "max_normolized", "mean_normolized", "var_normolized", "src_count", "dst_count", 'Type', 'Test']) packets_table = packets_table.set_index(['Type', 'Test']) return flag_agg.join(hops_agg).join(lost_agg).join(forwarded_agg).join(packets_table)
normal = load_data("data/normal", 100) blackhole = load_data("data/blackhole", 100) grayhole = load_data("data/greyhole", 100) wormhole = load_data("data/wormhole", 100) ddos = load_data("data/ddos", 100)
X = np.concatenate((normal, blackhole, grayhole, ddos)).astype(np.float) y = np.concatenate((np.zeros((100, 1)), np.ones((300, 1))))
indexes = np.array(range(len(y))) np.random.shuffle(indexes) indexes X = X[indexes] y = y[indexes]
X[np.isnan(X)] = 0

Model

class GAN: def __init__(self, shape): self.SHAPE = shape self.OPTIMIZER = Adam() self.compile_models() def __generator(self): model = Sequential() model.add(Dense(2048, input_shape=(1000,))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1024)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(256)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(self.SHAPE, activation='sigmoid')) #model = Sequential() #model.add(Dense(2048, input_shape=(1000,))) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(self.SHAPE, activation='sigmoid')) return model def __discriminator(self): model = Sequential() model.add(Dense(10, input_shape=(self.SHAPE,))) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(256)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(1024)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(1, activation='sigmoid')) #model = Sequential() #model.add(Dense(10, input_shape=(self.SHAPE,))) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(4096)) #model.add(Dense(1, activation='sigmoid')) return model def __stacked(self, generator, discriminator): discriminator.trainable = False model = Sequential() model.add(generator) model.add(discriminator) return model def compile_models(self): self.generator = self.__generator() self.discriminator = self.__discriminator() self.stacked = self.__stacked(self.generator, self.discriminator) self.generator.compile(loss='binary_crossentropy', optimizer=self.OPTIMIZER) self.discriminator.compile(loss='binary_crossentropy', optimizer=self.OPTIMIZER, metrics=['accuracy'] ) self.stacked.compile(loss='binary_crossentropy', optimizer=self.OPTIMIZER) def save_models(self): self.generator.save_weights("generator_weights.h5") self.discriminator.save_weights("discriminator_weights.h5") self.stacked.save_weights("stacked_weights.h5") with open("generator_model.json", "w") as json_file: json_file.write(self.generator.to_json()) with open("discriminator_model.json", "w") as json_file: json_file.write(self.discriminator.to_json()) with open("stacked_model.json", "w") as json_file: json_file.write(self.stacked.to_json()) def load_models(self): with open("generator_model.json", "r") as json_file: self.generator = model_from_json(json_file.read()) self.generator.load_weights("generator_weights.h5") with open("discriminator_model.json", "r") as json_file: self.discriminator = model_from_json(json_file.read()) self.discriminator.load_weights("discriminator_weights.h5") with open("stacked_model.json", "r") as json_file: self.stacked = model_from_json(json_file.read()) self.stacked.load_weights("stacked_weights.h5") self.generator.compile(loss='binary_crossentropy', optimizer=self.OPTIMIZER) self.discriminator.compile(loss='binary_crossentropy', optimizer=self.OPTIMIZER, metrics=['accuracy'] ) self.stacked.compile(loss='binary_crossentropy', optimizer=self.OPTIMIZER) def train(self,X, y, epochs=200, batch = 100, debug=False): for cnt in range(epochs): ## train discriminator random_index = np.random.randint(0, len(y) - batch) X_batch = X[random_index : random_index + batch] y_batch = y[random_index : random_index + batch] gen_noise = np.random.normal(0, 1, (batch,1000)) syntetic = self.generator.predict(gen_noise) x_combined_batch = np.concatenate((X_batch, syntetic)) y_combined_batch = np.concatenate((y_batch, np.zeros((batch, 1)))) d_loss = self.discriminator.train_on_batch(x_combined_batch, y_combined_batch) # train generator noise = np.random.normal(0, 1, (batch,1000)) y_mislabled = np.ones((batch, 1)) g_loss = self.stacked.train_on_batch(noise, y_mislabled) if debug: print ('epoch: %d, [Discriminator :: d_loss: %f], [ Generator :: loss: %f]' % (cnt, d_loss[0], g_loss))

Train

gan = GAN(X.shape[1]) gan.train(X, y, debug=True, epochs=100, batch=20)

Preidiction

predict = gan.discriminator.predict(X)
np.mean(y == (predict > 0.5))

Metrics

from sklearn import metrics print(metrics.confusion_matrix(y, (predict > 0.5)))
metrics.accuracy_score(y, (predict > 0.5))
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=17) cvscores = [] for train, test in kfold.split(X, y): gan = GAN(X.shape[1]) gan.train(X[train], y[train], epochs=100, batch=20, debug=False) predict = gan.discriminator.predict(X[test]) acc = metrics.f1_score(y[test], (predict > 0.5)) cvscores.append(acc) print(acc) print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
metrics.f1_score(y, (predict > 0.5))
predict.max()
gan.save_models()
gan.load_models() predict = gan.discriminator.predict(X) np.mean(y == (predict > 0.5))