Ddos Dataset: Import As Import As Import As Import As From Import
Ddos Dataset: Import As Import As Import As Import As From Import
DDos Dataset
          In [1]: import         matplotlib.pyplot as plt
import pandas as pd
import numpy as np
drive.mount('/content/drive')
Mounted at /content/drive
drive.mount('/content/drive',force_remount=True)
Mounted at /content/drive
In [4]: df =pd.read_csv('/content/drive/MyDrive/DDoS/compiled.csv')
                      /usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882:
                      DtypeWarning: Columns (85) have mixed types.Specify dtype option on import or
                      set low_memory=False.
In [5]: df.shape
In [6]: # df.to_csv('/content/drive/MyDrive/DDoS/compiled.csv',index=False)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                             1/51
6/20/22, 12:53 PM                                                            Copy_of_DDoS
In [7]: np.array(df.dtypes)
In [8]: df.head()
          Out[8]:
                           Unnamed:                                   Source      Destination    Destination
                                               Flow ID   Source IP                                             Protocol       Timestam
                                  0                                     Port               IP           Port
                                           172.16.0.5-
                                                                                                                               2018-12-0
                       0        12368    192.168.50.1-   172.16.0.5         550   192.168.50.1         1068         17
                                                                                                                          11:06:24.33969
                                          550-1068-17
                                           172.16.0.5-
                                                                                                                               2018-12-0
                       1        24112    192.168.50.1-   172.16.0.5         939   192.168.50.1        62932         17
                                                                                                                          11:06:21.1350
                                         939-62932-17
                                           172.16.0.5-
                                                                                                                               2018-12-0
                       2        23589    192.168.50.1-   172.16.0.5         564   192.168.50.1        32767         17
                                                                                                                          11:06:08.77624
                                         564-32767-17
                                           172.16.0.5-
                                                                                                                               2018-12-0
                       3        11258    192.168.50.1-   172.16.0.5         564   192.168.50.1        42118         17
                                                                                                                          11:06:19.0182
                                         564-42118-17
                                           172.16.0.5-
                                                                                                                               2018-12-0
                       4         9526    192.168.50.1-   172.16.0.5         559   192.168.50.1        10300         17
                                                                                                                          11:06:11.8384
                                         559-10300-17
5 rows × 88 columns
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                                                              2/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [9]: np.array(df.columns)
Out[9]: array(['Unnamed: 0', 'Flow ID', ' Source IP', ' Source Port',
' Destination IP', ' Destination Port', ' Protocol', ' Timestamp',
' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets',
' Fwd Packet Length Max', ' Fwd Packet Length Min',
' Fwd Packet Length Mean', ' Fwd Packet Length Std',
' Bwd Packet Length Mean', ' Bwd Packet Length Std',
' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total',
' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min',
'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max',
' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags',
' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length',
' Min Packet Length', ' Max Packet Length', ' Packet Length Mean',
' Packet Length Std', ' Packet Length Variance', 'FIN Flag Count',
' SYN Flag Count', ' RST Flag Count', ' PSH Flag Count',
' ACK Flag Count', ' URG Flag Count', ' CWE Flag Count',
' ECE Flag Count', ' Down/Up Ratio', ' Average Packet Size',
' Avg Fwd Segment Size', ' Avg Bwd Segment Size',
'Bwd Avg Bulk Rate', 'Subflow Fwd Packets', ' Subflow Fwd Bytes',
' Active Std', ' Active Max', ' Active Min', 'Idle Mean',
' Idle Std', ' Idle Max', ' Idle Min', 'SimillarHTTP', ' Inbound',
print(df[col].value_counts(dropna=False,normalize=True).head())
df[col].replace(np.inf,-1,inplace=True)
return df
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                       3/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
if(df[col].dtype != object):
print("*"*20)
print("Column: ",col)
IsInt = False
mx = df[col].max()
mn = df[col].min()
if not np.isfinite(df[col]).all():
df = Pre_Process_data(df,col)
asint = df[col].fillna(0).astype(np.int64)
result = (df[col]-asint)
result = result.sum()
IsInt = True
if IsInt:
if mn>=0:
if mx<255:
df[col] = df[col].astype(np.uint8)
elif mx<65535:
df[col] = df[col].astype(np.uint16)
elif mx<4294967295:
df[col] = df[col].astype(np.uint32)
else:
df[col] = df[col].astype(np.uint64)
else:
df[col] = df[col].astype(np.int8)
df[col] = df[col].astype(np.int16)
df[col] = df[col].astype(np.int32)
df[col] = df[col].astype(np.int64)
else:
df[col] = df[col].astype(np.float32)
print("*"*20)
return df
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                          4/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [12]: df = reduce_mem_usage(df)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  5/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
********************
Column: Unnamed: 0
********************
********************
********************
********************
********************
********************
********************
********************
********************
Column: Protocol
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
0.000000e+00 0.162598
2.944000e+09 0.112522
4.580000e+08 0.102990
1.472000e+09 0.039172
2.290000e+08 0.020842
********************
********************
2.000000e+06 0.487212
1.000000e+06 0.108950
inf 0.033670
4.166667e+04 0.019860
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  7/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
6.666667e+05 0.014640
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  8/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  9/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  10/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  11/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
Column: Init_Win_bytes_forward
********************
********************
Column: Init_Win_bytes_backward
********************
********************
Column: act_data_pkt_fwd
********************
********************
Column: min_seg_size_forward
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  12/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
Column: Inbound
********************
********************
Visualisation
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  13/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [13]: data_ = df
data = df
df[' Label'].value_counts()
sizes
Out[14]: array([39995, 39990, 39985, 39980, 39900, 39854, 39789, 39637, 39225,
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  14/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
explode = (0.3,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.3,0.2,0.1)
plt.rcParams.update({'font.size': 22})
plt.figure(figsize=(10,10))
plt.axis('equal')
plt.show()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                          15/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [16]: plt.figure(figsize=(40,20))
gt = g1.twinx()
In [17]: plt.figure(figsize=(40,20))
gt = g1.twinx()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      16/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [18]: plt.figure(figsize=(40,16))
gt = g1.twinx()
In [19]: plt.figure(figsize=(40,16))
gt = g1.twinx()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      17/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [20]: plt.figure(figsize=(20,16))
gt = g1.twinx()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                    18/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [21]: plt.figure(figsize=(20,16))
gt = g1.twinx()
                  df = df.drop(['Flow ID',' Source IP',' Source Port', ' Destination IP',' Desti
                  nation Port',' Timestamp','Fwd Packets/s','Flow Bytes/s','SimillarHTTP',' Labe
                  l'],axis=1)
X = StandardScaler().fit_transform(df)
X_norm = preprocessing.normalize(X)
In [24]: X_norm.shape
Out[25]: 1 395654
0 4346
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      19/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [26]: f = plt.figure(figsize=(20,15))
plt.matshow(df.corr(),fignum=f.number)
plt.xticks(range(df.shape[1]),df.columns,fontsize=10,rotation=90)
plt.yticks(range(df.shape[1]),df.columns,fontsize=10)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
In [26]:
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  20/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
total = 0
count=0
for i in df.columns:
for j in df.columns:
if i != j :
corr, _ = stats.pearsonr(data_[i],data_[j])
total=total+1
if corr>0.9:
count = count+1
print(count,total)
print(count/total)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                     21/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
                      /usr/local/lib/python3.7/dist-packages/scipy/stats/stats.py:3508: PearsonRCon
                      stantInputWarning: An input array is constant; the correlation coefficent is
                      not defined.
warnings.warn(PearsonRConstantInputWarning())
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                             22/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
Person correlation between Flow Duration and Fwd IAT Total :1.000
Person correlation between Total Fwd Packets and Subflow Fwd Packets :1.000
                      Person correlation          between      Total Backward Packets and Subflow Bwd Packets :
                      1.000
                      Person correlation          between Total Length of Fwd Packets and               Subflow Fwd Bytes
                      :1.000
                      Person correlation          between      Total Length of Bwd Packets and           Subflow Bwd Byte
                      s :1.000
                      Person correlation          between      Fwd Packet Length Max and           Fwd Packet Length Min
                      :0.993
                      Person correlation          between      Fwd Packet Length Max and           Fwd Packet Length Mean
                      :0.997
                      Person correlation          between      Fwd Packet Length Max and           Min Packet Length :0.9
                      92
                      Person correlation          between      Fwd Packet Length Max and           Max Packet Length :0.9
                      77
                      Person correlation          between      Fwd Packet Length Max and           Packet Length Mean :0.
                      997
                      Person correlation          between      Fwd Packet Length Max and           Average Packet Size :
                      0.993
                      Person correlation          between      Fwd Packet Length Max and           Avg Fwd Segment Size :
                      0.997
                      Person correlation          between      Fwd Packet Length Min and           Fwd Packet Length Max
                      :0.993
                      Person correlation          between      Fwd Packet Length Min and           Fwd Packet Length Mean
                      :0.997
                      Person correlation          between      Fwd Packet Length Min and           Min Packet Length :1.0
                      00
                      Person correlation          between      Fwd Packet Length Min and           Max Packet Length :0.9
                      65
                      Person correlation          between      Fwd Packet Length Min and           Packet Length Mean :0.
                      997
                      Person correlation          between      Fwd Packet Length Min and           Average Packet Size :
                      0.996
                      Person correlation          between      Fwd Packet Length Min and           Avg Fwd Segment Size :
                      0.997
                      Person correlation          between      Fwd Packet Length Mean and           Fwd Packet Length Max
                      :0.997
                      Person correlation          between      Fwd Packet Length Mean and           Fwd Packet Length Min
                      :0.997
                      Person correlation          between      Fwd Packet Length Mean and           Min Packet Length :0.
                      997
                      Person correlation          between      Fwd Packet Length Mean and           Max Packet Length :0.
                      970
                      Person correlation          between      Fwd Packet Length Mean and           Packet Length Mean :
                      0.999
                      Person correlation          between      Fwd Packet Length Mean and           Average Packet Size :
                      0.996
                      Person correlation          between      Fwd Packet Length Mean and           Avg Fwd Segment Size
                      :1.000
                      Person correlation          between      Bwd Packet Length Mean and           Avg Bwd Segment Size
                      :1.000
Person correlation between Flow IAT Mean and Flow IAT Std :0.984
Person correlation between Flow IAT Mean and Flow IAT Max :0.954
Person correlation between Flow IAT Mean and Fwd IAT Mean :0.991
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                                                   23/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
Person correlation between Flow IAT Mean and Fwd IAT Std :0.975
Person correlation between Flow IAT Mean and Fwd IAT Max :0.954
Person correlation between Flow IAT Mean and Idle Mean :0.952
Person correlation between Flow IAT Mean and Idle Max :0.954
Person correlation between Flow IAT Std and Flow IAT Mean :0.984
Person correlation between Flow IAT Std and Flow IAT Max :0.969
Person correlation between Flow IAT Std and Fwd IAT Mean :0.986
Person correlation between Flow IAT Std and Fwd IAT Std :0.998
Person correlation between Flow IAT Std and Fwd IAT Max :0.969
Person correlation between Flow IAT Std and Idle Mean :0.978
Person correlation between Flow IAT Std and Idle Max :0.969
Person correlation between Flow IAT Std and Idle Min :0.926
Person correlation between Flow IAT Max and Flow IAT Mean :0.954
Person correlation between Flow IAT Max and Flow IAT Std :0.969
Person correlation between Flow IAT Max and Fwd IAT Mean :0.968
Person correlation between Flow IAT Max and Fwd IAT Std :0.974
Person correlation between Flow IAT Max and Fwd IAT Max :1.000
Person correlation between Flow IAT Max and Idle Mean :0.968
Person correlation between Flow IAT Max and Idle Max :0.998
Person correlation between Flow IAT Min and Fwd IAT Min :0.999
Person correlation between Fwd IAT Total and Flow Duration :1.000
Person correlation between Fwd IAT Mean and Flow IAT Mean :0.991
Person correlation between Fwd IAT Mean and Flow IAT Std :0.986
Person correlation between Fwd IAT Mean and Flow IAT Max :0.968
Person correlation between Fwd IAT Mean and Fwd IAT Std :0.985
Person correlation between Fwd IAT Mean and Fwd IAT Max :0.969
Person correlation between Fwd IAT Mean and Idle Mean :0.963
Person correlation between Fwd IAT Mean and Idle Max :0.968
Person correlation between Fwd IAT Std and Flow IAT Mean :0.975
Person correlation between Fwd IAT Std and Flow IAT Std :0.998
Person correlation between Fwd IAT Std and Flow IAT Max :0.974
Person correlation between Fwd IAT Std and Fwd IAT Mean :0.985
Person correlation between Fwd IAT Std and Fwd IAT Max :0.974
Person correlation between Fwd IAT Std and Idle Mean :0.984
Person correlation between Fwd IAT Std and Idle Max :0.973
Person correlation between Fwd IAT Std and Idle Min :0.933
Person correlation between Fwd IAT Max and Flow IAT Mean :0.954
Person correlation between Fwd IAT Max and Flow IAT Std :0.969
Person correlation between Fwd IAT Max and Flow IAT Max :1.000
Person correlation between Fwd IAT Max and Fwd IAT Mean :0.969
Person correlation between Fwd IAT Max and Fwd IAT Std :0.974
Person correlation between Fwd IAT Max and Idle Mean :0.968
Person correlation between Fwd IAT Max and Idle Max :0.998
Person correlation between Fwd IAT Min and Flow IAT Min :0.999
Person correlation between Bwd IAT Total and Bwd IAT Max :0.919
Person correlation between Bwd IAT Mean and Bwd IAT Std :0.995
Person correlation between Bwd IAT Mean and Bwd IAT Max :0.958
Person correlation between Bwd IAT Std and Bwd IAT Mean :0.995
Person correlation between Bwd IAT Std and Bwd IAT Max :0.976
Person correlation between Bwd IAT Max and Bwd IAT Total :0.919
Person correlation between Bwd IAT Max and Bwd IAT Mean :0.958
Person correlation between Bwd IAT Max and Bwd IAT Std :0.976
Person correlation between Fwd PSH Flags and RST Flag Count :1.000
Person correlation between Fwd Header Length and Fwd Header Length.1 :1.000
                      Person    correlation       between Min Packet Length and Fwd Packet Length Max :0.9
                      92
Person correlation between Min Packet Length and Fwd Packet Length Min :1.0
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                                               24/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
00
                      Person correlation          between      Min Packet Length and          Fwd Packet Length Mean :0.
                      997
Person correlation between Min Packet Length and Max Packet Length :0.964
Person correlation between Min Packet Length and Packet Length Mean :0.997
Person correlation between Min Packet Length and Average Packet Size :0.996
                      Person correlation          between      Min   Packet    Length   and   Avg Fwd Segment Size :0.99
                      7
                      Person correlation          between      Max Packet Length and          Fwd Packet Length Max :0.9
                      77
                      Person correlation          between      Max Packet Length and          Fwd Packet Length Min :0.9
                      65
                      Person correlation          between      Max Packet Length and          Fwd Packet Length Mean :0.
                      970
Person correlation between Max Packet Length and Min Packet Length :0.964
Person correlation between Max Packet Length and Packet Length Mean :0.975
Person correlation between Max Packet Length and Average Packet Size :0.969
                      Person correlation          between      Max   Packet    Length   and   Avg Fwd Segment Size :0.97
                      0
                      Person correlation          between      Packet Length Mean and          Fwd Packet Length Max :0.
                      997
                      Person correlation          between      Packet Length Mean and          Fwd Packet Length Min :0.
                      997
                      Person correlation          between      Packet Length Mean and          Fwd Packet Length Mean :
                      0.999
Person correlation between Packet Length Mean and Min Packet Length :0.997
Person correlation between Packet Length Mean and Max Packet Length :0.975
                      Person correlation          between      Packet Length Mean and          Average Packet Size :0.99
                      7
                      Person correlation          between      Packet Length Mean and          Avg Fwd Segment Size :0.9
                      99
Person correlation between RST Flag Count and Fwd PSH Flags :1.000
                      Person correlation          between      Average Packet Size and Fwd Packet Length Max :
                      0.993
                      Person correlation          between      Average Packet Size and          Fwd Packet Length Min :
                      0.996
                      Person correlation          between      Average Packet Size and          Fwd Packet Length Mean :
                      0.996
Person correlation between Average Packet Size and Min Packet Length :0.996
Person correlation between Average Packet Size and Max Packet Length :0.969
                      Person correlation          between      Average Packet Size and          Packet Length Mean :0.99
                      7
                      Person correlation          between      Average Packet Size and          Avg Fwd Segment Size :0.
                      996
                      Person correlation          between      Avg Fwd Segment Size and          Fwd Packet Length Max :
                      0.997
                      Person correlation          between      Avg Fwd Segment Size and          Fwd Packet Length Min :
                      0.997
                      Person correlation          between      Avg Fwd Segment Size and          Fwd Packet Length Mean
                      :1.000
                      Person correlation          between      Avg Fwd Segment Size and          Min Packet Length :0.99
                      7
                      Person correlation          between      Avg Fwd Segment Size and          Max Packet Length :0.97
                      0
                      Person correlation          between      Avg Fwd Segment Size and          Packet Length Mean :0.9
                      99
Person correlation between Avg Fwd Segment Size and Average Packet Size :0.
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                                                  25/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
996
                      Person correlation between               Avg Bwd Segment Size and       Bwd Packet Length Mean
                      :1.000
Person correlation between Fwd Header Length.1 and Fwd Header Length :1.000
Person correlation between Subflow Fwd Packets and Total Fwd Packets :1.000
                      Person correlation between               Subflow Fwd Bytes and Total Length of Fwd Packets
                      :1.000
                      Person correlation between               Subflow Bwd Packets and Total Backward Packets :
                      1.000
                      Person correlation between               Subflow Bwd Bytes and       Total Length of Bwd Packet
                      s :1.000
Person correlation between Idle Mean and Flow IAT Mean :0.952
Person correlation between Idle Mean and Flow IAT Std :0.978
Person correlation between Idle Mean and Flow IAT Max :0.968
Person correlation between Idle Mean and Fwd IAT Mean :0.963
Person correlation between Idle Mean and Fwd IAT Std :0.984
Person correlation between Idle Mean and Fwd IAT Max :0.968
Person correlation between Idle Max and Flow IAT Mean :0.954
Person correlation between Idle Max and Flow IAT Std :0.969
Person correlation between Idle Max and Flow IAT Max :0.998
Person correlation between Idle Max and Fwd IAT Mean :0.968
Person correlation between Idle Max and Fwd IAT Std :0.973
Person correlation between Idle Max and Fwd IAT Max :0.998
Person correlation between Idle Min and Flow IAT Std :0.926
Person correlation between Idle Min and Fwd IAT Std :0.933
148 6006
0.024642024642024644
mean_vec = np.mean(X,axis=0)
cov_mat = (X-mean_vec).T.dot((X-mean_vec))/(X.shape[0]-1)
Covarience matrix
0.05059552]
0.08154645]
-0.03082452]
...
-0.03363851]
-0.06401936]
1.0000025 ]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                                               26/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
Hypothesis Testing
                  alpha = 0.05
                  print(" P value is "+str(p))
if p > alpha :
else:
P value is 4.0030002619486493e-66
                  alpha = 0.05
                  print(" P value is "+str(p))
if p > alpha :
else:
P value is 1.0
                  alpha = 0.05
                  print(" P value is "+str(p))
if p > alpha :
else:
P value is 0.0
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      27/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
         Out[32]:
                               Protocol     0        6        17
0 76 72324 327121
1 0 479 0
    T Test
         In [33]: from scipy.stats import ttest_ind
print(score)
Ttest_indResult(statistic=0.11945494497236958, pvalue=0.9049149630747436)
X_std = StandardScaler().fit_transform(df)
mean_vec = np.mean(X,axis= 0)
cov_mat = (X-mean_vec).T.dot((X-mean_vec))/(X.shape[0]-1)
Covarience matrix
0.05059552]
0.08154645]
-0.03082452]
...
-0.03363851]
-0.06401936]
1.0000025 ]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                          28/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  29/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
Eigen Vector
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]
...
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]]
Eigen Values
[ 1.42033356e+01+0.00000000e+00j 7.72649536e+00+0.00000000e+00j
6.97276476e+00+0.00000000e+00j 3.44372591e+00+0.00000000e+00j
3.36559607e+00+0.00000000e+00j 3.06290611e+00+0.00000000e+00j
2.73200811e+00+0.00000000e+00j 2.16244583e+00+0.00000000e+00j
2.04284143e+00+0.00000000e+00j 1.98999267e+00+0.00000000e+00j
1.92553259e+00+0.00000000e+00j 1.87241124e+00+0.00000000e+00j
1.57834156e+00+0.00000000e+00j 1.31971871e+00+0.00000000e+00j
1.20821059e+00+0.00000000e+00j 6.46386672e-01+0.00000000e+00j
7.93066087e-01+0.00000000e+00j 1.04245104e+00+0.00000000e+00j
1.00743115e+00+0.00000000e+00j 9.98571864e-01+0.00000000e+00j
9.40046537e-01+0.00000000e+00j 8.64781483e-01+0.00000000e+00j
8.86464711e-01+0.00000000e+00j 5.41688756e-01+0.00000000e+00j
4.35892964e-01+0.00000000e+00j 3.35441715e-01+0.00000000e+00j
2.69710392e-01+0.00000000e+00j 4.63365483e-01+0.00000000e+00j
3.84410884e-01+0.00000000e+00j 2.14326099e-01+0.00000000e+00j
2.07266827e-01+0.00000000e+00j 8.51670083e-02+0.00000000e+00j
6.95002450e-02+0.00000000e+00j 5.28876705e-02+0.00000000e+00j
3.53399844e-02+0.00000000e+00j 3.39899391e-02+0.00000000e+00j
2.15243164e-02+0.00000000e+00j 1.62337338e-02+0.00000000e+00j
1.43799765e-02+0.00000000e+00j 1.03590900e-02+0.00000000e+00j
6.89319475e-03+0.00000000e+00j 3.29125760e-03+0.00000000e+00j
2.65339198e-03+0.00000000e+00j 2.00362749e-03+0.00000000e+00j
1.30937508e-03+0.00000000e+00j 1.38314696e-03+0.00000000e+00j
1.35270413e-03+0.00000000e+00j 1.09949040e-03+0.00000000e+00j
8.90704310e-04+0.00000000e+00j 6.29806195e-04+0.00000000e+00j
5.03692830e-04+0.00000000e+00j 3.76318337e-04+0.00000000e+00j
2.14122590e-04+0.00000000e+00j 3.15230787e-04+0.00000000e+00j
1.20002518e-04+0.00000000e+00j 9.35033083e-05+0.00000000e+00j
1.66480243e-05+0.00000000e+00j 7.56703066e-06+0.00000000e+00j
1.54407558e-16+0.00000000e+00j 7.59839399e-17+6.76312141e-17j
7.59839399e-17-6.76312141e-17j 9.16402231e-17+0.00000000e+00j
-5.44726130e-17+1.81967420e-17j -5.44726130e-17-1.81967420e-17j
-8.25377906e-19+0.00000000e+00j 2.01016260e-17+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      30/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  31/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
eig_pairs.sort(key=lambda x:[0],reverse=True)
for i, j in enumerate(eig_pairs):
print(i,j[0])
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      32/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
0 14.203335635459023
1 7.72649536453649
2 6.972764758518834
3 3.4437259145677888
4 3.3655960738401918
5 3.0629061126132973
6 2.732008106736874
7 2.162445831484391
8 2.0428414264157237
9 1.989992674733237
10 1.9255325868241058
11 1.872411238759887
12 1.5783415578920403
13 1.3197187145205351
14 1.2082105909293108
15 0.6463866715041711
16 0.7930660872085342
17 1.0424510364507815
18 1.0074311525518724
19 0.9985718638465273
20 0.9400465373681084
21 0.8647814834581833
22 0.8864647110533768
23 0.5416887563792396
24 0.4358929638916721
25 0.3354417151908467
26 0.2697103922117089
                      27 0.46336548291789303
                      28 0.38441088408307755
                      29 0.21432609904951572
                      30 0.20726682701012103
                      31 0.0851670083082053
                      32 0.06950024499568283
                      33 0.05288767048344181
                      34 0.035339984422644515
                      35 0.03398993913758611
                      36 0.02152431638474884
                      37 0.01623373382139512
                      38 0.014379976511664358
39 0.010359090026185399
40 0.006893194750722876
41 0.0032912575989725934
42 0.0026533919781355636
43 0.002003627487092827
44 0.0013093750777122082
                      45 0.00138314696307466
                      46 0.001352704129549723
47 0.0010994903982587496
48 0.0008907043103310611
49 0.0006298061948454105
50 0.0005036928300685212
51 0.0003763183374628182
52 0.00021412258994108418
53 0.0003152307866004254
54 0.00012000251762704192
55 9.350330830589868e-05
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  33/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
56 1.6648024281554622e-05
                      57   7.5670306649343e-06
                      58   1.5440755813161445e-16
59 1.0172286002046191e-16
60 1.0172286002046191e-16
61 9.164022310312424e-17
62 5.743158529421716e-17
63 5.743158529421716e-17
64 8.253779064973264e-19
65 2.0101625965361303e-17
66 0.0
67 0.0
68 0.0
69 0.0
70 0.0
71 0.0
72 0.0
73 0.0
74 0.0
75 0.0
76 0.0
77 0.0
In [37]: eig_pairs[0][1]
Explained Variance
In [38]: tot=sum(eig_vals)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                                        34/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
plt.figure(figsize=(30,30))
                    plt.bar(range(78),var_exp,alpha=0.5,align="center",label="individual explain
                  ed variance")
plt.xlabel("Principal Component")
plt.legend(loc="best")
plt.tight_layout()
                      /usr/local/lib/python3.7/dist-packages/matplotlib/transforms.py:789: ComplexW
                      arning: Casting complex values to real discards the imaginary part
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                             35/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
         In [40]: matrix_w=np.hstack((eig_pairs[0][1].reshape(78,1),eig_pairs[1][1].reshape(78,1
                  )))
print("Matrix W: \n",matrix_w)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      36/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
Matrix W:
[[-1.36443195e-01+0.j -6.22649911e-02+0.j]
[ 1.85219855e-01+0.j 2.75809848e-02+0.j]
[-2.22020551e-01+0.j 3.69437371e-03+0.j]
[-9.58716391e-04+0.j 2.68948940e-03+0.j]
[-5.31782329e-02+0.j 2.93186133e-01+0.j]
[ 2.64032641e-02+0.j 2.46285459e-03+0.j]
[-2.03674472e-02+0.j 2.70705165e-01+0.j]
[ 1.59768349e-01+0.j 9.77303616e-02+0.j]
[ 1.60797920e-01+0.j 8.02322353e-02+0.j]
[ 1.62009163e-01+0.j 8.43531223e-02+0.j]
[-1.76346755e-02+0.j 1.23578115e-01+0.j]
[-3.93283262e-02+0.j 3.23065708e-01+0.j]
[-5.17699479e-03+0.j 4.97207168e-02+0.j]
[-3.51036243e-02+0.j 2.90787160e-01+0.j]
[-4.01112340e-02+0.j 2.93186523e-01+0.j]
[ 9.05963286e-02+0.j -9.91649673e-03+0.j]
[-2.30173147e-01+0.j -2.48931229e-02+0.j]
[-2.32136421e-01+0.j -1.55999806e-02+0.j]
[-2.37368521e-01+0.j 6.69290113e-03+0.j]
[-4.04002103e-03+0.j 9.35728608e-04+0.j]
[-2.21882711e-01+0.j 3.16978406e-03+0.j]
[-2.34409551e-01+0.j -2.02868225e-02+0.j]
[-2.33253515e-01+0.j -1.11587362e-02+0.j]
[-2.37167823e-01+0.j 5.91666077e-03+0.j]
[-3.98763123e-03+0.j 8.68998842e-04+0.j]
[-9.46949890e-02+0.j 9.76610626e-02+0.j]
[-8.65356604e-02+0.j 3.31227630e-02+0.j]
[-8.89126885e-02+0.j 4.09135207e-02+0.j]
[-9.42693534e-02+0.j 6.42026251e-02+0.j]
[-4.06034182e-02+0.j 4.41208073e-03+0.j]
[-1.14390720e-02+0.j 1.76059773e-02+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-4.67306418e-03+0.j 3.33247609e-04+0.j]
[ 2.59700180e-04+0.j -1.17091172e-03+0.j]
[-7.03104829e-03+0.j -3.50336382e-04+0.j]
[ 1.60830839e-01+0.j 7.96550072e-02+0.j]
[ 1.49144245e-01+0.j 1.61194665e-01+0.j]
[ 1.61128010e-01+0.j 9.31440822e-02+0.j]
[-3.29054579e-02+0.j 2.78601320e-01+0.j]
[-2.52655579e-02+0.j 3.05088181e-01+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-1.69732333e-03+0.j 8.65537371e-04+0.j]
[-1.14390720e-02+0.j 1.76059773e-02+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-1.84399501e-01+0.j -3.28368343e-02+0.j]
[-1.43509487e-02+0.j 2.27161780e-02+0.j]
[-9.28137823e-03+0.j 1.66573978e-02+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-2.59693125e-02+0.j 6.79906605e-02+0.j]
[ 1.58066480e-01+0.j 8.85095299e-02+0.j]
[ 1.62009163e-01+0.j 8.43531223e-02+0.j]
[-3.51036243e-02+0.j 2.90787160e-01+0.j]
[-4.67306418e-03+0.j 3.33247609e-04+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  37/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-9.58716391e-04+0.j 2.68948940e-03+0.j]
[ 2.64032641e-02+0.j 2.46285459e-03+0.j]
[-5.31782329e-02+0.j 2.93186133e-01+0.j]
[-2.03674472e-02+0.j 2.70705165e-01+0.j]
[-1.42732184e-01+0.j 7.40240838e-02+0.j]
[-1.44776217e-02+0.j 4.07840410e-02+0.j]
[ 1.57499410e-02+0.j -2.33859330e-03+0.j]
[-2.13709539e-02+0.j -3.97657590e-03+0.j]
[-3.40706178e-02+0.j 4.25100042e-02+0.j]
[-4.45546699e-02+0.j 3.16233496e-02+0.j]
[-4.66047215e-02+0.j 4.08394136e-02+0.j]
[-1.70703123e-02+0.j 3.08096393e-02+0.j]
[-2.33357423e-01+0.j 7.66424312e-03+0.j]
[-1.95649654e-01+0.j -1.47155175e-02+0.j]
[-2.36660758e-01+0.j 1.15920396e-03+0.j]
[-2.15315572e-01+0.j 1.45766138e-02+0.j]
[ 2.79204962e-02+0.j -1.08012869e-01+0.j]]
In [41]: Y=X_std.dot(matrix_w)
...,
[ -1.93651008+0.j, -1.22422617+0.j],
[ -2.03393674+0.j, -1.26868622+0.j],
[-11.84870487+0.j, -1.02759442+0.j]])
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  38/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
pca=PCA().fit(X_std)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlim(0,78,1)
plt.xlabel("Number of components")
In [43]: sklearn_pca=PCA(n_components=30)
Y_sklearn=sklearn_pca.fit_transform(X_std)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  39/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [44]: pca=PCA(n_components=2)
principalComponents=pca.fit_transform(X_norm)
plt.figure(figsize=(16,16))
                  g1=sns.scatterplot(principalComponents[:,0],principalComponents[:,1],s=100,hue
                  =data_[" Label"],cmap="Spectral",alpha=0.7)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      40/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
                      /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarni
                      ng: Pass the following variables as keyword args: x, y. From version 0.12, th
                      e only valid positional argument will be `data`, and passing other arguments
                      without an explicit keyword will result in an error or misinterpretation.
FutureWarning
                      /usr/local/lib/python3.7/dist-packages/google/colab/_event_manager.py:28: Use
                      rWarning: Creating legend with loc="best" can be slow with large amounts of d
                      ata.
func(*args, **kwargs)
                      /usr/local/lib/python3.7/dist-packages/IPython/core/pylabtools.py:125: UserWa
                      rning: Creating legend with loc="best" can be slow with large amounts of dat
                      a.
fig.canvas.print_figure(bytes_io, **kw)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                             41/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
y=LabelEncoder().fit_transform(y)
oversample=SMOTE()
X,y=oversample.fit_resample(X,y)
counter=Counter(y)
for k, v in counter.items():
per=v/len(y)*100
pyplot.bar(counter.keys(),counter.values())
pyplot.show()
In [46]: y.shape
Out[46]: (479940,)
In [47]: X.shape
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  42/51
6/20/22, 12:53 PM                                                            Copy_of_DDoS
                  X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state
                  =1)
model=LogisticRegression(max_iter=440000)
ovr=OneVsRestClassifier(model)
ovr.fit(X_train,y_train)
y_pred=ovr.predict(X_test)
print(classification_report(y_test,y_pred))
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                             43/51
6/20/22, 12:53 PM                                                            Copy_of_DDoS
classifier=DecisionTreeClassifier()
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
print(classification_report(y_test,y_pred))
In [ ]: print(accuracy_score(y_test,y_pred))
0.7284327207567612
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                             44/51
6/20/22, 12:53 PM                                                            Copy_of_DDoS
classifier.fit(X_train,y_train)
Y_pred = classifier.predict(X_test)
print(classification_report(y_test,Y_pred))
In [ ]: print(confusion_matrix(y_test,y_pred))
[[9727 2 1 0 4 0 0 1 0 1 5 0]
[ 7 45 1 3 9951 2 0 0 4 0 0 11]
[ 0 9 0 2 0 3800 6341 1 1 0 2 0]
[ 0 0 0 0 0 0 0 0 1 8060 1922 0]
[ 0 1 0 1 3 1 0 1 2 0 0 9945]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                             45/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
In [ ]: skplt.metrics.plot_confusion_matrix(y_test,y_pred,figsize=(16,16))
Feature Scaling
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  46/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
import pandas as pd
x=df.iloc[:,1:3].values
min_max_scaler=preprocessing.MinMaxScaler(feature_range=(0,1))
x_after_min_max_scaler=min_max_scaler.fit_transform(x)
Standardisation=preprocessing.StandardScaler()
x_after_Standardisation=Standardisation.fit_transform(x)
                        [       17         1]
                        [       17        44]
                        ...
                        [        6         1]
                        [        6         1]
                        [        6 112584179]]
[1.00000000e+00 8.33333646e-09]
[1.00000000e+00 3.66666804e-07]
...
[3.52941176e-01 8.33333646e-09]
[3.52941176e-01 8.33333646e-09]
[3.52941176e-01 9.38201843e-01]]
after standardisation :
[[ 0.47191611 -0.22334861]
[ 0.47191611 -0.22334861]
[ 0.47191611 -0.22334626]
...
[-2.11674636 -0.22334861]
[-2.11674636 -0.22334861]
[-2.11674636 5.91918411]]
import pandas as pd
In [60]: y.shape
Out[60]: (479940,)
In [61]: print(X.shape)
(479940, 78)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                  47/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=0.20)
scaler =StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)
extra_tree =ExtraTreeClassifier(random_state=0)
cls=BaggingClassifier(extra_tree,random_state=0).fit(X_train,y_train)
cls.score(X_test,y_test)
Out[65]: 0.7575738633995917
In [66]: y_pred=cls.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
[[7988 1 0 0 0 0 0 0 0 0 0 0]
[ 5 31 1 0 7844 1 0 0 3 1 1 14]
[ 0 4 0 1 1 2560 5379 0 1 0 1 0]
[ 1 0 0 0 0 0 0 0 0 6624 1465 0]
[ 0 0 0 0 0 0 0 0 0 0 0 8139]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                           48/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
                  X,y=make_classification(n_samples=1000,n_features=11,n_informative=3,n_redunda
                  nt=0,n_repeated=0,n_classes=2,random_state=0,shuffle=False)
forest=ExtraTreesClassifier(n_estimators=250,random_state=0)
forest.fit(X,y)
importances =forest.feature_importances_
axis=0)
indices=np.argsort(importances)[::-1]
print("Feature ranking:")
# for f in range(X.shape[1]):
                  plt.figure()
                  plt.title("Feature importances")
plt.bar(range(X.shape[1]),importances[indices],
color="r",yerr=std[indices],align="center")
plt.xticks(range(X.shape[1]),indices)
plt.xlim([-1,x.shape[1]])
plt.show()
Feature ranking:
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      49/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
def get_dataset():
                    X,y=make_classification(n_samples=1000,n_features=20,n_informative=15,n_redu
                  ndant=5,random_state=4)
return X,y
def get_models():
models=dict()
n_trees=[10,50,100,500,1000,5000]
for n in n_trees:
models[str(n)]=ExtraTreesClassifier(n_estimators=n)
return models
def evaluate_model(model,X,y):
cv=RepeatedStratifiedKFold(n_splits=10,n_repeats=3,random_state=1)
scores=cross_val_score(model,X,y,scoring="accuracy",cv=cv,n_jobs=-1)
return scores
X,y=get_dataset()
models =get_models()
results,names =list(),list()
scores=evaluate_model(model,X,y)
results.append(scores)
names.append(name)
pyplot.boxplot(results,labels=names,showmeans=True)
pyplot.show()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false                      50/51
6/20/22, 12:53 PM                                                           Copy_of_DDoS
                      /usr/local/lib/python3.7/dist-packages/joblib/externals/loky/process_executo
                      r.py:705: UserWarning: A worker stopped while some jobs were given to the exe
                      cutor. This can be caused by a too short worker timeout or by a memory leak.
In [ ]:
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 51/51