# Importing Python libraries needed for data analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import math
import plotly.io as pio
pio.renderers.default='notebook'
from sklearn.cluster import KMeans
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering
%matplotlib inline
# reading dataset
dataset = pd.read_excel('filename.xlsx').iloc[:,:-1]
! python --version
Python 3.7.12
dataset.shape
(189, 46)
# fixing columns spaces
cols=[]
for col in dataset.columns:
if col.startswith('Unnamed'):
cols.append(cols[-1])
else:
cols.append(col.strip())
dataset.columns=cols
dataset.columns=[i+'--'+str(z).strip() for i,z in zip(dataset.columns,dataset.iloc[0,:].fillna('').values)]
dataset.columns=[col.strip('--') for col in dataset.columns]
# Data transforming
dataset['Consciousness Time']=pd.to_numeric(dataset['Consciousness Time'],errors='coerce')
dataset.drop(0,inplace=True)
dataset.dropna(inplace=True)
dataset.reset_index(inplace=True,drop=True)
dataset.columns
Index(['Nr', 'Location', 'Sex', 'Age', 'Age onset', 'Years with ES', 'Seizure Type', 'Laterality', 'Behavior before', 'Same day ES before', 'ES before', 'Ictal Seconds', 'Ictal signs and symtoms--MA', 'Ictal signs and symtoms--OA', 'Ictal signs and symtoms--SMA', 'Ictal signs and symtoms--Laughing', 'Ictal signs and symtoms--Coughing', 'Ictal signs and symtoms--NRR', 'Ictal signs and symtoms--NRL', 'Ictal signs and symtoms--Vo', 'Ictal signs and symtoms--Gaze', 'Ictal signs and symtoms--VA', 'Ictal signs and symtoms--Hiccup', 'Consciousness Time', 'Postictal signs and symptoms--MA', 'Postictal signs and symptoms--OA', 'Postictal signs and symptoms--NRR', 'Postictal signs and symptoms--NRL', 'Postictal signs and symptoms--Smacking', 'Postictal signs and symptoms--Smile', 'Postictal signs and symptoms--Laughing', 'Postictal signs and symptoms--Coughing', 'Postictal signs and symptoms--Vo', 'Postictal signs and symptoms--Gape', 'Postictal signs and symptoms--Hipcup', 'Postictal signs and symptoms--Motor restless', 'Postictal signs and symptoms--Speaks incomprehensible', 'Postictal signs and symptoms--Cloni Arm', 'Postictal signs and symptoms--Stand up', 'Level of Consciousness', 'Coughing Time seconds--Coughing #1', 'Coughing Time seconds--Coughing #2', 'Coughing Time seconds--Coughing #3', 'Coughing Time seconds--Coughing #4', 'Disnomia seconds', 'Aphasia TT'], dtype='object')
# Examining the dataframe
dataset.head(25)
Nr | Location | Sex | Age | Age onset | Years with ES | Seizure Type | Laterality | Behavior before | Same day ES before | ... | Postictal signs and symptoms--Speaks incomprehensible | Postictal signs and symptoms--Cloni Arm | Postictal signs and symptoms--Stand up | Level of Consciousness | Coughing Time seconds--Coughing #1 | Coughing Time seconds--Coughing #2 | Coughing Time seconds--Coughing #3 | Coughing Time seconds--Coughing #4 | Disnomia seconds | Aphasia TT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | TR | 2.0 | 20.0 | 9.0 | 11.0 | 4.0 | 1.0 | 1.0 | 2.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
1 | 2.0 | TR | 1.0 | 25.0 | 22.0 | 3.0 | 9.0 | 1.0 | 1.0 | 3.0 | ... | 0 | 0 | 0 | 5.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
2 | 3.0 | TR | 2.0 | 28.0 | 6.0 | 22.0 | 10.0 | 1.0 | 2.0 | 0.0 | ... | 0 | 0 | 0 | 2.0 | 39 | 0 | 0 | 0 | 0.0 | 0.0 |
3 | 4.0 | TR | 2.0 | 42.0 | 1.0 | 41.0 | 11.0 | 1.0 | 1.0 | 2.0 | ... | 0 | 0 | 0 | 5.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
4 | 5.0 | TR | 2.0 | 35.0 | 10.0 | 25.0 | 4.0 | 1.0 | 1.0 | 0.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
5 | 6.0 | TR | 2.0 | 29.0 | 20.0 | 9.0 | 4.0 | 2.0 | 2.0 | 0.0 | ... | 0 | 0 | 0 | 4.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
6 | 7.0 | TR | 1.0 | 57.0 | 3.0 | 54.0 | 4.0 | 1.0 | 1.0 | 0.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
7 | 8.0 | TR | 1.0 | 24.0 | 4.0 | 20.0 | 2.0 | 1.0 | 1.0 | 2.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
8 | 9.0 | TR | 1.0 | 34.0 | 7.0 | 27.0 | 2.0 | 1.0 | 1.0 | 4.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
9 | 10.0 | TR | 2.0 | 42.0 | 1.0 | 41.0 | 2.0 | 1.0 | 1.0 | 4.0 | ... | 0 | 1 | 0 | 5.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
10 | 11.0 | TR | 1.0 | 22.0 | 13.0 | 9.0 | 2.0 | 1.0 | 2.0 | 0.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
11 | 12.0 | TR | 2.0 | 20.0 | 10.0 | 10.0 | 2.0 | 1.0 | 2.0 | 1.0 | ... | 0 | 0 | 0 | 2.0 | 2 | 0 | 0 | 0 | 0.0 | 0.0 |
12 | 13.0 | TR | 1.0 | 19.0 | 14.0 | 5.0 | 2.0 | 1.0 | 2.0 | 1.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
13 | 14.0 | TR | 2.0 | 21.0 | 10.0 | 11.0 | 6.0 | 2.0 | 1.0 | 1.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
14 | 15.0 | TR | 1.0 | 43.0 | 16.0 | 27.0 | 7.0 | 2.0 | 1.0 | 0.0 | ... | 0 | 0 | 0 | 4.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
15 | 16.0 | TR | 2.0 | 20.0 | 10.0 | 10.0 | 3.0 | 1.0 | 1.0 | 0.0 | ... | 0 | 0 | 0 | 10.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
16 | 18.0 | TR | 1.0 | 61.0 | 30.0 | 31.0 | 1.0 | 1.0 | 2.0 | 2.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 46.0 | 0.0 |
17 | 19.0 | TR | 2.0 | 20.0 | 9.0 | 11.0 | 1.0 | 1.0 | 1.0 | 5.0 | ... | 0 | 0 | 0 | 5.0 | 0 | 0 | 0 | 0 | 0.0 | 60.0 |
18 | 20.0 | TR | 1.0 | 24.0 | 23.0 | 1.0 | 1.0 | 1.0 | 2.0 | 6.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
19 | 21.0 | TR | 2.0 | 30.0 | 16.0 | 14.0 | 1.0 | 1.0 | 1.0 | 1.0 | ... | 0 | 0 | 0 | 4.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
20 | 24.0 | TR | 2.0 | 18.0 | 16.0 | 2.0 | 1.0 | 1.0 | 1.0 | 1.0 | ... | 0 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
21 | 25.0 | TR | 2.0 | 53.0 | 51.0 | 2.0 | 1.0 | 1.0 | 1.0 | 1.0 | ... | 0 | 0 | 0 | 2.0 | 7 | 0 | 0 | 0 | 0.0 | 0.0 |
22 | 26.0 | TR | 1.0 | 24.0 | 22.0 | 2.0 | 1.0 | 1.0 | 1.0 | 1.0 | ... | 0.083333 | 0.125 | 0.166667 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
23 | 27.0 | TR | 2.0 | 53.0 | 14.0 | 39.0 | 1.0 | 1.0 | 2.0 | 0.0 | ... | 1 | 0 | 0 | 2.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
24 | 28.0 | TR | 1.0 | 52.0 | 1.0 | 51.0 | 1.0 | 3.0 | 1.0 | 1.0 | ... | 0 | 0 | 0 | 4.0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
25 rows × 46 columns
# scatterplot graph , features transforming
df1=dataset.loc[:,'Sex':'Ictal Seconds']
for col in df1.columns[1:-1]:
df1[col]=df1[col].astype(int).astype(str)
df1['Age']=df1['Age'].astype(int)
df1['Consciousness Time']=pd.to_numeric(dataset['Consciousness Time'])
sns.set(rc={'figure.figsize':(20,20)})
g = sns.FacetGrid(df1, col="Sex", height=8.27, aspect=11.7/8.27)
g.map(sns.scatterplot, 'Consciousness Time', 'Age', alpha=.7)
g.add_legend()
<seaborn.axisgrid.FacetGrid at 0x1d080c16fa0>
g = sns.FacetGrid(df1, col="Laterality", height=4, aspect=.5)
g.map(sns.barplot, "Sex", "Consciousness Time")
C:\Users\tommy\anaconda3\lib\site-packages\seaborn\axisgrid.py:670: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
<seaborn.axisgrid.FacetGrid at 0x1d0814e39a0>
g = sns.FacetGrid(df1, col="Behavior before", height=4, aspect=.5)
g.map(sns.barplot, "Sex", "Consciousness Time")
C:\Users\tommy\anaconda3\lib\site-packages\seaborn\axisgrid.py:670: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
<seaborn.axisgrid.FacetGrid at 0x1d08198d520>
##age wise distribution plot
g = sns.FacetGrid(df1, row="Sex",
height=1.7, aspect=4,)
g.map(sns.kdeplot, "Consciousness Time")
<seaborn.axisgrid.FacetGrid at 0x1d0819a0bb0>
df=dataset.loc[:,'Sex':'Consciousness Time']
corr_df=df.corr()
fig = px.imshow(corr_df)
fig.update_layout(title='Correlation comparion for main features',width=1000,height=1000
)
fig.show()
# fig.write_html(r'D:\Machine learning\Epilepsia SUSANA\correlacion.html')
#with open("correlacion.html", "r", encoding='utf-8') as f:
# text= f.read()
import codecs
# f = codecs.open("correlacion.html", 'r', 'utf-8')