Generated datasets

## Reading the dataset using pandas
import pandas as pd
url = 'https://raw.githubusercontent.com/davidrkearney/colab-notebooks/main/datasets/CTG.csv'
df = pd.read_csv(url, error_bad_lines=False)
df
FileName Date SegFile b e LBE LB AC FM UC ... C D E AD DE LD FS SUSP CLASS NSP
0 Variab10.txt 12/1/1996 CTG0001.txt 240.0 357.0 120.0 120.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 9.0 2.0
1 Fmcs_1.txt 5/3/1996 CTG0002.txt 5.0 632.0 132.0 132.0 4.0 0.0 4.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0
2 Fmcs_1.txt 5/3/1996 CTG0003.txt 177.0 779.0 133.0 133.0 2.0 0.0 5.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0
3 Fmcs_1.txt 5/3/1996 CTG0004.txt 411.0 1192.0 134.0 134.0 2.0 0.0 6.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0
4 Fmcs_1.txt 5/3/1996 CTG0005.txt 533.0 1147.0 132.0 132.0 4.0 0.0 5.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2124 S8001045.dsp 6/6/1998 CTG2127.txt 1576.0 3049.0 140.0 140.0 1.0 0.0 9.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 5.0 2.0
2125 S8001045.dsp 6/6/1998 CTG2128.txt 2796.0 3415.0 142.0 142.0 1.0 1.0 5.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0
2126 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2127 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2128 NaN NaN NaN NaN NaN NaN NaN NaN 564.0 23.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2129 rows × 40 columns

## Dropping the columns which we don't need
df=df.drop(["FileName","Date","SegFile","b","e"],axis=1)
df.head()
LBE LB AC FM UC ASTV MSTV ALTV MLTV DL ... C D E AD DE LD FS SUSP CLASS NSP
0 120.0 120.0 0.0 0.0 0.0 73.0 0.5 43.0 2.4 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 9.0 2.0
1 132.0 132.0 4.0 0.0 4.0 17.0 2.1 0.0 10.4 2.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0
2 133.0 133.0 2.0 0.0 5.0 16.0 2.1 0.0 13.4 2.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0
3 134.0 134.0 2.0 0.0 6.0 16.0 2.4 0.0 23.0 2.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 6.0 1.0
4 132.0 132.0 4.0 0.0 5.0 16.0 2.4 0.0 19.9 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 1.0

5 rows × 35 columns

df['C']
0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
2124    0.0
2125    0.0
2126    NaN
2127    NaN
2128    NaN
Name: C, Length: 2129, dtype: float64
X = df['C']
X
0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
2124    0.0
2125    0.0
2126    NaN
2127    NaN
2128    NaN
Name: C, Length: 2129, dtype: float64
y = df['NSP']
y
0       2.0
1       1.0
2       1.0
3       1.0
4       1.0
       ... 
2124    2.0
2125    1.0
2126    NaN
2127    NaN
2128    NaN
Name: NSP, Length: 2129, dtype: float64
import seaborn as sns
sns.set_theme(font_scale=1.5)
df.columns
Index(['LBE', 'LB', 'AC', 'FM', 'UC', 'ASTV', 'MSTV', 'ALTV', 'MLTV', 'DL',
       'DS', 'DP', 'DR', 'Width', 'Min', 'Max', 'Nmax', 'Nzeros', 'Mode',
       'Mean', 'Median', 'Variance', 'Tendency', 'A', 'B', 'C', 'D', 'E', 'AD',
       'DE', 'LD', 'FS', 'SUSP', 'CLASS', 'NSP'],
      dtype='object')
sns.relplot(data=df, x='B', y='NSP', height=6);
sns.displot(data=df, x='B', hue='NSP', kind='kde', aspect=2);
/home/david/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:305: UserWarning: Dataset has 0 variance; skipping density estimate.
  warnings.warn(msg, UserWarning)
sns.jointplot(data=df, x="B", y="C", height=10, hue='NSP');
/home/david/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:305: UserWarning: Dataset has 0 variance; skipping density estimate.
  warnings.warn(msg, UserWarning)
/home/david/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:305: UserWarning: Dataset has 0 variance; skipping density estimate.
  warnings.warn(msg, UserWarning)
sns.jointplot(x="C", y="B", data=df, height=10, hue='NSP')
/home/david/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:305: UserWarning: Dataset has 0 variance; skipping density estimate.
  warnings.warn(msg, UserWarning)
/home/david/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:305: UserWarning: Dataset has 0 variance; skipping density estimate.
  warnings.warn(msg, UserWarning)
<seaborn.axisgrid.JointGrid at 0x7f72c8970460>