Example Results#
Example 1#
An example usage of GPR algorithm on the original article sample data.
1 import random
2 import numpy as np
3 from sklearn.metrics import accuracy_score
4 from sklearn.preprocessing import MinMaxScaler
5 from gpr_algorithm import GPR
6
7 random.seed(1)
8
9 labels = np.array(
10 [1, 0, 1, 1, 1]
11 )
12 attributes = np.array(
13 [[-1.0, 2.5], [2.5, 2.0], [3.0, 7.0], [-0.2, 6.3], [0.5, 5.0]]
14 )
15 attributes_normalized = MinMaxScaler().fit_transform(attributes)
16
17 gpr = GPR(
18 feature_names=['y1', 'y2'],
19 max_n_of_rules=2, max_n_of_ands=2,
20 verbose=False
21 )
22 gpr.fit(attributes_normalized, labels)
23 predicted_labels = gpr.predict(attributes_normalized)
24
25 print('Accuracy:')
26 print(accuracy_score(labels, predicted_labels))
27 print('Rules:')
28 for rule in gpr.rules:
29 print(rule)
Terminal output
Accuracy:
1.0
Rules:
IF y2 is High THEN 1 | Support: 0.6400
IF y1 is Low THEN 1 | Support: 0.6062
ELSE 0
Example 2#
An example usage of GPR algorithm on diabetes dataset.
1 import random
2 from pathlib import Path
3 import pandas as pd
4 from sklearn.metrics import accuracy_score
5 from sklearn.preprocessing import MinMaxScaler
6 from gpr_algorithm import GPR
7
8 random.seed(0)
9 df = pd.read_csv(
10 Path(__file__).parent.joinpath('data').joinpath('type1diabetes.csv')
11 )
12
13 target_names = ['sick', 'healthy']
14 feature_names = [
15 'age', 'weight', 'height', 'step_count',
16 'sedentary', 'light', 'moderate', 'vigorious'
17 ]
18
19 labels = df['healthy'].values
20 attributes = df[feature_names].values
21 attributes_normalized = MinMaxScaler().fit_transform(attributes)
22
23 gpr = GPR(
24 target_names=target_names,
25 feature_names=feature_names,
26 max_n_of_rules=1,
27 eval_fun=accuracy_score,
28 verbose=False
29 )
30 gpr.fit(attributes_normalized, labels)
31 predicted_labels = gpr.predict(attributes_normalized)
32 for rule in gpr.rules:
33 print(rule)
Terminal output
IF step_count is High THEN healthy | Support: 0.5288
ELSE sick
Example 3#
An example usage of GPR algorithm on BCW dataset.
1 import random
2 from pathlib import Path
3 import numpy as np
4 import pandas as pd
5 from sklearn.metrics import accuracy_score
6 from sklearn.preprocessing import MinMaxScaler
7 from gpr_algorithm import GPR
8
9 random.seed(0)
10 df = pd.read_csv(
11 Path(__file__).parent.joinpath('data').joinpath('bcw.csv')
12 )
13
14 target_names = ['benign', 'malignant']
15 feature_names = [
16 'Clump Thickness', 'Uniformity of Cell Size',
17 'Uniformity of Cell Shape', 'Marginal Adhesion',
18 'Single Epithelial Cell Size', 'Bare Nuclei',
19 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses'
20 ]
21
22 labels = df['Class'].values
23 labels[labels == 2] = 0
24 labels[labels == 4] = 1
25 attributes = df[feature_names].values
26 attributes_normalized = MinMaxScaler().fit_transform(attributes)
27
28 gpr = GPR(
29 target_names=target_names,
30 feature_names=feature_names,
31 max_n_of_rules=3,
32 max_n_of_ands=3,
33 n_generations=20,
34 n_populations=20,
35 verbose=False
36 )
37
38 gpr.fit(attributes_normalized, labels)
39 pred_labels = gpr.predict(attributes_normalized)
Terminal output
IF Bare Nuclei is High THEN malignant | Support: 0.7340
IF Uniformity of Cell Size is High THEN malignant | Support: 0.6192
ELSE benign