51 lines
1.9 KiB
Python
51 lines
1.9 KiB
Python
from sklearn.linear_model import LogisticRegression
|
|
from sklearn.svm import SVC
|
|
from sklearn.naive_bayes import GaussianNB
|
|
from sklearn import tree
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from sklearn.metrics import f1_score,recall_score,precision_score
|
|
import random
|
|
import matplotlib.pyplot as plt
|
|
%matplotlib inline
|
|
|
|
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
|
|
|
|
|
|
def RF():
|
|
classifer = RandomForestClassifier()
|
|
classifer.fit(x_train, y_train)
|
|
y_pred = classifer.predict(x_test)
|
|
f1_score_list.append(f1_score(y_test, y_pred, average='micro'))
|
|
recall_score_list.append(recall_score(y_test, y_pred, average='micro'))
|
|
precision_score_list.append(precision_score(y_test, y_pred, average='micro'))
|
|
scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]
|
|
score_df.loc['RandomForest'] = scores
|
|
score_df.plot.bar()
|
|
print(scores)
|
|
|
|
def main():
|
|
date = sys.argv[1]
|
|
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
|
|
examples_df = pd.read_csv(example_csv_file)
|
|
class_counts = examples_df['label'].value_counts().plot.bar()
|
|
examples = examples_df.values.copy()
|
|
score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \
|
|
columns = ['precision', 'recall', 'f1'])
|
|
f1_score_list = list()
|
|
recall_score_list = list()
|
|
precision_score_list = list()
|
|
for i in range(50):
|
|
np.random.shuffle(examples)
|
|
examples_train = examples[:int(len(examples)*0.75)]
|
|
examples_test = examples[int(len(examples)*0.75):]
|
|
x_train = examples_train[:,0:-1]
|
|
y_train = examples_train[:,-1]
|
|
x_test = examples_test[:,0:-1]
|
|
y_test = examples_test[:,-1]
|
|
RF(score_df, f1_score_list, recall_score_list, precision_score_list, \
|
|
x_train, y_train, x_test, y_test)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |