This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
cuiyiming-gradproj/Experiment/statFeature/mlAlgo.py
2019-12-16 21:53:05 +08:00

51 lines
1.9 KiB
Python

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score,recall_score,precision_score
import random
import matplotlib.pyplot as plt
%matplotlib inline
PREFIX_DIR = "/Users/Leo/Documents/github/GradProj/"
def RF():
classifer = RandomForestClassifier()
classifer.fit(x_train, y_train)
y_pred = classifer.predict(x_test)
f1_score_list.append(f1_score(y_test, y_pred, average='micro'))
recall_score_list.append(recall_score(y_test, y_pred, average='micro'))
precision_score_list.append(precision_score(y_test, y_pred, average='micro'))
scores = [np.mean(precision_score_list), np.mean(recall_score_list), np.mean(f1_score_list)]
score_df.loc['RandomForest'] = scores
score_df.plot.bar()
print(scores)
def main():
date = sys.argv[1]
example_csv_file = PREFIX_DIR + 'Experiment/statFeature/csvFile/' + date + '/examples.csv'
examples_df = pd.read_csv(example_csv_file)
class_counts = examples_df['label'].value_counts().plot.bar()
examples = examples_df.values.copy()
score_df = pd.DataFrame(np.zeros((5,3)),index = ['LogisticRegression', 'SVM', 'GaussianNB', 'tree', 'RandomForest'], \
columns = ['precision', 'recall', 'f1'])
f1_score_list = list()
recall_score_list = list()
precision_score_list = list()
for i in range(50):
np.random.shuffle(examples)
examples_train = examples[:int(len(examples)*0.75)]
examples_test = examples[int(len(examples)*0.75):]
x_train = examples_train[:,0:-1]
y_train = examples_train[:,-1]
x_test = examples_test[:,0:-1]
y_test = examples_test[:,-1]
RF(score_df, f1_score_list, recall_score_list, precision_score_list, \
x_train, y_train, x_test, y_test)
if __name__ == '__main__':
main()