""" Felix Muzny DS 2000 Lecture 22 November 26, 2024 - ml between animals - test out a few classifiers - knn - linear regression """ # install sklearn from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LinearRegression import csv TRAIN_FILE = "animal_train.csv" TEST_FILE = "animal_test.csv" K = 3 FEATURES = ["number_legs", "lifespan", "can_fly"] def read_file(filename, features = FEATURES, label_name = "label"): # first sublist will store the features # second sublist will store the labels all_data = [[], []] with open(filename, "r") as file: reader = csv.DictReader(file) for line in reader: # store the features # convert them all to floats feats = [float(line[feat]) for feat in features] all_data[0].append(feats) # store the label all_data[1].append(line[label_name]) return all_data def main(): print("Lec 22 - classifying some animals!") #KNN all_data = read_file(TRAIN_FILE, label_name="label") #Linear # all_data = read_file(TRAIN_FILE, label_name="label_int") X = all_data[0] y = all_data[1] print("train features:\n", X) print("train labels:\n", y) print() classifier = KNeighborsClassifier(n_neighbors=K) # classifier = LinearRegression() # fit the classifier to train it # X is a 2-dimensional list of features, number of examples by number of features # like # [[feat1, feat2, ...], # [feat1, feat2, ...], # [feat1, feat2, ...]] # y is a single dimensional list with labels for all the examples in X, in the same order classifier.fit(X, y) # same format as the training data # knn test_data = read_file(TEST_FILE, label_name="label") # linear # test_data = read_file(TEST_FILE, label_name="label_int") test_feats = test_data[0] test_labels = test_data[1] # see the guesses print("predictions: ", classifier.predict(test_feats)) print("actual labels: ", test_labels) print("accuracy: ", classifier.score(test_feats, test_labels)) main()