"""
Felix Muzny
DS 2000
Lecture 22
November 26, 2024

- ml between animals
- test out a few classifiers
- knn
- linear regression

"""

# install sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression
import csv


TRAIN_FILE = "animal_train.csv"
TEST_FILE = "animal_test.csv"
K = 3
FEATURES = ["number_legs", "lifespan", "can_fly"]

def read_file(filename, features = FEATURES, label_name = "label"):
    # first sublist will store the features
    # second sublist will store the labels
    all_data = [[], []]
    with open(filename, "r") as file:
        reader = csv.DictReader(file)
        for line in reader:
            # store the features
            # convert them all to floats
            feats = [float(line[feat]) for feat in features]
            all_data[0].append(feats)

            # store the label
            all_data[1].append(line[label_name])
    return all_data

def main():
    print("Lec 22 - classifying some animals!")

    #KNN
    all_data = read_file(TRAIN_FILE, label_name="label")
    #Linear
    # all_data = read_file(TRAIN_FILE, label_name="label_int")
    X = all_data[0]
    y = all_data[1]
    print("train features:\n", X)
    print("train labels:\n", y)
    print()

    classifier = KNeighborsClassifier(n_neighbors=K)
    # classifier = LinearRegression()

    # fit the classifier to train it
    # X is a 2-dimensional list of features, number of examples by number of features
    # like
    # [[feat1, feat2, ...],
    # [feat1, feat2, ...],
    # [feat1, feat2, ...]]
    # y is a single dimensional list with labels for all the examples in X, in the same order
    classifier.fit(X, y)

    # same format as the training data
    # knn
    test_data = read_file(TEST_FILE, label_name="label")
    # linear
    # test_data = read_file(TEST_FILE, label_name="label_int")
    test_feats = test_data[0]
    test_labels = test_data[1]
    # see the guesses
    print("predictions: ", classifier.predict(test_feats))
    print("actual labels: ", test_labels)

    print("accuracy: ", classifier.score(test_feats, test_labels))

main()