""" Felix Muzny November 3rd/4th, 2021 DS 2001 - CS Practicum #8 The first part of implementing k-means clustering for 2-dimensional points! """ import kmeans_utils as utils import matplotlib.pyplot as plt import math # Task 4 def distance(p1, p2): """ name: distance This function takes two 2-d points and calculates the Euclidian distance between them. parameters: point1 - list of two numbers point2 - list of two numbers return: float euclidian distance between the two points (x1, y1) and (x2, y2) """ d = math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) return d # Task 5 def closest(target_point, point_list): """ name: closest This function takes a target point and a list of points and calculates the Euclidian distance between them to find the point in the list that is closest to the target. parameters: target_point - list of a 2d point point_list - list of lists of 2d points return: the tuple point from point_list that is closest to target_point """ closest = None current_min = -1 for point in point_list: d = distance(target_point, point) if d < current_min or closest is None: closest = point current_min = d return closest if __name__ == "__main__": # Task 1 animal_ratings = {"cat": 4, "dog":4, "whale": 100} print(animal_ratings) # Task 2 points = utils.generate_points(20) centroids = utils.generate_points(2) print("data:", points) print("centroids:", centroids) print() # graphing our data plt.scatter(utils.get_column(points, 0), utils.get_column(points, 1), label="data") plt.scatter(utils.get_column(centroids, 0), utils.get_column(centroids, 1), label="centroids") plt.xlabel("x value") plt.ylabel("y value") plt.title("initial data for k-means clustering") plt.legend() plt.show() # Testing task 3 print("Testing distance function") print("distance from (0, 0) to (1, 1)", distance((0, 0), (1, 1))) print("distance from (-10, 0) to (10, 0)", distance((-10, 0), (10, 0))) print("first point in the data list:", points[0]) print("first point in the centroids list:", centroids[0]) print("distance between them:", distance(points[0], centroids[0])) print() target = [1, 1] # Testing task 4 print("Testing closest function") print("closest centroid to (0, 0):", closest((0, 0), centroids)) print("closest centroid to (10, 10):", closest((10, 10), centroids)) print() # Task 5 clusters = {} for point in points: belongs_to = closest(point, centroids) if belongs_to not in clusters: clusters[belongs_to] = 0 clusters[belongs_to] += 1 print("counts of points assigned to clusters") print(clusters)