""" John Rachlin DS 2000: Intro to Programming with Data Filename: Description: """ import matplotlib.pyplot as plt def read_data(filename): net = {} with open(filename) as file: for line in file: line = line.strip().split('\t') p1 = line[0] p2 = line[1] if p1 not in net: net[p1] = [] if p2 not in net: net[p2] = [] net[p1].append(p2) net[p2].append(p1) return net def degree_distribution(network): # extract num interactions for each protein degs = [len(network[p]) for p in network] degdist = {} for d in degs: if d not in degdist: degdist[d] = 1 else: degdist[d] += 1 return degdist def plot_degree_distribution(dd, loglog=False): """ Plot the degree distribution. Parameters: dd = degree distribution (degree:num) loglog = T/F whether to plot as log log plot Return: None """ xs = [k for k in dd] ys = [dd[k] for k in dd] plt.figure(figsize=(6,6), dpi=150) plt.grid() plt.title('PPI network degree distribution') plt.xlabel('# interactions') plt.ylabel('# proteins') plt.text(1.1,1.2,"Source: HuRI") if loglog: plt.xscale('log') plt.yscale('log') plt.scatter(xs, ys, marker='.', color='b') plt.savefig("ppi.png") plt.show() def main(): # read data - build network network = read_data('HuRI.tsv') # compute the degree distribution dd = degree_distribution(network) print(dd) # plot degree distribution plot_degree_distribution(dd, loglog=True) main()