#!/usr/bin/python # Read seasonal snowfalls from the Web import urllib import html2text # print "The web stie used by this Python script was taken down in Spring, 2016." # print "This script no longer works because of this." # sys.exit(1) # The first html produces two frames. We want the second frame. # html = urllib.urlopen("http://www.wrcc.dri.edu/cgi-bin/cliMAIN.pl?ma0770").read() # html = urllib.urlopen("http://www.wrcc.dri.edu/WRCCWrappers.py?sodxtrmts+190770+por+por+snow+none+msum+5+07+F").read() html = urllib.urlopen("https://course.ccs.neu.edu/cs7600/parent/python/snowfall-boston.html").read() #======================================= # Read the html data into a text file. if "1891-92" in html: # if URL taken from noaa.gov or a copy of it delim = "1891-92" delim_end = "\n\n" elif "1936" in html: # else if URL taken from wrcc.dri.edu delim = "1936" delim_end = "\n2016" # Convert raw html to text file. html2text.BODY_WIDTH = 0 # The stupid html2text was writing '\n\n' html = html2text.html2text(html) # html2text was producing unicode. ASCII is good enough in the US of A. :-) html = html.encode("ascii", "ignore") # ignore non-ASCII characters # The stupid html2text was writing '\n\n'; Finish patching the output. html = html.replace('\n', '\t') html = html.replace('\t\t', '\n') else: print "*** unknown format ***" #======================================= # Now process the html data, to get the record years. data1 = delim + html.split(delim)[1] data2 = data1.split(delim_end)[0] a = data2.split('\n') b = [ (x.split()[0], x.split()[-1]) for x in a ] c = [ (float(x[-1]), x[0]) for x in b ] d = sorted( c, key=lambda x: x[0] ) d.reverse() # Largest snowfalls first print( "Ten largest snowfalls:\n" + '\n'.join( [str((x[1], x[0])) for x in d[:10]] ) ) #======================================= # And now graph the variable c from above (snowfall in chronological order) import matplotlib.pyplot matplotlib.pyplot.xlabel("Year") # The list c is: [ (46.8, '1891-92'), ... ] # Pick out labels for [0:len(c)+1:10] (from 0 to len(c), for every 10th elt) matplotlib.pyplot.xticks( range(0, len(c), 10), [x[1] for x in c][0:len(c)+1:10] ) locs, labels = matplotlib.pyplot.xticks() # Get the labels field for x-axix matplotlib.pyplot.setp(labels, rotation=45) # Rotate the labels 45 degrees matplotlib.pyplot.axis(xmin=0) matplotlib.pyplot.axis(xmax=len(c)+1) matplotlib.pyplot.ylabel("Snowfall (in.)") # This must be done after adjusting the x- and y-axes. matplotlib.pyplot.tight_layout(w_pad=6, h_pad=6) # padding in width, height matplotlib.pyplot.scatter(range(len(c)), [x[0] for x in c]) # scatter plot matplotlib.pyplot.title("Seasonal snowfall in Boston") matplotlib.pyplot.show()