#!/usr/bin/python # Read seasonal snowfalls from the Web import urllib import html2text # Unfortunately, NOAA has stopped making this information _easily_ available. # This URL no longer works, and so we're using a copy of an old web page. # html = urllib.urlopen("http://www.erh.noaa.gov/box/climate/bossnw.shtml").read() # html = urllib.urlopen("http://www.ccs.neu.edu/course/cs3650/snowfall-boston.html").read() # The first html produces two frames. We want the second frame. # This version gets snowfall for Buffalo, NY. # html = urllib.urlopen("http://www.wrcc.dri.edu/cgi-bin/cliMAIN.pl?ny1012").read() # If you prefer snowfall for Buffalo, uncomment this and set delim = "1944" html = urllib.urlopen("http://www.wrcc.dri.edu/WRCCWrappers.py?sodxtrmts+301012+por+por+snow+none+msum+5+07+F").read() #======================================= # Read the html data into a text file. if "1891-92" in html: # if URL taken from noaa.gov or a copy of it delim = "1891-92" delim_end = "\n\n" elif "1936" in html: # else if URL taken from wrcc.dri.edu delim = "1936" elif "1944" in html: # else if URL taken from wrcc.dri.edu delim = "1944" delim_end = "\n2016" # Convert raw html to text file. html2text.BODY_WIDTH = 0 # The stupid html2text was writing '\n\n' html = html2text.html2text(html) # html2text was producing unicode. ASCII is good enough in the US of A. :-) html = html.encode("ascii", "ignore") # ignore non-ASCII characters # The stupid html2text was writing '\n\n'; Finish patching the output. html = html.replace('\n', '\t') html = html.replace('\t\t', '\n') else: print "*** unknown format ***" #======================================= # Now process the html data, to get the record years. data1 = delim + html.split(delim)[1] data2 = data1.split(delim_end)[0] a = data2.split('\n') b = [] for x in a: x = x.split() try: float(x[-1]) except ValueError: del x[-1] b.append( (x[0], x[-1]) ) c = [ (float(x[-1]), x[0]) for x in b ] d = sorted( c, key=lambda x: x[0] ) d.reverse() # Largest snowfalls first print( "Ten largest snowfalls:\n" + '\n'.join( [str((x[1], x[0])) for x in d[:10]] ) ) #================================= #NOTE if using Python3, do this instead for html: # import urllib.request # html = urllib.request.urlopen("http://www.erh.noaa.gov/box/climate/bossnw.shtml").read().decode('utf-8')