#!/usr/bin/python
# Read seasonal snowfalls from the Web
import urllib
import html2text

# Unfortunately, NOAA has stopped making this information _easily_ available.
# This URL no longer works, and so we're using a copy of an old web page.
# html = urllib.urlopen("http://www.erh.noaa.gov/box/climate/bossnw.shtml").read()
# html = urllib.urlopen("http://www.ccs.neu.edu/course/cs3650/snowfall-boston.html").read()

# The first html produces two frames.  We want the second frame.
# This version gets snowfall for Buffalo, NY.
# html = urllib.urlopen("http://www.wrcc.dri.edu/cgi-bin/cliMAIN.pl?ny1012").read()
# If you prefer snowfall for Buffalo, uncomment this and set delim = "1944"
html = urllib.urlopen("http://www.wrcc.dri.edu/WRCCWrappers.py?sodxtrmts+301012+por+por+snow+none+msum+5+07+F").read()

#=======================================
# Read the html data into a text file.
if "1891-92" in html:  # if URL taken from noaa.gov or a copy of it
  delim = "1891-92"
  delim_end = "\n\n"
elif "1936" in html:   # else if URL taken from wrcc.dri.edu
  delim = "1936"
elif "1944" in html:   # else if URL taken from wrcc.dri.edu
  delim = "1944"
  delim_end = "\n2016"
  # Convert raw html to text file.
  html2text.BODY_WIDTH = 0 # The stupid html2text was writing '\n\n'
  html = html2text.html2text(html)
  # html2text was producing unicode.  ASCII is good enough in the US of A. :-)
  html = html.encode("ascii", "ignore")  # ignore non-ASCII characters
  # The stupid html2text was writing '\n\n'; Finish patching the output.
  html = html.replace('\n', '\t')
  html = html.replace('\t\t', '\n')
else:
  print "*** unknown format ***"
#=======================================
# Now process the html data, to get the record years.
data1 = delim + html.split(delim)[1]
data2 = data1.split(delim_end)[0]

a = data2.split('\n')
b = []
for x in a:
  x = x.split()
  try:
    float(x[-1])
  except ValueError:
    del x[-1]
  b.append( (x[0], x[-1]) )
c = [ (float(x[-1]), x[0]) for x in b ]
d = sorted( c, key=lambda x: x[0] )
d.reverse() # Largest snowfalls first

print( "Ten largest snowfalls:\n" +
       '\n'.join( [str((x[1], x[0])) for x in d[:10]] ) )

#=================================
#NOTE if using Python3, do this instead for html:
# import urllib.request
# html = urllib.request.urlopen("http://www.erh.noaa.gov/box/climate/bossnw.shtml").read().decode('utf-8')