# -*- coding: utf-8 -*- """ @author: John Rachlin @date : September 13, 2021 @file : cherry.py Analysis of the full-flowering day-of-year for cherry blossums in Kyoto Japan. Why is this significant? Scientists are interested in understanding periodic biological phenomena (flowering, breeding, migration) especially as it relates to climatic conditions. (The scientific study of these types of phenomena is known as PHENOLOGY.) Data source: https://www.ncei.noaa.gov/pub/data/paleo/historical/phenology/japan/LatestVersion/KyotoFullFlower7.xls Data for 2020 and 2021 added to data set. 2020: March 30 - based on Japan Meterological Corporation (JMC) 2020 forecast. 2021: March 26 - based on reported actual (Osaka University) """ import csv import matplotlib.pyplot as plt def read_cherry_data(filename): """ Read cherry blossum full-flowering day-of-year data. filename - name of data file return - column lists: years, blossom-day-of-years] Return a list mapping year to day-of-year (of full blossums) """ years = [] # measurement year doys = [] # day of the year (kyoto full blossoming) with open(filename, 'r') as infile: infile.readline() # skip header for line in infile.readlines(): # extract values vals = line.strip().split(",") year = int(vals[0]) doy = vals[1] # Missing data converted to zero if doy == '': doy = 0 else: doy = int(doy) # Store values years.append(year) doys.append(doy) return years, doys def impute_values(L, missing_value = 0): """ Fill in missing values with the previous value in the list L - list of numeric values missing_value - code for a missing value return new list with imputed values """ imputed = L[:] # copy the original list for i in range(len(imputed)): if imputed[i] == missing_value and i != 0: imputed[i] = imputed[i-1] return imputed def get_window(L, idx, window_size=1): """ Extract a window of values of specified size centered on the specified index L: List of values idx: Center index window_size: window size """ minrange = max(idx - window_size // 2, 0) maxrange = idx + window_size // 2 + (window_size % 2) return L[minrange:maxrange] def moving_average(L, window_size=1): """ Compute a moving average over the list L using the specified window size L: List of values window_size - The window size (default=1) return - A new list with smoothed values """ mavg = [] for i in range(len(L)): window = get_window(L, i, window_size) avg = sum(window) / len(window) mavg.append(avg) return mavg def plot_blossum_day_history(years, doys): """ Plot the day-of-year full blossuming by year for all available years """ # fill in missing values imputed = impute_values(doys) moving = moving_average(imputed, window_size = 30) plt.figure(figsize=(10,6), dpi=200) plt.title("Cherry Blossoms: Full Blossom Date (Kyoto, Japan)") plt.xlabel("Year") plt.ylabel("Date") plt.scatter(years, doys, marker='.', color='#FF66B2', label='Full blossom date') # The color of cherry blossums! plt.plot(years, moving, color='b', label='30 year moving avg') plt.yticks([80,90,100,110,120], ['Mar 21', 'Mar 31', 'Apr 10', 'Apr 20', 'Apr 30']) # Highlight 2021 (Record) plt.text(1940, 84.5, '2021') # Outliers are interesting! # There was a global temperature reduction at that time. # See global_temps.png plt.text(1333,124, '1323 (May 4th)') plt.text(1416, 86, '1409') # Until 2021, this was the previous record year # Annotate figure with "The little ice-age" - a period of cooling # occurring between 1400-1800, though, according to wikipedia, # some experts prefer the alternative timespan of 1300-1850 plt.text(1300,84, '<- - - - - -------- The "Little Ice Age" ---------- - ->') # Include a legend plt.legend(facecolor='lightgrey', edgecolor='black') plt.ylim(80,130) plt.grid() plt.savefig('cherry.png') plt.show() def main(): # Read the data into a dictionary (year -> doy) years, doys = read_cherry_data('cherry.csv') # Visualize full-blossum day-of-year (Kyoto, Japan) plot_blossum_day_history(years, doys) if __name__ == '__main__': main()