""" @author John Rachlin @class DS2000: Programming with Data @file crud.py Crud Operations and Data Structures """ #%% INTRO """ Data structures: Ways of organizing collections of data Operations on Data (CRUD): Create: How do we initialize and add data to the structure Retrieve: How do we fetch / lookup data we want Update: Modify data in our collection Delete: Remove data from our collection So far, the data structures we've encountered include lists and tuples """ # CREATE my_list = [] my_list = [1, 'hello', ['a', 'b', 'c'], True] my_list = "hello world".split() my_list = list("Python") print(my_list) # RETRIEVE #using an integer index that denotes the POSITION of the # item we want an_item = my_list[2] last_item = my_list[-1] several = my_list[2:4] # UPDATE print(my_list) my_list[0] = 'p' my_list.append(3) print(my_list) # DELETE L = ['a', 'b', 'c', 'd'] del L[2] print(L) last = L.pop() print(L, last) #%% # New today: Dictionaries! # # Dictionaries are used when we want to look up a VALUE using some KEY. # Examples: # Given a word (key) fetch the definition (value) # Given an account number (key) fetch the orders (value) # Given a name (key) fetch the phone number (value) # . # . # Fortunately Python provides a more efficient data type for exactly this # sort of structure! # # These are called dictionaries. They work by associating keys with values, and # they provide simple means of accessing and manipulating these (key,value) pairs. #%% The alternative: Use lists of tuples (not as good!) # to be or not to be counts = [ ['to', 2], ['be', 2], ['or', 1], ['not', 1]] # Conceptually, this is a sort of dictionary: given a word, look up # how many times it occurs # BUT, the lookup might be slow: def lookup_count(word_counts, word): for wc in word_counts: print(wc) if wc[0] == word: return wc[1] return 0 word = 'be' count = lookup_count(counts, word) print("The word '" + word + "' occurs", count, "times.") # Now imagine I have 100,000 words in a dictionary or several billion # Facebook accounts! The lookup could be very slow. #%% GOALS # 1) Efficient lookup of the value given a key # 2) Keys should be unique so we don't get ambiguous results # (Do real dictionaries follow this rule? Actually yes! # The word occurs once. Multiple definitions if they exist # are all found in the same place.) # The value in a dictionary can be a LIST! #%% CREATE DICTIONARIES # create an empty dictionary d = {} print(d) # Define the keys and values directly d = {'to':2, 'be':2, 'or':1, 'not':1} print(d) print(d['be']) #%% # Add some more keys # to be or not to be that is the question d['that'] = 1 d['is'] = 1 d['the'] = 1 d['question'] = 1 print(d) #%% UPDATING DICTIONARIES d['to'] = d['to'] + 1 d # But if we update a word not yet in our dictionary.... #dcounts['computer'] = dcounts['computer'] + 1 # Key error #%% IN and NOT IN 'to' in d # True 'computer' in d # False 'computer' not in d # True def count_word(d, word): if word in d: d[word] += 1 else: d[word] = 1 count_word(d, 'computer') d #%% # A simpler approach using get (which allows you to specify a default # return value if the key is not in the dictionary d.get('foobar', 0) def count_word(d, word): d[word] = d.get(word,0) + 1 #%% DELETING (by key) # Deleting a key deletes both its key and value! # But you need to test that it actually exists first! del d['computer'] # Returns no value! d #%% A simple translator # We can initialize a dictionary with some pre-defined key-value pairs eng_fr = {"one":"un", "two":"deux", "three":"trois", "four":"quatre", "I":"je", "you":"tu", "and":"et", "cats":"chats", "love":"aime", "like":"aime bien", "my":"mes"} def translate(phrase, eng_fr): words = phrase.split() translation = [eng_fr[word] for word in words] return " ".join(translation) translate("I like my three cats", eng_fr) translate("I love you", eng_fr) #oops #%% Convert to a french english dictionary fr_eng = {v:k for k,v in eng_fr.items()} translate("je aime mes trois chats", fr_eng) #%% Keys are unique dict = {} dict[23] = "john" # adding a value for key 23 dict[23] = "mike" # now we are CHANGING the value associated with the key print(dict) #%% Numbers, strings, tuples can be keys, but NOT lists! WHY? # Keys must be IMMUTABLE dict = {} dict[4] = "four" dict[3.14159] = "pi" dict["pi"] = 3.14159 dict[(0,0)] = "origin" print(dict) #dict[[1,2,3]] = "one-two-three}" # Un-hashable type # But lists can be the values! dict["123"] = [1,2,3] print(dict["123"])