Think about the real world: you efficiently look up information (a value) using a unique, unchanging identifier (a key).
In Python, a dictionary
is a collection (like a list
) that efficiently allows you to look up values associated with keys. Conceptually you can think of them as an unordered collection of key-value pairs ([(key1, value1), (key2, value2), (key3, value3), ...]
), but note that the keys are unique cannot change (they are immutable) and the lookup is typically much faster by key than searching the entire list for a particular key.
Dictionary use is going to be quite similar to lists...
# Create an empty dictionary
print({})
print(dict())
# Create a dictionary with content
mydict = {'a':1, 'b':2}
print(mydict)
# Create a dictionary from a list/tuple of key/value pairs
mydict = dict([(1, 'one'), (2, 'two'), (3, 'three')])
print(mydict)
# Add to a dictionary simply by indexing based upon a key
mydict = {'a':1, 'b':2}
print(mydict)
mydict['c'] = 3
print(mydict)
# Note that since keys are unique, accessing the same key *changes* the value
mydict['c'] = 300
print(mydict)
# Sometimes you want to know if a key is in the dictionary
myvotes = {"alice":300, "bob":111, "carol":22}
print("carol" in myvotes)
print("dan" in myvotes)
# Because trying to get the value of an invalid key is :(
myvotes = {"alice":300, "bob":111, "carol":22}
print(myvotes['carol'])
print(myvotes['dan'])
# A common pattern: get with default
# Get the value, or a default value if invalid
myvotes = {"alice":300, "bob":111, "carol":22}
print(myvotes.get('carol', 0))
print(myvotes.get('dan', 0))
# As with lists, .pop() is useful for removal
myvotes = {"alice":300, "bob":111, "carol":22}
print(myvotes)
removed_votes = myvotes.pop("bob")
print(myvotes)
print(removed_votes)
myvotes = {"alice":300, "bob":111, "carol":22}
# You can individually get the dictionary keys...
print(list(myvotes.keys()))
# You can individually get the dictionary values...
print(list(myvotes.values()))
# Or both...
print(list(myvotes.items()))
# len works!
print(len(myvotes))
myvotes = {"alice":300, "bob":111, "carol":22}
# Looping defaults to keys
for element in myvotes:
print(element)
# Commonly you loop over items
for key, value in myvotes.items():
print("{} = {}".format(key, value))
# And yes, there are dictionary comprehensions
print({x:x**2 for x in range(11) if x % 2 == 0})
Commonly you want to group together data by some common attribute -- this is where dictionaries shine! As an example, let's go back to a task from last week: counting words in candidate speeches...
import os
transcript_directory = "political-speech-files"
def get_speech_path(politician):
return os.path.join(transcript_directory, "{}-speeches.txt".format(politician))
print(get_speech_path("obama"))
# If you haven't used nltk before, run the following
import nltk
nltk.download('stopwords')
import string
from nltk.corpus import stopwords
def preprocess(word):
bad_letters = string.punctuation + string.digits
word = list(word.lower())
return "".join([letter for letter in word if (letter=="'" or letter not in bad_letters)])
def get_all_words(politician):
with open(get_speech_path(politician), 'r', encoding='utf8') as f:
all_words = f.read().split()
preprocessed = [preprocess(word) for word in all_words]
# common words we don't care about
stop_words = stopwords.words('english') + ['—', '–']
return [word for word in preprocessed if word and word not in stop_words]
obama_words = get_all_words('obama')
print(len(obama_words))
def add_word_to_count(word, counts):
# try to add to existing count
for count in counts:
if count[0] == word:
count[1] += 1
return
# if no match, set count to 1
counts.append([word, 1])
def word_count(words):
counts = []
for word in words:
add_word_to_count(word, counts)
return counts
obama_count = word_count(obama_words) # ~1 minute
print(len(obama_count))
# Now let's find specific words
def find_word_count(counts, word):
for count in counts:
if count[0] == word:
return count[1]
return 0
print(find_word_count(obama_count, "people"))
print(find_word_count(obama_count, "python"))
Now try this with a dictionary...
def word_count_dictionary(words):
counts = {}
for word in words:
counts[word] = counts.get(word, 0) + 1
return counts
obama_count_dict = word_count_dictionary(obama_words) # instantaneous
print(len(obama_count))
def find_word_count_dictionary(counts, word):
return counts.get(word, 0)
print(find_word_count_dictionary(obama_count_dict, "people"))
print(find_word_count_dictionary(obama_count_dict, "python"))