'''
    DS2000
    Spring 2023
    Sample code from class -- quick example on web scraping
'''


# import urllib for grabbing web paages and beautifulsoup for parsing
from urllib.request import urlopen
from bs4 import BeautifulSoup

def main():
    html = urlopen("https://www.khoury.northeastern.edu/home/laney/simple.html")
    print(html.read())
    
    # Find the first h1 header in our DS2000 course website
    html = urlopen("https://course.ccs.neu.edu/ds2000/")
    bs = BeautifulSoup(html.read(), "html.parser")
    print(bs.h1)
    
    # Find the title in our Ds2500 web page
    # (WARNING! People who make web pages are not neceesarily consistent
    # with their use of tags. :)
    print(bs.title)
    
    # Find all the text enclosed in paragraph tags
    # Note that there's not much in paragraph tags because
    # we are not smart or consistent
    graphs = bs.findAll("p")
    for p in graphs:
        print(p.get_text())
        
        
    # Find the first image on the website (it's our Khoury logo!)
    print(bs.img)
    
    
    # Find all the link text in our website. For accessibility standards,
    # these should be descriptive. 
    links = bs.findAll({"a" : "href"})
    for link in links:
        print(link.get_text())
        
main()