Winter Olympic Scrap
import requests
from bs4 import BeautifulSoup
def rank(url):
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table', class_="grid-table")
#tab = table.string.extract()
table = table.tbody
table = table.find_all('tr')
is_korea_in_5 = False
ranklist = []
for row in table[0:5]:
c_name = row.find_all('td')[1]
c_name = c_name.find('a').text
c_name = c_name.strip()
if c_name == "South Korea":
is_korea_in_5 = True
gold = row.find_all('td')[2]
gold = gold.find('div').string
silver = row.find_all('td')[3]
silver = silver.find('div').string
bronze = row.find_all('td')[4]
bronze = bronze.find('div').string
total = str(int(gold) + int(silver) + int(bronze))
medal = [c_name, gold, silver, bronze, total]
ranklist.extend(medal)
if not is_korea_in_5:
print(soup.table.find(text="\n South Korea ").parent.parent.parent.parent)
row = soup.table.find(text="\n South Korea ").parent.parent.parent.parent
c_name = row.find_all('td')[1]
c_name = c_name.find('a').text
c_name = c_name.strip()
gold = row.find_all('td')[2]
gold = gold.find('div').string
silver = row.find_all('td')[3]
silver = silver.find('div').string
bronze = row.find_all('td')[4]
bronze = bronze.find('div').string
total = str(int(gold) + int(silver) + int(bronze))
medal = [c_name, gold, silver, bronze, total]
ranklist.extend(medal)
return ranklist
# rank_1 = ['1'. noc, gold, silver, bronze, total]
rank_list = rank('http://www.nbcolympics.com/medals')
print(rank_list)
# Write file
fh = open('medal_standing.asp', 'w', encoding='utf8')
no = 1
for i in range(0, len(rank_list), 5):
fh.write('<ul class="medal_list">\n')
fh.write('<li class="no">' + str(no) + '</li>\n')
fh.write('<li class="country">' + rank_list[i] + '</li>\n')
fh.write('<li class="gold">' + rank_list[i+1] + '</li>\n')
fh.write('<li class="silver">' + rank_list[i+2] + '</li>\n')
fh.write('<li class="bronze">' + rank_list[i+3] + '</li>\n')
fh.write('<li class="all">' + rank_list[i+4] + '</li>\n')
fh.write('</ul>\n')
no = no + 1
fh.close()