1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
| import re import requests import bs4 from bs4 import BeautifulSoup
def getHTMLText(url): try: kv = {'User-Agent':'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132'} response = requests.get(url, timeout = 10, headers = kv) response.raise_for_status() response.encoding = response.apparent_encoding return response.text except: return "产生HTTPError"
def fillUnivList(ulist, html): document = BeautifulSoup(html, "html.parser") for item in document.tbody.children: if isinstance(item, bs4.element.Tag): tds = item.find_all('td') ulist.append((tds[0].string, tds[1].string, tds[2].string))
def printUnivList(ulist, num): tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}" char_blank = ' ' print(tplt .format("排名", "学校", "省市", char_blank)) for i in range(num): u = ulist[i] print(tplt .format(u[0], u[1], u[2], char_blank))
def main(): uinfo = list() url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html" html = getHTMLText(url) fillUnivList(uinfo, html) printUnivList(uinfo, 30) main()
|