Python - 初級爬蟲實驗1
今天實驗試作一個爬蟲。
抓Yahoo首頁查特定文字出現次數...
執行環境Jupyter Notebook。
# [Source code]
get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import font_manager
import requests
import re #正則表達式
# 設定中文字型否則圖無法正常顯示中文字
myfont = font_manager.FontProperties(fname='mingliu.ttc')
searchUrl = 'http://tw.yahoo.com/'
htmlfile = requests.get(searchUrl)
name = np.array(["柯文哲","蔡英文","郭台銘","韓國瑜","館長"])
timeValue = [0,0,0,0,0]
# 格式化成2019-08-28 11:45:39形式
localtime = time.strftime("%Y-%m-%d %H:%M:%S")
print ("目前時間:", localtime)
# 爬資料
if htmlfile.status_code == requests.codes.ok:
for i,pattern in enumerate(name):
if pattern in htmlfile.text:
print("搜尋 %s 成功" %pattern)
else:
print("搜尋 %s 失敗" %pattern)
times = re.findall(pattern,htmlfile.text)
if times != None:
print("%s 出現過 %d 次" %(pattern,len(times)))
else:
print("%s 出現過0次" %pattern)
timeValue[i] = len(times)
else:
print("網頁下載失敗")
#繪圖
width = 0
x = range(len(name))
plt.xticks([i+width for i in x],name, fontproperties=myfont)
plt.bar(name, timeValue, width = 0.35)
[執行結果]
抓Yahoo首頁查特定文字出現次數...
執行環境Jupyter Notebook。
# [Source code]
get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import font_manager
import requests
import re #正則表達式
# 設定中文字型否則圖無法正常顯示中文字
myfont = font_manager.FontProperties(fname='mingliu.ttc')
searchUrl = 'http://tw.yahoo.com/'
htmlfile = requests.get(searchUrl)
name = np.array(["柯文哲","蔡英文","郭台銘","韓國瑜","館長"])
timeValue = [0,0,0,0,0]
# 格式化成2019-08-28 11:45:39形式
localtime = time.strftime("%Y-%m-%d %H:%M:%S")
print ("目前時間:", localtime)
# 爬資料
if htmlfile.status_code == requests.codes.ok:
for i,pattern in enumerate(name):
if pattern in htmlfile.text:
print("搜尋 %s 成功" %pattern)
else:
print("搜尋 %s 失敗" %pattern)
times = re.findall(pattern,htmlfile.text)
if times != None:
print("%s 出現過 %d 次" %(pattern,len(times)))
else:
print("%s 出現過0次" %pattern)
timeValue[i] = len(times)
else:
print("網頁下載失敗")
#繪圖
width = 0
x = range(len(name))
plt.xticks([i+width for i in x],name, fontproperties=myfont)
plt.bar(name, timeValue, width = 0.35)
[執行結果]


留言
張貼留言