python爬虫抓取天气
已失效
一个小小小爬虫
由于天气API需要付费,就自己爬虫爬天气数据下来
然后挂在openshift上。
http://i.zxc.science/weatherapi?citycode=101190102&type=today
爬虫代码如下:
简单的正则匹配,效率应该很低,基于python3。
#coding:utf-8
import urllib.request
import re
__author__ = 'Taosky'
def now(citycode):
url = 'http://m.weathercn.com/todayweather.do?id='+citycode+'&partner=m'
r = urllib.request.urlopen(url)
html = r.read().decode('UTF-8')
today ={}
today['today_date'] = re.search(r'<span class="date">(.*?)</span>',html).group(1)
today['today_weekday'] = re.search(r'<span class="weekday">(.*?)</span>',html).group(1)
today['today_solarTerm'] = re.search(r'<span class="solarTerm">(.*?)</span>',html).group()
today['today_curtime'] = re.search(r'<div class="curtime">/s*?<span>(.*?)</span>',html).group(1)
today['today_curtemp'] = re.search(r'<span class="cur-temp">(.*?)</sup>',html).group(1).replace('<sup>','')
today['today_description'] = re.search(r'<span class="description">(.*?)</span>',html).group(1)
today['today_sunrise'] = re.search(r'<div class="sunrise"><i></i><span>.*?</span><span>(.*?)</span></div>',html).group(1)
today['today_sunset'] = re.search(r'<div class="sunset"><i></i><span>.*?</span><span>(.*?)</span></div>',html).group(1)
textdescre = re.compile('<p/s*class="textdesc">(.*?)</p>')
textdesc = textdescre.findall(html)
today['today_wind-direction'] = textdesc[0]
today['today_wind_level'] = textdesc[1]
numre = re.compile('<p class="num">(.*?)</p>')
num = numre.findall(html)
today['today_air_pressure'] = num[0]
today['today_humidity'] = num[1]
today['today_wind_speed'] = num[2]
today['today_visibility'] = num[3]
return today
def days(citycode):
url = 'http://m.weathercn.com/index.do?id='+citycode+'&partner=m'
r = urllib.request.urlopen(url)
html = r.read().decode('UTF-8')
datesre = re.compile('<td class="date">(.*?)</td>')
tempssre = re.compile('<td class="temp">/s*?(.*?)/s*?</td>')
descsre = re.compile('<td class="desc">/s*?(.*?)/s*?</td>')
dates = datesre.findall(html)
temps = tempssre.findall(html)
descs = descsre.findall(html)
days = {}
for i in range(len(dates)):
num =str(i+1)
days['days_date'+num] = dates[i]
days['days_tem'+num] = temps[i].replace('/t','').replace('/n','').replace('/r','').replace(' ','')
days['days_desc'+num] = descs[i].replace('/t','').replace('/n','').replace('/r','').replace(' ','')
return days
def hours(citycode):
url = 'http://m.weathercn.com/eachhours.do?id='+ citycode + '&partner='
r = urllib.request.urlopen(url)
html = r.read().decode('UTF-8')
timesre = re.compile('<div class="time">(.*?)</div>')
tempsre = re.compile('<div class="temp">(.*?)</div>')
descsre = re.compile('<div class="desc">(.*?)</div>')
times = timesre.findall(html)
temps = tempsre.findall(html)
descs= descsre.findall(html)
hours = {}
for i in range(len(times)):
num =str(i+1)
hours['hours_time'+num] = times[i]
hours['hours_tem'+num] = temps[i]
hours['hours_desc'+num] = descs[i]
return hours