# coding=utf-8
#code by xi4okv QQ:48011203 site:xiaokui.cc
import urllib2 as url
import urllib2
import string
import urllib
import re
import sys
def help():
print 'python baidu.py keyword page'
return
def baidu_search(keyword,pn):
p='wd='+keyword
res=url.urlopen('http://www.baidu.com/s?'+p+'&pn='+str(pn))
html=res.read()
# print html
return html
def get_url(html):
import re
if html:
urls_pat=re.compile(r'url':'(.*?)'}')
siteUrls=re.findall(urls_pat,html)
return siteUrls
else:
print 'ERROR!'
siteUrls=False
def baidu_url(xk_url):
try:
baidu = urllib2.urlopen('http:'+xk_url)
if baidu:
return baidu.url
else:
print 'ERROR!'
baidu.url=False
except:
print 'ERROR!'
def main():
help()
fileName='result.lst'
mode='w+'
f=open(fileName,mode)
keyword = sys.argv[1]
page = string.atoi(sys.argv[2])
print 'search '+keyword+' in baidu:'
count = 1
while count <>
count = count + 1
pn = 10 * count
html = baidu_search(keyword,pn)
urls = get_url(html)
for xk_url in urls:
if 'link?url' in xk_url:
result = baidu_url(xk_url)
try:
f.write(result+'\n')
print result
except:
print 'ERROR'
if __name__=='__main__':
main()
联系客服