您的评价: |
|
收藏该经验 |
001 |
#!/usr/bin/env python |
002 |
# -*- coding: utf-8 -*- |
003 |
# |
004 |
# Copyright 2012 Channing Wong |
005 |
# |
006 |
# @mail: channing.wong@yahoo.com |
007 |
# @home: http://blog.3363.me/ |
008 |
# @date: Mar 3, 2012 |
009 |
# |
010 |
|
011 |
import json |
012 |
import sys |
013 |
import time |
014 |
import types |
015 |
import urllib |
016 |
|
017 |
reload (sys) |
018 |
sys.setdefaultencoding( 'utf-8' ) |
019 |
|
020 |
|
021 |
class BaiduMap: |
022 |
""" |
023 |
""" |
024 |
def __init__( self , keyword): |
025 |
self .keyword = keyword |
026 |
self .query = [ |
027 |
( 'b' , '(-1599062.039999999,811604.75;24779177.96,8168020.75)' ), |
028 |
( 'c' , '1' ), |
029 |
( 'from' , 'webmap' ), |
030 |
( 'ie' , 'utf-8' ), |
031 |
( 'l' , '4' ), |
032 |
( 'newmap' , '1' ), |
033 |
( 'qt' , 's' ), |
034 |
( 'src' , '0' ), |
035 |
( 'sug' , '0' ), |
036 |
( 't' , time.time().__int__()), |
037 |
( 'tn' , 'B_NORMAL_MAP' ), |
038 |
( 'wd' , keyword), |
039 |
( 'wd2' , '') |
040 |
] |
041 |
self .mapurl = 'http://map.baidu.com/' |
042 |
self . file = open ( '%s.txt' % keyword, 'w' ) |
043 |
self .count = 0 |
044 |
self .count_c = 0 |
045 |
self .total_num = 0 |
046 |
|
047 |
self ._get_city() |
048 |
|
049 |
def _fetch( self , query = None , json = True ): |
050 |
data = urllib.urlencode(query) |
051 |
url = self .mapurl + '?' + data |
052 |
opener = urllib.FancyURLopener() |
053 |
data = opener. open (url).read() |
054 |
|
055 |
if json: |
056 |
return self ._tojson(data) |
057 |
else : |
058 |
return data |
059 |
|
060 |
def _tojson( self , data): |
061 |
try : |
062 |
js = json.loads(data, 'utf-8' ) |
063 |
except : |
064 |
js = None |
065 |
|
066 |
return js |
067 |
|
068 |
def _get_city( self ): |
069 |
data = self ._fetch( self .query) |
070 |
|
071 |
if type (data[ 'content' ]) is not types.ListType: |
072 |
print 'keyworld error.' |
073 |
sys.exit() |
074 |
|
075 |
self .city = data[ 'content' ] |
076 |
|
077 |
if data.has_key( 'more_city' ): |
078 |
for c in data[ 'more_city' ]: |
079 |
self .city.extend(c[ 'city' ]) |
080 |
|
081 |
for city in self .city: |
082 |
self .total_num + = city[ 'num' ] |
083 |
|
084 |
def _get_data( self , city, page = 0 ): |
085 |
query = [ |
086 |
( 'addr' , '0' ), |
087 |
( 'b' , '(%s)' % city[ 'geo' ].split( '|' )[ 1 ]), |
088 |
( 'c' , city[ 'code' ]), |
089 |
( 'db' , '0' ), |
090 |
( 'gr' , '3' ), |
091 |
( 'ie' , 'utf-8' ), |
092 |
( 'l' , '9' ), |
093 |
( 'newmap' , '1' ), |
094 |
( 'on_gel' , '1' ), |
095 |
( 'pn' , page), |
096 |
( 'qt' , 'con' ), |
097 |
( 'src' , '7' ), |
098 |
( 'sug' , '0' ), |
099 |
( 't' , time.time().__int__()), |
100 |
( 'tn' , 'B_NORMAL_MAP' ), |
101 |
( 'wd' , self .keyword), |
102 |
( 'wd2' , ''), |
103 |
] |
104 |
data = self ._fetch(query) |
105 |
return data |
106 |
|
107 |
def _save( self , content, city): |
108 |
for c in content: |
109 |
self .count + = 1 |
110 |
self .count_c + = 1 |
111 |
if c.has_key( 'tel' ): |
112 |
tel = c[ 'tel' ] |
113 |
else : |
114 |
tel = '' |
115 |
|
116 |
_data = '%s\t%s\t%s\t%s\n' % (city[ 'name' ], c[ 'name' ], c[ 'addr' ], tel) |
117 |
self . file .write(_data) |
118 |
print '(%s/%s) %s[%s/%s]' % ( self .count, self .total_num, city[ 'name' ], self .count_c, city[ 'num' ]) |
119 |
|
120 |
def get( self , city): |
121 |
self .count_c = 0 |
122 |
pages = abs ( - city[ 'num' ] / 10 ) |
123 |
for page in range ( 0 , pages): |
124 |
data = self ._get_data(city, page) |
125 |
if data.has_key( 'content' ): |
126 |
self ._save(data[ 'content' ], city) |
127 |
|
128 |
def get_all( self ): |
129 |
for city in self .city: |
130 |
self .get(city) |
131 |
|
132 |
self . file .close() |
133 |
|
134 |
|
135 |
if __name__ = = '__main__' : |
136 |
if sys.argv.__len__() > 1 : |
137 |
keyword = sys.argv[ 1 ] |
138 |
else : |
139 |
keyword = '钻石' |
140 |
|
141 |
baidumap = BaiduMap(keyword) |
142 |
print '_' * 20 |
143 |
print 'CITY: %s' % baidumap.city.__len__() |
144 |
print 'DATA: %s' % baidumap.total_num |
145 |
baidumap.get_all() |
联系客服