BitTorrent文件使用bencode编码,其中包括了4种数据类型:
'd' 开头表示是dict类型,'e'表示结束
'l' (小写字母L)开头表示是list类型,'e'表示结束
'i'开头表示是integer类型,'e'表示结束,可以表示负数
以数字开头表示string类型,数字为string长度,长度与string内容以':'分割
默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式
"announce" -- tracker服务器的地址,为string
"info" ---文件信息,为dict类型
"name" --单文件模式,表示文件名,多文件模式表示根目录名。
"length" --单文件模式表示文件长度,多文件模式不存在
"piece length" --文件分片大小
"pieces" --为一个长string, 没20个字节表示一个分片的SHA1 hash值。按照文件分片的顺序排列。
分片是按照所以文件组合在一起进行的,即一个分片可能会跨越多个文件。
"files" -- 多文件模式存在,为一个文件列表,每个文件为一个dict类型
"path" -- 文件目录列表,最后一项为文件名
"length" --文件长度
"peace length" --分片大小
以下为draft bep定义的属性
"code page"
"announce-list" --tracker列表,为二维数组,即将tracker服务器分为多个组
"encoding" -- Text属性的编码类型,string 类型,如 UTF-8
"publisher" -- 发布者
"publisher url" --发布者 URL
"creater" --创建者,如btcomet,btspirit
"creation date" --创建日期,为UTC格式,需要转化为本地时区可读格式
"commnent" --注释
"nodes" -- DHT 节点列表
BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html
以下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。
1 from datetime import datetime 2 3 import bcodec 4 5 _READ_MAX_LEN = -1 6 7 class BTFormatError(BaseException): 8 pass 9 10 class TorrentFile(object): 11 12 __metainfo = {} 13 __file_name = '' 14 15 def read_file(self, filename): 16 17 torrent_file = open(filename, 'rb') 18 data = torrent_file.read(_READ_MAX_LEN) 19 torrent_file.close() 20 21 data = list(data) 22 metainfo = bcodec.bdecode(data) 23 if metainfo != None and type(metainfo) == type({}): 24 self.__file_name = filename 25 self.__metainfo = metainfo 26 else: 27 raise BTFormatError() 28 29 def __is_singlefile(self): 30 31 return self.__get_meta_info('length') != None 32 33 def __decode_text(self, text): 34 encoding = 'utf-8' 35 resultstr = '' 36 if self.get_encoding() != None: 37 encoding = self.get_encoding() 38 elif self.get_codepage() != None: 39 encoding = 'cp' + str(self.get_codepage()) 40 if text: 41 try: 42 resultstr = text.decode(encoding=encoding) 43 except ValueError: 44 return text 45 else: 46 return None 47 return resultstr 48 49 def __get_meta_top(self, key): 50 if key in self.__metainfo.keys(): 51 return self.__metainfo[key] 52 else: 53 return None 54 def __get_meta_info(self,key): 55 meta_info = self.__get_meta_top('info') 56 if meta_info != None and key in meta_info.keys(): 57 return meta_info[key] 58 return None 59 60 def get_codepage(self): 61 return self.__get_meta_top('codepage') 62 def get_encoding(self): 63 return self.__get_meta_top('encoding') 64 65 def get_announces(self): 66 announces = self.__get_meta_top('announce-list') 67 if announces != None: 68 return announces 69 70 announces = [[]] 71 ann = self.__get_meta_top('announce') 72 if ann: 73 announces[0].append(ann) 74 return announces 75 76 def get_publisher(self): 77 return self.__decode_text(self.__get_meta_top('publisher')) 78 def get_publisher_url(self): 79 return self.__decode_text(self.__get_meta_top('publisher-url')) 80 81 def get_creater(self): 82 return self.__decode_text(self.__get_meta_top('created by')) 83 def get_creation_date(self): 84 utc_date = self.__get_meta_top('creation date') 85 if utc_date == None: 86 return utc_date 87 creationdate = datetime.utcfromtimestamp(utc_date) 88 return creationdate 89 def get_comment(self): 90 return self.__get_meta_top('comment') 91 92 def get_nodes(self): 93 return self.__get_meta_top('nodes') 94 95 def get_piece_length(self): 96 return self.__get_meta_info('piece length') 97 98 def get_piece(self, index): 99 pieces = self.__get_meta_info('pieces')100 if pieces == None:101 return None102 103 offset = index*20104 if offset+20 > len(pieces):105 return None106 return pieces[offset:offset+20]107 108 def get_files(self):109 110 files = []111 name = self.__decode_text(self.__get_meta_info('name'))112 piece_length = self.get_piece_length()113 if name == None:114 return files115 116 if self.__is_singlefile():117 file_name = name118 file_length = self.__get_meta_info('length')119 if not file_length:120 return files121 122 pieces_num = file_length/piece_length123 last_piece_offset = file_length % piece_length124 if last_piece_offset != 0:125 pieces_num = int(pieces_num) + 1126 last_piece_offset -= 1127 else:128 last_piece_offset = piece_length - 1129 130 first_piece_offset = 0131 132 files.append({'name':[file_name], 'length':file_length, 'first-piece':(0, first_piece_offset), 'last-piece':(pieces_num-1,last_piece_offset)})133 return files134 135 folder = name136 meta_files = self.__get_meta_info('files')137 if meta_files == None:138 return files139 140 total_length = int(0)141 for one_file in self.__get_meta_info('files'):142 143 file_info = {}144 path_list = []145 path_list.append(folder)146 147 if 'path' not in one_file.keys():148 break149 for path in one_file['path']:150 path_list.append(self.__decode_text(path))151 file_info['name'] = path_list152 153 if 'length' not in one_file.keys():154 break155 156 file_info['length'] = one_file['length']157 158 piece_index = total_length / piece_length159 first_piece_offset = total_length % piece_length160 161 total_length += one_file['length']162 pieces_num = total_length / piece_length - piece_index163 last_piece_offset = total_length % piece_length164 165 if last_piece_offset != 0:166 pieces_num += 1167 last_piece_offset -= 1168 else:169 last_piece_offset = piece_length - 1170 171 file_info['first-piece'] = (piece_index,first_piece_offset)172 file_info['last-piece'] = ((piece_index+pieces_num-1),last_piece_offset)173 files.append(file_info)174 return files175 176 if __name__ == '__main__':177 filename = r".\huapi2.torrent"178 179 torrent = TorrentFile()180 181 print "begin to read file"182 try:183 torrent.read_file(filename)184 except (IOError,BTFormatError), reason:185 print "Read bittorrent file error! Error:%s" %reason186 187 print "end to read file"188 189 print "announces: " , torrent.get_announces() 190 print "peace length:", torrent.get_piece_length()191 print "code page:" , torrent.get_codepage()192 print "encoding:" , torrent.get_encoding()193 print "publisher:" ,torrent.get_publisher()194 print "publisher url:", torrent.get_publisher_url()195 print "creater:" , torrent.get_creater()196 print "creation date:", torrent.get_creation_date()197 print "commnent:", torrent.get_comment()198 print "nodes:", torrent.get_nodes()199 torrent.get_files()200 for one_file in torrent.get_files():201 print 'name:', '\\'.join(one_file['name'])202 print 'length:', one_file['length']203 print 'first-piece:', one_file['first-piece']204 print 'last-piece:', one_file['last-piece']
1 ''' 2 Created on 2012-9-30 3 4 @author: ddt 5 ''' 6 def bdecode(data): 7 data = list(data) 8 return _read_chunk(data) 9 10 def _read_chunk(data): 11 12 chunk = None 13 14 if len(data) == 0: 15 return chunk 16 17 leading_chr = data[0] 18 #print leading_chr, 19 if leading_chr.isdigit(): 20 chunk = _read_string(data) 21 #print chunk 22 elif leading_chr == 'd': 23 chunk = _read_dict(data) 24 #print chunk is None 25 elif leading_chr == 'i': 26 chunk = _read_integer(data) 27 #print chunk 28 elif leading_chr == 'l': 29 chunk = _read_list(data) 30 31 #print chunk 32 return chunk 33 34 def _read_dict(data): 35 36 if len(data) == 0 or data.pop(0) != 'd': 37 return None 38 39 chunk = {} 40 while len(data) > 0 and data[0] != 'e': 41 42 key = _read_chunk(data) 43 value = _read_chunk(data) 44 45 if key != None and value != None and type(key) == type(''): 46 if key in chunk.keys(): 47 value_type = type(chunk[key]) 48 if value_type == type([]): 49 chunk[key] += value 50 elif value_type == type({}): 51 chunk[key].update(value) 52 else: 53 chunk[key] = value 54 else: 55 chunk[key] = value 56 #print key 57 else: 58 return None 59 60 if len(data) == 0 or data.pop(0) != 'e': 61 return None 62 63 return chunk 64 65 def _read_list(data): 66 67 if len(data) == 0 or data.pop(0) != 'l': 68 return None 69 70 chunk = [] 71 while len(data) > 0 and data[0] != 'e': 72 value = _read_chunk(data) 73 if value != None: 74 chunk.append(value) 75 else: 76 return None 77 78 if len(data) == 0 or data.pop(0) != 'e': 79 return None 80 81 return chunk 82 83 def _read_string(data): 84 85 str_len = '' 86 while len(data) > 0 and data[0].isdigit(): 87 str_len += data.pop(0) 88 89 if len(data) == 0 or data.pop(0) != ':': 90 return None 91 92 str_len = int(str_len) 93 if str_len > len(data): 94 return None 95 96 value = data[0:str_len] 97 del data[0:str_len] 98 return ''.join(value) 99 100 def _read_integer(data):101 102 integer = ''103 if len(data) < len('i2e') or data.pop(0) != 'i': 104 return None105 106 sign = data.pop(0)107 if sign != '-' and not sign.isdigit():108 return None109 integer += sign110 111 while len(data) > 0 and data[0].isdigit():112 integer += data.pop(0)113 114 if len(data) == 0 or data.pop(0) != 'e':115 return None116 return int(integer)117 118 def bencode(data):119 result = None120 data_type = type(data)121 if data_type == type({}):122 result = _write_dict(data)123 elif data_type == type([]):124 result = _write_list(data)125 elif data_type == type(''):126 result = _write_string(data)127 elif data_type == type(int(0)):128 result = _write_integer(data)129 return result130 131 def _write_dict(data):132 result = 'd'133 for key, value in data.items():134 key_encode = bencode(key)135 value_encode = bencode(value)136 if key_encode != None and value_encode != None:137 result += key_encode138 result += value_encode139 else:140 return None141 result += 'e'142 return result143 144 def _write_list(data):145 result = 'l'146 for value in data:147 value_encode = bencode(value)148 if value_encode != None:149 result += value_encode150 else:151 return None152 result += 'e'153 return result154 155 def _write_string(data):156 result = '%d' %len(data)157 result += ':'158 result += data159 return result160 161 def _write_integer(data):162 result = 'i'163 result += '%d' %data164 result += 'e'165 return result
联系客服