parser.py 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. import struct
  2. import io
  3. import datetime
  4. import re
  5. from rdbtools.encodehelpers import STRING_ESCAPE_RAW, apply_escape_bytes, bval
  6. from .compat import range, str2regexp
  7. from .iowrapper import IOWrapper
  8. try:
  9. try:
  10. from cStringIO import StringIO as BytesIO
  11. except ImportError:
  12. from StringIO import StringIO as BytesIO
  13. except ImportError:
  14. from io import BytesIO
  15. try:
  16. import lzf
  17. HAS_PYTHON_LZF = True
  18. except ImportError:
  19. HAS_PYTHON_LZF = False
  20. REDIS_RDB_6BITLEN = 0
  21. REDIS_RDB_14BITLEN = 1
  22. REDIS_RDB_32BITLEN = 0x80
  23. REDIS_RDB_64BITLEN = 0x81
  24. REDIS_RDB_ENCVAL = 3
  25. REDIS_RDB_OPCODE_AUX = 250
  26. REDIS_RDB_OPCODE_RESIZEDB = 251
  27. REDIS_RDB_OPCODE_EXPIRETIME_MS = 252
  28. REDIS_RDB_OPCODE_EXPIRETIME = 253
  29. REDIS_RDB_OPCODE_SELECTDB = 254
  30. REDIS_RDB_OPCODE_EOF = 255
  31. REDIS_RDB_TYPE_STRING = 0
  32. REDIS_RDB_TYPE_LIST = 1
  33. REDIS_RDB_TYPE_SET = 2
  34. REDIS_RDB_TYPE_ZSET = 3
  35. REDIS_RDB_TYPE_HASH = 4
  36. REDIS_RDB_TYPE_ZSET_2 = 5 # ZSET version 2 with doubles stored in binary.
  37. REDIS_RDB_TYPE_MODULE = 6
  38. REDIS_RDB_TYPE_MODULE_2 = 7
  39. REDIS_RDB_TYPE_HASH_ZIPMAP = 9
  40. REDIS_RDB_TYPE_LIST_ZIPLIST = 10
  41. REDIS_RDB_TYPE_SET_INTSET = 11
  42. REDIS_RDB_TYPE_ZSET_ZIPLIST = 12
  43. REDIS_RDB_TYPE_HASH_ZIPLIST = 13
  44. REDIS_RDB_TYPE_LIST_QUICKLIST = 14
  45. REDIS_RDB_ENC_INT8 = 0
  46. REDIS_RDB_ENC_INT16 = 1
  47. REDIS_RDB_ENC_INT32 = 2
  48. REDIS_RDB_ENC_LZF = 3
  49. REDIS_RDB_MODULE_OPCODE_EOF = 0 # End of module value.
  50. REDIS_RDB_MODULE_OPCODE_SINT = 1
  51. REDIS_RDB_MODULE_OPCODE_UINT = 2
  52. REDIS_RDB_MODULE_OPCODE_FLOAT = 3
  53. REDIS_RDB_MODULE_OPCODE_DOUBLE = 4
  54. REDIS_RDB_MODULE_OPCODE_STRING = 5
  55. DATA_TYPE_MAPPING = {
  56. 0 : "string", 1 : "list", 2 : "set", 3 : "sortedset", 4 : "hash", 5 : "sortedset", 6 : "module", 7: "module",
  57. 9 : "hash", 10 : "list", 11 : "set", 12 : "sortedset", 13 : "hash", 14 : "list"}
  58. class RdbCallback(object):
  59. """
  60. A Callback to handle events as the Redis dump file is parsed.
  61. This callback provides a serial and fast access to the dump file.
  62. """
  63. def __init__(self, string_escape):
  64. if string_escape is None:
  65. self._escape = STRING_ESCAPE_RAW
  66. else:
  67. self._escape = string_escape
  68. def encode_key(self, key):
  69. """
  70. Escape a given key bytes with the instance chosen escape method.
  71. Key is not escaped if it contains only 'ASCII printable' bytes.
  72. """
  73. return apply_escape_bytes(key, self._escape, skip_printable=True)
  74. def encode_value(self, val):
  75. """Escape a given value bytes with the instance chosen escape method."""
  76. return apply_escape_bytes(val, self._escape)
  77. def start_rdb(self):
  78. """
  79. Called once we know we are dealing with a valid redis dump file
  80. """
  81. pass
  82. def aux_field(self, key, value):
  83. """"
  84. Called in the beginning of the RDB with various meta data fields such as:
  85. redis-ver, redis-bits, ctime, used-mem
  86. exists since redis 3.2 (RDB v7)
  87. """
  88. pass
  89. def start_database(self, db_number):
  90. """
  91. Called to indicate database the start of database `db_number`
  92. Once a database starts, another database cannot start unless
  93. the first one completes and then `end_database` method is called
  94. Typically, callbacks store the current database number in a class variable
  95. """
  96. pass
  97. def start_module(self, key, module_name, expiry):
  98. """
  99. Called to indicate start of a module key
  100. :param key: string
  101. :param module_name: string
  102. :param expiry:
  103. :return: boolean to indicate whatever to record the full buffer or not
  104. """
  105. return False
  106. def handle_module_data(self, key, opcode, data):
  107. pass
  108. def end_module(self, key, buffer_size, buffer=None):
  109. pass
  110. def db_size(self, db_size, expires_size):
  111. """
  112. Called per database before the keys, with the key count in the main dictioney and the total voletaile key count
  113. exists since redis 3.2 (RDB v7)
  114. """
  115. pass
  116. def set(self, key, value, expiry, info):
  117. """
  118. Callback to handle a key with a string value and an optional expiry
  119. `key` is the redis key
  120. `value` is a string or a number
  121. `expiry` is a datetime object. None and can be None
  122. `info` is a dictionary containing additional information about this object.
  123. """
  124. pass
  125. def start_hash(self, key, length, expiry, info):
  126. """Callback to handle the start of a hash
  127. `key` is the redis key
  128. `length` is the number of elements in this hash.
  129. `expiry` is a `datetime` object. None means the object does not expire
  130. `info` is a dictionary containing additional information about this object.
  131. After `start_hash`, the method `hset` will be called with this `key` exactly `length` times.
  132. After that, the `end_hash` method will be called.
  133. """
  134. pass
  135. def hset(self, key, field, value):
  136. """
  137. Callback to insert a field=value pair in an existing hash
  138. `key` is the redis key for this hash
  139. `field` is a string
  140. `value` is the value to store for this field
  141. """
  142. pass
  143. def end_hash(self, key):
  144. """
  145. Called when there are no more elements in the hash
  146. `key` is the redis key for the hash
  147. """
  148. pass
  149. def start_set(self, key, cardinality, expiry, info):
  150. """
  151. Callback to handle the start of a hash
  152. `key` is the redis key
  153. `cardinality` is the number of elements in this set
  154. `expiry` is a `datetime` object. None means the object does not expire
  155. `info` is a dictionary containing additional information about this object.
  156. After `start_set`, the method `sadd` will be called with `key` exactly `cardinality` times
  157. After that, the `end_set` method will be called to indicate the end of the set.
  158. Note : This callback handles both Int Sets and Regular Sets
  159. """
  160. pass
  161. def sadd(self, key, member):
  162. """
  163. Callback to inser a new member to this set
  164. `key` is the redis key for this set
  165. `member` is the member to insert into this set
  166. """
  167. pass
  168. def end_set(self, key):
  169. """
  170. Called when there are no more elements in this set
  171. `key` the redis key for this set
  172. """
  173. pass
  174. def start_list(self, key, expiry, info):
  175. """
  176. Callback to handle the start of a list
  177. `key` is the redis key for this list
  178. `expiry` is a `datetime` object. None means the object does not expire
  179. `info` is a dictionary containing additional information about this object.
  180. After `start_list`, the method `rpush` will be called with `key` exactly `length` times
  181. After that, the `end_list` method will be called to indicate the end of the list
  182. Note : This callback handles both Zip Lists and Linked Lists.
  183. """
  184. pass
  185. def rpush(self, key, value):
  186. """
  187. Callback to insert a new value into this list
  188. `key` is the redis key for this list
  189. `value` is the value to be inserted
  190. Elements must be inserted to the end (i.e. tail) of the existing list.
  191. """
  192. pass
  193. def end_list(self, key, info):
  194. """
  195. Called when there are no more elements in this list
  196. `key` the redis key for this list
  197. `info` is a dictionary containing additional information about this object that wasn't known in start_list.
  198. """
  199. pass
  200. def start_sorted_set(self, key, length, expiry, info):
  201. """
  202. Callback to handle the start of a sorted set
  203. `key` is the redis key for this sorted
  204. `length` is the number of elements in this sorted set
  205. `expiry` is a `datetime` object. None means the object does not expire
  206. `info` is a dictionary containing additional information about this object.
  207. After `start_sorted_set`, the method `zadd` will be called with `key` exactly `length` times.
  208. Also, `zadd` will be called in a sorted order, so as to preserve the ordering of this sorted set.
  209. After that, the `end_sorted_set` method will be called to indicate the end of this sorted set
  210. Note : This callback handles sorted sets in that are stored as ziplists or skiplists
  211. """
  212. pass
  213. def zadd(self, key, score, member):
  214. """Callback to insert a new value into this sorted set
  215. `key` is the redis key for this sorted set
  216. `score` is the score for this `value`
  217. `value` is the element being inserted
  218. """
  219. pass
  220. def end_sorted_set(self, key):
  221. """
  222. Called when there are no more elements in this sorted set
  223. `key` is the redis key for this sorted set
  224. """
  225. pass
  226. def end_database(self, db_number):
  227. """
  228. Called when the current database ends
  229. After `end_database`, one of the methods are called -
  230. 1) `start_database` with a new database number
  231. OR
  232. 2) `end_rdb` to indicate we have reached the end of the file
  233. """
  234. pass
  235. def end_rdb(self):
  236. """Called to indicate we have completed parsing of the dump file"""
  237. pass
  238. class RdbParser(object):
  239. """
  240. A Parser for Redis RDB Files
  241. This class is similar in spirit to a SAX parser for XML files.
  242. The dump file is parsed sequentially. As and when objects are discovered,
  243. appropriate methods in the callback are called.
  244. Typical usage :
  245. callback = MyRdbCallback() # Typically a subclass of RdbCallback
  246. parser = RdbParser(callback)
  247. parser.parse('/var/redis/6379/dump.rdb')
  248. filter is a dictionary with the following keys
  249. {"dbs" : [0, 1], "keys" : "foo.*", "types" : ["hash", "set", "sortedset", "list", "string"]}
  250. If filter is None, results will not be filtered
  251. If dbs, keys or types is None or Empty, no filtering will be done on that axis
  252. """
  253. def __init__(self, callback, filters = None) :
  254. """
  255. `callback` is the object that will receive parse events
  256. """
  257. self._callback = callback
  258. self._key = None
  259. self._expiry = None
  260. self.init_filter(filters)
  261. self._rdb_version = 0
  262. def parse(self, filename):
  263. """
  264. Parse a redis rdb dump file, and call methods in the
  265. callback object during the parsing operation.
  266. """
  267. self.parse_fd(open(filename, "rb"))
  268. def parse_fd(self, fd):
  269. with fd as f:
  270. self.verify_magic_string(f.read(5))
  271. self.verify_version(f.read(4))
  272. self._callback.start_rdb()
  273. is_first_database = True
  274. db_number = 0
  275. while True :
  276. self._expiry = None
  277. data_type = read_unsigned_char(f)
  278. if data_type == REDIS_RDB_OPCODE_EXPIRETIME_MS :
  279. self._expiry = to_datetime(read_unsigned_long(f) * 1000)
  280. data_type = read_unsigned_char(f)
  281. elif data_type == REDIS_RDB_OPCODE_EXPIRETIME :
  282. self._expiry = to_datetime(read_unsigned_int(f) * 1000000)
  283. data_type = read_unsigned_char(f)
  284. if data_type == REDIS_RDB_OPCODE_SELECTDB :
  285. if not is_first_database :
  286. self._callback.end_database(db_number)
  287. is_first_database = False
  288. db_number = self.read_length(f)
  289. self._callback.start_database(db_number)
  290. continue
  291. if data_type == REDIS_RDB_OPCODE_AUX:
  292. aux_key = self.read_string(f)
  293. aux_val = self.read_string(f)
  294. ret = self._callback.aux_field(aux_key, aux_val)
  295. if ret:
  296. break # TODO: make all callbacks return abort flag
  297. continue
  298. if data_type == REDIS_RDB_OPCODE_RESIZEDB:
  299. db_size = self.read_length(f)
  300. expire_size = self.read_length(f)
  301. self._callback.db_size(db_size, expire_size)
  302. continue
  303. if data_type == REDIS_RDB_OPCODE_EOF:
  304. self._callback.end_database(db_number)
  305. self._callback.end_rdb()
  306. if self._rdb_version >= 5:
  307. f.read(8)
  308. break
  309. if self.matches_filter(db_number):
  310. self._key = self.read_string(f)
  311. if self.matches_filter(db_number, self._key, data_type):
  312. self.read_object(f, data_type)
  313. else:
  314. self.skip_object(f, data_type)
  315. else :
  316. self.skip_key_and_object(f, data_type)
  317. def read_length_with_encoding(self, f):
  318. length = 0
  319. is_encoded = False
  320. bytes = []
  321. bytes.append(read_unsigned_char(f))
  322. enc_type = (bytes[0] & 0xC0) >> 6
  323. if enc_type == REDIS_RDB_ENCVAL:
  324. is_encoded = True
  325. length = bytes[0] & 0x3F
  326. elif enc_type == REDIS_RDB_6BITLEN:
  327. length = bytes[0] & 0x3F
  328. elif enc_type == REDIS_RDB_14BITLEN:
  329. bytes.append(read_unsigned_char(f))
  330. length = ((bytes[0] & 0x3F) << 8) | bytes[1]
  331. elif bytes[0] == REDIS_RDB_32BITLEN:
  332. length = read_unsigned_int_be(f)
  333. elif bytes[0] == REDIS_RDB_64BITLEN:
  334. length = read_unsigned_long_be(f)
  335. else:
  336. raise Exception('read_length_with_encoding', "Invalid string encoding %s (encoding byte 0x%X)" % (enc_type, bytes[0]))
  337. return (length, is_encoded)
  338. def read_length(self, f) :
  339. return self.read_length_with_encoding(f)[0]
  340. def read_string(self, f) :
  341. tup = self.read_length_with_encoding(f)
  342. length = tup[0]
  343. is_encoded = tup[1]
  344. val = None
  345. if is_encoded :
  346. if length == REDIS_RDB_ENC_INT8 :
  347. val = read_signed_char(f)
  348. elif length == REDIS_RDB_ENC_INT16 :
  349. val = read_signed_short(f)
  350. elif length == REDIS_RDB_ENC_INT32 :
  351. val = read_signed_int(f)
  352. elif length == REDIS_RDB_ENC_LZF :
  353. clen = self.read_length(f)
  354. l = self.read_length(f)
  355. val = self.lzf_decompress(f.read(clen), l)
  356. else:
  357. raise Exception('read_string', "Invalid string encoding %s"%(length))
  358. else :
  359. val = f.read(length)
  360. return val
  361. def read_float(self, f):
  362. dbl_length = read_unsigned_char(f)
  363. if dbl_length == 253:
  364. return float('nan')
  365. elif dbl_length == 254:
  366. return float('inf')
  367. elif dbl_length == 255:
  368. return float('-inf')
  369. data = f.read(dbl_length)
  370. if isinstance(data, str):
  371. return float(data)
  372. return data # bug?
  373. # Read an object for the stream
  374. # f is the redis file
  375. # enc_type is the type of object
  376. def read_object(self, f, enc_type) :
  377. if enc_type == REDIS_RDB_TYPE_STRING :
  378. val = self.read_string(f)
  379. self._callback.set(self._key, val, self._expiry, info={'encoding':'string'})
  380. elif enc_type == REDIS_RDB_TYPE_LIST :
  381. # A redis list is just a sequence of strings
  382. # We successively read strings from the stream and create a list from it
  383. # The lists are in order i.e. the first string is the head,
  384. # and the last string is the tail of the list
  385. length = self.read_length(f)
  386. self._callback.start_list(self._key, self._expiry, info={'encoding':'linkedlist' })
  387. for count in range(0, length) :
  388. val = self.read_string(f)
  389. self._callback.rpush(self._key, val)
  390. self._callback.end_list(self._key, info={'encoding':'linkedlist' })
  391. elif enc_type == REDIS_RDB_TYPE_SET:
  392. # A redis list is just a sequence of strings
  393. # We successively read strings from the stream and create a set from it
  394. # Note that the order of strings is non-deterministic
  395. length = self.read_length(f)
  396. self._callback.start_set(self._key, length, self._expiry, info={'encoding':'hashtable'})
  397. for count in range(0, length):
  398. val = self.read_string(f)
  399. self._callback.sadd(self._key, val)
  400. self._callback.end_set(self._key)
  401. elif enc_type == REDIS_RDB_TYPE_ZSET or enc_type == REDIS_RDB_TYPE_ZSET_2 :
  402. length = self.read_length(f)
  403. self._callback.start_sorted_set(self._key, length, self._expiry, info={'encoding':'skiplist'})
  404. for count in range(0, length):
  405. val = self.read_string(f)
  406. score = read_double(f) if enc_type == REDIS_RDB_TYPE_ZSET_2 else self.read_float(f)
  407. self._callback.zadd(self._key, score, val)
  408. self._callback.end_sorted_set(self._key)
  409. elif enc_type == REDIS_RDB_TYPE_HASH:
  410. length = self.read_length(f)
  411. self._callback.start_hash(self._key, length, self._expiry, info={'encoding':'hashtable'})
  412. for count in range(0, length):
  413. field = self.read_string(f)
  414. value = self.read_string(f)
  415. self._callback.hset(self._key, field, value)
  416. self._callback.end_hash(self._key)
  417. elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP:
  418. self.read_zipmap(f)
  419. elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST:
  420. self.read_ziplist(f)
  421. elif enc_type == REDIS_RDB_TYPE_SET_INTSET:
  422. self.read_intset(f)
  423. elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST:
  424. self.read_zset_from_ziplist(f)
  425. elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST:
  426. self.read_hash_from_ziplist(f)
  427. elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
  428. self.read_list_from_quicklist(f)
  429. elif enc_type == REDIS_RDB_TYPE_MODULE:
  430. raise Exception('read_object', 'Unable to read Redis Modules RDB objects (key %s)' % self._key)
  431. elif enc_type == REDIS_RDB_TYPE_MODULE_2:
  432. self.read_module(f)
  433. else:
  434. raise Exception('read_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
  435. def skip_key_and_object(self, f, data_type):
  436. self.skip_string(f)
  437. self.skip_object(f, data_type)
  438. def skip_string(self, f):
  439. tup = self.read_length_with_encoding(f)
  440. length = tup[0]
  441. is_encoded = tup[1]
  442. bytes_to_skip = 0
  443. if is_encoded :
  444. if length == REDIS_RDB_ENC_INT8 :
  445. bytes_to_skip = 1
  446. elif length == REDIS_RDB_ENC_INT16 :
  447. bytes_to_skip = 2
  448. elif length == REDIS_RDB_ENC_INT32 :
  449. bytes_to_skip = 4
  450. elif length == REDIS_RDB_ENC_LZF :
  451. clen = self.read_length(f)
  452. l = self.read_length(f)
  453. bytes_to_skip = clen
  454. else :
  455. bytes_to_skip = length
  456. skip(f, bytes_to_skip)
  457. def skip_float(self, f):
  458. dbl_length = read_unsigned_char(f)
  459. if dbl_length < 253:
  460. skip(f, dbl_length)
  461. def skip_binary_double(self, f):
  462. skip(f, 8)
  463. def skip_object(self, f, enc_type):
  464. skip_strings = 0
  465. if enc_type == REDIS_RDB_TYPE_STRING :
  466. skip_strings = 1
  467. elif enc_type == REDIS_RDB_TYPE_LIST :
  468. skip_strings = self.read_length(f)
  469. elif enc_type == REDIS_RDB_TYPE_SET :
  470. skip_strings = self.read_length(f)
  471. elif enc_type == REDIS_RDB_TYPE_ZSET or enc_type == REDIS_RDB_TYPE_ZSET_2 :
  472. length = self.read_length(f)
  473. for x in range(length):
  474. self.skip_string(f)
  475. self.skip_binary_double(f) if enc_type == REDIS_RDB_TYPE_ZSET_2 else self.skip_float(f)
  476. elif enc_type == REDIS_RDB_TYPE_HASH :
  477. skip_strings = self.read_length(f) * 2
  478. elif enc_type == REDIS_RDB_TYPE_HASH_ZIPMAP :
  479. skip_strings = 1
  480. elif enc_type == REDIS_RDB_TYPE_LIST_ZIPLIST :
  481. skip_strings = 1
  482. elif enc_type == REDIS_RDB_TYPE_SET_INTSET :
  483. skip_strings = 1
  484. elif enc_type == REDIS_RDB_TYPE_ZSET_ZIPLIST :
  485. skip_strings = 1
  486. elif enc_type == REDIS_RDB_TYPE_HASH_ZIPLIST :
  487. skip_strings = 1
  488. elif enc_type == REDIS_RDB_TYPE_LIST_QUICKLIST:
  489. skip_strings = self.read_length(f)
  490. elif enc_type == REDIS_RDB_TYPE_MODULE:
  491. raise Exception('skip_object', 'Unable to skip Redis Modules RDB objects (key %s)' % self._key)
  492. elif enc_type == REDIS_RDB_TYPE_MODULE_2:
  493. self.read_module(f)
  494. else:
  495. raise Exception('skip_object', 'Invalid object type %d for key %s' % (enc_type, self._key))
  496. for x in range(0, skip_strings):
  497. self.skip_string(f)
  498. def read_intset(self, f) :
  499. raw_string = self.read_string(f)
  500. buff = BytesIO(raw_string)
  501. encoding = read_unsigned_int(buff)
  502. num_entries = read_unsigned_int(buff)
  503. self._callback.start_set(self._key, num_entries, self._expiry, info={'encoding':'intset', 'sizeof_value':len(raw_string)})
  504. for x in range(0, num_entries) :
  505. if encoding == 8 :
  506. entry = read_signed_long(buff)
  507. elif encoding == 4 :
  508. entry = read_signed_int(buff)
  509. elif encoding == 2 :
  510. entry = read_signed_short(buff)
  511. else :
  512. raise Exception('read_intset', 'Invalid encoding %d for key %s' % (encoding, self._key))
  513. self._callback.sadd(self._key, entry)
  514. self._callback.end_set(self._key)
  515. def read_ziplist(self, f) :
  516. raw_string = self.read_string(f)
  517. buff = BytesIO(raw_string)
  518. zlbytes = read_unsigned_int(buff)
  519. tail_offset = read_unsigned_int(buff)
  520. num_entries = read_unsigned_short(buff)
  521. self._callback.start_list(self._key, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
  522. for x in range(0, num_entries) :
  523. val = self.read_ziplist_entry(buff)
  524. self._callback.rpush(self._key, val)
  525. zlist_end = read_unsigned_char(buff)
  526. if zlist_end != 255 :
  527. raise Exception('read_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
  528. self._callback.end_list(self._key, info={'encoding':'ziplist'})
  529. def read_list_from_quicklist(self, f):
  530. count = self.read_length(f)
  531. total_size = 0
  532. self._callback.start_list(self._key, self._expiry, info={'encoding': 'quicklist', 'zips': count})
  533. for i in range(0, count):
  534. raw_string = self.read_string(f)
  535. total_size += len(raw_string)
  536. buff = BytesIO(raw_string)
  537. zlbytes = read_unsigned_int(buff)
  538. tail_offset = read_unsigned_int(buff)
  539. num_entries = read_unsigned_short(buff)
  540. for x in range(0, num_entries):
  541. self._callback.rpush(self._key, self.read_ziplist_entry(buff))
  542. zlist_end = read_unsigned_char(buff)
  543. if zlist_end != 255:
  544. raise Exception('read_quicklist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
  545. self._callback.end_list(self._key, info={'encoding': 'quicklist', 'zips': count, 'sizeof_value': total_size})
  546. def read_zset_from_ziplist(self, f) :
  547. raw_string = self.read_string(f)
  548. buff = BytesIO(raw_string)
  549. zlbytes = read_unsigned_int(buff)
  550. tail_offset = read_unsigned_int(buff)
  551. num_entries = read_unsigned_short(buff)
  552. if (num_entries % 2) :
  553. raise Exception('read_zset_from_ziplist', "Expected even number of elements, but found %d for key %s" % (num_entries, self._key))
  554. num_entries = num_entries // 2
  555. self._callback.start_sorted_set(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
  556. for x in range(0, num_entries) :
  557. member = self.read_ziplist_entry(buff)
  558. score = self.read_ziplist_entry(buff)
  559. if isinstance(score, bytes) :
  560. score = float(score)
  561. self._callback.zadd(self._key, score, member)
  562. zlist_end = read_unsigned_char(buff)
  563. if zlist_end != 255 :
  564. raise Exception('read_zset_from_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
  565. self._callback.end_sorted_set(self._key)
  566. def read_hash_from_ziplist(self, f) :
  567. raw_string = self.read_string(f)
  568. buff = BytesIO(raw_string)
  569. zlbytes = read_unsigned_int(buff)
  570. tail_offset = read_unsigned_int(buff)
  571. num_entries = read_unsigned_short(buff)
  572. if (num_entries % 2) :
  573. raise Exception('read_hash_from_ziplist', "Expected even number of elements, but found %d for key %s" % (num_entries, self._key))
  574. num_entries = num_entries // 2
  575. self._callback.start_hash(self._key, num_entries, self._expiry, info={'encoding':'ziplist', 'sizeof_value':len(raw_string)})
  576. for x in range(0, num_entries) :
  577. field = self.read_ziplist_entry(buff)
  578. value = self.read_ziplist_entry(buff)
  579. self._callback.hset(self._key, field, value)
  580. zlist_end = read_unsigned_char(buff)
  581. if zlist_end != 255 :
  582. raise Exception('read_hash_from_ziplist', "Invalid zip list end - %d for key %s" % (zlist_end, self._key))
  583. self._callback.end_hash(self._key)
  584. def read_ziplist_entry(self, f) :
  585. length = 0
  586. value = None
  587. prev_length = read_unsigned_char(f)
  588. if prev_length == 254 :
  589. prev_length = read_unsigned_int(f)
  590. entry_header = read_unsigned_char(f)
  591. if (entry_header >> 6) == 0 :
  592. length = entry_header & 0x3F
  593. value = f.read(length)
  594. elif (entry_header >> 6) == 1 :
  595. length = ((entry_header & 0x3F) << 8) | read_unsigned_char(f)
  596. value = f.read(length)
  597. elif (entry_header >> 6) == 2 :
  598. length = read_unsigned_int_be(f)
  599. value = f.read(length)
  600. elif (entry_header >> 4) == 12 :
  601. value = read_signed_short(f)
  602. elif (entry_header >> 4) == 13 :
  603. value = read_signed_int(f)
  604. elif (entry_header >> 4) == 14 :
  605. value = read_signed_long(f)
  606. elif (entry_header == 240) :
  607. value = read_24bit_signed_number(f)
  608. elif (entry_header == 254) :
  609. value = read_signed_char(f)
  610. elif (entry_header >= 241 and entry_header <= 253) :
  611. value = entry_header - 241
  612. else :
  613. raise Exception('read_ziplist_entry', 'Invalid entry_header %d for key %s' % (entry_header, self._key))
  614. return value
  615. def read_zipmap(self, f) :
  616. raw_string = self.read_string(f)
  617. buff = io.BytesIO(bytearray(raw_string))
  618. num_entries = read_unsigned_char(buff)
  619. self._callback.start_hash(self._key, num_entries, self._expiry, info={'encoding':'zipmap', 'sizeof_value':len(raw_string)})
  620. while True :
  621. next_length = self.read_zipmap_next_length(buff)
  622. if next_length is None :
  623. break
  624. key = buff.read(next_length)
  625. next_length = self.read_zipmap_next_length(buff)
  626. if next_length is None :
  627. raise Exception('read_zip_map', 'Unexepcted end of zip map for key %s' % self._key)
  628. free = read_unsigned_char(buff)
  629. value = buff.read(next_length)
  630. try:
  631. value = int(value)
  632. except ValueError:
  633. pass
  634. skip(buff, free)
  635. self._callback.hset(self._key, key, value)
  636. self._callback.end_hash(self._key)
  637. def read_zipmap_next_length(self, f) :
  638. num = read_unsigned_char(f)
  639. if num < 254:
  640. return num
  641. elif num == 254:
  642. return read_unsigned_int(f)
  643. else:
  644. return None
  645. def read_module(self, f):
  646. # this method is based on the actual implementation in redis (src/rdb.c:rdbLoadObject)
  647. iowrapper = IOWrapper(f)
  648. iowrapper.start_recording_size()
  649. iowrapper.start_recording()
  650. length, encoding = self.read_length_with_encoding(iowrapper)
  651. record_buffer = self._callback.start_module(self._key, self._decode_module_id(length), self._expiry)
  652. if not record_buffer:
  653. iowrapper.stop_recording()
  654. opcode = self.read_length(iowrapper)
  655. while opcode != REDIS_RDB_MODULE_OPCODE_EOF:
  656. if opcode == REDIS_RDB_MODULE_OPCODE_SINT or opcode == REDIS_RDB_MODULE_OPCODE_UINT:
  657. data = self.read_length(iowrapper)
  658. elif opcode == REDIS_RDB_MODULE_OPCODE_FLOAT:
  659. data = self.read_float(iowrapper)
  660. elif opcode == REDIS_RDB_MODULE_OPCODE_DOUBLE:
  661. data = read_double(iowrapper)
  662. elif opcode == REDIS_RDB_MODULE_OPCODE_STRING:
  663. data = self.read_string(iowrapper)
  664. else:
  665. raise Exception("Unknown module opcode %s" % opcode)
  666. self._callback.handle_module_data(self._key, opcode, data)
  667. # read the next item in the module data type
  668. opcode = self.read_length(iowrapper)
  669. buffer = None
  670. if record_buffer:
  671. # prepand the buffer with REDIS_RDB_TYPE_MODULE_2 type
  672. buffer = struct.pack('B', REDIS_RDB_TYPE_MODULE_2) + iowrapper.get_recorded_buffer()
  673. iowrapper.stop_recording()
  674. self._callback.end_module(self._key, buffer_size=iowrapper.get_recorded_size(), buffer=buffer)
  675. charset = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
  676. def _decode_module_id(self, module_id):
  677. """
  678. decode module id to string
  679. based on @antirez moduleTypeNameByID function from redis/src/module.c
  680. :param module_id: 64bit integer
  681. :return: string
  682. """
  683. name = [''] * 9
  684. module_id >>= 10
  685. for i in reversed(range(9)):
  686. name[i] = self.charset[module_id & 63]
  687. module_id >>= 6
  688. return ''.join(name)
  689. def verify_magic_string(self, magic_string) :
  690. if magic_string != b'REDIS' :
  691. raise Exception('verify_magic_string', 'Invalid File Format')
  692. def verify_version(self, version_str) :
  693. version = int(version_str)
  694. if version < 1 or version > 8:
  695. raise Exception('verify_version', 'Invalid RDB version number %d' % version)
  696. self._rdb_version = version
  697. def init_filter(self, filters):
  698. self._filters = {}
  699. if not filters:
  700. filters={}
  701. if not 'dbs' in filters:
  702. self._filters['dbs'] = None
  703. elif isinstance(filters['dbs'], int):
  704. self._filters['dbs'] = (filters['dbs'], )
  705. elif isinstance(filters['dbs'], list):
  706. self._filters['dbs'] = [int(x) for x in filters['dbs']]
  707. else:
  708. raise Exception('init_filter', 'invalid value for dbs in filter %s' %filters['dbs'])
  709. if not ('keys' in filters and filters['keys']):
  710. self._filters['keys'] = re.compile(b".*")
  711. else:
  712. self._filters['keys'] = str2regexp(filters['keys'])
  713. if not ('not_keys' in filters and filters['not_keys']):
  714. self._filters['not_keys'] = None
  715. else:
  716. self._filters['not_keys'] = str2regexp(filters['not_keys'])
  717. if not 'types' in filters:
  718. self._filters['types'] = ('set', 'hash', 'sortedset', 'module', 'string', 'list')
  719. elif isinstance(filters['types'], bytes):
  720. self._filters['types'] = (filters['types'], )
  721. elif isinstance(filters['types'], list):
  722. self._filters['types'] = [str(x) for x in filters['types']]
  723. else:
  724. raise Exception('init_filter', 'invalid value for types in filter %s' %filters['types'])
  725. def matches_filter(self, db_number, key=None, data_type=None):
  726. if isinstance(key, bytes):
  727. key_to_match = key
  728. elif isinstance(key, str): # bytes key in python2
  729. key_to_match = key
  730. else:
  731. key_to_match = str(key).encode('utf-8')
  732. if self._filters['dbs'] and (not db_number in self._filters['dbs']):
  733. return False
  734. if key and self._filters['not_keys'] and (self._filters['not_keys'].match(key_to_match)):
  735. return False
  736. if key and (not self._filters['keys'].match(key_to_match)):
  737. return False
  738. if data_type is not None and (not self.get_logical_type(data_type) in self._filters['types']):
  739. return False
  740. return True
  741. def get_logical_type(self, data_type):
  742. return DATA_TYPE_MAPPING[data_type]
  743. def lzf_decompress(self, compressed, expected_length):
  744. if HAS_PYTHON_LZF:
  745. return lzf.decompress(compressed, expected_length)
  746. else:
  747. in_stream = bytearray(compressed)
  748. in_len = len(in_stream)
  749. in_index = 0
  750. out_stream = bytearray()
  751. out_index = 0
  752. while in_index < in_len :
  753. ctrl = in_stream[in_index]
  754. if not isinstance(ctrl, int) :
  755. raise Exception('lzf_decompress', 'ctrl should be a number %s for key %s' % (str(ctrl), self._key))
  756. in_index = in_index + 1
  757. if ctrl < 32 :
  758. for x in range(0, ctrl + 1) :
  759. out_stream.append(in_stream[in_index])
  760. #sys.stdout.write(chr(in_stream[in_index]))
  761. in_index = in_index + 1
  762. out_index = out_index + 1
  763. else :
  764. length = ctrl >> 5
  765. if length == 7 :
  766. length = length + in_stream[in_index]
  767. in_index = in_index + 1
  768. ref = out_index - ((ctrl & 0x1f) << 8) - in_stream[in_index] - 1
  769. in_index = in_index + 1
  770. for x in range(0, length + 2) :
  771. out_stream.append(out_stream[ref])
  772. ref = ref + 1
  773. out_index = out_index + 1
  774. if len(out_stream) != expected_length :
  775. raise Exception('lzf_decompress', 'Expected lengths do not match %d != %d for key %s' % (len(out_stream), expected_length, self._key))
  776. return bytes(out_stream)
  777. def skip(f, free):
  778. if free :
  779. f.read(free)
  780. def to_datetime(usecs_since_epoch):
  781. seconds_since_epoch = usecs_since_epoch // 1000000
  782. if seconds_since_epoch > 221925052800 :
  783. seconds_since_epoch = 221925052800
  784. useconds = usecs_since_epoch % 1000000
  785. dt = datetime.datetime.utcfromtimestamp(seconds_since_epoch)
  786. delta = datetime.timedelta(microseconds = useconds)
  787. return dt + delta
  788. def read_signed_char(f) :
  789. return struct.unpack('b', f.read(1))[0]
  790. def read_unsigned_char(f) :
  791. return struct.unpack('B', f.read(1))[0]
  792. def read_signed_short(f) :
  793. return struct.unpack('h', f.read(2))[0]
  794. def read_unsigned_short(f) :
  795. return struct.unpack('H', f.read(2))[0]
  796. def read_signed_int(f) :
  797. return struct.unpack('i', f.read(4))[0]
  798. def read_unsigned_int(f) :
  799. return struct.unpack('I', f.read(4))[0]
  800. def read_unsigned_int_be(f):
  801. return struct.unpack('>I', f.read(4))[0]
  802. def read_24bit_signed_number(f):
  803. s = b'0' + f.read(3)
  804. num = struct.unpack('i', s)[0]
  805. return num >> 8
  806. def read_signed_long(f) :
  807. return struct.unpack('q', f.read(8))[0]
  808. def read_unsigned_long(f) :
  809. return struct.unpack('Q', f.read(8))[0]
  810. def read_unsigned_long_be(f) :
  811. return struct.unpack('>Q', f.read(8))[0]
  812. def read_double(f) :
  813. return struct.unpack('d', f.read(8))[0]
  814. def string_as_hexcode(string) :
  815. for s in string :
  816. if isinstance(s, int) :
  817. print(hex(s))
  818. else :
  819. print(hex(ord(s)))
  820. class DebugCallback(RdbCallback) :
  821. def start_rdb(self):
  822. print('[')
  823. def aux_field(self, key, value):
  824. print('aux:[%s:%s]' % (key, value))
  825. def start_database(self, db_number):
  826. print('{')
  827. def db_size(self, db_size, expires_size):
  828. print('db_size: %s, expires_size %s' % (db_size, expires_size))
  829. def set(self, key, value, expiry):
  830. print('"%s" : "%s"' % (str(key), str(value)))
  831. def start_hash(self, key, length, expiry):
  832. print('"%s" : {' % str(key))
  833. pass
  834. def hset(self, key, field, value):
  835. print('"%s" : "%s"' % (str(field), str(value)))
  836. def end_hash(self, key):
  837. print('}')
  838. def start_set(self, key, cardinality, expiry):
  839. print('"%s" : [' % str(key))
  840. def sadd(self, key, member):
  841. print('"%s"' % str(member))
  842. def end_set(self, key):
  843. print(']')
  844. def start_list(self, key, expiry, info):
  845. print('"%s" : [' % str(key))
  846. def rpush(self, key, value) :
  847. print('"%s"' % str(value))
  848. def end_list(self, key, info):
  849. print(']')
  850. def start_sorted_set(self, key, length, expiry):
  851. print('"%s" : {' % str(key))
  852. def zadd(self, key, score, member):
  853. print('"%s" : "%s"' % (str(member), str(score)))
  854. def end_sorted_set(self, key):
  855. print('}')
  856. def end_database(self, db_number):
  857. print('}')
  858. def end_rdb(self):
  859. print(']')