Comment parsingr le fichier Manifest.mbdb dans une sauvegarde iTunes iOS 4.0

Dans iOS 4.0, Apple a repensé le processus de sauvegarde.

iTunes stockait une liste de noms de fichiers associés aux fichiers de sauvegarde dans le fichier Manifest.plist, mais dans iOS 4.0, il a déplacé ces informations vers un fichier Manifest.mbdb.

Vous pouvez voir un exemple de ce fichier en effectuant une sauvegarde avec vos appareils iOS 4.0 et en recherchant dans votre dossier ~ / Library / Application Support / MobileSync / Backup (Regardez à l’intérieur des sous-dossiers avec la date la plus récente)

Voici une capture d’écran de ce à quoi ressemble le fichier dans un éditeur de texte:

alt text http://soffr.miximages.com/extract/mbdb.png

Comment puis-je parsingr ceci dans une application Cocoa afin que je puisse mettre à jour mon application (gratuite) iPhone Backup Extractor ( http://supercrazyawesome.com ) pour iOS 4.0?

Merci, user374559 et reneD – ce code et cette description sont très utiles.

Mon coup à certains Python pour parsingr et imprimer les informations dans un format similaire à Unix ls-l:

#!/usr/bin/env python import sys def getint(data, offset, intsize): """Resortingeve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo return mbdb def process_mbdx_file(filename): mbdx = {} # Map offset of info in the MBDB file => fileID ssortingng data = open(filename).read() if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file") offset = 4 offset = offset + 2 # value 0x02 0x00, not sure what this is filecount, offset = getint(data, offset, 4) # 4-byte count of records while offset < len(data): # 26 byte record, made up of ... fileID = data[offset:offset+20] # 20 bytes of fileID fileID_string = ''.join(['%02x' % ord(b) for b in fileID]) offset = offset + 20 mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog mode, offset = getint(data, offset, 2) # 2-byte mode field mbdx[mbdb_offset] = fileID_string return mbdx def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) 

Dans iOS 5, le fichier Manifest.mbdx a été supprimé. Pour les besoins de cet article, il était de toute façon redondant, car le domaine et le chemin sont dans Manifest.mbdb et le hachage d’identifiant peut être généré avec SHA1.

Voici ma mise à jour du code de galloglass pour que cela fonctionne avec les sauvegardes des appareils iOS 5. Les seuls changements sont l’élimination de process_mbdx_file () et l’ajout de quelques lignes dans process_mbdb_file ().

Testé avec des sauvegardes d’un iPhone 4S et d’un iPad 1, avec de nombreuses applications et fichiers.

 #!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Resortingeve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) 

J’ai terminé mon travail sur ce sujet – c’est-à-dire la mise à jour iOS 4 + iTunes 9.2 de ma bibliothèque de décodeurs de sauvegarde pour Python – http://www.iki.fi/fingon/iphonebackupdb.py

Il fait ce dont j’ai besoin, peu de documentation, mais n’hésitez pas à copier des idées à partir de là 😉

(Semble fonctionner correctement avec mes sauvegardes au moins.)

Vous pouvez trouver des informations et une petite description du format MBDB / MBDX ici:

http://code.google.com/p/iphonebackupbrowser/

Ceci est mon application pour parcourir les fichiers de sauvegarde. J’ai essayé de documenter le format des nouveaux fichiers fournis avec iTunes 9.2.

Ce script python est génial.

Voici ma version Ruby (avec des améliorations mineures) et des capacités de recherche. (pour iOS 5)

 # encoding: utf-8 require 'fileutils' require 'digest/sha1' class ManifestParser def initialize(mbdb_filename, verbose = false) @verbose = verbose process_mbdb_file(mbdb_filename) end # Returns the numbers of records in the Manifest files. def record_number @mbdb.size end # Returns a huge ssortingng containing the parsing of the Manifest files. def to_s s = '' @mbdb.each do |v| s += "#{fileinfo_str(v)}\n" end s end def to_file(filename) File.open(filename, 'w') do |f| @mbdb.each do |v| f.puts fileinfo_str(v) end end end # Copy the backup files to their real path/name. # * domain_match Can be a regexp to ressortingct the files to copy. # * filename_match Can be a regexp to ressortingct the files to copy. def rename_files(domain_match = nil, filename_match = nil) @mbdb.each do |v| if v[:type] == '-' # Only rename files. if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match) dst = "#{v[:domain]}/#{v[:filename]}" puts "Creating: #{dst}" FileUtils.mkdir_p(File.dirname(dst)) FileUtils.cp(v[:fileID], dst) end end end end # Return the filename that math the given regexp. def search(regexp) result = Array.new @mbdb.each do |v| if "#{v[:domain]}::#{v[:filename]}" =~ regexp result << v end end result end private # Retrieve an integer (big-endian) and new offset from the current offset def getint(data, offset, intsize) value = 0 while intsize > 0 value = (value<<8) + data[offset].ord offset += 1 intsize -= 1 end return value, offset end # Retrieve a string and new offset from the current offset into the data def getstring(data, offset) return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset...(offset + length)] return value, (offset + length) end def process_mbdb_file(filename) @mbdb = Array.new data = File.open(filename, 'rb') { |f| f.read } puts "MBDB file read. Size: #{data.size}" raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb' offset = 4 offset += 2 # value x05 x00, not sure what this is while offset < data.size fileinfo = Hash.new fileinfo[:start_offset] = offset fileinfo[:domain], offset = getstring(data, offset) fileinfo[:filename], offset = getstring(data, offset) fileinfo[:linktarget], offset = getstring(data, offset) fileinfo[:datahash], offset = getstring(data, offset) fileinfo[:unknown1], offset = getstring(data, offset) fileinfo[:mode], offset = getint(data, offset, 2) if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink fileinfo[:type] = 'l' elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File fileinfo[:type] = '-' elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir fileinfo[:type] = 'd' else # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode'] fileinfo[:type] = '?' end fileinfo[:unknown2], offset = getint(data, offset, 4) fileinfo[:unknown3], offset = getint(data, offset, 4) fileinfo[:userid], offset = getint(data, offset, 4) fileinfo[:groupid], offset = getint(data, offset, 4) fileinfo[:mtime], offset = getint(data, offset, 4) fileinfo[:atime], offset = getint(data, offset, 4) fileinfo[:ctime], offset = getint(data, offset, 4) fileinfo[:filelen], offset = getint(data, offset, 8) fileinfo[:flag], offset = getint(data, offset, 1) fileinfo[:numprops], offset = getint(data, offset, 1) fileinfo[:properties] = Hash.new (0...(fileinfo[:numprops])).each do |ii| propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo[:properties][propname] = propval end # Compute the ID of the file. fullpath = fileinfo[:domain] + '-' + fileinfo[:filename] fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath) # We add the file to the list of files. @mbdb << fileinfo end @mbdb end def modestr(val) def mode(val) r = (val & 0x4) ? 'r' : '-' w = (val & 0x2) ? 'w' : '-' x = (val & 0x1) ? 'x' : '-' r + w + x end mode(val >> 6) + mode(val >> 3) + mode(val) end def fileinfo_str(f) return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]] info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination f[:properties].each do |k, v| info += " #{k}=#{v.inspect}" end info end end if __FILE__ == $0 mp = ManifestParser.new 'Manifest.mbdb', true mp.to_file 'filenames.txt' end 

J’ai aimé le code de galloglas, et j’ai changé la fonction principale de sorte qu’il affiche une liste sortingée de taille totale par application:

 verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") sizes = {} for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) if (fileinfo['mode'] & 0xE000) == 0x8000: sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen'] for domain in sorted(sizes, key=sizes.get): print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024)) 

De cette façon, vous pouvez déterminer quelle application mange tout cet espace.

Pour ceux qui recherchent une implémentation Java d’un lecteur de fichiers MBDB, il en existe plusieurs:

Merci à la réponse de galloglass. Le code fonctionne bien avec Python 2.7. Il n’y a qu’une seule chose que je veux mesurer. Lorsque vous lisez le fichier manifest.mbdb, vous devez utiliser le mode binary. Sinon, tout le contenu n’est pas lu.

J’ai aussi apporté quelques modifications mineures pour que le code fonctionne avec Python 3.4. Voici le code

 #!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Resortingeve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value << 8) + data[offset] offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF): return '', offset + 2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset + length] return value.decode(encoding='latin-1'), (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename, 'rb').read() # 'b' is needed to read all content at once if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath.encode()) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r + w + x return mode(val >> 6) + mode((val >> 3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file( r"Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print(fileinfo_str(fileinfo, verbose))