#! /usr/bin/python
# -*- coding: iso-8859-15 -*-

##
## bindiff.py
## Simple script to compare binary files which creates
## coloured and easily readable HTML side-by-side hex dumps of both
## files with highlighted differences.
##
## Usage: ./bindiff.py file1 file2
##
## I wrote this script in an hour or two to be able to analyse the
## binary data base format of the Trekstor Vibez portable music player
## (http://www.vibez.de/) so I could write It'sGotTheVibez
## (http://www.ohrner.net/software/itsgotthevibez_en.php)
## This script is buggy ;), it miscalculates the length of the files
## compared and outputs a few bytes of garbage at the end of the diff...
## Feel free to fix this if it bothers you... ;)
##
## This is GPL-compatible free software under the 3 clause BSD license,
## as it's not possible to place source code in the "public domain" in
## Germany.
##

## 
## Copyright (c) 2006-2007 Gunter Ohrner
## All rights reserved.
## 
## Redistribution and use in source and binary forms, with or without
## modification, are permitted provided that the following conditions
## are met:
## 1. Redistributions of source code must retain the above copyright
##    notice, this list of conditions and the following disclaimer.
## 2. Redistributions in binary form must reproduce the above copyright
##    notice, this list of conditions and the following disclaimer in the
##    documentation and/or other materials provided with the distribution.
## 3. Neither the name of the University nor the names of its contributors
##    may be used to endorse or promote products derived from this software
##    without specific prior written permission.
## 
## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
## ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
## SUCH DAMAGE.
##



import difflib
import sys

EQUAL = 0
DELETED = 1
INSERTED = 2
REPLACED = 3


def hexPrinter(val):
	if val != None:
		return '%02x' % ord(val)
	else:
		return '..'


def printablePrinter(val):
	if val in [ '\0', ' ', None ]:
		return '&nbsp;'
	elif val != None and 32 <= ord(val) < 127:
		return '%s' % val
	else:
		return '.'
	

def writeFormattedRow(mode, pos, row_len, m, formats, printer, spacer):
	line = '';
	cur_spacer = '';
	mode_toggling = False
	
	## open current mode	
	line += '%s' % (formats[mode][0])
	
	for col in xrange(0, row_len):
		if pos < len(m) \
				 and m[pos] in [ EQUAL, INSERTED, DELETED, REPLACED ]:
			## switch mode
			line += '%s' % formats[mode][1]
			mode = m[pos]
			mode_toggling = True

		line += cur_spacer
		cur_spacer = spacer

		if mode_toggling:
			line += '%s' % formats[mode][0]
			pos += 1
			mode_toggling = False

		if pos < len(m):
			line += printer(m[pos])
		else:
			line += printer(None)

		pos += 1
	

	## close current mode
	line += '%s' % formats[mode][1]
	return (pos, line, mode)



def writeRow(mode, pos, row_len, m, formats):
	(y, linept1, x) = writeFormattedRow(mode, pos, row_len, m, formats,
																			hexPrinter, ' ')
	(pos, linept2, mode) = writeFormattedRow(mode, pos, row_len, m, formats,
																					 printablePrinter, '')
	return (pos, linept1 + ' ' + linept2, mode)


if __name__ == '__main__':
	if len(sys.argv) != 3:
		print 'Autsch!'
		sys.exit(1)

	f1 = open(sys.argv[1], 'r')
	bin1 = f1.read()
	f1.close()
	f2 = open(sys.argv[2], 'r')
	bin2 = f2.read()
	f2.close()

	matcher = difflib.SequenceMatcher(a = bin1, b = bin2)

	print '<html><body>'

	## intermediate representations
	m1 = []
	m2 = []

	## Zwischenrepräsentation erstellen
	for opcode in matcher.get_opcodes():
		#if opcode[0] == 'equal':
		#	print opcode[0], '(%d)' % (opcode[2] - opcode[1])
		#else:
		op = opcode[0]
		if op == 'equal':
			ident = EQUAL
		elif op == 'delete': 
			ident = DELETED
		elif op == 'insert': 
			ident = INSERTED
		elif op == 'replace': 
			ident = REPLACED

		m1.append(ident)
		m2.append(ident)

		b1_1 = opcode[1]
		b1_2 = opcode[2]
		b2_1 = opcode[3]
		b2_2 = opcode[4]

		m1.extend(bin1[b1_1:b1_2])
		m2.extend(bin2[b2_1:b2_2])

		if b1_2-b1_1 < b2_2-b2_1:
			m1.extend([None] * (b2_2-b2_1-b1_2+b1_1))
		if b2_2-b2_1 < b1_2-b1_1:
			m2.extend([None] * (b1_2-b1_1-b2_2+b2_1))

	font_red = '<span style="color:yellow;background-color:red">'
	font_yellow = '<span style="color:red;background-color:yellow">'
	font_green = '<span style="color:yellow;background-color:green">'
	font_end = '</span>'

	frmt_left = {EQUAL: ('',''),
							 INSERTED: ('', ''),
							 DELETED: (font_red, font_end),
							 REPLACED: (font_yellow, font_end),
							 None: ('', '')}
	frmt_rght = {EQUAL: ('',''),
							 INSERTED: (font_green, font_end),
							 DELETED: ('', ''),
							 REPLACED: (font_yellow, font_end),
							 None: ('', '')}
	frmt_unified = {EQUAL: ('',''),
									INSERTED: (font_green, font_end),
									DELETED: (font_red, font_end),
									REPLACED: (font_yellow, font_end),
									None: ('', '')}

	mode = None

	row_len = 16

	print '<div style="font-size:50%"><tt>'

	pos = 0
	for row in xrange(0, len(m1) / row_len):
		print '<p>'
		(y, line, x) = writeRow(mode, pos, row_len, m1, frmt_unified)
		print line

		print 10 * '&nbsp;'
		
		(pos, line, mode) = writeRow(mode, pos, row_len, m2, frmt_unified)
		print line

		print '</p>'
			
	
	print '</tt></div>'

	print len(m1), len(m2)
	print '</body></html>'
