Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 39 additions & 27 deletions officeparser.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
#!/usr/bin/env python

# CHANGELOG:
# 2020-01-09: - Changes to make it compatible with Python3
# - Add better explanation of error when no OLE document filename is provided
# 2014-08-15: - VBA: fixed incorrect value check in PROJECTHELPFILEPATH Record
# - VBA: fixed infinite loop when output file already exists
# - improved logging output, set default level to INFO

import sys
from struct import unpack
from optparse import OptionParser
from cStringIO import StringIO
try:
from cStringIO import StringIO as sio
except ModuleNotFoundError:
from io import BytesIO as sio
import logging
import re
import os
Expand Down Expand Up @@ -193,14 +198,14 @@ def __init__(self, file, parser_options=None):
if len(data) != self.sector_size:
logging.error('broken FAT (invalid sector size {0} != {1})'.format(len(data), self.sector_size))
else:
for value in unpack('<{0}L'.format(self.sector_size / 4), data):
for value in unpack('<{0}L'.format(int(self.sector_size / 4)), data):
self.fat.append(value)

# get the list of directory sectors
self.directory = []
buffer = self.read_chain(self.header._sectDirStart)
directory_index = 0
for chunk in unpack("128s" * (len(buffer) / 128), buffer):
for chunk in unpack("128s" * int(len(buffer) / 128), buffer):
self.directory.append(Directory(chunk, directory_index))
directory_index += 1

Expand All @@ -217,15 +222,15 @@ def __init__(self, file, parser_options=None):
# chain in the Fat, with the beginning of the chain stored in the
# header.

data = StringIO(self.read_chain(self.header._sectMiniFatStart))
data = sio(self.read_chain(self.header._sectMiniFatStart))
while True:
chunk = data.read(self.sector_size)
if chunk == '':
if chunk == b'':
break
if len(chunk) != self.sector_size:
logging.warning("encountered EOF while parsing minifat")
continue
for value in unpack('<{0}L'.format(self.sector_size / 4), chunk):
for value in unpack('<{0}L'.format(int(self.sector_size / 4)), chunk):
self.minifat.append(value)

def read_sector(self, sector):
Expand All @@ -248,7 +253,7 @@ def __impl_read_chain(self, start, read_sector_f, read_fat_f):
"""Returns the entire contents of a chain starting at the given sector."""
sector = start
check = [ sector ] # keep a list of sectors we've already read
buffer = StringIO()
buffer = sio()
while sector != ENDOFCHAIN:
buffer.write(read_sector_f(sector))
next = read_fat_f(sector)
Expand All @@ -268,7 +273,7 @@ def read_chain(self, sector_start):

def print_fat_sectors(self):
for sector in self.fat_sectors:
print '{0:08X}'.format(sector)
print('{0:08X}'.format(sector))

def get_stream(self, index):
d = self.directory[index]
Expand Down Expand Up @@ -315,7 +320,7 @@ def __init__(self, data, parser_options=None):
self._sectFat = self.header[18:] # sects of first 109 FAT sectors

def pretty_print(self):
print """HEADER DUMP
print("""HEADER DUMP
_abSig = {0}
_clid = {1}
_uMinorVersion = {2}
Expand All @@ -334,8 +339,8 @@ def pretty_print(self):
_csectMiniFat = {15}
_sectDifStart = {16}
_csectDif = {17}""".format(
' '.join(['{0:02X}'.format(ord(x)) for x in self._abSig]),
' '.join(['{0:02X}'.format(ord(x)) for x in self._clid]),
' '.join(['{0:02X}'.format(ord(x) if isinstance(x, str) else x) for x in self._abSig]),
' '.join(['{0:02X}'.format(ord(x) if isinstance(x, str) else x) for x in self._clid]),
'{0:04X}'.format(self._uMinorVersion),
'{0}'.format(self._uDllVersion),
'{0:04X}'.format(self._uByteOrder),
Expand All @@ -353,11 +358,11 @@ def pretty_print(self):
'{0:08X}'.format(self._sectMiniFatStart),
'{0:08X}'.format(self._csectMiniFat),
'{0:08X}'.format(self._sectDifStart),
'{0:08X}'.format(self._csectDif))
'{0:08X}'.format(self._csectDif)))

for fat in self._sectFat:
if fat != FREESECT:
print '_sectFat = {0:08X}'.format(fat)
print('_sectFat = {0:08X}'.format(fat))

STGTY_INVALID = 0
STGTY_STORAGE = 1
Expand Down Expand Up @@ -401,7 +406,10 @@ def __init__(self, data, index):
self._ab = self.directory[0]
self._cb = self.directory[1]
# convert wide chars into ASCII
self.name = ''.join([x for x in self._ab[0:self._cb] if ord(x) != 0])
if isinstance(self._ab[0], str):
self.name = ''.join([x for x in self._ab[0:self._cb] if ord(x) != 0])
else:
self.name = ''.join([chr(x) for x in self._ab[0:self._cb] if x != 0])
self._mse = self.directory[2]
self._bflags = self.directory[3]
self._sidLeftSib = self.directory[4]
Expand All @@ -416,7 +424,7 @@ def __init__(self, data, index):
# last two bytes are padding

def pretty_print(self):
print """
print("""
_ab = {0}
_cb = {1}
_mse = {2}
Expand Down Expand Up @@ -446,7 +454,7 @@ def pretty_print(self):
'{0}'.format(self._time[1]),
'{0:08X}'.format(self._sectStart),
'{0:08X} ({0} bytes)'.format(self._ulSize),
'{0:04X}'.format(self._dptPropType))
'{0:04X}'.format(self._dptPropType)))

def _main():

Expand Down Expand Up @@ -542,6 +550,10 @@ def _main():

(options, args) = parser.parse_args()

if len(args) < 1:
print("A path to an OLE MSOffice file must be given. Run with --help for more information.")
return 1

logging.basicConfig(format='%(levelname)s: %(message)s',
level=logging.__dict__[options.log_level])

Expand All @@ -561,31 +573,31 @@ def _main():

if options.print_directory:
for x in xrange(0, len(ofdoc.directory)):
print "Directory Index {0:08X} ({0})".format(x)
print("Directory Index {0:08X} ({0})".format(x))
ofdoc.directory[x].pretty_print()
print
print()

if options.print_fat:
for sector in xrange(0, len(ofdoc.fat)):
print '{0:08X}: {1}'.format(sector, fat_value_to_str(ofdoc.fat[sector]))
print('{0:08X}: {1}'.format(sector, fat_value_to_str(ofdoc.fat[sector])))

if options.print_mini_fat:
for sector in xrange(0, len(ofdoc.minifat)):
print '{0:08X}: {1}'.format(sector, fat_value_to_str(ofdoc.minifat[sector]))
print('{0:08X}: {1}'.format(sector, fat_value_to_str(ofdoc.minifat[sector])))

if options.print_streams:
for d in ofdoc.directory:
if d._mse == STGTY_STREAM:
print '{0}: {1}'.format(d.index, d.name)
print('{0}: {1}'.format(d.index, d.name))

if options.print_expected_file_size:
expected_file_size = (len([x for x in ofdoc.fat if x != FREESECT]) * ofdoc.sector_size) + 512
actual_file_size = os.path.getsize(args[0])
size_diff = abs(expected_file_size - actual_file_size)
percent_diff = (float(size_diff) / float(expected_file_size)) * 100.0

print "expected file size {0} actual {1} difference {2} ({3:0.2f}%)".format(
expected_file_size, actual_file_size, size_diff, percent_diff)
print("expected file size {0} actual {1} difference {2} ({3:0.2f}%)".format(
expected_file_size, actual_file_size, size_diff, percent_diff))

#
# analysis options
Expand Down Expand Up @@ -620,7 +632,7 @@ def _main():
logging.warning('invalid FAT sector reference {0:08X}'.format(value))

if options.print_invalid_fat_count:
print "invalid FAT sector references: {0}".format(invalid_fat_sectors)
print("invalid FAT sector references: {0}".format(invalid_fat_sectors))

invalid_fat_entries = 0
if options.check_fat or options.print_invalid_fat_count:
Expand All @@ -634,7 +646,7 @@ def _main():
logging.warning('invalid FAT sector {0:08X} value {1:08X}'.format(value, ptr))

if options.print_invalid_fat_count:
print "invalid FAT entries: {0}".format(invalid_fat_entries)
print("invalid FAT entries: {0}".format(invalid_fat_entries))

if options.check_orphaned_chains:
buffer = [False for fat in ofdoc.fat]
Expand Down Expand Up @@ -805,7 +817,7 @@ def _main():
break

# parse PROJECT
buffer = StringIO()
buffer = sio()
buffer.write(ofdoc.get_stream(project.index))
buffer.seek(0)
re_keyval = re.compile(r'^([^=]+)=(.*)$')
Expand Down Expand Up @@ -853,7 +865,7 @@ def check_value(name, expected, value):
if expected != value:
logging.error("invalid value for {0} expected {1:04X} got {2:04X}".format(name, expected, value))

dir_stream = StringIO(decompress_stream(ofdoc.get_stream(dir_stream.index)))
dir_stream = sio(decompress_stream(ofdoc.get_stream(dir_stream.index)))

# PROJECTSYSKIND Record
PROJECTSYSKIND_Id = unpack("<H", dir_stream.read(2))[0]
Expand Down