| 1 |
#!/usr/bin/python |
|---|
| 2 |
# (C) Copyright 2003-2007 Nuxeo SAS <http://nuxeo.com> |
|---|
| 3 |
# (C) Copyright 2002 Eric Bellot <ebellot@netcourrier.com> |
|---|
| 4 |
# |
|---|
| 5 |
# Authors: |
|---|
| 6 |
# M.-A. Darche (Nuxeo) |
|---|
| 7 |
# Ruslan Spivak (Nuxeo) |
|---|
| 8 |
# Eric Bellot <ebellot@netcourrier.com> |
|---|
| 9 |
# Laurent Godard (lgodard@indesko.com) |
|---|
| 10 |
# |
|---|
| 11 |
# This program is free software; you can redistribute it and/or modify |
|---|
| 12 |
# it under the terms of the GNU General Public License version 2 as published |
|---|
| 13 |
# by the Free Software Foundation. |
|---|
| 14 |
# |
|---|
| 15 |
# This program is distributed in the hope that it will be useful, |
|---|
| 16 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 17 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 18 |
# GNU General Public License for more details. |
|---|
| 19 |
# |
|---|
| 20 |
# You should have received a copy of the GNU General Public License |
|---|
| 21 |
# along with this program; if not, write to the Free Software |
|---|
| 22 |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
|---|
| 23 |
# 02111-1307, USA. |
|---|
| 24 |
# |
|---|
| 25 |
# See ``COPYING`` for more information |
|---|
| 26 |
# |
|---|
| 27 |
# $Id$ |
|---|
| 28 |
|
|---|
| 29 |
import zipfile |
|---|
| 30 |
import os, os.path, sys |
|---|
| 31 |
from string import join, split, find |
|---|
| 32 |
import codecs |
|---|
| 33 |
from xml.dom import minidom |
|---|
| 34 |
import time, base64 |
|---|
| 35 |
import re |
|---|
| 36 |
import xml.sax |
|---|
| 37 |
import shutil |
|---|
| 38 |
import locale |
|---|
| 39 |
from optparse import OptionParser |
|---|
| 40 |
|
|---|
| 41 |
VERSION = '2.0' |
|---|
| 42 |
|
|---|
| 43 |
CONF_FILE_NAME = 'ooo2dbk.xml' |
|---|
| 44 |
|
|---|
| 45 |
# OpenOffice.org canonical XML files |
|---|
| 46 |
OOO_META_FILE_NAME = 'meta.xml' |
|---|
| 47 |
OOO_STYLES_FILE_NAME = 'styles.xml' |
|---|
| 48 |
OOO_CONTENT_FILE_NAME = 'content.xml' |
|---|
| 49 |
|
|---|
| 50 |
DOCBOOK_FILE_SUFFIX = '.docb.xml' |
|---|
| 51 |
|
|---|
| 52 |
# ZIP entries paths are stored in "code page 437" encoding (cp437). |
|---|
| 53 |
# One cannot use UTF-8 for the ZIP entries paths. |
|---|
| 54 |
# Read [ 878120 ] Zipfile archive name can't be unicode |
|---|
| 55 |
# https://sourceforge.net/tracker/?func=detail&atid=105470&aid=878120&group_id=5470 |
|---|
| 56 |
ZIP_FILE_ENCODING = 'cp437' |
|---|
| 57 |
|
|---|
| 58 |
# Initialization (attempt to remove some global statements) |
|---|
| 59 |
oooVersion = 'ooo1' |
|---|
| 60 |
docbookXSL = None |
|---|
| 61 |
verbose = True |
|---|
| 62 |
zipfile_target = None |
|---|
| 63 |
docbook_top_element = 'book' |
|---|
| 64 |
process_ole_objects = False |
|---|
| 65 |
|
|---|
| 66 |
def execArgs(): |
|---|
| 67 |
"""Analyze command line arguments. |
|---|
| 68 |
""" |
|---|
| 69 |
usage = "usage: %prog [options] openoffice.org-file" |
|---|
| 70 |
parser = OptionParser(usage=usage, version="%prog " + VERSION) |
|---|
| 71 |
|
|---|
| 72 |
parser.add_option('-z', '--zipfile', |
|---|
| 73 |
action='store', |
|---|
| 74 |
dest='zipfile_target', |
|---|
| 75 |
type='string', |
|---|
| 76 |
metavar='FILE', |
|---|
| 77 |
default=None, |
|---|
| 78 |
help="Use FILE as the filename for " |
|---|
| 79 |
"the generated ZIP archive") |
|---|
| 80 |
|
|---|
| 81 |
parser.add_option('-d', '--dbkfile', |
|---|
| 82 |
action='store', |
|---|
| 83 |
dest='dbkfile', |
|---|
| 84 |
type='string', |
|---|
| 85 |
metavar='FILE', |
|---|
| 86 |
default=None, |
|---|
| 87 |
help="Use FILE as the filename for " |
|---|
| 88 |
"the generated DocBook XML file. " |
|---|
| 89 |
"This option has no effect if the " |
|---|
| 90 |
"-z/--zipfile option is used.") |
|---|
| 91 |
|
|---|
| 92 |
parser.add_option('-b', '--book', |
|---|
| 93 |
action='store_true', |
|---|
| 94 |
dest='book', |
|---|
| 95 |
default=False, |
|---|
| 96 |
help="Produce a DocBook XML book. " |
|---|
| 97 |
"This is the default.") |
|---|
| 98 |
|
|---|
| 99 |
parser.add_option('-a', '--article', |
|---|
| 100 |
action='store_true', |
|---|
| 101 |
dest='article', |
|---|
| 102 |
default=False, |
|---|
| 103 |
help="Produce a DocBook XML article.") |
|---|
| 104 |
|
|---|
| 105 |
parser.add_option('--ole', |
|---|
| 106 |
action='store_true', |
|---|
| 107 |
dest='ole', |
|---|
| 108 |
default=False, |
|---|
| 109 |
help="Include potential OLE objects as images in the " |
|---|
| 110 |
"resulting DocBook XML document. This option needs that " |
|---|
| 111 |
"a listening OpenOffice.org instance be running.") |
|---|
| 112 |
|
|---|
| 113 |
parser.add_option('-c', '--config', |
|---|
| 114 |
action='store', |
|---|
| 115 |
dest='config', |
|---|
| 116 |
type='string', |
|---|
| 117 |
metavar='FILE', |
|---|
| 118 |
default=None, |
|---|
| 119 |
help="Use FILE as the file path for the program configuration file. " |
|---|
| 120 |
"Defaults to the global /etc/%s configuration file or to the " |
|---|
| 121 |
"%s configuration file in the ooo2dbk executable directory." |
|---|
| 122 |
% (CONF_FILE_NAME, CONF_FILE_NAME)) |
|---|
| 123 |
|
|---|
| 124 |
parser.add_option('-x', '--xslt', |
|---|
| 125 |
action='store', |
|---|
| 126 |
dest='xslt', |
|---|
| 127 |
type='string', |
|---|
| 128 |
metavar='FILE', |
|---|
| 129 |
default=None, |
|---|
| 130 |
help="Use FILE as the file path for the XSLT stylesheet. " |
|---|
| 131 |
"Defaults to the ooo2dbk ooo2dbk.xsl stylesheet.") |
|---|
| 132 |
|
|---|
| 133 |
parser.add_option('-m', '--cmdxslt', |
|---|
| 134 |
action='store', |
|---|
| 135 |
dest='cmdxslt', |
|---|
| 136 |
type='string', |
|---|
| 137 |
metavar='NAME', |
|---|
| 138 |
default=None, |
|---|
| 139 |
help="Use command NAME as the XSLT processor. " |
|---|
| 140 |
"Available command names are defined in the " |
|---|
| 141 |
"ooo2dbk configuration file. " |
|---|
| 142 |
"Defaults to xsltproc.") |
|---|
| 143 |
|
|---|
| 144 |
parser.add_option('-f', '--flatxml', |
|---|
| 145 |
action='store_false', |
|---|
| 146 |
dest='flatxml', |
|---|
| 147 |
default=True, |
|---|
| 148 |
help="Preserve the intermediate OpenOffice.org " |
|---|
| 149 |
"XML file (global.xml)") |
|---|
| 150 |
|
|---|
| 151 |
parser.add_option('-v', '--verbose', |
|---|
| 152 |
action='store_true', |
|---|
| 153 |
dest='verbose', |
|---|
| 154 |
default=False, |
|---|
| 155 |
help="Print additional information to stdout " |
|---|
| 156 |
"when running conversion") |
|---|
| 157 |
|
|---|
| 158 |
if len(sys.argv) < 2: |
|---|
| 159 |
parser.print_help() |
|---|
| 160 |
# Command line syntax errors return the error code "2" |
|---|
| 161 |
sys.exit(2) |
|---|
| 162 |
|
|---|
| 163 |
(options, args) = parser.parse_args() |
|---|
| 164 |
|
|---|
| 165 |
if len(args) != 1: |
|---|
| 166 |
parser.error("incorrect number of arguments") |
|---|
| 167 |
|
|---|
| 168 |
ooo_file_path = args[0] |
|---|
| 169 |
|
|---|
| 170 |
if options.book: |
|---|
| 171 |
docbook_top_element = 'book' |
|---|
| 172 |
elif options.article: |
|---|
| 173 |
docbook_top_element = 'article' |
|---|
| 174 |
else: |
|---|
| 175 |
docbook_top_element = 'book' |
|---|
| 176 |
xslParams = [] |
|---|
| 177 |
xslParams.append('topElementName') |
|---|
| 178 |
xslParams.append(docbook_top_element) |
|---|
| 179 |
|
|---|
| 180 |
process_ole_objects = options.ole |
|---|
| 181 |
|
|---|
| 182 |
convert(ooo_file_path, |
|---|
| 183 |
conf_file_path=options.config, |
|---|
| 184 |
command=options.cmdxslt, |
|---|
| 185 |
xslt_file_path=options.xslt, |
|---|
| 186 |
xslParams=xslParams, |
|---|
| 187 |
docbook_file_path=options.dbkfile, |
|---|
| 188 |
deltemp=options.flatxml, |
|---|
| 189 |
verbose=options.verbose, |
|---|
| 190 |
zipfile_target=options.zipfile_target, |
|---|
| 191 |
docbook_top_element=docbook_top_element, |
|---|
| 192 |
process_ole_objects=options.ole, |
|---|
| 193 |
docbookXSL=None, |
|---|
| 194 |
) |
|---|
| 195 |
return |
|---|
| 196 |
|
|---|
| 197 |
# --------- |
|---|
| 198 |
# Utilities |
|---|
| 199 |
# --------- |
|---|
| 200 |
|
|---|
| 201 |
def getModulePath(): |
|---|
| 202 |
"""Return the path of the directory in which the ooo2dbk executable resides. |
|---|
| 203 |
""" |
|---|
| 204 |
moduleFullname = os.path.abspath(execArgs.func_code.co_filename) |
|---|
| 205 |
modulePath = os.path.split(moduleFullname)[0] |
|---|
| 206 |
return modulePath |
|---|
| 207 |
|
|---|
| 208 |
def fileExist(file): |
|---|
| 209 |
if file != '': |
|---|
| 210 |
return os.path.isfile(file) |
|---|
| 211 |
else: |
|---|
| 212 |
print "Bad filename: ", file |
|---|
| 213 |
sys.exit(1) |
|---|
| 214 |
|
|---|
| 215 |
def writeFile(file,strContent): |
|---|
| 216 |
b = open(file, 'w') |
|---|
| 217 |
b.write(strContent) |
|---|
| 218 |
b.close() |
|---|
| 219 |
|
|---|
| 220 |
|
|---|
| 221 |
def verifSys(): |
|---|
| 222 |
"""Current system identification. |
|---|
| 223 |
""" |
|---|
| 224 |
global preferred_encoding |
|---|
| 225 |
preferred_encoding = locale.getpreferredencoding() |
|---|
| 226 |
|
|---|
| 227 |
global currentSys |
|---|
| 228 |
if sys.platform == 'win32': |
|---|
| 229 |
currentSys = 'windows' |
|---|
| 230 |
else: |
|---|
| 231 |
currentSys = 'unix' |
|---|
| 232 |
|
|---|
| 233 |
|
|---|
| 234 |
def verifPath(path): |
|---|
| 235 |
"""Syntax path verification. |
|---|
| 236 |
""" |
|---|
| 237 |
global currentSys |
|---|
| 238 |
if currentSys == 'windows': |
|---|
| 239 |
modPathWin = re.compile(r"^(([a-zA-Z]:\\)?|(\.\.\\)*)([^\?:/\*\"<>\|]+[^\s\?:/\*\"<>\|]\\)*[^\?:/\*\"<>\|]+(\.[a-zA-Z0-9]+)?$") |
|---|
| 240 |
verifPath = modPathWin.match(path) |
|---|
| 241 |
if currentSys == 'unix': |
|---|
| 242 |
modPathUnix = re.compile(r"^(~|(\.\./)*)?([^\\\?:\*\"<>\|]+[^\\\s\?:\*\"<>\|]/)*[^\\\?:\*\"<>\|]+(\.[a-zA-Z0-9]+)*$") |
|---|
| 243 |
verifPath = modPathUnix.match(path) |
|---|
| 244 |
if verifPath == None: |
|---|
| 245 |
print "Bad path :\n", path |
|---|
| 246 |
sys.exit(1) |
|---|
| 247 |
else: |
|---|
| 248 |
return verifPath.group() |
|---|
| 249 |
|
|---|
| 250 |
|
|---|
| 251 |
def createDirectory(path): |
|---|
| 252 |
drive = '' |
|---|
| 253 |
if currentSys == 'windows' and os.path.isabs(path): |
|---|
| 254 |
dualWin = os.path.splitdrive(path) |
|---|
| 255 |
drive = dualWin[0] |
|---|
| 256 |
path = dualWin[1][1:] |
|---|
| 257 |
listDir = split(path, os.sep) |
|---|
| 258 |
i = 1 |
|---|
| 259 |
while i <= len(listDir): |
|---|
| 260 |
testPath = join(listDir[:i], os.sep) |
|---|
| 261 |
if drive != '': |
|---|
| 262 |
testPath = join([drive, testPath], os.sep) |
|---|
| 263 |
if os.path.isdir(testPath) == 0: |
|---|
| 264 |
os.mkdir(testPath) |
|---|
| 265 |
i += 1 |
|---|
| 266 |
|
|---|
| 267 |
# -------- |
|---|
| 268 |
# SETTINGS |
|---|
| 269 |
# -------- |
|---|
| 270 |
|
|---|
| 271 |
def getXSLfile(oooVersion): |
|---|
| 272 |
|
|---|
| 273 |
# Using the XSLT stylesheets specified in the CONF_FILE_NAME file |
|---|
| 274 |
stylesheet_file_name = getConfigValue('xslt-stylesheet', |
|---|
| 275 |
'stylesheetPath', |
|---|
| 276 |
name=oooVersion) |
|---|
| 277 |
stylesheet_file_name = verifPath(stylesheet_file_name) |
|---|
| 278 |
if stylesheet_file_name == None: |
|---|
| 279 |
print "Bad filename %s for 'xslt-stylesheet' %s in '%s'" % ( |
|---|
| 280 |
stylesheet_file_name, |
|---|
| 281 |
oooVersion, |
|---|
| 282 |
CONF_FILE_NAME, |
|---|
| 283 |
) |
|---|
| 284 |
|
|---|
| 285 |
xsltfile = os.path.join(getModulePath(), stylesheet_file_name) |
|---|
| 286 |
|
|---|
| 287 |
return xsltfile |
|---|
| 288 |
|
|---|
| 289 |
def setConfFileSettings(conf_file_path=None): |
|---|
| 290 |
global configXML |
|---|
| 291 |
global configElts, imgRelDir, imgRootName |
|---|
| 292 |
global oooserver_host, oooserver_port |
|---|
| 293 |
global ole_img_format, ole2img_script_path, ooopython_path |
|---|
| 294 |
|
|---|
| 295 |
# Configuration file |
|---|
| 296 |
# look at options.config priorities for parameters file |
|---|
| 297 |
# 1- c file.xml |
|---|
| 298 |
# 2- /etc/ooo2dbk.xml |
|---|
| 299 |
# 3- ooo2dbk.xml in the cuurent directory |
|---|
| 300 |
if conf_file_path is not None: |
|---|
| 301 |
configXML = conf_file_path |
|---|
| 302 |
else: |
|---|
| 303 |
conf_file_path_global = os.path.join('/etc', CONF_FILE_NAME) |
|---|
| 304 |
if os.path.isfile(conf_file_path_global): |
|---|
| 305 |
configXML = conf_file_path_global |
|---|
| 306 |
else: |
|---|
| 307 |
configXML = os.path.join(getModulePath(), CONF_FILE_NAME) |
|---|
| 308 |
|
|---|
| 309 |
configParse = minidom.parse(configXML) |
|---|
| 310 |
configDocElt = configParse.documentElement |
|---|
| 311 |
eltsParse = configDocElt.childNodes |
|---|
| 312 |
configElts = [] |
|---|
| 313 |
for node in eltsParse: |
|---|
| 314 |
if node.nodeType == node.ELEMENT_NODE: |
|---|
| 315 |
lenAtt = node.attributes.length |
|---|
| 316 |
dictAtt = {} |
|---|
| 317 |
i = 0 |
|---|
| 318 |
while i < lenAtt: |
|---|
| 319 |
att = node.attributes.item(i) |
|---|
| 320 |
dictAtt[att.name] = att.value |
|---|
| 321 |
i += 1 |
|---|
| 322 |
tupleElt = (node.nodeName, dictAtt) |
|---|
| 323 |
configElts.append(tupleElt) |
|---|
| 324 |
|
|---|
| 325 |
|
|---|
| 326 |
|
|---|
| 327 |
# Images relative directory |
|---|
| 328 |
imgRelDir = getConfigValue('images', 'imagesRelativeDirectory') |
|---|
| 329 |
verifPathIRD = re.match(r"^[a-zA-Z0-9]+$", imgRelDir) |
|---|
| 330 |
if verifPathIRD == None: |
|---|
| 331 |
msg = ("Only one depth relative directory (no '%s') " |
|---|
| 332 |
"and only alphanum chars for 'imagesRelativeDirectory' in '%s'\n" |
|---|
| 333 |
"Actual name is : '%s'" |
|---|
| 334 |
% (os.sep, CONF_FILE_NAME, imgRelDir)) |
|---|
| 335 |
print msg |
|---|
| 336 |
sys.exit(1) |
|---|
| 337 |
# Images root name |
|---|
| 338 |
imgRootName = getConfigValue('images', 'imageNameRoot') |
|---|
| 339 |
verifPathIR = re.match(r"^[a-zA-Z0-9]+$", imgRootName) |
|---|
| 340 |
if verifPathIR == None: |
|---|
| 341 |
print "Only alphanum chars for 'imageNameRoot' in '%s'" % CONF_FILE_NAME |
|---|
| 342 |
print "Actual name is :", imgRootName |
|---|
| 343 |
sys.exit(1) |
|---|
| 344 |
|
|---|
| 345 |
oooserver_host = getConfigValue('oooserver', 'host') |
|---|
| 346 |
oooserver_port = getConfigValue('oooserver', 'port') |
|---|
| 347 |
ole_img_format = getConfigValue('ole', 'imgFormat') |
|---|
| 348 |
ole2img_script_path = getConfigValue('ole', 'scriptPath') |
|---|
| 349 |
ooopython_path = getConfigValue('ooopython', 'path') |
|---|
| 350 |
|
|---|
| 351 |
|
|---|
| 352 |
def getConfigValue(element, attribute, name=''): |
|---|
| 353 |
""" |
|---|
| 354 |
Return from the CONF_FILE_NAME file the value of the specified attribute |
|---|
| 355 |
('command', 'param-syntax', etc.) for the specified element type |
|---|
| 356 |
'xslt-command', 'xslt-stylesheet', etc.) with its 'name' attribute having |
|---|
| 357 |
the name value. |
|---|
| 358 |
""" |
|---|
| 359 |
global configElts |
|---|
| 360 |
value = '' |
|---|
| 361 |
i = len(configElts) - 1 |
|---|
| 362 |
while i >= 0 : |
|---|
| 363 |
elt = configElts[i] |
|---|
| 364 |
if name != '': |
|---|
| 365 |
if elt[0] == element and elt[1]['name'] == name: |
|---|
| 366 |
value = elt[1][attribute] |
|---|
| 367 |
else: |
|---|
| 368 |
# We take the default element |
|---|
| 369 |
if elt[0] == element: |
|---|
| 370 |
value = elt[1][attribute] |
|---|
| 371 |
i = i - 1 |
|---|
| 372 |
if value != '': |
|---|
| 373 |
return value |
|---|
| 374 |
else: |
|---|
| 375 |
if name != '': |
|---|
| 376 |
print ("There isn't any value for this parameter. " |
|---|
| 377 |
"There should be an error in your %s." % CONF_FILE_NAME) |
|---|
| 378 |
sys.exit(1) |
|---|
| 379 |
|
|---|
| 380 |
|
|---|
| 381 |
def setUserSettings(ooofile, docbook, command, imagesrew, deltemp, dtd, |
|---|
| 382 |
xslt_file_path, xslParams, verbose): |
|---|
| 383 |
global docOOoSXW, docbookXML, globalXML |
|---|
| 384 |
global imgRelDir, imgAbsDir, rewriteImg |
|---|
| 385 |
global XSLCmdTemplate, dtdPublic, dtdSystem, XSLParams |
|---|
| 386 |
|
|---|
| 387 |
# OpenOffice.org filename |
|---|
| 388 |
ooofile = verifPath(ooofile) |
|---|
| 389 |
if fileExist(ooofile) == 0: |
|---|
| 390 |
errorMsg = ("\n>> ERROR : Incorrect OpenOffice.org file : \n>> " |
|---|
| 391 |
+ ooofile + "\n") |
|---|
| 392 |
print errorMsg |
|---|
| 393 |
sys.exit(1) |
|---|
| 394 |
else: |
|---|
| 395 |
docOOoSXW = ooofile |
|---|
| 396 |
# DocBook filename |
|---|
| 397 |
if docbook is not None: |
|---|
| 398 |
docbook = verifPath(docbook) |
|---|
| 399 |
path = os.path.split(docbook)[0] |
|---|
| 400 |
docbookXML = docbook |
|---|
| 401 |
else: |
|---|
| 402 |
OOoSplit = os.path.split(docOOoSXW) |
|---|
| 403 |
#path = OOoSplit[0] |
|---|
| 404 |
# This line will result producing subobjects(images) and |
|---|
| 405 |
# OOo & DocBook xml under directory where ooo2dbk.py resides |
|---|
| 406 |
#path = os.path.abspath(os.path.dirname(__file__)) |
|---|
| 407 |
# This line will result producing subobjects(images) and |
|---|
| 408 |
# OOo & DocBook xml under directory from which ooo2dbk.py was launched |
|---|
| 409 |
path = os.getcwd() |
|---|
| 410 |
rootName = os.path.splitext(OOoSplit[1])[0] |
|---|
| 411 |
docbookXML = os.path.join(path, rootName) + DOCBOOK_FILE_SUFFIX |
|---|
| 412 |
# Replace spaces in Writer document name with '_' |
|---|
| 413 |
docbookXML = re.sub('\s', '_', docbookXML) |
|---|
| 414 |
# Destination directory |
|---|
| 415 |
if path != '' and os.path.isdir(path) == 0: |
|---|
| 416 |
createDirectory(path) |
|---|
| 417 |
# Temporary files names |
|---|
| 418 |
if deltemp == 0: |
|---|
| 419 |
globalXML = os.path.join(path, 'global.xml') |
|---|
| 420 |
else: |
|---|
| 421 |
import tempfile |
|---|
| 422 |
tempfile.tempdir = path |
|---|
| 423 |
globalXML = tempfile.mktemp('g.xml') |
|---|
| 424 |
|
|---|
| 425 |
# Images Directory |
|---|
| 426 |
imgAbsDir = os.path.join(toUnicode(path), imgRelDir) |
|---|
| 427 |
|
|---|
| 428 |
# Force image rewriting (0|1) |
|---|
| 429 |
rewriteImg = imagesrew |
|---|
| 430 |
|
|---|
| 431 |
# XSL processor command |
|---|
| 432 |
if command is not None: |
|---|
| 433 |
XSLCmdTemplate = getConfigValue('xslt-command', 'command', command) |
|---|
| 434 |
else: |
|---|
| 435 |
XSLCmdTemplate = getConfigValue('xslt-command', 'command') |
|---|
| 436 |
|
|---|
| 437 |
# DTD |
|---|
| 438 |
if dtd is not None: |
|---|
| 439 |
dtdPublic = getConfigValue('dtd', 'doctype-public', dtd) |
|---|
| 440 |
dtdSystem = getConfigValue('dtd', 'doctype-system', dtd) |
|---|
| 441 |
else: |
|---|
| 442 |
dtdPublic = getConfigValue('dtd', 'doctype-public') |
|---|
| 443 |
dtdSystem = getConfigValue('dtd', 'doctype-system') |
|---|
| 444 |
|
|---|
| 445 |
# XSLT stylesheet |
|---|
| 446 |
if xslt_file_path is not None: |
|---|
| 447 |
docbookXSL = xslt_file_path |
|---|
| 448 |
|
|---|
| 449 |
# XSLT Params |
|---|
| 450 |
if xslParams is not None: |
|---|
| 451 |
if command is not None: |
|---|
| 452 |
param_syntax = getConfigValue('xslt-command', 'param-syntax', |
|---|
| 453 |
command) |
|---|
| 454 |
else: |
|---|
| 455 |
param_syntax = getConfigValue('xslt-command', 'param-syntax') |
|---|
| 456 |
# Retrieve the XSLT params and set them according to the param syntax. |
|---|
| 457 |
# This is done because XSLT processors have different command line |
|---|
| 458 |
# options. |
|---|
| 459 |
XSLParams = ("%s" % (param_syntax)) % tuple(xslParams) |
|---|
| 460 |
else: |
|---|
| 461 |
XSLParams = ' ' |
|---|
| 462 |
if verbose: |
|---|
| 463 |
print " - xslParams = %s" % xslParams |
|---|
| 464 |
print " - param_syntax = %s" % param_syntax |
|---|
| 465 |
print " - XSLParams = %s" % XSLParams |
|---|
| 466 |
|
|---|
| 467 |
|
|---|
| 468 |
def initializeSets(ooo_file_path, docbook, command, imagesrew, deltemp, dtd, |
|---|
| 469 |
conf_file_path, xslt_file_path, xslParams, verbose): |
|---|
| 470 |
verifSys() |
|---|
| 471 |
setConfFileSettings(conf_file_path) |
|---|
| 472 |
setUserSettings(ooo_file_path, docbook, command, imagesrew, deltemp, dtd, |
|---|
| 473 |
xslt_file_path, xslParams, verbose) |
|---|
| 474 |
|
|---|
| 475 |
# -------------------- |
|---|
| 476 |
# Conversion functions |
|---|
| 477 |
# -------------------- |
|---|
| 478 |
|
|---|
| 479 |
def extractOooArchive(docOOoSXW, XMLFile): |
|---|
| 480 |
"""Generic XML files extraction. |
|---|
| 481 |
""" |
|---|
| 482 |
# Checking that the OOo file is truly of the ZIP format |
|---|
| 483 |
if zipfile.is_zipfile(docOOoSXW): |
|---|
| 484 |
zip_file = zipfile.ZipFile(docOOoSXW, 'r') |
|---|
| 485 |
# Listing the file content |
|---|
| 486 |
contentListZip = zip_file.namelist() |
|---|
| 487 |
# Checking that a "content.xml" file is truly present |
|---|
| 488 |
for i in contentListZip: |
|---|
| 489 |
if i == XMLFile: |
|---|
| 490 |
# If "content.xml" is truly present, we open it. |
|---|
| 491 |
# The result, "docOOoXML" is the content as text. |
|---|
| 492 |
docOOoXMLExist = 1 |
|---|
| 493 |
strOOoXML = zip_file.read(XMLFile) |
|---|
| 494 |
zip_file.close() |
|---|
| 495 |
return strOOoXML |
|---|
| 496 |
|
|---|
| 497 |
def listChildNodes(docOOoSXW, XMLFile, ooo_file_path, verbose): |
|---|
| 498 |
"""Extract and parse Zip XML files for concat. |
|---|
| 499 |
""" |
|---|
| 500 |
# Extract and parse XML file |
|---|
| 501 |
strXML = extractOooArchive(docOOoSXW, XMLFile) |
|---|
| 502 |
XMLparse = minidom.parseString(strXML) |
|---|
| 503 |
rootNode = XMLparse.documentElement |
|---|
| 504 |
vChildNodes = rootNode.childNodes |
|---|
| 505 |
# Images treatment |
|---|
| 506 |
if XMLFile == OOO_CONTENT_FILE_NAME: |
|---|
| 507 |
global dictImg, myZip, numImg, dictNamespace |
|---|
| 508 |
numImg = 0 |
|---|
| 509 |
dictImg = {} |
|---|
| 510 |
dictNamespace = {} |
|---|
| 511 |
myZip = zipfile.ZipFile(docOOoSXW, 'r') |
|---|
| 512 |
# Creating the directory where the images will be dropped. |
|---|
| 513 |
# The exported OLE images go in this directory too. |
|---|
| 514 |
if not (os.path.exists(imgAbsDir) |
|---|
| 515 |
and os.path.isdir(imgAbsDir)): |
|---|
| 516 |
os.mkdir(imgAbsDir) |
|---|
| 517 |
if process_ole_objects: |
|---|
| 518 |
cmd = (('%s %s --target "%s" ' |
|---|
| 519 |
'--oooserverhost %s --oooserverport %s ' |
|---|
| 520 |
'--format %s "%s"') |
|---|
| 521 |
% ( |
|---|
| 522 |
ooopython_path, |
|---|
| 523 |
ole2img_script_path, |
|---|
| 524 |
imgAbsDir, |
|---|
| 525 |
oooserver_host, oooserver_port, |
|---|
| 526 |
ole_img_format, ooo_file_path)) |
|---|
| 527 |
if verbose: |
|---|
| 528 |
print cmd |
|---|
| 529 |
os.system(cmd) |
|---|
| 530 |
replaceImageNode(vChildNodes) |
|---|
| 531 |
myZip.close() |
|---|
| 532 |
# Extract all root element's childs |
|---|
| 533 |
listChildElts = [] |
|---|
| 534 |
for node in vChildNodes: |
|---|
| 535 |
if node.nodeType == node.ELEMENT_NODE: |
|---|
| 536 |
listChildElts.append(node) |
|---|
| 537 |
return listChildElts |
|---|
| 538 |
|
|---|
| 539 |
|
|---|
| 540 |
def replaceImageNode(vChildNodes): |
|---|
| 541 |
"""Replace the incorporated images links by the new images links |
|---|
| 542 |
and extract and copy all incorporated images. |
|---|
| 543 |
XXX: Why renaming images (apart from making their path relative)? |
|---|
| 544 |
Please add comment if you know. |
|---|
| 545 |
""" |
|---|
| 546 |
global numImg |
|---|
| 547 |
for node in vChildNodes: |
|---|
| 548 |
if node.nodeName == 'draw:image': |
|---|
| 549 |
hRefValue = node.attributes['xlink:href'].value |
|---|
| 550 |
if find(hRefValue, 'Pictures/', 0) != -1: |
|---|
| 551 |
nameImgOld = os.path.split(hRefValue)[1] |
|---|
| 552 |
|
|---|
| 553 |
# XXX: What is this block for? Please add comment if you know. |
|---|
| 554 |
if dictImg.has_key(nameImgOld): |
|---|
| 555 |
node.attributes['xlink:href'].value = dictImg[nameImgOld] |
|---|
| 556 |
else: |
|---|
| 557 |
extImg = os.path.splitext(nameImgOld)[1] |
|---|
| 558 |
numImg += 1 |
|---|
| 559 |
nameImgNew = imgRootName + "%03i" % numImg + extImg |
|---|
| 560 |
hrefImgNew = os.path.join(imgRelDir, nameImgNew) |
|---|
| 561 |
pathImgNew = os.path.join(imgAbsDir, nameImgNew) |
|---|
| 562 |
if hRefValue.startswith('#'): |
|---|
| 563 |
# OOo 1 |
|---|
| 564 |
pathImgZip = hRefValue[1:] |
|---|
| 565 |
else: |
|---|
| 566 |
# OOo 2 |
|---|
| 567 |
pathImgZip = hRefValue |
|---|
| 568 |
zipImg = myZip.read(pathImgZip) |
|---|
| 569 |
if os.path.isfile(pathImgNew) and rewriteImg: |
|---|
| 570 |
os.remove(pathImgNew) |
|---|
| 571 |
if not os.path.isfile(pathImgNew): |
|---|
| 572 |
imgNew = open(pathImgNew, 'wb') |
|---|
| 573 |
imgNew.write(zipImg) |
|---|
| 574 |
imgNew.close() |
|---|
| 575 |
dictImg[nameImgOld] = hrefImgNew |
|---|
| 576 |
node.attributes['xlink:href'].value = dictImg[nameImgOld] |
|---|
| 577 |
else: |
|---|
| 578 |
pass |
|---|
| 579 |
|
|---|
| 580 |
# XXX: What is this block for? Please add comment if you know. |
|---|
| 581 |
if node.hasChildNodes(): |
|---|
| 582 |
wChilNodes = node.childNodes |
|---|
| 583 |
replaceImageNode(wChilNodes) |
|---|
| 584 |
|
|---|
| 585 |
|
|---|
| 586 |
def getGlobalRootHead(sourcefile, XMLFile): |
|---|
| 587 |
|
|---|
| 588 |
strXML = extractOooArchive(docOOoSXW, XMLFile) |
|---|
| 589 |
XMLparse = minidom.parseString(strXML) |
|---|
| 590 |
rootNode = XMLparse.documentElement |
|---|
| 591 |
|
|---|
| 592 |
if rootNode.attributes['xmlns:office'].value == 'http://openoffice.org/2000/office': |
|---|
| 593 |
oooVersion = 'ooo1' |
|---|
| 594 |
elif rootNode.attributes['xmlns:office'].value == 'urn:oasis:names:tc:opendocument:xmlns:office:1.0': |
|---|
| 595 |
oooVersion = 'ooo2' |
|---|
| 596 |
|
|---|
| 597 |
if oooVersion == 'ooo1': |
|---|
| 598 |
# OpenOffice.org 1.x |
|---|
| 599 |
globalRootHead = """\ |
|---|
| 600 |
<?xml version="1.0" encoding="UTF-8"?> |
|---|
| 601 |
|
|---|
| 602 |
<office:document xmlns:office="http://openoffice.org/2000/office" |
|---|
| 603 |
xmlns:style="http://openoffice.org/2000/style" |
|---|
| 604 |
xmlns:text="http://openoffice.org/2000/text" |
|---|
| 605 |
xmlns:table="http://openoffice.org/2000/table" |
|---|
| 606 |
xmlns:draw="http://openoffice.org/2000/drawing" |
|---|
| 607 |
xmlns:fo="http://www.w3.org/1999/XSL/Format" |
|---|
| 608 |
xmlns:xlink="http://www.w3.org/1999/xlink" |
|---|
| 609 |
xmlns:number="http://openoffice.org/2000/datastyle" |
|---|
| 610 |
xmlns:svg="http://www.w3.org/2000/svg" |
|---|
| 611 |
xmlns:chart="http://openoffice.org/2000/chart" |
|---|
| 612 |
xmlns:dr3d="http://openoffice.org/2000/dr3d" |
|---|
| 613 |
xmlns:math="http://www.w3.org/1998/Math/MathML" |
|---|
| 614 |
xmlns:form="http://openoffice.org/2000/form" |
|---|
| 615 |
xmlns:script="http://openoffice.org/2000/script" |
|---|
| 616 |
xmlns:dc="http://purl.org/dc/elements/1.1/" |
|---|
| 617 |
xmlns:meta="http://openoffice.org/2000/meta" |
|---|
| 618 |
office:class="text" |
|---|
| 619 |
office:version="1.0"> |
|---|
| 620 |
""" |
|---|
| 621 |
|
|---|
| 622 |
elif oooVersion == 'ooo2': |
|---|
| 623 |
# OpenOffice.org 2.x - OpenDocument |
|---|
| 624 |
globalRootHead = """\ |
|---|
| 625 |
<?xml version="1.0" encoding="UTF-8"?> |
|---|
| 626 |
|
|---|
| 627 |
<office:document |
|---|
| 628 |
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" |
|---|
| 629 |
xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" |
|---|
| 630 |
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" |
|---|
| 631 |
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" |
|---|
| 632 |
xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" |
|---|
| 633 |
xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" |
|---|
| 634 |
xmlns:xlink="http://www.w3.org/1999/xlink" |
|---|
| 635 |
xmlns:dc="http://purl.org/dc/elements/1.1/" |
|---|
| 636 |
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" |
|---|
| 637 |
xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" |
|---|
| 638 |
xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" |
|---|
| 639 |
xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" |
|---|
| 640 |
xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" |
|---|
| 641 |
xmlns:math="http://www.w3.org/1998/Math/MathML" |
|---|
| 642 |
xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" |
|---|
| 643 |
xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" |
|---|
| 644 |
xmlns:ooo="http://openoffice.org/2004/office" |
|---|
| 645 |
xmlns:ooow="http://openoffice.org/2004/writer" |
|---|
| 646 |
xmlns:oooc="http://openoffice.org/2004/calc" |
|---|
| 647 |
xmlns:dom="http://www.w3.org/2001/xml-events" |
|---|
| 648 |
xmlns:xforms="http://www.w3.org/2002/xforms" |
|---|
| 649 |
xmlns:xsd="http://www.w3.org/2001/XMLSchema" |
|---|
| 650 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|---|
| 651 |
office:version="1.0"> |
|---|
| 652 |
""" |
|---|
| 653 |
|
|---|
| 654 |
globalRootFoot = """\ |
|---|
| 655 |
</office:document> |
|---|
| 656 |
""" |
|---|
| 657 |
|
|---|
| 658 |
|
|---|
| 659 |
return globalRootHead, globalRootFoot, oooVersion |
|---|
| 660 |
|
|---|
| 661 |
def createGlobalXML(globalFile, ooo_file_path, verbose): |
|---|
| 662 |
""" |
|---|
| 663 |
Create a global XML file by concatening the different XML files contained |
|---|
| 664 |
within a .sxw OOo file (meta.xml, styles.xml, content.xml). |
|---|
| 665 |
""" |
|---|
| 666 |
# First let's delete any previous images directory, because if we don't |
|---|
| 667 |
# delete it there might be a previous directory with content in it and we |
|---|
| 668 |
# don't want to get this unrequested content in a generated archive. |
|---|
| 669 |
if os.path.exists(imgAbsDir): |
|---|
| 670 |
shutil.rmtree(imgAbsDir) |
|---|
| 671 |
|
|---|
| 672 |
globalRootHead, globalRootFoot, oooVersion = getGlobalRootHead(docOOoSXW, |
|---|
| 673 |
OOO_META_FILE_NAME) |
|---|
| 674 |
globalRootStr = globalRootHead + globalRootFoot |
|---|
| 675 |
|
|---|
| 676 |
globalStrParse = minidom.parseString(globalRootStr) |
|---|
| 677 |
globalRoot = globalStrParse.documentElement |
|---|
| 678 |
metaListElts = listChildNodes(docOOoSXW, OOO_META_FILE_NAME, ooo_file_path, verbose) |
|---|
| 679 |
stylesListElts = listChildNodes(docOOoSXW, OOO_STYLES_FILE_NAME, ooo_file_path, verbose) |
|---|
| 680 |
contentListElts = listChildNodes(docOOoSXW, OOO_CONTENT_FILE_NAME, ooo_file_path, verbose) |
|---|
| 681 |
globalListElts = metaListElts + stylesListElts + contentListElts |
|---|
| 682 |
for node in globalListElts: |
|---|
| 683 |
globalRoot.appendChild(node) |
|---|
| 684 |
strXML = globalRoot.toxml() |
|---|
| 685 |
listLine = split(strXML, '\n')[1:] |
|---|
| 686 |
strXMLNS = join([globalRootHead, join(listLine, u"\n")], u"\n") |
|---|
| 687 |
fileXML = codecs.open(globalFile, 'w', 'utf-8') |
|---|
| 688 |
fileXML.write(strXMLNS) |
|---|
| 689 |
fileXML.close() |
|---|
| 690 |
|
|---|
| 691 |
return oooVersion |
|---|
| 692 |
|
|---|
| 693 |
|
|---|
| 694 |
def tempFilesDelete(deltemp): |
|---|
| 695 |
if deltemp == 1: |
|---|
| 696 |
os.remove(globalXML) |
|---|
| 697 |
|
|---|
| 698 |
|
|---|
| 699 |
def getXsltCommand(input_file_path, output_file_path, stylesheet, verbose): |
|---|
| 700 |
"""Return the actual XSLT processing command. |
|---|
| 701 |
""" |
|---|
| 702 |
global XSLCmdTemplate |
|---|
| 703 |
cmd = XSLCmdTemplate |
|---|
| 704 |
gListVar = ['%o', '%i', '%s', '%p', '%y', '%v'] |
|---|
| 705 |
listVar = [] |
|---|
| 706 |
for var in gListVar: |
|---|
| 707 |
if find(cmd, var) != -1: |
|---|
| 708 |
listVar.append(var) |
|---|
| 709 |
for var in listVar: |
|---|
| 710 |
varSplit = split(cmd, var) |
|---|
| 711 |
if var == '%o': |
|---|
| 712 |
# Note that the file path has to be protected by "" in case it |
|---|
| 713 |
# contains special characters such as spaces. |
|---|
| 714 |
varSplit = '%s"%s"%s' % (varSplit[0], toUnicode(output_file_path), varSplit[1]) |
|---|
| 715 |
elif var == '%i': |
|---|
| 716 |
# Note that the file path has to be protected by "" in case it |
|---|
| 717 |
# contains special characters such as spaces. |
|---|
| 718 |
varSplit = '%s"%s"%s' % (varSplit[0], toUnicode(input_file_path), varSplit[1]) |
|---|
| 719 |
elif var == '%s': |
|---|
| 720 |
varSplit = '%s"%s"%s' % (varSplit[0], stylesheet, varSplit[1]) |
|---|
| 721 |
elif var == '%p': |
|---|
| 722 |
varSplit = '%s%s%s' % (varSplit[0], dtdPublic, varSplit[1]) |
|---|
| 723 |
elif var == '%y': |
|---|
| 724 |
varSplit = '%s%s%s' % (varSplit[0], dtdSystem, varSplit[1]) |
|---|
| 725 |
elif var == '%v': |
|---|
| 726 |
varSplit = '%s%s%s' % (varSplit[0], XSLParams, varSplit[1]) |
|---|
| 727 |
cmd = join(varSplit, '') |
|---|
| 728 |
if verbose: |
|---|
| 729 |
print cmd |
|---|
| 730 |
return cmd.encode(preferred_encoding) |
|---|
| 731 |
|
|---|
| 732 |
|
|---|
| 733 |
def toUnicode(s): |
|---|
| 734 |
return unicode(s, preferred_encoding) |
|---|
| 735 |
|
|---|
| 736 |
|
|---|
| 737 |
def o2dConvert(input, output, stylesheet, verbose): |
|---|
| 738 |
"""Generic conversion. |
|---|
| 739 |
""" |
|---|
| 740 |
startTime = time.time() |
|---|
| 741 |
os.system(getXsltCommand(input, output, stylesheet,verbose)) |
|---|
| 742 |
endTime = time.time() |
|---|
| 743 |
duration = round(endTime - startTime, 2) |
|---|
| 744 |
|
|---|
| 745 |
|
|---|
| 746 |
# ------------- |
|---|
| 747 |
# User commands |
|---|
| 748 |
# ------------- |
|---|
| 749 |
|
|---|
| 750 |
def createDocbookArchive(zipfile_target): |
|---|
| 751 |
pjoin = os.path.join |
|---|
| 752 |
psplit = os.path.split |
|---|
| 753 |
psplitext = os.path.splitext |
|---|
| 754 |
pbasename = os.path.basename |
|---|
| 755 |
|
|---|
| 756 |
arch_dest_dir = psplit(zipfile_target)[0] |
|---|
| 757 |
arch_top_dir = psplitext(pbasename(zipfile_target))[0] |
|---|
| 758 |
arch_path = pjoin(arch_dest_dir, arch_top_dir + '.zip') |
|---|
| 759 |
arch = zipfile.ZipFile(arch_path, 'w', zipfile.ZIP_DEFLATED) |
|---|
| 760 |
docbook_fname = pbasename(docbookXML) |
|---|
| 761 |
docbook_path_in_arch = pjoin(arch_top_dir, docbook_fname) |
|---|
| 762 |
# ZIP entries paths are stored in "code page 437" encoding (cp437). |
|---|
| 763 |
# One cannot use UTF-8 for the ZIP entries paths. |
|---|
| 764 |
docbook_path_in_arch_enc = toUnicode(docbook_path_in_arch).encode(ZIP_FILE_ENCODING) |
|---|
| 765 |
arch.write(docbookXML, docbook_path_in_arch_enc) |
|---|
| 766 |
# Adding in the arch the images contained in the original OOo arch |
|---|
| 767 |
if os.path.exists(imgAbsDir): |
|---|
| 768 |
for img_name in os.listdir(imgAbsDir): |
|---|
| 769 |
img_path = pjoin(imgAbsDir, img_name) |
|---|
| 770 |
img_path_in_arch = pjoin(arch_top_dir, 'images', img_name) |
|---|
| 771 |
# ZIP entries paths are stored in "code page 437" encoding (cp437). |
|---|
| 772 |
# One cannot use UTF-8 for the ZIP entries paths. |
|---|
| 773 |
img_path_in_arch_enc = img_path_in_arch.encode(ZIP_FILE_ENCODING) |
|---|
| 774 |
arch.write(img_path, img_path_in_arch_enc) |
|---|
| 775 |
arch.close() |
|---|
| 776 |
# Remove created DocBook XML and subobjects, if any |
|---|
| 777 |
os.remove(docbookXML) |
|---|
| 778 |
if os.path.exists(imgAbsDir): |
|---|
| 779 |
shutil.rmtree(imgAbsDir) |
|---|
| 780 |
|
|---|
| 781 |
|
|---|
| 782 |
def convert(ooo_file_path, |
|---|
| 783 |
command=None, |
|---|
| 784 |
docbook_file_path=None, |
|---|
| 785 |
imagesrew=1, |
|---|
| 786 |
deltemp=1, |
|---|
| 787 |
dtd=None, |
|---|
| 788 |
conf_file_path=None, |
|---|
| 789 |
xslt_file_path=None, |
|---|
| 790 |
xslParams=None, |
|---|
| 791 |
verbose=False, |
|---|
| 792 |
zipfile_target=False, |
|---|
| 793 |
docbook_top_element='book', |
|---|
| 794 |
process_ole_objects=False, |
|---|
| 795 |
docbookXSL=None, |
|---|
| 796 |
): |
|---|
| 797 |
"""Convert OpenOffice.org Writer file to DocBook XML. |
|---|
| 798 |
""" |
|---|
| 799 |
startTime = time.time() |
|---|
| 800 |
|
|---|
| 801 |
if verbose: |
|---|
| 802 |
print " 1 - Command line options" |
|---|
| 803 |
print " - OOo2DBK config file : %s" % conf_file_path |
|---|
| 804 |
print " - OpenOffice.org file : %s" % ooo_file_path |
|---|
| 805 |
print " - DocBook file : %s" % docbook_file_path |
|---|
| 806 |
print " - top element is : %s" % docbook_top_element |
|---|
| 807 |
print " - process OLE objects : %s" % process_ole_objects |
|---|
| 808 |
|
|---|
| 809 |
initializeSets(ooo_file_path, docbook_file_path, command, imagesrew, |
|---|
| 810 |
deltemp, dtd, conf_file_path, xslt_file_path, xslParams, |
|---|
| 811 |
verbose) |
|---|
| 812 |
|
|---|
| 813 |
ooo_file_path = toUnicode(ooo_file_path) |
|---|
| 814 |
if docbook_file_path is not None: |
|---|
| 815 |
docbook_file_path = toUnicode(docbook_file_path) |
|---|
| 816 |
if xslt_file_path is not None: |
|---|
| 817 |
xslt_file_path = toUnicode(xslt_file_path) |
|---|
| 818 |
|
|---|
| 819 |
endTime = time.time() |
|---|
| 820 |
duration = round(endTime - startTime, 2) |
|---|
| 821 |
|
|---|
| 822 |
if verbose: |
|---|
| 823 |
print " ==>", duration, "sec.\n" |
|---|
| 824 |
print " 2 - Unzip and concat OpenOffice.org XML files" |
|---|
| 825 |
|
|---|
| 826 |
startTime = time.time() |
|---|
| 827 |
|
|---|
| 828 |
oooVersion = createGlobalXML(globalXML, ooo_file_path, verbose) |
|---|
| 829 |
|
|---|
| 830 |
endTime = time.time() |
|---|
| 831 |
duration = round(endTime - startTime, 2) |
|---|
| 832 |
|
|---|
| 833 |
if verbose: |
|---|
| 834 |
print " - Detected file format: %s" % (oooVersion) |
|---|
| 835 |
print " ==>", duration, "sec.\n" |
|---|
| 836 |
print " 3 - Initialization (configuration file and computed options)" |
|---|
| 837 |
|
|---|
| 838 |
if docbookXSL is None: |
|---|
| 839 |
# Get XSLT file to use from configuration file |
|---|
| 840 |
docbookXSL = getXSLfile(oooVersion) |
|---|
| 841 |
|
|---|
| 842 |
if verbose: |
|---|
| 843 |
global configXML |
|---|
| 844 |
print " - preferred encoding : %s" % preferred_encoding |
|---|
| 845 |
print " - OOo2DBK config file : %s" % configXML |
|---|
| 846 |
print " - XSLT file : %s" % docbookXSL |
|---|
| 847 |
print " - OpenOffice.org file path : %s" % docOOoSXW |
|---|
| 848 |
print " - DocBook file path : %s" % docbookXML |
|---|
| 849 |
if process_ole_objects: |
|---|
| 850 |
print " - oooserver host : %s" % oooserver_host |
|---|
| 851 |
print " - oooserver port : %s" % oooserver_port |
|---|
| 852 |
print " - exported OLE image format : %s" % ole_img_format |
|---|
| 853 |
print " - OOo Python path: %s" % ooopython_path |
|---|
| 854 |
print "\n 4 - DocBook file creation" |
|---|
| 855 |
|
|---|
| 856 |
startTime = time.time() |
|---|
| 857 |
o2dConvert(globalXML, docbookXML, docbookXSL, verbose) |
|---|
| 858 |
|
|---|
| 859 |
tempFilesDelete(deltemp) |
|---|
| 860 |
endTime = time.time() |
|---|
| 861 |
duration = round(endTime - startTime, 2) |
|---|
| 862 |
if verbose: |
|---|
| 863 |
print " ==>", duration, "sec.\n" |
|---|
| 864 |
print "Conversion completed\n" |
|---|
| 865 |
|
|---|
| 866 |
if zipfile_target: |
|---|
| 867 |
createDocbookArchive(zipfile_target) |
|---|
| 868 |
if verbose: |
|---|
| 869 |
print "Zip archive created\n" |
|---|
| 870 |
|
|---|
| 871 |
# Shell conversion |
|---|
| 872 |
if __name__ == "__main__": |
|---|
| 873 |
execArgs() |
|---|