|
Revision 30164, 1.9 kB
(checked in by rspivak, 4 years ago)
|
Added support for word_to_html on win32 platform(using wvWare). Step forward
to fix #708
|
- Property svn:eol-style set to
native
- Property svn:keywords set to
Author Date Id Revision
|
| Line | |
|---|
| 1 |
import os |
|---|
| 2 |
import sys |
|---|
| 3 |
from Products.PortalTransforms.libtransforms.utils \ |
|---|
| 4 |
import bodyfinder, scrubHTML |
|---|
| 5 |
from Products.PortalTransforms.libtransforms.commandtransform \ |
|---|
| 6 |
import commandtransform |
|---|
| 7 |
|
|---|
| 8 |
ENCODING = "iso-8859-15" |
|---|
| 9 |
|
|---|
| 10 |
class document(commandtransform): |
|---|
| 11 |
|
|---|
| 12 |
def __init__(self, name, data): |
|---|
| 13 |
""" Initialization: create tmp work directory and copy the |
|---|
| 14 |
document into a file""" |
|---|
| 15 |
binary = 'wvHtml' |
|---|
| 16 |
if sys.platform == 'win32': |
|---|
| 17 |
binary = 'wvWare' |
|---|
| 18 |
commandtransform.__init__(self, name, binary=binary) |
|---|
| 19 |
name = self.name() |
|---|
| 20 |
if not name.endswith('.doc'): |
|---|
| 21 |
name = name + ".doc" |
|---|
| 22 |
self.tmpdir, self.fullname = self.initialize_tmpdir(data, filename=name) |
|---|
| 23 |
|
|---|
| 24 |
def convert(self, output_encoding=ENCODING): |
|---|
| 25 |
"Convert the document" |
|---|
| 26 |
tmpdir = self.tmpdir |
|---|
| 27 |
|
|---|
| 28 |
if sys.platform == 'win32': |
|---|
| 29 |
paths = os.environ['PATH'].split(';') |
|---|
| 30 |
for path in paths: |
|---|
| 31 |
config_path = os.path.join(path, 'wvHtml.xml') |
|---|
| 32 |
if os.path.exists(config_path): |
|---|
| 33 |
cmd = '%s --charset=%s -x "%s" "%s" > "%s"' % ( |
|---|
| 34 |
self.binary, |
|---|
| 35 |
output_encoding, |
|---|
| 36 |
config_path, |
|---|
| 37 |
self.fullname, |
|---|
| 38 |
os.path.join(tmpdir, self.__name__+'.html')) |
|---|
| 39 |
break |
|---|
| 40 |
else: |
|---|
| 41 |
cmd = '' |
|---|
| 42 |
else: |
|---|
| 43 |
cmd = 'cd "%s" && %s --charset=%s "%s" "%s.html"' % ( |
|---|
| 44 |
tmpdir, |
|---|
| 45 |
self.binary, |
|---|
| 46 |
output_encoding, |
|---|
| 47 |
self.fullname, |
|---|
| 48 |
self.__name__) |
|---|
| 49 |
|
|---|
| 50 |
os.system(cmd) |
|---|
| 51 |
|
|---|
| 52 |
def html(self): |
|---|
| 53 |
htmlfile = open(os.path.join(self.tmpdir, self.__name__+'.html')) |
|---|
| 54 |
html = htmlfile.read() |
|---|
| 55 |
htmlfile.close() |
|---|
| 56 |
html = scrubHTML(html) |
|---|
| 57 |
body = bodyfinder(html) |
|---|
| 58 |
return body |
|---|