| 1 |
""" |
|---|
| 2 |
Uses lynx -dump |
|---|
| 3 |
""" |
|---|
| 4 |
from Products.PortalTransforms.interfaces import itransform |
|---|
| 5 |
from Products.PortalTransforms.libtransforms.utils import bin_search, basename, sansext |
|---|
| 6 |
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform |
|---|
| 7 |
from os import system |
|---|
| 8 |
|
|---|
| 9 |
class lynx_dump(commandtransform): |
|---|
| 10 |
__implements__ = itransform |
|---|
| 11 |
|
|---|
| 12 |
__name__ = "lynx_dump" |
|---|
| 13 |
inputs = ('text/html',) |
|---|
| 14 |
output = 'text/plain' |
|---|
| 15 |
|
|---|
| 16 |
binaryName = "lynx" |
|---|
| 17 |
binaryArgs = "-dump" |
|---|
| 18 |
|
|---|
| 19 |
def __init__(self): |
|---|
| 20 |
commandtransform.__init__(self, binary=self.binaryName) |
|---|
| 21 |
|
|---|
| 22 |
def convert(self, data, cache, **kwargs): |
|---|
| 23 |
orig_name = basename((kwargs.get('filename') or 'unknown')) |
|---|
| 24 |
kwargs['filename'] = orig_name + '.html' |
|---|
| 25 |
tmpdir, fullname = self.initialize_tmpdir(data, **kwargs) |
|---|
| 26 |
outname = "%s/%s.txt" % (tmpdir, orig_name) |
|---|
| 27 |
self.invokeCommand(tmpdir, fullname, outname) |
|---|
| 28 |
text = self.astext(outname) |
|---|
| 29 |
self.cleanDir(tmpdir) |
|---|
| 30 |
cache.setData(text) |
|---|
| 31 |
return cache |
|---|
| 32 |
|
|---|
| 33 |
def invokeCommand(self, tmpdir, inputname, outname): |
|---|
| 34 |
system('cd "%s" && %s %s "%s" 1>"%s" 2>/dev/null' % \ |
|---|
| 35 |
(tmpdir, self.binary, self.binaryArgs, inputname, outname)) |
|---|
| 36 |
|
|---|
| 37 |
def astext(self, outname): |
|---|
| 38 |
txtfile = open("%s" % (outname), 'r') |
|---|
| 39 |
txt = txtfile.read() |
|---|
| 40 |
txtfile.close() |
|---|
| 41 |
return txt |
|---|
| 42 |
|
|---|
| 43 |
def register(): |
|---|
| 44 |
return lynx_dump() |
|---|