plugins:html_indexer
no way to compare when less than two revisions
Differences
This shows you the differences between two versions of the page.
| — | plugins:html_indexer [2009/08/15 13:06] (current) – created daniel | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| + | ====== html_indexer plugin ====== | ||
| + | ===== Purpose ===== | ||
| + | This is an **incoming_mimeprocess** plugin that recursively searches for HTML parts in the message, convert them to a text format, and pass them to the full text indexer. | ||
| + | |||
| + | ===== Dependencies ===== | ||
| + | Requires | ||
| + | * manitou-mdx 0.9.11 or above | ||
| + | * CPAN [[http:// | ||
| + | |||
| + | ===== Code ===== | ||
| + | [[http:// | ||
| + | |||
| + | <code perl> | ||
| + | # HTML attachments indexer plugin for Manitou-Mail | ||
| + | # Copyright (C) 2009 Daniel Verite | ||
| + | |||
| + | # This file is part of Manitou-Mail (see http:// | ||
| + | # This program is free software; you can redistribute it and/or modify | ||
| + | # it under the terms of the GNU General Public License version 2 as | ||
| + | # published by the Free Software Foundation. | ||
| + | |||
| + | # This program is distributed in the hope that it will be useful, | ||
| + | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| + | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
| + | # GNU General Public License for more details. | ||
| + | |||
| + | # You should have received a copy of the GNU General Public License | ||
| + | # along with this program; if not, write to the Free Software | ||
| + | # Foundation, Inc., 59 Temple Place - Suite 330, | ||
| + | # Boston, MA 02111-1307, USA. | ||
| + | |||
| + | package Manitou:: | ||
| + | |||
| + | use HTML:: | ||
| + | use HTML:: | ||
| + | use IO::Handle; | ||
| + | |||
| + | sub init { | ||
| + | shift; | ||
| + | my ($args)=@_; | ||
| + | my $self={}; | ||
| + | bless $self; | ||
| + | return $self; | ||
| + | } | ||
| + | |||
| + | sub finish { | ||
| + | # nothing to do | ||
| + | 1; | ||
| + | } | ||
| + | |||
| + | sub index_contents { | ||
| + | my ($fh, $ctxt)=@_; | ||
| + | my $html; | ||
| + | my $text; | ||
| + | { | ||
| + | local $/; | ||
| + | $html = $fh-> | ||
| + | } | ||
| + | | ||
| + | if (defined $html) { | ||
| + | my $tree = HTML:: | ||
| + | $tree-> | ||
| + | my $formatter = HTML:: | ||
| + | $text = $formatter-> | ||
| + | } | ||
| + | if (defined $text) { | ||
| + | Manitou:: | ||
| + | } | ||
| + | } | ||
| + | |||
| + | sub process_parts { | ||
| + | my ($obj, | ||
| + | if ($obj-> | ||
| + | foreach my $subobj ($obj-> | ||
| + | process_parts($subobj, | ||
| + | } | ||
| + | } | ||
| + | else { | ||
| + | my $type=$obj-> | ||
| + | if ($type eq " | ||
| + | my $io = $obj-> | ||
| + | index_contents($io, | ||
| + | $io-> | ||
| + | } | ||
| + | } | ||
| + | } | ||
| + | |||
| + | sub process { | ||
| + | my ($self, | ||
| + | process_parts($ctxt-> | ||
| + | 1; | ||
| + | } | ||
| + | |||
| + | 1; | ||
| + | </ | ||
| + | |||
plugins/html_indexer.txt · Last modified: 2009/08/15 13:06 by daniel
