plugins:html_indexer
no way to compare when less than two revisions
Differences
This shows you the differences between two versions of the page.
— | plugins:html_indexer [2009/08/15 13:06] (current) – created daniel | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== html_indexer plugin ====== | ||
+ | ===== Purpose ===== | ||
+ | This is an **incoming_mimeprocess** plugin that recursively searches for HTML parts in the message, convert them to a text format, and pass them to the full text indexer. | ||
+ | |||
+ | ===== Dependencies ===== | ||
+ | Requires | ||
+ | * manitou-mdx 0.9.11 or above | ||
+ | * CPAN [[http:// | ||
+ | |||
+ | ===== Code ===== | ||
+ | [[http:// | ||
+ | |||
+ | <code perl> | ||
+ | # HTML attachments indexer plugin for Manitou-Mail | ||
+ | # Copyright (C) 2009 Daniel Verite | ||
+ | |||
+ | # This file is part of Manitou-Mail (see http:// | ||
+ | # This program is free software; you can redistribute it and/or modify | ||
+ | # it under the terms of the GNU General Public License version 2 as | ||
+ | # published by the Free Software Foundation. | ||
+ | |||
+ | # This program is distributed in the hope that it will be useful, | ||
+ | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
+ | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
+ | # GNU General Public License for more details. | ||
+ | |||
+ | # You should have received a copy of the GNU General Public License | ||
+ | # along with this program; if not, write to the Free Software | ||
+ | # Foundation, Inc., 59 Temple Place - Suite 330, | ||
+ | # Boston, MA 02111-1307, USA. | ||
+ | |||
+ | package Manitou:: | ||
+ | |||
+ | use HTML:: | ||
+ | use HTML:: | ||
+ | use IO::Handle; | ||
+ | |||
+ | sub init { | ||
+ | shift; | ||
+ | my ($args)=@_; | ||
+ | my $self={}; | ||
+ | bless $self; | ||
+ | return $self; | ||
+ | } | ||
+ | |||
+ | sub finish { | ||
+ | # nothing to do | ||
+ | 1; | ||
+ | } | ||
+ | |||
+ | sub index_contents { | ||
+ | my ($fh, $ctxt)=@_; | ||
+ | my $html; | ||
+ | my $text; | ||
+ | { | ||
+ | local $/; | ||
+ | $html = $fh-> | ||
+ | } | ||
+ | | ||
+ | if (defined $html) { | ||
+ | my $tree = HTML:: | ||
+ | $tree-> | ||
+ | my $formatter = HTML:: | ||
+ | $text = $formatter-> | ||
+ | } | ||
+ | if (defined $text) { | ||
+ | Manitou:: | ||
+ | } | ||
+ | } | ||
+ | |||
+ | sub process_parts { | ||
+ | my ($obj, | ||
+ | if ($obj-> | ||
+ | foreach my $subobj ($obj-> | ||
+ | process_parts($subobj, | ||
+ | } | ||
+ | } | ||
+ | else { | ||
+ | my $type=$obj-> | ||
+ | if ($type eq " | ||
+ | my $io = $obj-> | ||
+ | index_contents($io, | ||
+ | $io-> | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | |||
+ | sub process { | ||
+ | my ($self, | ||
+ | process_parts($ctxt-> | ||
+ | 1; | ||
+ | } | ||
+ | |||
+ | 1; | ||
+ | </ | ||
+ | |||
plugins/html_indexer.txt · Last modified: 2009/08/15 13:06 by daniel