Namazu-devel-ja(旧)
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
DocCat PDF support and filter/pdf.pl
- From: SATOH Fumiyasu <fumiya@xxxxxxxxxxx>
- Date: Sat, 10 Mar 2001 01:23:27 +0900
- X-ml-name: namazu-devel-ja
- X-mail-count: 01329
さとうふみやす@転職しましたです。
DocCat (http://www.dehenken.co.jp/) が PDF をサポートしました。
それに対応するための filter/pdf.pl のパッチを添付します。
filter/msword.pl の filter_doccat() をパクったのですが、
$cfile のファイルを一時ファイル ($tmpfile2) にコピーしているのは
何故なんでしょうか? > 作者
? pdf.pl.dist
Index: pdf.pl
===================================================================
RCS file: /storage/cvsroot/namazu/filter/pdf.pl,v
retrieving revision 1.22
diff -u -r1.22 pdf.pl
--- pdf.pl 2001/01/04 01:57:58 1.22
+++ pdf.pl 2001/03/09 16:12:11
@@ -25,10 +25,12 @@
package pdf;
use strict;
+use File::Copy;
require 'util.pl';
require 'gfilter.pl';
my $pdfconvpath = undef;
+my $pdfconvname = undef;
sub mediatype() {
return ('application/pdf');
@@ -36,7 +38,17 @@
sub status() {
$pdfconvpath = util::checkcmd('pdftotext');
- return 'yes' if (defined $pdfconvpath);
+ if (defined $pdfconvpath) {
+ $pdfconvname = 'pdftotext';
+ return 'yes';
+ }
+
+ $pdfconvpath = util::checkcmd('doccat');
+ if (defined $pdfconvpath) {
+ $pdfconvname = 'doccat';
+ return 'yes';
+ }
+
return 'no';
}
@@ -57,6 +69,19 @@
}
sub filter ($$$$$) {
+ my ($orig_cfile, $cont, $weighted_str, $headings, $fields) = @_;
+ my $err = undef;
+
+ if ($pdfconvname eq 'pdftotext') {
+ $err = filter_wv($orig_cfile, $cont, $weighted_str, $headings,$fields);
+ } else {
+ $err = filter_doccat($orig_cfile, $cont, $weighted_str, $headings,$fields);
+ }
+
+ return $err;
+}
+
+sub filter_wv ($$$$$) {
my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
= @_;
my $cfile = defined $orig_cfile ? $$orig_cfile : '';
@@ -94,6 +119,35 @@
unless $fields->{'title'};
gfilter::show_filter_debug_info($cont, $weighted_str,
$fields, $headings);
+ return undef;
+}
+
+sub filter_doccat ($$$$$) {
+ my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
+ = @_;
+ my $cfile = defined $orig_cfile ? $$orig_cfile : '';
+
+ my $tmpfile = util::tmpnam('NMZ.pdf');
+ my $tmpfile2 = util::tmpnam('NMZ.pdf2');
+ copy("$cfile", "$tmpfile2");
+
+ system("$pdfconvpath -o e $tmpfile2 > $tmpfile");
+
+ {
+ my $fh = util::efopen("< $tmpfile");
+ $$cont = util::readfile($fh);
+ }
+
+ unlink($tmpfile);
+ unlink($tmpfile2);
+
+ gfilter::line_adjust_filter($cont);
+ gfilter::line_adjust_filter($weighted_str);
+ gfilter::white_space_adjust_filter($cont);
+ $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str)
+ unless $fields->{'title'};
+ gfilter::show_filter_debug_info($cont, $weighted_str,
+ $fields, $headings);
return undef;
}
--
SATOH Fumiyasu - <fumiya@xxxxxxxxxxxxxx>, <fumiya@xxxxxxxxxxx>
THRUST Co., Ltd. @ Fujisawa, Kanagawa, Japan - http://www.net-thrust.com/
Samba-JP, aka `Samba Users Group Japan' - http://www.samba.gr.jp/
Apache-JP(?), aka `Japan Apache Users Group' - http://www.apache.or.jp/