Namazu-devel-ja(旧)
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
scheme (Re: indexer.pl)
- From: knok@xxxxxxxxxxxxx (NOKUBI Takatsugu)
- Date: Thu, 5 Dec 2002 15:29:38 JST
- X-ml-name: namazu-devel-ja
- X-mail-count: 02709
<200211181039.TAA18404@xxxxxxxxxxxxxxxxxx>の記事において
私は書きました。
>> >> > 次は非ファイルの対応に着手したいところです。
>> >> どんな仕様ですか?
>>
>> ファイル名で受けている個所を拡張して URI として入力を得られるように
>> すると良いかな、と思っています。あとは namazu-devel-ja#2678 にあるよう
>> な感じで scheme ごとに library を用意すれば良いかなと。
とりあえずできました。まずは scheme に対応するための差分をつけます。
--
野首 貴嗣
E-mail: knok@xxxxxxxxxxxxx
knok@xxxxxxxxxx / knok@xxxxxxxxxx
? scheme
Index: Makefile.am
===================================================================
RCS file: /storage/cvsroot/namazu/Makefile.am,v
retrieving revision 1.66
diff -u -r1.66 Makefile.am
--- Makefile.am 25 Apr 2002 08:58:40 -0000 1.66
+++ Makefile.am 5 Dec 2002 06:26:21 -0000
@@ -11,10 +11,10 @@
endif
SUBDIRS = conf doc etc filter intl po lib nmz pl src lisp scripts man \
- tests template contrib $(BUILT_TKNAMAZU)
+ tests template contrib scheme $(BUILT_TKNAMAZU)
DIST_SUBDIRS = conf doc etc filter intl po lib nmz pl src lisp scripts man \
- tests template contrib $(BUILT_TKNAMAZU)
+ tests template contrib scheme $(BUILT_TKNAMAZU)
bin_SCRIPTS = nmz-config
Index: configure.in
===================================================================
RCS file: /storage/cvsroot/namazu/configure.in,v
retrieving revision 1.150
diff -u -r1.150 configure.in
--- configure.in 15 Nov 2002 09:01:48 -0000 1.150
+++ configure.in 5 Dec 2002 06:26:21 -0000
@@ -419,6 +419,7 @@
src/Makefile
template/Makefile
contrib/Makefile
+ scheme/Makefile
tests/Makefile
tests/data/Makefile
tests/data/ja/Makefile
Index: pl/conf.pl.in
===================================================================
RCS file: /storage/cvsroot/namazu/pl/conf.pl.in,v
retrieving revision 1.34
diff -u -r1.34 conf.pl.in
--- pl/conf.pl.in 23 Aug 2001 08:18:49 -0000 1.34
+++ pl/conf.pl.in 5 Dec 2002 06:26:21 -0000
@@ -203,6 +203,7 @@
# $LIBDIR = "@PERLLIBDIR@";
# $FILTERDIR = "@FILTERDIR@";
# $TEMPLATEDIR = "@TEMPLATEDIR@";
+# $SCHEMEDIR = "@SCHEMEDIR@";
1;
Index: pl/util.pl
===================================================================
RCS file: /storage/cvsroot/namazu/pl/util.pl,v
retrieving revision 1.26
diff -u -r1.26 util.pl
--- pl/util.pl 23 Sep 2002 08:52:33 -0000 1.26
+++ pl/util.pl 5 Dec 2002 06:26:21 -0000
@@ -307,4 +307,9 @@
}
}
+# check url
+sub isurl ($) {
+ return $_[0] =~ /^[a-z]+:/;
+}
+
1;
Index: pl/var.pl.in
===================================================================
RCS file: /storage/cvsroot/namazu/pl/var.pl.in,v
retrieving revision 1.12
diff -u -r1.12 var.pl.in
--- pl/var.pl.in 2 Mar 2000 02:39:33 -0000 1.12
+++ pl/var.pl.in 5 Dec 2002 06:26:21 -0000
@@ -142,6 +142,16 @@
'text/plain' => "yes",
);
+%SupportedScheme =
+ (
+ 'file' => "yes",
+ );
+
+%RECURSIVE_SCHEME =
+ (
+ 'file' => 1,
+ );
+
# Dummy function for gettextization.
sub N_ {};
Index: scripts/mknmz.in
===================================================================
RCS file: /storage/cvsroot/namazu/scripts/mknmz.in,v
retrieving revision 1.116
diff -u -r1.116 mknmz.in
--- scripts/mknmz.in 16 Nov 2002 09:19:26 -0000 1.116
+++ scripts/mknmz.in 5 Dec 2002 06:26:22 -0000
@@ -49,6 +49,7 @@
my $LIBDIR = $PKGDATADIR . "/pl"; # directory where library etc. are in.
my $FILTERDIR = $PKGDATADIR . "/filter"; # directory where filters are in.
my $TEMPLATEDIR = $PKGDATADIR . "/template"; # directory where templates are in.
+my $SCHEMEDIR = $PKGDATADIR . "/scheme"; # directory where scheme handlers are in.
my $DeletedFilesCount = 0;
my $UpdatedFilesCount = 0;
@@ -175,7 +176,10 @@
$field_indices, $fh_errorsfile, $total_files_num) = @_;
my $processed_num = 0;
- my $file_size = util::filesize($cfile);
+ my $file_size = 0;
+ if (! $cfile =~ /^[a-z]+:/) {
+ $file_size = util::filesize($cfile);
+ }
if ($var::Opt{'htmlsplit'} && $cfile =~ $conf::HTML_SUFFIX) {
my @parts = htmlsplit::split($cfile, "NMZ.partial");
@@ -186,7 +190,7 @@
"$cfile#$part" =~ /$conf::EXCLUDE_PATH/);
my $fname = util::tmpnam("NMZ.partial.$id");
my $fragment = defined $part ? $part : undef;
- my $uri = generate_uri($cfile, $fragment);
+ my $uri = generate_uri($cfile, $fragment);
my $result = namazu_core($fname,
$docid_count + $processed_num,
$docid_base, $file_count,
@@ -430,6 +434,38 @@
}
}
+sub load_schememodules() {
+ unshift @INC, $SCHEMEDIR;
+ my @schemes = ();
+ @schemes = glob "$SCHEMEDIR/*.pl";
+
+ load_schemes(@schemes);
+}
+
+sub load_schemes(@) {
+ my @schemes = @_;
+ for my $scheme (@schemes) {
+ $scheme =~ m!([-\w]+)\.pl$!;
+ my $module = $1;
+ require "$module.pl" || die "unable to require \"$module.pl\"\n";;
+ my (@protocols, $status, $recursive);
+ eval "\@protocols = Namazu::Scheme::${module}::scheme();";
+ die $@ if $@;
+ eval "\$status = Namazu::Scheme::${module}::status();";
+ die $@ if $@;
+ eval "\$recursive = Namazu::Scheme::${module}::recursive();";
+ die $@ if $@;
+ eval "Namazu::Scheme::${module}::init();";
+
+ for my $sc (@protocols) {
+ next if (defined $var::SupportedScheme{$sc} &&
+ $var::SupportedScheme{$sc} eq 'yes' && $status eq 'no');
+ $var::SupportedScheme{$sc} = $status;
+ $var::RECURSIVE_SCHEME{$sc} = $recursive;
+ }
+ }
+}
+
# Core routine.
#
# FIXME: Too many parameters. They must be cleared.
@@ -454,6 +490,20 @@
unless ($uri) {
$uri = generate_uri($cfile); # Make a URI from a file name.
}
+ my $scheme;
+ my $mtime;
+ $mtime = (stat($cfile))[9] unless util::isurl($cfile);
+ foreach $scheme (keys %var::SupportedScheme) {
+ if ($var::SupportedScheme{$scheme} eq 'yes') {
+ if ($uri =~ /^$scheme:/) {
+ eval "Namazu::Scheme::${scheme}::fetch(\$uri, \\\$content);";
+ die $@ if $@;
+ eval "\$mtime = Namazu::Scheme::${scheme}::mtime(\$uri);";
+ die $@ if $@;
+ }
+ }
+ }
+ $fields{'mtime'} = $mtime;
my ($cfile_size, $text_size, $kanji, $mtype) =
load_document(\$cfile, \$content, \$weighted_str,
\$headings, \%fields);
@@ -486,7 +536,7 @@
clean_field_index(\%fields);
put_field_index(\%fields, $field_indices);
- put_dateindex($cfile);
+ put_dateindex($cfile, $fields{'mtime'});
$content .= $weighted_str; # add weights
$Indexer->init(\$content, $conf::WORD_LENG_MAX, $var::Opt{'nosymbol'});
$Indexer->noedgesymbol() if ($var::Opt{'noedgesymbol'});
@@ -594,9 +644,10 @@
$fields->{'title'} = gfilter::filename_to_title($cfile, $wsref);
}
unless (defined($fields->{'date'})) {
- my $mtime = (stat($cfile))[9];
+ my $mtime = $fields->{'mtime'};
my $date = util::rfc822time($mtime);
$fields->{'date'} = $date;
+ $fields->{'mtime'} = $mtime;
}
unless (defined($fields->{'uri'})) {
$fields->{'uri'} = $uri;
@@ -732,8 +783,8 @@
# put the date infomation into NMZ.t file
sub put_dateindex ($) {
- my ($cfile) = @_;
- my $mtime = (stat($cfile))[9];
+ my ($cfile, $mtime) = @_;
+ $mtime = (stat($cfile))[9] unless defined $mtime;
my $fh_dataindex = util::efopen(">>$var::NMZ{'_t'}");
print $fh_dataindex pack("N", $mtime);
@@ -746,8 +797,7 @@
= @_;
my $cfile = $$orig_cfile;
- return (0, 0, 0, 0) unless (-f $cfile && -r $cfile);
-
+ return (0, 0, 0, 0) unless (util::isurl($cfile) || (-f $cfile && -r $cfile));
my $file_size;
my $shelter_cfile = "";
@@ -770,6 +820,7 @@
$$contref = util::readfile($cfile);
} else {
$file_size = length($$contref);
+ $fields->{'size'} = $file_size;
}
my ($kanji, $mtype) = apply_filter($orig_cfile, $contref, $weighted_str, $headings, $fields, $shelter_cfile);
@@ -1110,6 +1161,7 @@
util::dprint("Override indexing language: $util::LANG\n");
}
load_filtermodules(); # to make effect $opt_config, $index_lang.
+ load_schememodules();
postload_modules();
if ($opt_help) {
@@ -1273,6 +1325,7 @@
sub absolute_path($$) {
my ($cwd, $path) = @_;
+ return $path if (util::isurl($path));
$path =~ s!^\.$!\./!;
$path =~ s!^\.[/\\]!$cwd/!;
if (($SYSTEM eq "MSWin32") || ($SYSTEM eq "os2")) {
@@ -1321,8 +1374,10 @@
print STDERR "Warning: target contains empty line, skip it\n";
next;
}
-
- if (-f $target) { # target is a file.
+
+ if (util::isurl($target)) {
+ add_target($target, \@flist, \%counts);
+ } elsif (-f $target) { # target is a file.
add_target($target, \@flist, \%counts);
} elsif (-d $target) { # target is a directory.
my @subtargets = ();
@@ -1385,11 +1440,11 @@
return; # skip a file name containing LF/CR/TAB chars.
}
- return unless -f $target; # Only file is targeted.
+ return unless util::isurl($target) || -f $target;
$counts_ref->{'possible'}++;
- unless (-r $target) {
+ unless (util::isurl($target) || -r $target) {
util::vprint(sprintf(_("Unreadable: %s"), $target));
$counts_ref->{'excluded'}++;
return;
@@ -1407,7 +1462,7 @@
#
# Do processing just like find's --mtime option.
#
- if (defined $var::Opt{'mtime'}) {
+ if (defined $var::Opt{'mtime'} && ! util::isurl($_)) {
my $mtime = -M $_;
if ($var::Opt{'mtime'} < 0) {
@@ -1506,9 +1561,9 @@
my ($cfile, $cfile_size, $text_size, $mtype, $uri) = @_;
my $msg = undef;
- if (! -e $cfile) {
+ if (! util::isurl($cfile) && ! -e $cfile) {
$msg = _("does NOT EXIST! skipped.");
- } elsif (! -r $cfile) {
+ } elsif (! util::isurl($cfile) && ! -r $cfile) {
$msg = _("is NOT READABLE! skipped.");
} elsif ($text_size == 0 || $cfile_size == 0) {
$msg = _("is 0 size! skipped.");
@@ -2427,6 +2482,7 @@
$var::RECURSIVE_ACTIONS, $conf::META_TAGS, $var::USE_NKF_MODULE,
$conf::ADDRESS, $var::MAILING_ADDRESS,
$conf::FILE_SIZE_MAX,
+ $var::SupportedScheme, $var::RECURSIVE_SCHEME,
);
sub muda {}