Namazu-users-ja(旧)
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: インデックスのマージ
古川です。
From: Osamu Okano <osamu2001@xxxxxxxxxxxx>
Subject: [namazu-users-ja] Re: pnamazu-2000.10.07 (インデックスのマージ)
Date: Tue, 10 Oct 2000 23:21:36 +0900
osamu2001> かなり前の話ですが
osamu2001> インデックスのマージツールを作る話
osamu2001> があったと思うのですが
osamu2001> 今現在はどうなっているのでしょうか?
おっ、そんな話もありましたね。じゃあ、つくりましょうか。
やっつけですが…
--
Rei FURUKAWA
furukawa@xxxxxxxxxxxx
#! /usr/local/bin/perl5 -w
use strict;
push(@INC, "/usr/local/share/namazu/pl");
require 'nmzidx.pl';
print("Usage: nmzmerge.pl dst src1 src2\n"), exit unless @ARGV == 3;
&nmzmerge(@ARGV);
sub nmzmerge{
my ($dir0, $dir1, $dir2) = @_;
my $nmz0 = new nmzidx($dir0, 'w');
my $nmz1 = new nmzidx($dir1, 'r');
my $nmz2 = new nmzidx($dir2, 'r');
my $offset = 0;
{
my $nmz0_file = $nmz0->open_flist;
my $nmz1_file = $nmz1->open_flist;
my @field = keys %{$nmz1_file->{'field'}};
$nmz1_file->close;
my $nmz2_file = $nmz2->open_flist;
@field = (@field, (keys %{$nmz1_file->{'field'}}));
$nmz2_file->close;
for my $field (@field){
$nmz0_file->{'field'}->open($nmz0, $field) unless defined $nmz0_file->{'field'}->{$field};
}
@field = sort keys %{$nmz0_file->{'field'}};
$nmz1_file = $nmz1->open_flist;
$offset = &nmzfile(\@field, $nmz0_file, $nmz1_file);
$nmz1_file->close;
$nmz2_file = $nmz2->open_flist;
&nmzfile(\@field, $nmz0_file, $nmz2_file);
$nmz2_file->close;
$nmz0_file->close;
}
{
my $nmz0_word = $nmz0->open_word;
my $nmz1_word = $nmz1->open_word;
my $nmz2_word = $nmz2->open_word;
my ($w1, $w2, %list1, %list2);
my $word1 = $nmz1_word->read(\$w1, \%list1);
my $word2 = $nmz2_word->read(\$w2, \%list2);
my $ndx = 0;
while (defined(my $c = &wordcmp($word1, $word2))){
my %list;
my $word;
if ($c <= 0){
$word = $word1;
%list = %list1;
$word1 = $nmz1_word->read(\$w1, \%list1);
}
if ($c >= 0){
$word = $word2;
for my $key (keys %list2){
$list{$key + $offset} = $list2{$key};
}
$word2 = $nmz2_word->read(\$w2, \%list2);
}
$nmz0_word->write($word, \%list);
print "word $ndx: $word\n" unless ++$ndx % 100;
}
}
{
my $nmz0_phrase = $nmz0->open_phrase;
my $nmz1_phrase = $nmz1->open_phrase;
my $nmz2_phrase = $nmz2->open_phrase;
for (my $ndx = 0; $ndx < 0x10000; $ndx++){
my (@list, @list2);
$nmz1_phrase->read(\@list);
$nmz2_phrase->read(\@list2);
for my $key (@list2){
push(@list, $key + $offset);
}
$nmz0_phrase->write(\@list);
printf("phrase %04X\n", $ndx) unless $ndx & 0xff;
}
}
$nmz0->write_status($nmz1);
if (my $log = $nmz0->log_open("[Merge]")){
$log->printf("%-20s %d\n", "Total Files:", $nmz0->{'flist'}->{'offset'});
$log->printf("%-20s %d\n", "Total Keywords:", $nmz0->{'word'}->{'offset'});
$nmz0->log_close;
}
$nmz0->replace_db(0);
}
sub nmzfile{
my $ref_field = shift;
my $nmzo_file = shift;
my $nmzi_file = shift;
my $dir = $nmzi_file->{'dir'};
my $size = $nmzi_file->{'size'};
my $ndx = 0;
my %list;
while (defined $nmzi_file->read(\%list)){
++$ndx;
for my $field (@$ref_field){
$list{'field'}{$field} = '' unless defined $list{'field'}{$field};
}
print "$dir: $ndx/$size\n" unless $ndx % 100;
$nmzo_file->write(\%list);
}
return $ndx;
}
sub wordcmp{
my ($w1, $w2) = @_;
return undef unless defined $w1 or defined $w2;
return -1 if defined $w1 and !defined $w2;
return 1 if defined $w2 and !defined $w1;
return $w1 cmp $w2;
}