Namazu-users-ja($B5l(B)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: $B%$%s%G%C%/%9$N%^!<%8(B

From: Rei FURUKAWA <furukawa@xxxxxxxxxxxx>
Date: Wed, 11 Oct 2000 22:40:39 +0900
X-ml-name: namazu-users-ja
X-mail-count: 01064
References: <20001007055756P.furukawa@tcp-ip.or.jp> <39E325F015C.FBACOSAMU2001@smtp.livedoor.com>

$B8E@n$G$9!#(B
(B
(BFrom: Osamu Okano <osamu2001@xxxxxxxxxxxx>
(BSubject: [namazu-users-ja] Re: pnamazu-2000.10.07 $B!J%$%s%G%C%/%9$N%^!<%8!K(B
(BDate: Tue, 10 Oct 2000 23:21:36 +0900
(B
(Bosamu2001> $B$+$J$jA0$NOC$G$9$,(B
(Bosamu2001> $B%$%s%G%C%/%9$N%^!<%8%D!<%k$r:n$kOC(B
(Bosamu2001> $B$,$"$C$?$H;W$&$N$G$9$,(B
(Bosamu2001> $B:#8=:_$O$I$&$J$C$F$$$k$N$G$7$g$&$+(B?
(B
$B$*$C!"$=$s$JOC$b$"$j$^$7$?$M!#$8$c$"!"$D$/$j$^$7$g$&$+!#(B
(B
$B$d$C$D$1$G$9$,!D(B
(B
(B-- 
(BRei FURUKAWA 
(Bfurukawa@xxxxxxxxxxxx

#! /usr/local/bin/perl5 -w

use strict;

push(@INC, "/usr/local/share/namazu/pl");
require 'nmzidx.pl';

print("Usage: nmzmerge.pl dst src1 src2\n"), exit unless @ARGV == 3;

&nmzmerge(@ARGV);

sub nmzmerge{
    my ($dir0, $dir1, $dir2) = @_;

    my $nmz0 = new nmzidx($dir0, 'w');

    my $nmz1 = new nmzidx($dir1, 'r');
    my $nmz2 = new nmzidx($dir2, 'r');

    my $offset = 0;
    {
        my $nmz0_file = $nmz0->open_flist;

        my $nmz1_file = $nmz1->open_flist;
        my @field = keys %{$nmz1_file->{'field'}};
        $nmz1_file->close;

        my $nmz2_file = $nmz2->open_flist;
        @field = (@field, (keys %{$nmz1_file->{'field'}}));
        $nmz2_file->close;

        for my $field (@field){
            $nmz0_file->{'field'}->open($nmz0, $field) unless defined $nmz0_file->{'field'}->{$field};
        }
        @field = sort keys %{$nmz0_file->{'field'}};

        $nmz1_file = $nmz1->open_flist;
        $offset = &nmzfile(\@field, $nmz0_file, $nmz1_file);
        $nmz1_file->close;

        $nmz2_file = $nmz2->open_flist;
        &nmzfile(\@field, $nmz0_file, $nmz2_file);
        $nmz2_file->close;

        $nmz0_file->close;
    }

    {
        my $nmz0_word = $nmz0->open_word;
        my $nmz1_word = $nmz1->open_word;
        my $nmz2_word = $nmz2->open_word;

        my ($w1, $w2, %list1, %list2);
        my $word1 = $nmz1_word->read(\$w1, \%list1);
        my $word2 = $nmz2_word->read(\$w2, \%list2);

        my $ndx = 0;

        while (defined(my $c = &wordcmp($word1, $word2))){
            my %list;
            my $word;

            if ($c <= 0){
                $word = $word1;
                %list = %list1;
                $word1 = $nmz1_word->read(\$w1, \%list1);
            }
            if ($c >= 0){
                $word = $word2;
                for my $key (keys %list2){
                    $list{$key + $offset} = $list2{$key};
                }
                $word2 = $nmz2_word->read(\$w2, \%list2);
            }
            $nmz0_word->write($word, \%list);
            print "word $ndx: $word\n" unless ++$ndx % 100;
        }
    }

    {
        my $nmz0_phrase = $nmz0->open_phrase;
        my $nmz1_phrase = $nmz1->open_phrase;
        my $nmz2_phrase = $nmz2->open_phrase;

        for (my $ndx = 0; $ndx < 0x10000; $ndx++){
            my (@list, @list2);
            $nmz1_phrase->read(\@list);
            $nmz2_phrase->read(\@list2);

            for my $key (@list2){
                push(@list, $key + $offset);
            }

            $nmz0_phrase->write(\@list);
            printf("phrase %04X\n", $ndx) unless $ndx & 0xff;
        }
    }

    $nmz0->write_status($nmz1);

    if (my $log = $nmz0->log_open("[Merge]")){
        $log->printf("%-20s %d\n", "Total Files:", $nmz0->{'flist'}->{'offset'});
        $log->printf("%-20s %d\n", "Total Keywords:", $nmz0->{'word'}->{'offset'});
        $nmz0->log_close;
    }

    $nmz0->replace_db(0);
}

sub nmzfile{
    my $ref_field = shift;
    my $nmzo_file = shift;
    my $nmzi_file = shift;

    my $dir = $nmzi_file->{'dir'};
    my $size = $nmzi_file->{'size'};
    my $ndx = 0;
    my %list;

    while (defined $nmzi_file->read(\%list)){
        ++$ndx;
        for my $field (@$ref_field){
            $list{'field'}{$field} = '' unless defined $list{'field'}{$field};
        }
        print "$dir: $ndx/$size\n" unless $ndx % 100;
        $nmzo_file->write(\%list);
    }
    return $ndx;
}

sub wordcmp{
    my ($w1, $w2) = @_;
    return undef unless defined $w1 or defined $w2;
    return -1 if defined $w1 and !defined $w2;
    return 1 if defined $w2 and !defined $w1;
    return $w1 cmp $w2;
}

References:
- pnamazu-2000.10.07
  - From: Rei FURUKAWA
- Re: pnamazu-2000.10.07 $B!J%$%s%G%C%/(B$B%9$N%^!<%8!K(B
  - From: Osamu Okano

Prev by Date: "#" $BIU$N%U%!%$%k$K$D$$$F(B
Next by Date: "#" $BIU$N%U%!%$%k$K$D$$$F(B
Previous by thread: Re: pnamazu-2000.10.07 $B!J%$%s%G%C%/(B$B%9$N%^!<%8!K(B
Next by thread: $B%$%s%G%C%/%9$NJ,3d(B (Re: $B%$%s%G%C%/%9$N%^!<%8(B)
Index(es):
- Date
- Thread

Namazu-users-ja($B5l(B)

Re: $B%$%s%G%C%/%9$N%^!<%8(B

Namazu-users-ja($B5l(B)

Re: $B%$%s%G%C%/%9$N%^!<%8(B