namazu-ml(avocado)


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Namazu v1.2.0.7 released!



Ken-ichi Hirose <hirose@xxxxxxxxxxxxxxxxxxxx> wrote:

>>統計を取ったわけではありませんが、検索語のわかち書きが必要な場合は
>>それほど多くないと思うのでそれなりにいい加減な処理でもいいんじゃな
>>いかと思っています。
>
>結構「この一文が含まれる資料を全部探す」という使い方をするので。。
>#cut & paste で 例えば「検索語のわかち書きが必要な場合」 とか。。

なるほど、そういう利用方法もあるのですね。


>>前述の送り仮名については対応してみたのでパッチをつけておきます。気
>>になる方はお試しください。
>
>了解しました。
>
>#win32 の readme.txt では namazu/doc 以下を indexing して「試してみよう」
>#で検索してみてくださいと書いてあるので、どっちにしろ document は改版する
>#必要がある訳で、それに適用させて頂きます。

どうせパッチをあてるならこっちの方が良いです。

-- Satoru Takabayashi

diff -c namazu-1.2.0.7/src/cgi.c namazu-1.2.0.7.1/src/cgi.c
*** namazu-1.2.0.7/src/cgi.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.7.1/src/cgi.c	Fri Sep 18 14:09:15 1998
***************
*** 62,68 ****
      /* note that CERN HTTPD would add empty PATH_INFO */
      if (getenv("PATH_INFO")) {
          char *path_info = getenv("PATH_INFO");
!         if (strlen(path_info) > 0) {
              sprintf(tmp, "%s%s", DEFAULT_DIR, path_info);
              if ((uchar *) NULL ==
                  (DbNames[DbNumber] = (uchar *) malloc(strlen(tmp) + 1)))
--- 62,68 ----
      /* note that CERN HTTPD would add empty PATH_INFO */
      if (getenv("PATH_INFO")) {
          char *path_info = getenv("PATH_INFO");
!         if (strlen(path_info) > 0 && strlen(path_info) < 128) {
              sprintf(tmp, "%s%s", DEFAULT_DIR, path_info);
              if ((uchar *) NULL ==
                  (DbNames[DbNumber] = (uchar *) malloc(strlen(tmp) + 1)))
diff -c namazu-1.2.0.7/src/messages.c namazu-1.2.0.7.1/src/messages.c
*** namazu-1.2.0.7/src/messages.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.7.1/src/messages.c	Fri Sep 18 14:09:15 1998
***************
*** 35,41 ****
  #endif
  
  /* information about Namazu */
! uchar *VERSION = "  Search Program of Namazu - Version 1.2.0.7\n";
  uchar *COPYRIGHT =
  "  Copyright (C) 1997-1998 Satoru Takabayashi All rights reserved.\n\n";
  
--- 35,41 ----
  #endif
  
  /* information about Namazu */
! uchar *VERSION = "  Search Program of Namazu - Version 1.2.0.7.1\n";
  uchar *COPYRIGHT =
  "  Copyright (C) 1997-1998 Satoru Takabayashi All rights reserved.\n\n";
  
diff -c namazu-1.2.0.7/src/mknmz.pl namazu-1.2.0.7.1/src/mknmz.pl
*** namazu-1.2.0.7/src/mknmz.pl	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.7.1/src/mknmz.pl	Fri Sep 18 14:09:15 1998
***************
*** 1,7 ****
  #!%OPT_PATH_PERL% -w
  #
  # mknmz.pl - indexer of Namazu
! # Version   1.2.0.7 [09/17/1998]
  #
  # Copyright (C) 1997-1998 Satoru Takabayashi  All rights reserved.
  #     This is free software with ABSOLUTELY NO WARRANTY.
--- 1,7 ----
  #!%OPT_PATH_PERL% -w
  #
  # mknmz.pl - indexer of Namazu
! # Version   1.2.0.7.1 [09/18/1998]
  #
  # Copyright (C) 1997-1998 Satoru Takabayashi  All rights reserved.
  #     This is free software with ABSOLUTELY NO WARRANTY.
***************
*** 61,67 ****
  ##
  ## ソフトウェア情報
  ##
! $VERSION = "1.2.0.7";
  $COPYRIGHT = "Copyright (C) 1997-1998 Satoru Takabayashi  All rights reserved.";
  $NMZ_URL = "http://saturn.aichi-u.ac.jp/%7Eccsatoru/Namazu/";
  
--- 61,67 ----
  ##
  ## ソフトウェア情報
  ##
! $VERSION = "1.2.0.7.1";
  $COPYRIGHT = "Copyright (C) 1997-1998 Satoru Takabayashi  All rights reserved.";
  $NMZ_URL = "http://saturn.aichi-u.ac.jp/%7Eccsatoru/Namazu/";
  
diff -c namazu-1.2.0.7/src/namazu.c namazu-1.2.0.7.1/src/namazu.c
*** namazu-1.2.0.7/src/namazu.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.7.1/src/namazu.c	Fri Sep 18 14:09:15 1998
***************
*** 1,7 ****
  /*
   * 
   * namazu.c - search client of Namazu
!  * Version   1.2.0.7 [09/17/1998]
   *
   * Copyright (C) 1997-1998 Satoru Takabayashi  All rights reserved.
   * This is free software with ABSOLUTELY NO WARRANTY.
--- 1,7 ----
  /*
   * 
   * namazu.c - search client of Namazu
!  * Version   1.2.0.7.1 [09/18/1998]
   *
   * Copyright (C) 1997-1998 Satoru Takabayashi  All rights reserved.
   * This is free software with ABSOLUTELY NO WARRANTY.
diff -c namazu-1.2.0.7/src/search.c namazu-1.2.0.7.1/src/search.c
*** namazu-1.2.0.7/src/search.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.7.1/src/search.c	Fri Sep 18 14:09:15 1998
***************
*** 378,387 ****
      if (!MoreShortFormat) {
          printf(" { ");
      }
      for (i = 0; ;i++) {
          q = strchr(p, '\t');
          if (q) 
!             *q = (uchar)NULL;
          if (strlen(p) > 0) {
              HLIST tmp;
  
--- 378,390 ----
      if (!MoreShortFormat) {
          printf(" { ");
      }
+     while (*p == '\t') {  /* beggining tabs are skipped */
+         p++;
+     }
      for (i = 0; ;i++) {
          q = strchr(p, '\t');
          if (q) 
!             *q = '\0';
          if (strlen(p) > 0) {
              HLIST tmp;
  
diff -c namazu-1.2.0.7/src/wakati.c namazu-1.2.0.7.1/src/wakati.c
*** namazu-1.2.0.7/src/wakati.c	Thu Sep 17 19:04:50 1998
--- namazu-1.2.0.7.1/src/wakati.c	Fri Sep 18 14:09:15 1998
***************
*** 32,37 ****
--- 32,39 ----
  
  #define iseuc(c)  ((int)(c) >= 0xa1 && (int)(c) <= 0xfe)
  #define is_kanji(c)  (iseuc(*(c)) && iseuc(*(c + 1)))
+ #define is_choon(c) ((int)*(c) == 0xa1 && (int)*(c + 1) == 0xbc)
+ 
  int is_katakana(uchar *c)
  {
      if ((((int)*c == 0xa5 && 
***************
*** 52,104 ****
      return 0;
  }
  
  void wakati(uchar *key)
  {
!     int i, j, key_leng;
      uchar buf[BUFSIZE * 2] = "";
  
!     for (i = 0; i < strlen(key); i++) {
  	if (iseuc(*(key + i))) {
  	    key_leng = 0;
! 	    for (j = 0; is_kanji(key + i + j) && !is_katakana(key + i + j) 
!                          && !is_hiragana(key + i + j);  j += 2)
              {
  		uchar tmp[BUFSIZE];
  
  		strncpy(tmp, key + i, j + 2);
  		*(tmp + j + 2) = '\0';
  
! 		if (binsearch(tmp) != -1) {
  		    key_leng = j + 2;
  		}
  	    }
  	    if (key_leng > 0) {
- 		if (strlen(buf) != 0 && *(buf + strlen(buf) -1) != '\t') {
- 		    strcat(buf, "\t");
- 		}
  		strncat(buf, key + i, key_leng); 
! 		if (*(key + i + key_leng) != '\0') {
! 		    strcat(buf, "\t");
! 		}
! 		i += key_leng - 1;
  	    } else {
! 		strncat(buf, key + i, 2);
! 		i++;
  	    }
  	} else {
-             if (i > 0 && iseuc(*(key + i - 1))) {
-                 strcat(buf, "\t");
-             }
              while(*(key + i) && !iseuc(*(key + i))) {
                  strncat(buf, key + i, 1);
                  i++;
              }
!             if (*(key + i)) {
!                 strcat(buf, "\t");
!             }
!             i--;
  	}
      }
      if (strlen(buf) <= BUFSIZE) {
  	strcpy(key, buf);
      } else {
--- 54,139 ----
      return 0;
  }
  
+ 
+ #define ASCII 0
+ #define KANJI 1
+ #define KATAKANA 2
+ #define HIRAGANA 3
+ 
+ int detect_code_type(uchar *c)
+ {
+     if (is_katakana(c)) {
+         return KATAKANA;
+     } else if (is_hiragana(c)){
+         return HIRAGANA;
+     } else if (is_kanji(c)) {
+         return KANJI;
+     }
+     return ASCII;
+ }
+ 
  void wakati(uchar *key)
  {
!     int i, j, key_leng, type;
      uchar buf[BUFSIZE * 2] = "";
  
!     for (i = 0; i < strlen(key); ) {
!         type = detect_code_type(key + i);
  	if (iseuc(*(key + i))) {
  	    key_leng = 0;
! 	    for (j = 0; is_kanji(key + i + j) ;  j += 2)
              {
  		uchar tmp[BUFSIZE];
  
+                 if (j == 0 && (is_katakana(key + i + j) ||
+                     is_hiragana(key + i + j))) 
+                 {
+                     /* if beggining character is Katakana or Hiragana */
+                     break;
+                 }
+ 
  		strncpy(tmp, key + i, j + 2);
  		*(tmp + j + 2) = '\0';
  
! 		if (binsearch(tmp, 0) != -1) {
  		    key_leng = j + 2;
  		}
  	    }
+ 
  	    if (key_leng > 0) {
  		strncat(buf, key + i, key_leng); 
!                 strcat(buf, "\t");
! 		i += key_leng;
  	    } else {
!                 if (type == HIRAGANA || type == KATAKANA) {
!                     for (j =0; ; j += 2) {
!                         if (!((type == HIRAGANA && is_hiragana(key + i + j))
!                             ||(type == KATAKANA && is_katakana(key + i + j)))) 
!                         {
!                             break;
!                         }
!                         strncat(buf, key + i + j, 2);
!                     }
!                     i += j;
!                     strcat(buf, "\t");
!                 } else {
!                     strncat(buf, key + i, 2);
!                     strcat(buf, "\t");
!                     i += 2;
!                 }
  	    }
  	} else {
              while(*(key + i) && !iseuc(*(key + i))) {
+                 /* as an initial attempt always success, 
+                    outer 'for loop' can avoid infinite loop */
                  strncat(buf, key + i, 1);
                  i++;
              }
!             strcat(buf, "\t");
  	}
      }
+     chop(buf);
+ 
      if (strlen(buf) <= BUFSIZE) {
  	strcpy(key, buf);
      } else {