Namazu-devel-ja(旧)
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
(patch) MaxHit, MaxMatch
- From: masao@xxxxxxxxxx (Masao Takaku)
- Date: Tue, 2 May 2000 12:41:04 +0900 (JST)
- X-ml-name: namazu-devel-ja
- X-mail-count: 00442
たかく@図書館情報大です。
IGNORE_HIT・IGNORE_MATCHを越えるような検索式を与えると、
検索結果が出ないというのに、
ちょっとフラストレーションを感じたので、
これらの値を namazurc で設定可能にするパッチをざっと作ってみました。
namazurc で、以下のように指定します。
(それぞれ文書・ワード数の上限値を示します。)
--
MaxHit 30000
MaxMatch 10000
--
特に異義が出ないようなら commit しようかと思いますが、
いかがでしょうか。
--
高久 雅生 || Masao Takaku
mailto:masao@xxxxxxxxxx http://cosmo.ulis.ac.jp/~masao/
Index: ChangeLog
===================================================================
RCS file: /storage/cvsroot/namazu/ChangeLog,v
retrieving revision 1.687
diff -u -r1.687 ChangeLog
--- ChangeLog 2000/04/26 02:30:50 1.687
+++ ChangeLog 2000/05/02 02:50:07
@@ -1,3 +1,20 @@
+2000-05-01 Masao Takaku <masao@xxxxxxxxxx>
+
+ * src/rcfile.c (process_rc_maxhit): New function.
+ (process_rc_maxmatch): Likewise.
+
+ * nmz/libnamazu.h (IGNORE_HIT): Abolished.
+ (IGNORE_MATCH): Likewise.
+
+ * nmz/{hlist,re,search}.c: Call nmz_get_maxhit() instead of using
+ IGNORE_HIT.
+ Call nmz_get_maxmatch() instead of using IGNORE_MATCH.
+
+ * nmz/libnamazu.c (nmz_set_maxhit): New function.
+ (nmz_get_maxhit): Likewise.
+ (nmz_set_maxmatch): Likewise.
+ (nmz_get_maxmatch): Likewise.
+
2000-04-26 Satoru Takabayashi <satoru-t@xxxxxxxxxxxxxxxxxx>
* scripts/mknmz.in (load_registry): Simplified. Completely rewritten.
Index: nmz/hlist.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/hlist.c,v
retrieving revision 1.45
diff -u -r1.45 hlist.c
--- nmz/hlist.c 2000/03/12 02:07:06 1.45
+++ nmz/hlist.c 2000/05/02 02:50:07
@@ -552,7 +552,7 @@
nmz_debug_printf("idf: %f (N:%d, n:%d)\n", idf, document_number, n/2);
}
- if (n >= IGNORE_HIT * 2) {
+ if (n >= nmz_get_maxhit() * 2) {
/* '* 2' means NMZ.i contains a file-ID and a score. */
hlist.stat = ERR_TOO_MUCH_HIT;
} else {
Index: nmz/libnamazu.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/libnamazu.c,v
retrieving revision 1.33
diff -u -r1.33 libnamazu.c
--- nmz/libnamazu.c 2000/02/20 06:35:02 1.33
+++ nmz/libnamazu.c 2000/05/02 02:50:07
@@ -61,6 +61,8 @@
static enum nmz_sortmethod sortmethod = SORT_BY_SCORE;
static enum nmz_sortorder sortorder = DESCENDING;
+static int maxhit = 10000; /* Ignore if pages matched more than this. */
+static int maxmatch = 1000; /* Ignore if words matched more than this. */
static int debugmode = 0;
static int loggingmode = 1; /* do logging with NMZ.slog */
static char dyingmsg[BUFSIZE] = "";
@@ -106,6 +108,30 @@
nmz_get_sortorder(void)
{
return sortorder;
+}
+
+void
+nmz_set_maxhit(int max)
+{
+ maxhit = max;
+}
+
+int
+nmz_get_maxhit(void)
+{
+ return maxhit;
+}
+
+void
+nmz_set_maxmatch(int max)
+{
+ maxmatch = max;
+}
+
+int
+nmz_get_maxmatch(void)
+{
+ return maxmatch;
}
void
Index: nmz/libnamazu.h
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/libnamazu.h,v
retrieving revision 1.43
diff -u -r1.43 libnamazu.h
--- nmz/libnamazu.h 2000/03/03 03:36:20 1.43
+++ nmz/libnamazu.h 2000/05/02 02:50:07
@@ -59,9 +59,6 @@
QUERY_TOKEN_MAX = 32, /* Max number of tokens in the query. */
QUERY_MAX = 256, /* Max length of the query. */
- IGNORE_HIT = 10000, /* Ignore if pages matched more than this. */
- IGNORE_MATCH = 1000, /* Ignore if words matched more than this. */
-
INDEX_MAX = 64 /* Max number of databases */
};
@@ -234,6 +231,10 @@
extern enum nmz_sortmethod nmz_get_sortmethod(void);
extern void nmz_set_sortorder ( enum nmz_sortorder order );
extern enum nmz_sortorder nmz_get_sortorder(void);
+extern void nmz_set_maxhit ( int max );
+extern int nmz_get_maxhit ( void );
+extern void nmz_set_maxmatch ( int max );
+extern int nmz_get_maxmatch ( void );
extern void nmz_set_debugmode ( int mode );
extern int nmz_is_debugmode ( void );
extern void nmz_set_loggingmode ( int mode );
Index: nmz/re.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/re.c,v
retrieving revision 1.29
diff -u -r1.29 re.c
--- nmz/re.c 2000/02/13 17:34:53 1.29
+++ nmz/re.c 2000/05/02 02:50:07
@@ -79,12 +79,12 @@
if (val.stat == ERR_FATAL)
return val;
val.num = 0; /* set 0 for no matching case */
- max = IGNORE_HIT;
+ max = nmz_get_maxhit();
if (strcmp(field, "uri") == 0) {
uri_mode = 1;
}
} else {
- max = IGNORE_MATCH;
+ max = nmz_get_maxmatch();
}
nmz_re_compile_pattern(tmpexpr, strlen(tmpexpr), rp);
@@ -115,7 +115,7 @@
tmp = nmz_get_hlist(i);
if (tmp.stat == ERR_FATAL)
return tmp;
- if (tmp.num > IGNORE_HIT) {
+ if (tmp.num > nmz_get_maxhit()) {
nmz_free_hlist(val);
val.stat = ERR_TOO_MUCH_HIT;
val.num = 0;
@@ -137,7 +137,7 @@
if (val.stat == ERR_FATAL)
return val;
}
- if (val.num > IGNORE_HIT) {
+ if (val.num > nmz_get_maxhit()) {
nmz_free_hlist(val);
val.num = -1;
break;
Index: nmz/search.c
===================================================================
RCS file: /storage/cvsroot/namazu/nmz/search.c,v
retrieving revision 1.74
diff -u -r1.74 search.c
--- nmz/search.c 2000/04/05 07:07:54 1.74
+++ nmz/search.c 2000/05/02 02:50:07
@@ -178,7 +178,7 @@
* Return if too much word would be hit
* because treat 'a*' completely is too consuming
*/
- if (j > IGNORE_MATCH) {
+ if (j > nmz_get_maxmatch()) {
nmz_free_hlist(val);
val.stat = ERR_TOO_MUCH_MATCH;
break;
@@ -192,7 +192,7 @@
tmp = nmz_get_hlist(i);
if (tmp.stat == ERR_FATAL)
return tmp;
- if (tmp.num > IGNORE_HIT) {
+ if (tmp.num > nmz_get_maxhit()) {
nmz_free_hlist(val);
val.stat = ERR_TOO_MUCH_MATCH;
break;
@@ -200,7 +200,7 @@
val = nmz_ormerge(val, tmp);
if (val.stat == ERR_FATAL)
return val;
- if (val.num > IGNORE_HIT) {
+ if (val.num > nmz_get_maxhit()) {
nmz_free_hlist(val);
val.stat = ERR_TOO_MUCH_MATCH;
break;
Index: po/namazu.pot
===================================================================
RCS file: /storage/cvsroot/namazu/po/namazu.pot,v
retrieving revision 1.237
diff -u -r1.237 namazu.pot
--- po/namazu.pot 2000/04/26 03:18:05 1.237
+++ po/namazu.pot 2000/05/02 02:50:07
@@ -6,7 +6,7 @@
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
-"POT-Creation-Date: 2000-04-26 12:12+0900\n"
+"POT-Creation-Date: 2000-05-02 11:11+0900\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@xxxxxx>\n"
@@ -132,41 +132,41 @@
msgstr ""
#. terminator not matched
-#: src/rcfile.c:315
+#: src/rcfile.c:343
msgid "can't find string terminator"
msgstr ""
-#: src/rcfile.c:431
+#: src/rcfile.c:459
msgid "invalid directive name"
msgstr ""
-#: src/rcfile.c:444
+#: src/rcfile.c:472
msgid "can't find arguments"
msgstr ""
-#: src/rcfile.c:547
+#: src/rcfile.c:575
msgid "too few arguments"
msgstr ""
-#: src/rcfile.c:550
+#: src/rcfile.c:578
msgid "too many arguments"
msgstr ""
-#: src/rcfile.c:560
+#: src/rcfile.c:588
msgid "unknown directive"
msgstr ""
-#: src/rcfile.c:611
+#: src/rcfile.c:639
#, c-format
msgid "%s:%d: syntax error: %s"
msgstr ""
-#: src/rcfile.c:716
+#: src/rcfile.c:744
#, c-format
msgid "Loaded rcfile: %s\n"
msgstr ""
-#: src/rcfile.c:721
+#: src/rcfile.c:749
#, c-format
msgid ""
"Index: %s\n"
@@ -174,15 +174,17 @@
"Lang: %s\n"
"Scoring: %s\n"
"Template: %s\n"
+"MaxHit: %d\n"
+"MaxMatch: %d\n"
"EmphasisTags: %s\t%s\n"
msgstr ""
-#: src/rcfile.c:738
+#: src/rcfile.c:769
#, c-format
msgid "Alias: %-20s\t%s\n"
msgstr ""
-#: src/rcfile.c:748
+#: src/rcfile.c:779
#, c-format
msgid "Replace: %-20s\t%s\n"
msgstr ""
Index: src/rcfile.c
===================================================================
RCS file: /storage/cvsroot/namazu/src/rcfile.c,v
retrieving revision 1.23
diff -u -r1.23 rcfile.c
--- src/rcfile.c 2000/02/23 08:26:03 1.23
+++ src/rcfile.c 2000/05/02 02:50:07
@@ -114,6 +114,8 @@
static enum nmz_stat process_rc_lang ( const char *directive, const StrList *args );
static enum nmz_stat process_rc_emphasistags ( const char *directive, const StrList *args );
static enum nmz_stat process_rc_template ( const char *directive, const StrList *args );
+static enum nmz_stat process_rc_maxhit ( const char *directive, const StrList *args );
+static enum nmz_stat process_rc_maxmatch ( const char *directive, const StrList *args );
struct conf_directive {
char *name;
@@ -134,6 +136,8 @@
{ "LANG", 1, 0, process_rc_lang },
{ "EMPHASISTAGS", 2, 0, process_rc_emphasistags },
{ "TEMPLATE", 1, 0, process_rc_template },
+ { "MAXHIT", 1, 0, process_rc_maxhit },
+ { "MAXMATCH", 1, 0, process_rc_maxmatch },
{ NULL, 0, 0, NULL }
};
@@ -262,6 +266,30 @@
return SUCCESS;
}
+static enum nmz_stat
+process_rc_maxhit(const char *directive, const StrList *args)
+{
+ int arg1 = atoi(args->value);
+
+ if (arg1 <= 0) {
+ return FAILURE;
+ }
+ nmz_set_maxhit(arg1);
+ return SUCCESS;
+}
+
+static enum nmz_stat
+process_rc_maxmatch(const char *directive, const StrList *args)
+{
+ int arg1 = atoi(args->value);
+
+ if (arg1 <= 0) {
+ return FAILURE;
+ }
+ nmz_set_maxmatch(arg1);
+ return SUCCESS;
+}
+
/*
* Get the environment variable of NAMAZURC, NAMAZUCONF or
* NAMAZUCONFPATH. and return it. Original of this code is
@@ -724,10 +752,13 @@
Lang: %s\n\
Scoring: %s\n\
Template: %s\n\
+MaxHit: %d\n\
+MaxMatch: %d\n\
EmphasisTags: %s\t%s\n\
"), nmz_get_defaultidx(), nmz_is_loggingmode() ? "on" : "off",
nmz_get_lang(), nmz_is_tfidfmode() ? "tfidf" : "simple",
get_templatedir(),
+ nmz_get_maxhit(), nmz_get_maxmatch(),
get_emphasis_tag_start(), get_emphasis_tag_end()
);