namazu-ml(ring)
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: namazu.confの REPLACEが無視されてしまう
Yu Imai <s98106yi@xxxxxxxxxxxxxx> wrote:
>/a/fs0603a/s98106yi/public_html/index.html
>
>というファイルを、
>
>http://www.sfc.keio.ac.jp/~s98106yi/index.html
>
>に置換しようと思い、namazu.confに
>
>REPLACE /a/fs0603a/ http://www.sfc.keio.ac.jp/~
>REPLACE /public_html /
>
>と記述しているのですが、検索結果には
2重に置換することはできません。
REPLACE /a/fs0603a/s98106yi/public_html/ http://www.sfc.keio.ac.jp/~s98106yi/
と指定してくださいませ。
ちなみに、開発中のヴァージョンなら
REPLACE /a/fs0603a/(.*)/public_html/ http://www.sfc.keio.ac.jp/~\1/
と正規表現で置換できます。
あ、そういえば (正規表現での置換を実現する) 1.3.0.8 用のパッ
チをもらっていたのだ (忘れていてごめんなさい)。メイル末尾に
添付しておきます。うまく 1.3.0.10 に適用できるといいんだけど。
# パッチの作者は Junio Hamano <junio@xxxxxxxxxxx> さんです
-- Satoru Takabayashi
rcsdiff -r1.3.0.8 -r1.3.0.8.0 -u *,v
===================================================================
RCS file: hlist.c,v
retrieving revision 1.3.0.8
retrieving revision 1.3.0.8.0.1
diff -u -r1.3.0.8 -r1.3.0.8.0.1
--- hlist.c 1999/07/12 07:38:35 1.3.0.8
+++ hlist.c 1999/07/12 07:41:11 1.3.0.8.0.1
@@ -30,6 +30,7 @@
#include <math.h>
#include "namazu.h"
#include "util.h"
+#include "regex.h"
/* merge the left and right with AND rule */
HLIST andmerge(HLIST left, HLIST right, int *ignore)
@@ -448,6 +449,105 @@
}
}
+/* FROM_STRING_PTR and TO_STRING_PTR points to the original
+ * (replace-from, replace-to) pair.
+ * If they are for regexp substitution and if the target string
+ * contained in TMP matches the replace-from pattern at the
+ * beginning, fill REPLACE_FROM_BUF and REPLACE_TO_BUF with
+ * a (replace-from, replace-to) pair that is suitable to be used
+ * for string subtitution. That is, when:
+ * tmp = "ab/cd", *from_string_ptr = "(.*)/(.*)", *to_string_ptr = "\1\2",
+ * store "ab/cd" in replace_from_buf[], "abcd" in replace_to_buf[],
+ * and update *from_string_ptr and *to_string_ptr to point to them.
+ */
+void replace_using_regexp (uchar *tmp,
+ uchar **from_string_ptr,
+ uchar **to_string_ptr,
+ uchar *replace_from_buf,
+ uchar *replace_to_buf)
+{
+ uchar *replace_from = *from_string_ptr;
+ uchar *replace_to = *to_string_ptr;
+ int i, j;
+
+ if (strpbrk (replace_from, ".*")) {
+ struct re_registers regs;
+ struct re_pattern_buffer *re;
+ int mlen;
+ int is_a_regexp_match = 0;
+
+ regs.allocated = 0;
+ re = malloc(sizeof (*re));
+ memset (re, 0, sizeof (*re));
+ re->buffer = 0;
+ re->allocated = 0;
+ if (re_compile_pattern (replace_from, strlen (replace_from), re))
+ /* re_comp fails; maybe it was not a regexp substitution
+ * after all. Fall back to string substitution for backward
+ * compatibility.
+ */
+ is_a_regexp_match = 0;
+ else if (0 < (mlen = re_match (re, tmp, strlen (tmp), 0, ®s))) {
+ /* We got a match. Try to replace the string. */
+ uchar *subst = replace_to;
+ /* Assume we are doing regexp match for now; if any of the
+ * substitution fails, we will switch back to the straight
+ * string substitution.
+ */
+ is_a_regexp_match = 1;
+ for (i = j = 0; subst[i]; i++) {
+ /* i scans through RHS of sed-style substitution.
+ * j points at the string being built.
+ */
+ if ((subst[i] == '\\') &&
+ ('0' <= subst[++i]) &&
+ (subst[i] <= '9')) {
+ /* A backslash followed by a digit---regexp substitution.
+ * Note that a backslash followed by anything else is
+ * silently dropped (including a \\ sequence) and is
+ * passed on to the else clause.
+ */
+ int regno = subst[i] - '0';
+ int ct;
+ if (re->re_nsub <= regno) {
+ /* Oops; this is a bad substitution. Just give up
+ * and use straight string substitution for backward
+ * compatibility.
+ */
+ is_a_regexp_match = 0;
+ break;
+ }
+ for (ct = regs.beg[regno]; ct < regs.end[regno]; ct++)
+ replace_to_buf[j++] = tmp[ct];
+ }
+ else {
+ /* Either ordinary character, or an unrecognized \ sequence.
+ * Just copy it.
+ */
+ replace_to_buf[j++] = subst[i];
+ }
+ }
+ if (is_a_regexp_match) {
+ /* Good. Regexp substitution worked and we now have a good
+ * string in replace_to_buf. Fake replace_from and replace_to
+ * as if these matched string pairs were specified in the
+ * replacement list as literal substitutions.
+ */
+ replace_to_buf[j] = 0;
+ *to_string_ptr = replace_to = replace_to_buf;
+ strcpy (replace_from_buf, tmp);
+ replace_from_buf[mlen] = 0;
+ *from_string_ptr = replace_from_buf;
+ }
+ re_free_registers (®s);
+ }
+ re_free_pattern (re);
+ /* We behave as if replace_from and replace_to specified the
+ * literal string pairs from the beginning.
+ */
+ }
+}
+
/* replace a URL */
void replace_url(uchar * s, int opt)
{
@@ -458,16 +558,27 @@
strcpy(tmp, s);
for(n=0;n<url_no;n++) {
- n_from = strlen(URL_REPLACE_FROM[n]);
- n_to = strlen(URL_REPLACE_TO[n]);
+ uchar *replace_from = URL_REPLACE_FROM[n];
+ uchar *replace_to = URL_REPLACE_TO[n];
+ uchar replace_from_buf[BUFSIZE];
+ uchar replace_to_buf[BUFSIZE];
+
+ replace_using_regexp (tmp,
+ &replace_from,
+ &replace_to,
+ replace_from_buf,
+ replace_to_buf);
+
+ n_from = strlen(replace_from);
+ n_to = strlen(replace_to);
- if (!strncmp(URL_REPLACE_FROM[n], tmp, n_from)) {
- strcpy(s, URL_REPLACE_TO[n]);
+ if (!strncmp(replace_from, tmp, n_from)) {
+ strcpy(s, replace_to);
for (i = n_from, j = n_to; tmp[i] != '>'; i++, j++)
s[j] = tmp[i];
s[j++] = tmp[i++];
- if (opt && !strncmp(URL_REPLACE_FROM[n], tmp + i, n_from)) {
- strcpy(s + j, URL_REPLACE_TO[n]);
+ if (opt && !strncmp(replace_from, tmp + i, n_from)) {
+ strcpy(s + j, replace_to);
i += n_from;
j += n_to;
}
===================================================================
RCS file: re_match.c,v
retrieving revision 1.3.0.8
retrieving revision 1.3.0.8.0.1
diff -u -r1.3.0.8 -r1.3.0.8.0.1
--- re_match.c 1999/07/12 07:38:35 1.3.0.8
+++ re_match.c 1999/07/12 07:41:11 1.3.0.8.0.1
@@ -38,6 +38,12 @@
#define STEP 256
+void replace_using_regexp (uchar *tmp,
+ uchar **from_string_ptr,
+ uchar **to_string_ptr,
+ uchar *replace_from_buf,
+ uchar *replace_to_buf);
+
void replace(uchar *s)
{
int n;
@@ -45,16 +51,26 @@
uchar tmp[BUFSIZE];
strcpy(tmp, s);
- for(n=0;n<url_no;n++) {
- n_from = strlen(URL_REPLACE_FROM[n]);
- n_to = strlen(URL_REPLACE_TO[n]);
-
- if (!strncmp(URL_REPLACE_FROM[n], tmp, n_from)) {
- strcpy(s, URL_REPLACE_TO[n]);
- for (i = n_from, j = n_to; tmp[i] != '\0'; i++, j++)
- s[j] = tmp[i];
- s[j] = '\0';
- }
+ for(n=0;n<url_no;n++) {
+ uchar *replace_from = URL_REPLACE_FROM[n];
+ uchar *replace_to = URL_REPLACE_TO[n];
+ uchar replace_from_buf[BUFSIZE];
+ uchar replace_to_buf[BUFSIZE];
+ replace_using_regexp (tmp,
+ &replace_from,
+ &replace_to,
+ replace_from_buf,
+ replace_to_buf);
+
+ n_from = strlen(replace_from);
+ n_to = strlen(replace_to);
+
+ if (!strncmp(replace_from, tmp, n_from)) {
+ strcpy(s, replace_to);
+ for (i = n_from, j = n_to; tmp[i] != '\0'; i++, j++)
+ s[j] = tmp[i];
+ s[j] = '\0';
+ }
}
}