namazu-dev(ring)
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Using s/pat/subst/ in REPLACE
- From: Junio Hamano <junio@xxxxxxxxxxx>
- Date: Thu, 8 Jul 1999 23:24:26 -0700 (PDT)
以下は、.namazurc の REPLACE の左辺に regexp 、右辺に pattern を
書けるようにするパッチです。
REPLACE /repository/(.*)%2Cv http://h/cvsweb/\1
REPLACE /repository/(.*)/Attic/(.*)%2Cv http://h/cvsweb/\1/\2
と .namazurc に書くと、namazu で CVS レポジトリの昔の版にある文字
列まで全体に索引をつけておいて、検索した結果を、cvsweb で表示する、
なんてことができるようになります。
本当なら、struct replace を全部書換えて、
struct replace_elem {
struct replace_elem *next;
uchar *src;
uchar *dst;
/* The following fields are NULL if this is a traditional
* string substitution
*/
struct re_pattern_buffer *src_exp;
struct subst_elem {
enum { literal, regex_regno } subst_type;
union {
uchar *literal_string;
int regex_regno;
} u;
};
};
なんてやって、conf.c で REPLACE を読み込むところで regexp のコン
パイルと、置き換え文字列の準備まで先に全部やってしまうのがよいの
ですが、かなり大規模な書き換えになるので、TODO-namazu-2.0 にある
「Replace と Alias をもっときれいに書く」までのつなぎとして、変更
個所が最小限になるような形でパッチにしてあります。
まずは使ってみてください。
RCS file: /circus/cvsroot/namazu/src/re_match.c,v
retrieving revision 1.4
diff -u -r1.4 re_match.c
--- src/re_match.c 1999/06/12 14:29:31 1.4
+++ src/re_match.c 1999/07/09 05:41:03
@@ -50,6 +50,82 @@
strcpy(tmp, s);
while (list.src) {
+ if (strpbrk (list.src->str, ".*")) {
+ struct re_registers regs;
+ Regexp *re;
+ int mlen;
+ int is_a_regexp_match = 0;
+
+ regs.allocated = 0;
+ re = ALLOC(Regexp);
+ MEMZERO((char *)re, Regexp, 1);
+ re->buffer = 0;
+ re->allocated = 0;
+ if (re_compile_pattern (list.src->str, strlen (list.src->str), re))
+ /* re_comp fails; maybe it was not a regexp substitution
+ * after all. Fall back to string substitution for backward
+ * compatibility.
+ */
+ is_a_regexp_match = 0;
+ else if (0 < (mlen = re_match (re, tmp, strlen (tmp), 0, ®s))) {
+ /* We got a match. Try to replace the string. */
+ uchar repl[BUFSIZE];
+ uchar *subst = list.dst->str;
+ /* Assume we are doing regexp match for now; if any of the
+ * substitution fails, we will switch back to the straight
+ * string substitution.
+ */
+ is_a_regexp_match = 1;
+ for (i = j = 0; subst[i]; i++) {
+ /* i scans through RHS of sed-style substitution.
+ * j points at the string being built.
+ */
+ if ((subst[i] == '\\') &&
+ ('0' <= subst[++i]) &&
+ (subst[i] <= '9')) {
+ /* A backslash followed by a digit---regexp substitution.
+ * Note that a backslash followed by anything else is
+ * silently dropped (including a \\ sequence) and is
+ * passed on to the else clause.
+ */
+ int regno = subst[i] - '0';
+ int ct;
+ if (re->re_nsub <= regno) {
+ /* Oops; this is a bad substitution. Just give up
+ * and use straight string substitution for backward
+ * compatibility.
+ */
+ is_a_regexp_match = 0;
+ break;
+ }
+ for (ct = regs.beg[regno]; ct < regs.end[regno]; ct++)
+ repl[j++] = tmp[ct];
+ }
+ else {
+ /* Either ordinary character, or an unrecognized \ sequence.
+ * Just copy it.
+ */
+ repl[j++] = subst[i];
+ }
+ }
+ if (is_a_regexp_match) {
+ /* Good. Regexp substitution worked and we now have a good
+ * string in repl.
+ * The part that matched and being replaced is 0 to mlen-1
+ * in tmp; tmp[mlen] through the end of it should be
+ * concatenated to the end of the resulting string.
+ */
+ repl[j] = 0;
+ strcpy (s, repl);
+ strcpy (s + j, tmp + mlen);
+ }
+ re_free_registers (®s);
+ }
+ re_free_pattern (re);
+ if (is_a_regexp_match)
+ return 0;
+ /* Otherwise, we fall back to string substitution */
+ }
n_src = strlen(list.src->str);
n_dst = strlen(list.dst->str);