diff -u2rN -x awktab.c gawk-3.0.6/ChangeLog.MB gawk-3.0.6+mb1.15/ChangeLog.MB --- gawk-3.0.6/ChangeLog.MB Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/ChangeLog.MB Wed May 16 01:17:50 2001 @@ -0,0 +1,331 @@ +2001-05-16 okabe katsuyuki + + * Version 3.0.6 + multi-byte extension 1.15. + +2001-02-16 okabe katsuyuki + + * Version 3.0.4 + multi-byte extension 1.14. + +2000-12-01 okabe katsuyuki + + * regex.c: 以下の点を修正。 + "あ*[あ]" が "あ","ああ" などにマッチしない。 + "[あ]*[あ]" が "あ","ああ" などにマッチしない。 + "[^あ]*[い]" が "い","ない" などにマッチしない。 + (Thanks to 河村雅夫さん ) + +2000-06-18 okabe katsuyuki + + * Version 3.0.4 + multi-byte extension 1.13. + +2000-06-15 okabe katsuyuki + + * main.c (main): gawkにて実行時には通常のgawkとして動作するように変 + 更。 + + * mbc.h (MBCTYPE_DEFAULT): jgawkにて実行したときの使用文字コードマク + ロ追加。 + + * mbc.c (ASCII,EUC,SJIS,UTF8): GAWKマクロが定義されているときには + undefするようにした。 + +2000-03-06 okabe katsuyuki + + * awk.y (yylex): 新出@奈良女子大学さんのパッチを適用。 + +2000-02-08 okabe katsuyuki + + * eval.c (func_name;): 新出@奈良女子大学さん + がfj.comp.lang.awkに投稿された Message-ID: + <87gf09$199$1@narans.cc.nara-wu.ac.jp> のパッチを適用。 + +2000-01-29 okabe katsuyuki + + * pc/xsystem.c (isk1): 引数をunsigned charでcastしていなかったため第 + 7bitの立っている文字を調べる際にdjgpp版ではSIGSEGVが起きていたのを修 + 正。(Thanks to 畑浩一さん ) + +2000-01-27 okabe katsuyuki + + * dfa.c (lex): 文字クラスがマルチバイト文字+ハイフンで終了するとき, + 最後のマルチバイト文字がマッチングの対象になっていなかったのを修正. + 否定文字クラス中にマルチバイト文字の範囲指定があるとき必要以上の文字 + がマッチングの対象となっていたのを修正. (Thanks to 畑浩一さん + ) + +1999-09-12 okabe katsuyuki + + * pc/xsystem.c (is_unixy_shell): 拡張子.EXEの省略対応のためのバッファ + shellexeのサイズが小さかったのを直した. (Thanks to しろおかさん + , Fe2+さん ) + +1999-08-29 okabe katsuyuki + + * Version 3.0.4 + multi-byte extension 1.12. + +1999-07-18 okabe katsuyuki + + * ベースをgawk-3.0.4へ. pc 版には関数 os_system が追加されているが + multi-byte extension では mingw32 版を除き今まで通り xsystem を使用. + + * pc/xsystem.c (is_unixy_shell): shellの拡張子.EXEを省略してもよいよ + うに変更. + +1998-08-08 okabe katsuyuki + + * Version 3.0.3 + multi-byte extension 1.11. + +1998-08-05 okabe katsuyuki + + * io.c, pc/Makefile, pc/gawkmisc.pc, pc/getid.c, pc/igawk.c, + pc/popen.c, pc/xsystem.c: Visual C++ 5.0 でコンパイルできるように + 修正. + + +1998-07-28 okabe katsuyuki + + * builtin.c (sub_common): //がマルチバイト文字に対応していなかっ + たのを修正. (Thanks to 渡辺博文さん , + BEVERLEYBOY さん ) + +Sat Aug 23 18:05:45 1997 Katsuyuki Okabe (wills at x68k) + + * Version 3.0.3 + multi-byte extension 1.10. + +Tue Aug 12 11:16:45 1997 Katsuyuki Okabe (wills at x68k) + + * pc/xsystem.c (pars1c): "file2>file" といったリダイレクトが + "file 2>file" と解釈されるのを直した. (Thanks to Fe2+ さん + ) + + * pc/xsystem.c (is_unixy_shell): djgpp v2.01 以降ではライブラリ + の _is_unixy_shell() を使用していたのを is_unixy_shell を使用す + るように直した. 変数 shells から "TCSH.EXE" を削除し "KSH.EXE", + "ZSH.EXE" を追加した. + + * pc/xsystem.c (xsystem): 環境変数 SHELL より環境変数 AWKSHELL + を優先するようにした. + +Mon Jun 16 13:27:41 1997 Katsuyuki Okabe (wills at x68k) + + * Version 3.0.3 + multi-byte extension 1.09. + +Sat Jun 14 20:44:47 1997 Katsuyuki Okabe (wills at x68k) + + * builtin.c (format_tree): マルチバイト文字を読み飛ばすときにポ + インタ s1 への加算が 1 少なかったのを直した. (Thanks to 印牧秀育 + さん ) + +Sat Jun 07 13:07:16 1997 Katsuyuki Okabe (wills at x68k) + + * Version 3.0.3 + multi-byte extension 1.08. + +Fri Jun 06 19:49:11 1997 Katsuyuki Okabe (wills at x68k) + + * io.c (mmap_get_record): grRS を grRSp に変更するのを忘れていた + のを直した. (Thanks to 渡辺博文さん ) + +Sat May 31 09:21:24 1997 Katsuyuki Okabe (wills at x68k) + + * regex.c (is_in_list): isutf8 の設定中 current_mbctype に代入を + 行なっていたため以後の動作が異常なことがあったのを直した. + (Thanks to しろおかさん ) + +Thu May 29 00:55:28 1997 Katsuyuki Okabe (wills at x68k) + + * builtin.c (do_jsubstr): do_substr に合わせて書き換えた. + + * ベースを gawk-3.0.3 へ移行. + +Tue May 27 23:51:08 1997 Katsuyuki Okabe (wills at x68k) + + * Version 3.0.2 + multi-byte extension 1.07. + +Mon May 26 17:41:14 1997 Katsuyuki Okabe (wills at x68k) + + * regex.c [MAX_BUF_SIZE]: Borland C の時の値が (1L << 16) になっ + ていたのを直した. + + * awk.h: 新規マクロ SIGNED_SIZE_T. size_t を符号付き整数にキャス + トする. + + * builtin.c (do_substr, do_jsubstr, sub_common): MSDOS の 16bit + Cコンパイラで問題となる size_t の long へのキャストを + SIGNED_SIZE_T マクロを通して long にキャストするようにした. + (Thanks to 伊藤秋彦さん ) + + * pc/config.h [HAVE_LIMITS_H]: MS-C, Borland C でも定義するよう + にした. + + * node.c (make_str_node): MSDOS の 16bit モデルで生成する文字列 + 長が INT_MAX を越えるようならば終了するようにした. + + * builtin.c: 新規マクロ allocate_space. MSDOS の 16bit モデルで + は生成する文字列が INT_MAX を越えるようならば終了する. + + * builtin.c [bchunk, bchunk_one, chksize]: allocate_space() を使 + うように書き換えた. (Thanks to Bruce. さん + ) + +Sat May 24 23:12:50 1997 Katsuyuki Okabe (wills at x68k) + + * regex.c: UTF8 に対応してみた. + + * re.c (make_regexp): dfa.[ch] が UTF8 未対応のためとりあえず + current_mbctype が MBCTYPE_UTF8 の時には DFA を生成しないように + しておく. + + * main.c (main): 拡張オプション -Wctype=utf8, ロングオプション + --ctype=utf8 を追加. + +Fri May 23 01:26:09 1997 Katsuyuki Okabe (wills at x68k) + + * io.c (get_a_record, mmap_get_record): 3 バイト以上からなるマル + チバイト文字に対応できるようにした. + +Thu May 22 10:51:33 1997 Katsuyuki Okabe (wills at x68k) + + * builtin.c (do_index, do_jindex, format_tree, do_jsubstr, + do_tolower, sub_common), field.c (re_parse_field, + null_parse_field, sc_parse_field), main.c (arg_assign), node.c + (make_str_node), re.c (make_regexp): 3 バイト以上からなるマルチ + バイト文字に対応できるようにした. + +Thu May 22 03:37:09 1997 Katsuyuki Okabe (wills at x68k) + + * awk.y (yylex): 3 バイト以上からなるマルチバイト文字に対応でき + るようにした. + +Thu May 22 00:07:02 1997 Katsuyuki Okabe (wills at x68k) + + * field.c (null_parse_field): split("あいうえお", ary, "") のよ + うな空文字での分割でマルチバイト文字を考慮してなかったのを修正. + (Thanks to Bruce. さん ) + + * node.c (make_str_node): "\xa0" のようなマルチバイト文字 1 バイ + ト目と見なせるエスケープ文字の次の文字を無条件にマルチバイト文字 + 2 バイト目として扱っていたのを扱わないようにした. + + * mbc.h: mbcfetch() のプロトタイプの追加. マルチバイト文字のバイ + ト数を返すマクロ mbclen() の追加. ワイド文字の型 widechar_t の追 + 加. + + * mbc.c: 新規関数 mbcfetch(). マルチバイト文字をワイド文字に変換 + して返す. + + * mbfuncs.c (mbstrncasecmp, mbmemcmp, mblength, mbbyte): 3 バイ + ト以上からなるマルチバイト文字に対応できるようにした. + +Wed May 21 03:38:48 1997 Katsuyuki Okabe (wills at x68k) + + * awk.h: mbbyte() のプロトタイプを追加. + + * mbfuncs.c: 新規関数 mbbyte(). n バイトの文字列中のマルチバイト + 文字とみなせるバイト数を返す. + + * builtin.c (do_substr), node.c (make_str_node): mbbyte() を使用 + するようにした. + +Sat May 03 19:04:10 1997 Katsuyuki Okabe (wills at x68k) + + * Version 3.0.2 + multi-byte extension 1.06a. + + * pc/gawkmisc.pc (os_open): 環境が LFN をサポートしているか調べ + るようにした. + + * pc/xargs.c (lfn_supported): -1 での初期化を忘れていたため, LFN + の処理が働かなかいようになっていたのを直した. + +Sun Apr 27 17:16:18 1997 Katsuyuki Okabe (wills at x68k) + + * Version 3.0.2 + multi-byte extension 1.06. + + * pc/igawk.c: igawk.sh の C バージョン. + +Wed Apr 02 21:08:41 1997 Katsuyuki Okabe (wills at x68k) + + * io.c, pc/gawkmisc.pc (os_open), pc/xargs.[ch]: LFN に対応して + みた. + +Tue Feb 04 05:25:03 1997 Katsuyuki Okabe (wills at x68k) + + * io.c (do_pathopen): AWKPATH のパスセパレータを ':', ';' のどち + らでもよくした. x68k 版からの引用. + +Wed Jan 29 01:30:40 1997 Katsuyuki Okabe (wills at x68k) + + * pc/xargs.[ch]: serow 氏作 jgawk-2.15.2+1.1 から引用. sjis に依 + 存している部分を DBCS テーブルを参照するように変更した. + + * pc/xsystem.c: serow 氏作 jgawk-2.15.2+1.1 から引用. sjis に依 + 存している部分を DBCS テーブルを参照するように変更した. + djgpp v2.01 へ対応. + + * awk.h, main.c, re.c: serow 氏作 jgawk-2.15.2+1.1 から -Wmemory + を引用. + +Sun Jan 26 18:57:25 1997 Katsuyuki Okabe (wills at x68k) + + * builtin.c (do_substr, do_jsubstr): length が 0 以下のときの + warning 表示フォーマットの `%d' を length の long へのキャストに + 合わせて `%ld' に変更. + + * eval.c (op_assign): %= 演算子で 0 で割った時の fatal 表示の + `%=' はフォーマット指定と間違われることがあるので `%%=' に直した. + +Sat Jan 25 04:15:44 1997 Katsuyuki Okabe (wills at x68k) + + * io.c (get_a_record): RS がマルチバイト文字の 2 バイト目になれ + るようなシングルバイト文字のときレコードへの分割を失敗することが + あったのを直した. ファイルがマルチバイト文字の 1 バイト目で終了 + しているときはその 1 バイト目を削除するようになっていなかったの + を直した. + + * io.c (mmap_a_record): マルチバイト文字対応化. RS がシングルバ + イト文字 1 文字のときにレコードが RS で終了していない場合でも戻 + り値から RS の文字数である 1 を引いていたのを直した. + +Fri Jan 24 01:03:04 1997 Katsuyuki Okabe (wills at x68k) + + * awk.h: mblength() のプロトタイプを追加. + + * mbfuncs.c (mblength): 追加. n バイトの文字列の文字数を返す. + + * builtin.c (do_jlength): mblength() を使用するようにした. + + * builtin.c (do_jsubstr): do_substr() の変更に合わせ変更. + + * ベースを gawk-3.0.2 へ移行. + +Tue Aug 13 14:49:26 1996 Katsuyuki Okabe (wills at x68k) + + * Version 3.0.0 + multi-byte extension 1.05. + + * builtin.c (do_index): IGNORECASE が真の時の検索でマルチバイト + 文字でも1バイトしかスキップしていなかったのを修正. + + * builtin.c (do_gensub): target が tmp_string() で作成していたた + め REGEXP にマッチしないとき sub_common() で解放されていたオリジ + ナルのバグを修正. + + * dfa.c (lex): syntax に RE_CONTEXT_INDEP_ANCHORS がセットされて + いないにもかかわらず ``^'' が正規表現の先頭にないときでもメタ文 + 字として働くことがあったオリジナルのバグを修正. (Thanks to + Bruce. さん , 21M さん + , BEVERLEYBOY さん + ) + + * main.c (gawk_option): -Wposix において do_traditional と + do_posix が共にセットされるというオリジナルのバグを修正. + + * ベースを gawk-3.0.0 へ移行. + + +Local Variables: +mode: indented-text +left-margin: 8 +fill-column: 72 +fill-prefix: " " +version-control: never +End: + diff -u2rN -x awktab.c gawk-3.0.6/Makefile.in gawk-3.0.6+mb1.15/Makefile.in --- gawk-3.0.6/Makefile.in Sun Jun 18 21:14:08 2000 +++ gawk-3.0.6+mb1.15/Makefile.in Wed May 16 01:25:15 2001 @@ -20,4 +20,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA +# Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) +# Last change: May. 16, 2001 by okabe katsuyuki + @SET_MAKE@ @@ -61,9 +64,18 @@ CPPFLAGS = @CPPFLAGS@ CFLAGS = @CFLAGS@ -COMPFLAGS = -DGAWK -I. -I$(srcdir) $(CPPFLAGS) @DEFS@ $(CFLAGS) +COMPFLAGS = -DGAWK -I. -I$(srcdir) $(CPPFLAGS) @DEFS@ $(CFLAGS) $(MBCTYPE_DEF) + +# Things you might set to MBCTYPE_DEF to spec. default multi-byte char type. +# -DEUC will make default multi-byte char type EUC and +# -DSJIS SJIS and +# -DUTF8 UTF8. +# If you do not set EUC/SJIS/UTF8, gawk assumes no +# multi-byte char as default. +MBCTYPE_DEF = @MBCTYPE_DEF@ # object files AWKOBJS = array.o builtin.o eval.o field.o gawkmisc.o io.o main.o \ - missing.o msg.o node.o re.o version.o + missing.o msg.o node.o re.o version.o \ + mbc.o mbfuncs.o ALLOBJS = $(AWKOBJS) awktab.o @@ -75,13 +87,15 @@ # source and documentation files SRC = array.c builtin.c eval.c field.c gawkmisc.c io.c main.c \ - missing.c msg.c node.c re.c version.c + missing.c msg.c node.c re.c version.c \ + mbc.c mbfuncs.c ALLSRC= $(SRC) awktab.c -AWKSRC= awk.h awk.y custom.h $(ALLSRC) patchlevel.h protos.h random.h +AWKSRC= awk.h awk.y custom.h $(ALLSRC) patchlevel.h protos.h random.h mbc.h LIBSRC = alloca.c dfa.c dfa.h regex.c regex.h getopt.h getopt.c getopt1.c random.c COPIES = missing/system.c missing/tzset.c \ + missing/memmove.c \ missing/memcmp.c missing/memcpy.c missing/memset.c \ missing/strncasecmp.c missing/strchr.c \ @@ -95,7 +109,8 @@ doc/gawk.tp doc/gawk.tps doc/gawk.vr doc/gawk.vrs -MISC = NEWS COPYING FUTURES Makefile.in PROBLEMS README PORTS POSIX.STD \ +MISC = NEWS COPYING FUTURES Makefile.in PROBLEMS README* PORTS POSIX.STD \ configure configure.in acconfig.h configh.in ACKNOWLEDGMENT \ - ChangeLog INSTALL LIMITATIONS install-sh mkinstalldirs aclocal.m4 \ + ChangeLog* INSTALL LIMITATIONS install-sh mkinstalldirs aclocal.m4 \ + OChangeLog.MB OREADME.MB \ stamp-h.in @@ -108,4 +123,6 @@ # Release of gawk. There can be no leading or trailing white space here! REL=3.0 +# Release of gawk+mb. +MBREL=1.15 # clear out suffixes list @@ -144,7 +161,7 @@ $(CC) -o gawk $(CFLAGS) $(LDFLAGS) $(ALLOBJS) $(LIBOBJS) $(REOBJS) $(LIBS) -$(ALLOBJS): awk.h dfa.h regex.h config.h custom.h +$(ALLOBJS): awk.h dfa.h regex.h config.h custom.h mbc.h -$(LIBOBJS): config.h custom.h +$(LIBOBJS): config.h custom.h mbc.h # SunOS make's (at least) VPATH doesn't do subdirectories... @@ -152,5 +169,5 @@ gawkmisc.o: $(srcdir)/atari/gawkmisc.atr \ $(srcdir)/pc/gawkmisc.pc $(srcdir)/posix/gawkmisc.c \ - $(srcdir)/vms/gawkmisc.vms + $(srcdir)/vms/gawkmisc.vms $(srcdir)/human/gawkmisc.x68 $(CC) -c $(COMPFLAGS) -DDEFPATH='$(DEFPATH)' $(srcdir)/gawkmisc.c @@ -170,12 +187,14 @@ # VMS POSIX make won't apply the default .c.o rule to awktab.o for some reason -awktab.o: awktab.c awk.h +awktab.o: awktab.c awk.h mbc.h $(CC) -c $(COMPFLAGS) $(srcdir)/awktab.c alloca.o: alloca.c +mbc.o: Makefile + install: gawk info installdirs -rm -f $(bindir)/gawk - fullname=gawk-$(REL).`./gawk '{print $$3}' $(srcdir)/patchlevel.h` ; \ + fullname=gawk-$(REL).`./gawk '{print $$3}' $(srcdir)/patchlevel.h`+mb$(MBREL) ; \ $(INSTALL_PROGRAM) gawk $(bindir)/$$fullname ; \ (cd $(bindir); $(LN) $$fullname gawk) @@ -197,5 +216,5 @@ (cd $(bindir); \ if [ -f awk ] && cmp awk gawk > /dev/null; then rm -f awk; fi) - (fullname=gawk-$(REL).`gawk '{print $$3}' $(srcdir)/patchlevel.h` ; \ + (fullname=gawk-$(REL).`gawk '{print $$3}' $(srcdir)/patchlevel.h`+mb$(MBREL) ; \ cd $(bindir); \ if cmp gawk $$fullname; then rm -f gawk; fi ; \ @@ -262,5 +281,5 @@ dist: $(AWKSRC) $(LIBSRC) $(DOCS) $(MISC) $(COPIES) awklib/stamp-eg info distclean -rm -rf gawk-$(REL)* - dir=gawk-$(REL).`gawk '{print $$3}' patchlevel.h` && \ + dir=gawk-$(REL).`gawk '{print $$3}' patchlevel.h`+mb$(MBREL) && \ mkdir $$dir && \ cp -pr $(AWKSRC) $(LIBSRC) $(MISC) $$dir && \ diff -u2rN -x awktab.c gawk-3.0.6/OChangeLog.MB gawk-3.0.6+mb1.15/OChangeLog.MB --- gawk-3.0.6/OChangeLog.MB Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/OChangeLog.MB Fri May 4 21:20:14 2001 @@ -0,0 +1,201 @@ +Wed May 3 01:14:56 1995 Takahiro Tanimoto (tt@isaac) + + * Version 2.15.6 + multi-byte extension 1.04 released. + +Tue May 2 14:47:58 1995 Takahiro Tanimoto (tt@isaac) + + * field.c (sc_parse_field): MB 対応の洩れがあり, SJIS のとき, フィー + ルド分割に失敗することがあった. (Thanks to "Yasushi Suzudo" + ) + + * ベースを gawk-2.15.6 へ移行. + +Fri Oct 7 15:23:18 1994 Takahiro Tanimoto (tt@isaac) + + * eval.c (r_tree_eval): 10個 (マクロ STACKSIZE) を越える文字列 + の連接の評価で正しく評価されないことがあったオリジナルのバグを修 + 正. (Thanks to 大西力さん ) + + * ベースを gawk-2.15.5 へ移行. + +Tue Aug 30 09:03:45 1994 Takahiro Tanimoto (tt@isaac) + + * Version 2.15.4 + multi-byte extension 1.03 released. + + * main.c (arg_assign): コマンドライン上でも, マルチバイト文字を + 含む変数名を使用できるように修正. + +Mon Aug 29 05:43:17 1994 Takahiro Tanimoto (tt@isaac) + + * test/Makefile.dos, test/badargs.dos: DOS 用に追加. + + * test/badargs.good: +mb 用に修正. + + * regex.c (re_match_2): 前日の修正ミスを訂正. + + * test/Makefile, test/argarray.good: オリジナルのバグ(?)を修正. + 環境変数のチェック部分がおかしかった. + +Mon Aug 29 04:04:18 1994 Takahiro Tanimoto (tt at pc98) + + * Makefile.emx, Makefile.os2: サポートの追加. (ただし未確認) + + * Makefile.go32, config.go32: DJGPP サポートの追加. + +Sun Aug 28 03:05:30 1994 Takahiro Tanimoto (tt@isaac) + + * node.c (make_str_node): `\' の直後のマルチバイト文字の処理を改 + 善. + + * main.c, msg.c, ...: NR, FNR, sourceline を int から long へ変 + 更. + + * dfa.c (setcodeset): 複数の dfa を同時使用した場合に問題があっ + た部分を修正. + + * builtin.c (sub_common): オリジナルのバグを修正. realloc() が必 + 要か否かの判断で, 条件式が間違っていた. 2.15.4 では, 32 ビットマ + シンで正しく動くようにキャストが追加されていたが, 16 ビットマシ + ンでは依然として間違っていた. (Thanks to 大西力さん + ) + + * builtin.c (do_lower, do_upper): islower(), isupper() への引数 + を (unsigned char) へキャスト. + + * builtin.c (do_jsubstr): 2.15.4 に従い, warning の出力を追加し + た. + + * builtin.c (do_substr): オリジナルの妙な部分を修正. position + + length が元の文字列の長さを超えるとき, メッセージを出力してから + length を修正する. + + * builtin.c (do_sprintf): 書式指定文字列中で `%' の後にマルチバ + イト文字が現れたとき, 正しく無視するようにした. + +Sat Aug 26 16:19:56 1994 Takahiro Tanimoto (tt@isaac) + + * ベースを gawk-2.15.4 へ移行開始. + +Sat Mar 5 16:07:45 1994 Takahiro Tanimoto (tt@isaac) + + * pc/README.MSC: PC-9800 シリーズ用 MS-C 6.00A の, ワイルドカー + ド展開ルーチンのバグに対処した. 以前の stdargv.diff をこれに統 + 合し, 削除した. (Thanks to 福浩邦さん + ) + +Thu Aug 19 04:11:27 1993 Takahiro Tanimoto (tt@isaac) + + * regex.c (re_compile_fastmap): charset_not の fastmap の作成処 + 理が間違っていて, regex の fastmap を使用する場合 (gawk では, 例 + えば IGNORECASE がセットされているとき), 正規表現の先頭の [^A] + や [^a] に例えば B がマッチしなかった. (Thanks to 小屋良祐さん + ) + +Tue Aug 10 01:11:24 1993 Takahiro Tanimoto (tt@isaac) + + * regex.c (set_list_bits): 文字クラス中のマルチバイト文字の最適 + 化で, 区間終点の更新処理部分にバグがあり, [A-CE-GB-D] を最 + 適化すると [A-G] だが, これが [A-E] となってしまっていた. + ただし, regex ではなく dfa が使用される場合にはこのバグは表面に + は現れない. + +Fri Jul 23 00:33:11 1993 Takahiro Tanimoto (tt@isaac) + + * Version 2.15.2 + multi-byte extension 1.02 released. + +Thu Jul 22 00:17:18 1993 Takahiro Tanimoto (tt@albert) + + * pc/stdargv.diff: 追加. + + * regex.c (re_match_2): charset(_not)? の処理で一箇所キャストが + 間違っていた. + + * main.c (gawk_option): オプション -Wctype=SJIS が -Wctype=EUC + と同じ効果となるバグを修正. + + * grep-2.0+mb1.01 から mbc.[ch] を輸入. 多くのファイルをそれに + 従って書き換えた. + +Tue Jul 20 20:35:42 1993 Takahiro Tanimoto (tt@albert) + + * awk.y (snode): jlength の引数が省略されたときの処理が抜けてい + た. (Thanks to 高橋衛さん . バグ + フィックスも高橋さんによる. 同様の指摘, およびバグフィックスを + 村上敬一郎さん からも頂いた.) + +Fri Jul 9 01:01:19 1993 Takahiro Tanimoto (tt@isaac) + + * Version 2.15.2 + multi-byte extension 1.01 released. + + * dfa.c (setcodeset): マルチバイト文字の 2 バイト目から '\0' を + 除外する処理が抜けていた. + +Thu Jul 8 23:59:31 1993 Takahiro Tanimoto (tt@isaac) + + * main.c (main, gawk_option): 拡張オプション -N, -E, -S を + -Wctype=ascii, -Wctype=euc, -Wctype=sjis へ変更した. また, これ + に合わせて, ロングオプションも, --ctype=... とした. 当初からこ + うした方がいいのではという意見もあったのと, GNU grep-2.0 で -E + が別の意味に使用されたため, 変更に踏み切った. + +Sat Jul 3 06:58:54 1993 Takahiro Tanimoto (tt@isaac) + + * Version 2.15.2 + multi-byte extension 1.00 released. + + * dfa.c (syntax_bits): syntax_bits が long になっていないという + オリジナルのバグがあった. + + * regex.c (re_compile_pattern): 文字クラス中の範囲指定の終りのマ + ルチバイト文字の判定が間違っていた. + + * regex.c (re_match_2): 文字クラスの処理中の 16 ビット int で正 + 常動作しない部分を修正. + +Fri Jul 2 12:59:36 1993 Takahiro Tanimoto (tt@isaac) + + * regex.c (set_list_bits), mbfuncs.c (mbmemcmp): 使用していなかっ + た変数を削除した. + +Sun Jun 27 16:58:25 1993 Takahiro Tanimoto (tt@isaac) + + * Makefile.bsd44: サポートの追加. (ただし未確認.) + +Sun Jun 27 16:19:43 1993 Takahiro Tanimoto (tt at pc98) + + * config/msc60 pc/Makefile pc/make.bat pc/names.lnk: MS-DOS のサ + ポートを追加. (ただし, MS-C 6.0 のみ.) + +Tue Jun 1 21:17:27 1993 Takahiro Tanimoto (tt@isaac) + + * Version 2.15.2 + multi-byte extension 0.03 released. + + * main.c (version): multi-byte extension 0.03. + +Sun May 30 05:50:22 1993 Takahiro Tanimoto (tt@isaac) + + * main.c: mbversion_string の変数宣言を削除した. (使用していな + かった.) + +Tue May 25 00:35:28 1993 Takahiro Tanimoto (tt@isaac) + + * awk.h: mbstrncasecmp(), mbmemcmp() のプロトタイプを追加. + + * mbfuncs.c (mbmemcmp): 追加. バイト毎の比較ではなく, 文字毎に + 比較する. 実際のビットパターンによらず, シングルバイト文字はマ + ルチバイト文字よりも小さいとみなす. これにより, 正規表現の文字 + クラスの扱いと同様, (ASCII 文字) < (半角カナ文字) < (全角文字) + の関係が成り立つ. + + * eval.c (cmp_nodes): 文字列比較に memcmp() を使用するのをやめた. + mbmemcmp() を作成することにした. + + * Version 2.15.2 + multi-byte extension 0.02 released. + + +Local Variables: +mode: indented-text +left-margin: 8 +fill-column: 72 +fill-prefix: " " +version-control: never +End: diff -u2rN -x awktab.c gawk-3.0.6/OREADME.MB gawk-3.0.6+mb1.15/OREADME.MB --- gawk-3.0.6/OREADME.MB Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/OREADME.MB Fri May 4 21:20:26 2001 @@ -0,0 +1,387 @@ +●● Gnu Awk (gawk) 2.15, patchlevel 6 + multi-byte extension 1.04 ●● +●● May 3, 1995 by t^2 ●● + + gawk-2.15.6+mb1.04 -- マルチバイト文字対応版 Gnu Awk + +●概要 + + GNU プロジェクトによる awk (以下 gawk) をマルチバイト文字対応化した + ものです. + +●使用法 + + gawk からの拡張部分だけを説明します. + + 増えたオプションは以下の通りです. + + -Wctype=ASCII または --ctype=ASCII + マルチバイト文字を考慮しません. このオプションを使用した場合, + オリジナルの gawk を単に8ビットクリーンとした動作となります. + + -Wctype=EUC または --ctype=EUC + マルチバイト文字として EUC を認識します. + + -Wctype=SJIS または --ctype=SJIS + マルチバイト文字として Shift-JIS を認識します. + + MS-DOS 以外のシステムで, Makefile(.in)? を書き換えずにインストー + ルした場合, デフォルトでは EUC を認識します. MS-DOS ではデフォ + ルトで Shift-JIS を認識します. + + 以下, 組み込み関数などの変更点を挙げます. + + substr() マルチバイト文字を分断させるような指定は自動的に修正 + します. 例えば, + substr("aあiいuうeえoお", 3, 4) + は, + substr("aあiいuうeえoお", 2, 3) ==> "あi" + という結果になります. どちらの引数も小さくなる方向 + へ丸めます. + + jindex() これは, 以前からよくある index() の変形です. + index() の結果をバイト位置ではなく文字位置で返します. + + jlength() 同じく length() の変形です. バイト数ではなく文字数 + を返します. + + jsubstr() substr() の変形です. バイト位置, バイト数の指定の代 + わりに文字位置, 文字数を指定します. + + RS マルチバイト文字を使用することができます. + + その他 ・識別子にマルチバイト文字を使用できます. + + ・パス名に含まれるマルチバイト文字はについては全く考 + 慮していません. 注意してください. + + ・文字列の大小比較は, 正規表現中の文字クラスの範囲指 + 定と同様, + + 1バイト ASCII 文字 < 半角カナ文字 < 全角文字 + + という関係に基づいて比較します. + +● MS-DOS 版実行形式を含むアーカイブについて (それ以外の形態で入手された + 方は無視してください) + + 1. アーカイブに含まれているファイル + + オリジナルから全く手を加えていないファイル + + ACKNOWLE オリジナルのソースに含まれている ACKNOWLEDGMENT + COPYING オリジナルのソースに含まれている COPYING + FUTURES オリジナルのソースに含まれている FUTURES + LIMITATI オリジナルのソースに含まれている LIMITATIONS + NEWS オリジナルのソースに含まれている NEWS + POSIX オリジナルのソースに含まれている POSIX + PROBLEMS オリジナルのソースに含まれている PROBLEMS + README オリジナルのソースに含まれている README + README.1ST オリジナルのソースに含まれている README.FIRST + + gawk+mb 用のファイル + + CHANGELO.MB gawk+mb の変更履歴 + README.MB このファイル + + MS-DOS 版 gawk+mb 用のファイル + + GAWK MS-DOS + GO32 版 gawk-2.15.6+mb1.04 の coff + GAWK.CAT オリジナルのソースに含まれているマニュアルページ + gawk.1 を GNU roff でフォーマットしたもの. + GAWK.EXE MS-DOS (リアルモード) 版 gawk-2.15.6+mb1.04 の実 + 行形式 + READMAN.SED sed を持っている人へおまけ + (sed -f readman.sed gawk.cat) + + 2. GAWK と GAWK.EXE について + + GAWK および GAWK.EXE は, gawk-2.15.6+mb1.04 をそれぞれ GNU CC + 2.5.7, MS-C 6.00A でコンパイルしたものです. + + デフォルトで Shift-JIS 漢字コードを含むプログラムやファイルを処 + 理できますが, オプションによって変更することもできます. + + GAWK (GO32 版) はインストールする際, お使いになっている GO32 に + 付属の coff2exe ユーティリティなどで EXE 形式にして, 使用してく + ださい. 詳しくはご使用の GO32 のドキュメントをご覧ください. なお, + 都合上, 動作確認は DJGPP1.11 でのみ行っています. それ以外のバー + ジョンの GO32 では動作しない可能性もないわけではありません. 不具 + 合などは遠慮なくご報告ください. (対処できるとは限りませんが.) + + GAWK.EXE (リアルモード版) は, そのまま PATH の通ったディレクトリ + にコピーして使用してください. + + GO32 版とリアルモード版は, 単に容量的な違いだけです. 通常はリア + ルモード版で十分と思われます. リアルモード版がメモリ不足となるよ + うな巨大なプログラム/データを処理したい方は, GO32 版を使用して + みてください. 速度的にはリアルモード版の方が高速ですので, そうい + う巨大なプログラム/データに縁のない方は GO32 版はインストールす + る必要はないでしょう. + + 3. コマンドライン引数について + + GO32 版のコマンドラインに関しては, ご使用の GO32 のドキュメント + を参照してください. + + GAWK.EXE (リアルモード版) には, MS-C の setargv.obj を組み込んで + ありますので, MS-DOS でポピュラーなタイプのワイルドカードが使用 + できます. UNIX の csh ライクなワイルドカード展開ルーチンを用意 + しようかとも思ったのですが, MS-DOS の他のコマンドとの整合性が取 + れないし, オリジナルをなるべく尊重したかったので断念しました. + + 1つ1つの引数は空白で区切ります. 引数に空白, ", \, <, >, | を + 含むときはクォーティングが必要です. その方法は COMMAND.COM のバ + グ臭い仕様と, さらに setargv.obj にも問題があり, かなり難しいの + でここでは説明を省きます. 各自研究してください. 一番簡単なのは, + gawk のプログラムをファイルにして + + gawk -f ファイル名 + + とすることです. + + 4. 環境 + + 環境変数 AWKPATH がセットされていれば, その値をディレクトリのリ + ストとみなし, -f で指定されたファイルをそのディレクトリから探し + ます. 環境変数 PATH などと同様に, それぞれのディレクトリはセミ + コロン (;) で区切ります. + + 5. マニュアル + + roff 系のフォーマッタを使えない人のために GNU roff でフォーマッ + ト済みのマニュアルを用意しました. ボールドフェース, アンダーラ + イン対応の less などでお読みください. エディタなどでは ^H が入っ + ていて読みにくいと思います. + + s/.^H//g + + という sed のプログラムに通せば, 通常のテキストファイルが得られ + ます. (^H というのはコントロールコードを直接埋めこむという意味 + です.) + +●インストール (MS-DOS 以外) + + デフォルトのマルチバイト文字の設定は, Makefile.in の中で指定します. + デフォルトを Shift-JIS とする場合と, デフォルトでマルチバイト文字を + 使用しない場合は Makefile.in の MBCTYPE_DEF マクロの定義をそれぞれ以 + 下のように変えてください. + + MBCTYPE_DEF = -DSJIS (デフォルトで Shift-JIS の場合) + MBCTYPE_DEF = (デフォルトで使用しない場合) + + いずれの場合でも起動時のオプションによりマルチバイト文字コードの選択 + が可能です. + + memmove 関数がライブラリにないシステムではリンク時にエラーが出ること + があります. そのときは config/* の対応するファイルに + + MEMMOVE_MISSING 1 + + という行を追加して configure をやり直してください. + + その他の作業は, オリジナルの gawk と同様です. + +●インストール (MS-DOS 版. ここでいうインストールとは, ソースからのイン + ストールのことです) + + まず, pc/* を, このディレクトリにコピーしてください. + + A>copy pc\*.* . + + MS-C 6.00A を使用して, デフォルトで Shift-JIS を認識する gawk を作成 + する場合は, README.MSC に目を通して, 必要ならライブラリにパッチを当 + て, + + A>nmake -f Makefile.msc DEFS= + + とするだけでOKです. オプティマイズは -Ox まで大丈夫なようです. コ + ンパイルが無事終了したら, + + A>nmake -f Makefile.msc test + + と入力して, 動作チェックをしてみてください. + + その他の処理系を使用する場合や, デフォルトを Shift-JIS 以外にする場 + 合は Makefile.msc を書き換えて使用してください. + + また, GO32 版をインストールするには以下のようにしてください. + + A>copy pc\*.* . + A>del config.h + A>ren config.go3 config.h + A>make -f Makefile.go3 + + ただし, 確認しているのは DJGPP 1.11 のみです. 動作チェックの際に + DJGPP 版 GNU make を使用した場合, + + A>make -f Makefile.go3 test + + とすると何故かこけます. (単に私の設定が悪いだけかも知れませんが.) そ + のときは test/Makefile.dos のこけた部分を見て, 手動でやってみてくだ + さい. + + なお, この動作チェックを行う場合は, rm, wc などのツールが必要です. + 持っていない方は test/Makefile.dos を書き換えるなり, rm, wc, etc. を + 作成するなりしてしのいでください. redir という見慣れないプログラムも + 使用していますが, これは, 標準エラー出力をリダイレクトしたりするため + に即興で作ったプログラムです. これのソースはおまけとして pc/ ディレ + クトリに入れておきました. しょうもないプログラムですので決してコード + を覗き見したりしないように(笑). 簡易マニュアルを pc/redir.txt に書い + ています. + +●バグ + + 1. いわゆる JIS には対応していません. 将来対応する予定もありません. + + 2. マルチバイト文字コードはあまり厳格には考えていません. + + EUC 1バイト目 ... 0x80 - 0xff + EUC 2バイト目 ... 0x01 - 0xff (0x0a を除く) + + Shift-JIS 1バイト目 ... 0x80 - 0x9f, 0xe0 - 0xff + Shift-JIS 2バイト目 ... 0x01 - 0xff (0x0a を除く) + + として処理しています. 半角カナも使えるはずです. EUC の SS3 + (0x8f) に始まる3バイトコードは使えません. (私はこれをサポートし + ているシステムを見たことがない...) + +●アルゴリズム (dfa.[ch] のマルチバイト文字対応化) + + 以前は漠然と, DFA を直接 EUC や Shift-JIS のような文字種の多いコード + セットに対応させるのは, 非常に難しいと思っていました. ところがある + 日, 自作ライブラリのテスト用に, 正規表現を DFA へ変換する簡単なプロ + グラムを書いたときに, 突然うまいアイディアが閃いたのです. マルチバ + イト文字といえども結局はバイトの並びです. マルチバイト文字を, すべ + てバイト単位に分解して, 正規表現を作ってしまえばよかったのです. + + 言葉ではうまく表現できないので, 以下の記号を使用し, どういうふうにバ + イト単位に分解しているのか, 例を挙げます. + + a, b, c ... シングルバイト文字. + x, y, z ... マルチバイト文字の1文字目. + + . (任意の1文字) + ==> [a-c]|[x-z][a-z] + + (シングルバイト文字か, またはマルチバイト文字の1文字目と + 任意の1文字の連接.) + + [xb-zx] (xb から zx の範囲のマルチバイト文字 + ==> x[b-z]|y[a-z]|z[a-x] + + yb* + ==> (yb)* + + 実際には正規表現を作り出すのではなく, 正規表現を分解したトークンを直 + 接生成しています. この辺, 興味がある方はソースを見たほうが早いと思 + います. (あまりエレガントではありませんのでソースをじっくり見られる + のは恥ずかしい気もしますが...) + + これだけでは, 例えばあるテキストから xy という文字を探そうとすると, + xxyy のような文字の並びにまで反応してしまいます. そこで, マルチバイ + トモードのときには必ず "^.*(" + ユーザパターン + ")" として処理しま + す. '.*' により, '.' はマルチバイト文字の一部にはマッチしませんから, + 頭出しできるわけです. + +● dfa.[ch], regex.[ch] の拡張仕様 (他のアプリケーションへ応用したい方へ) + + dfa.[ch], regex.[ch] モジュールは mbc.[ch] モジュールに依存していま + す. また, これはオリジナルの仕様ですが, dfa.[ch] を使用する場合は + regex.h の定義が必要です. + + マルチバイト文字のタイプは, mbc.[ch] の mbcinit() で設定します. + mbc.h に定義されているマクロ MBCTYPE_ASCII, MBCTYPE_EUC, + MBCTYPE_SJIS のいずれかを mbcinit() に渡してください. + + dfa.[ch] は, パターンのコンパイル時にだけ, この mbc.[ch] の設定を参 + 照します. パターンマッチングの際は, コンパイル時に設定されていた, + マルチバイト文字のタイプを検索します. + + 一方, regex.[ch] は, パターンコンパイル時, マッチング時の両方で + mbc.[ch] の設定を参照します. が, この両者で mbc.[ch] の設定を変更す + ることはできません. つまり, Shift-JIS で記述されたパターンを, EUC + テキストから検索するといった動作はできません. 注意してください. + + マルチバイト文字対応に伴って注意すべき正規表現を以下に記します. + + . 任意の1バイト文字, 正当なマルチバイト文字にマッチします. + 「正当なマルチバイト文字」とは, マルチバイト文字の1文字 + 目に, '\0' または '\n' 以外が続く文字のことです. + + [x-y] 文字コード (内部表現) が x から y の範囲にある任意の1文 + 字にマッチします. これも . と同じく, 正当でない文字には + マッチしません. + + [^x-y] 文字コード (内部表現) が x から y の範囲にない任意の1文 + 字にマッチします. 正当でない文字にもマッチします. + + マルチバイト文字の内部表現は単に1バイト目を上位バイト, 2バイト目を + 下位バイトとした16ビット符号なし整数です. Shift-JIS でも EUC でも + + 1バイト ASCII 文字 < 半角カナ文字 < 全角文字 + + という大小関係が成り立っています. + +●条件など + + 1. オリジナルの GNU awk の著作権は Free Software Foundation, Inc. が + 有しています. パッチ部分 (gawk-mb.diff) の著作権は私 (t^2) が有し + ています. + + 2. GNU awk のソースコードは各所の ftp サイト, もしくは Nifty-serve + の FUNIX のデータライブラリから入手可能です. GNU awk から gawk+mb + への差分 gawk-mb.diff は, Nifty-serve の FUNIX LIB 8, および + fj.sources へポストしています. + + 3. 差分 gawk-mb.diff の再配布は自由です. これに関しては FSF の規定に + 従う必要もありません. しかし差分を適用した結果のソースコード, お + よび実行形式での再配布の際は GNU GENERAL PUBLIC LICENSE (COPYING + 参照) に従ってください. + + gawk+mb に何らかの改変を加えたものを再配布する際も, GNU GENERAL + PUBLIC LICENSE に従うように注意してください. また gawk+mb に含ま + れるコード (dfa.[ch] や regex.[ch] など) を利用したプログラムを配 + 布する際も GNU GENERAL PUBLIC LICENSE の該当部分に従ってください. + + また義務ではありませんが再配布される方は事後にでも連絡をください. + そして可能な限り, 新しいバージョンへのアップデートに努め, 利用者 + からの連絡が私に届くように配慮してください. + + 4. このプログラムは無保証です. + + 5. gawk+mb に何らかの不具合が発生した場合, (FSF や, オリジナルの作者 + ではなく) 私に連絡してください. 配布した人が希望している場合は, + その人に連絡してください. + + 6. ご質問/ご要望/お叱り, その他も大歓迎です. できるかぎりサポートし + ます. + +●謝辞 + + 原作者および FSF に感謝します. + + このドキュメント作成に関して多くの助言をくださった堂園和郎氏 + に感謝します. + + これまで転載/バグ報告をくださった方々に感謝します. (バグ報告に関して + は ChangeLog.MB を参照.) + + 最後に, 貴重なディスクスペースを gawk+mb のために割いてご使用頂いて + いるすべての利用者の方々に感謝いたします. + +●「私」の連絡先 + + 〒810 福岡市中央区梅光園団地 7-207 + TEL/FAX: 092-731-4025 (TEL/FAX 自動切替え) + 092-724-6342 (TEL のみ) + E-mail: NBC02362@niftyserve.or.jp 谷本孝浩 + +# Local variables: +# mode: indented-text +# indent-tabs-mode: nil +# tab-stop-list: (4 8 16 24 32 40 48 56 64 72 80) +# left-margin: 4 +# fill-column: 72 +# fill-prefix: " " +# version-control: never +# End: diff -u2rN -x awktab.c gawk-3.0.6/README.MB gawk-3.0.6+mb1.15/README.MB --- gawk-3.0.6/README.MB Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/README.MB Wed May 16 01:27:27 2001 @@ -0,0 +1,290 @@ +======================================================================== + GNU Awk 3.0.6 + multi-byte extension 1.15 + Feb. 21, 2001 by okabe katsuyuki +======================================================================== + + gawk-3.0.6+mb1.15 -- マルチバイト文字対応版 Gnu Awk + +○概要 + t^2 氏がマルチバイト文字対応化された gawk-2.15.6+mb1.04 を元に + gawk-3.0.6 をマルチバイト文字対応化したものです. + +○使用法 + gawk からの拡張は基本的に gawk-2.15.6+mb1.04 と同じですが以下のオプ + ションを追加しています. + + -Wctype=UTF8 または --ctype=UTF8 + マルチバイト文字として UTF8 を認識します. 文字セットは + UCS-2(unicode), UCS-4 共に使用できます. + + -Wmemory + 正規表現の DFA 生成を抑制することによりメモリ消費量を節約し + ます. MSDOS のリアルモード版で頻繁にするようならば djgpp 版 + や WIN32 版を使用するようにした方がよいでしょう. + + 起動時のプログラム名によりマルチバイト文字として認識する文字セットのデ + フォルトを変更します。その規則は以下の通りです. + + 1. jgawk などプログラム名の先頭が j のとき + gawk コンパイルに指定した文字セットをマルチバイト文字として認 + 識する. + 2. eucgawk などプログラム名の先頭が euc のとき + マルチバイト文字として EUC を認識する. + 3. sjisgawk などプログラム名の先頭が sjis のとき + マルチバイト文字として SJIS を認識する. + 4. utf8gawk などプログラム名の先頭が utf8 のとき + マルチバイト文字として UTF8 を認識する. + 5. 環境変数 GAWKMB_ENABLE が設定されているとき + gawk コンパイルに指定した文字セットをマルチバイト文字として認 + 識する. + 6. 1.〜5. のいづれでもないとき + マルチバイト文字を認識しない. + + その他の拡張については OREADME.MB の「●使用法」を参照してください. + +○Windows 版実行形式を含むアーカイブについて + 1. アーカイブに含まれているファイル + オリジナルソースからのファイル + ANKNOWLEDGMENT ANKNOWLEDGMENT + COPYING COPYING + ChangeLog ChangeLog + FUTURES FUTURES + LIMITATIONS LIMITATIONS + NEWS NEWS + POSIX.STD POSIX.STD + PROBLEMS PROBLEMS + README README + README_d/README.FIRST README_d/README.FIRST + README_d/README.pc README_d/README.pc + awklib/assert.awk awklib/eg/lib/assert.awk + awklib/ctime.awk awklib/eg/lib/ctime.awk + awklib/ftrans.awk awklib/eg/lib/ftrans.awk + awklib/getopt.awk awklib/eg/lib/getopt.awk + awklib/gettiem.awk awklib/eg/lib/gettime.awk + awklib/join.awk awklib/eg/lib/join.awk + awklib/mktime.awk awklib/eg/lib/mktime.awk + awklib/nextfile.awk awklib/eg/lib/nextfile.awk + awklib/ord.awk awklib/eg/lib/ord.awk + awklib/round.awk awklib/eg/lib/round.awk + doc/gawk.info doc/gawk.info + + gawk+mb 用のファイル + ChangeLog.MB 変更履歴 + README.MB このファイル + OChangeLog.MB gawk-2.15.6+mb1.04 までの変更履歴 + OREADME.MB gawk-2.15.6+mb1.04 までの README.MB + + MSDOS 版 gawk+mb 用のファイル + doc/gawk.cat オリジナルソースに含まれているマニュアル + ページ doc/gawk.1 を GNU roff でフォーマッ + トしたもの. + doc/igawk.cat オリジナルソースに含まれているマニュアル + ページ doc/igawk.1 を GNU roff でフォー + マットしたもの. + gawk.exe WIN32版 gawk-3.0.6+mb1.15 の実行ファイル + igawk.exe igawk 実行ファイル. + readman.awk おまけ + + 2. gawk.exe について + gawk.exe は, gawk-3.0.6+mb1.15 を Visual C++ 6.0 + SP5にてコンパ + イルしたものです. + + gawk.exe のインストールは, そのまま PATH の通ったディレクトリに + コピーしてください. + + デフォルトで Shift-JIS 漢字コードを含むプログラムやファイルを処 + 理するためにはファイル名を jgawk.exe などに変更して下さい. また、 + オプションにより変更することもできます. + + 3. igawk.exe について + gawk に include 機能を追加するプログラムです. スクリプト中に + + @include FILENAME + + と書くことで環境変数 AWKPATH 内にあるファイル名 ``FILENAME'' の + ファイルをスクリプト中に取り込むことができます. 環境変数 AWKPATH + が設定されていなければその値として ``.;c:/gnu/lib/awk'' が用いら + れます. + gawk を子プロセスで呼び出すため gawk を実行できるだけのメモリが + 空いている必要があります. + PATH が通っていない gawk を使用したいときやファイル名が gawk.exe + から変更してある gawk を使用したいときには環境変数 GAWK_PATH に + 使用する gawk のフルパスを設定してください. + + 4. コマンドライン引数について + djgpp 版については djgpp の仕様に準じます. + + リアルモード版についてはそれぞれ serow 氏作の xargs.[ch], WIN32版 + では Bruce. 氏対応 xargs32.[ch] を使用することにより次のような拡張 + がなされています. + + - シングルクオート '' で引数を括ることができます. + ただし >, |, < のような command.com に解釈されてしまうよな + 文字を使用するさいには従来のダブルクオート "" を用いてくだ + さい. + + - csh-like なワイルドカード展開ができます. + + 5. system() について + serow 氏作の xsystem.c を使用することにより次のような拡張がなさ + れています. + + - 戻り値が取得できます. ただし, command.com の内部コマンドの + 場合には常に 0 となります. また実行できなかった場合には + 255 を返します. + + - 出力ハンドルの切り替えができます. [FD1]>&[FD2] のような書 + 式によりファイルハンドル FD1 に出力される内容がファイルハ + ンドル FD2 にリダイレクトされます. 例えば, + + system("gawk 2>&1") + + とすることで, 通常は標準エラー出力(2)へ出力される gawk の + usage が標準出力(1)に出力されます. またこの機能は getline + へのパイプにも有効, + + "gawk 2>&1" | getline + + とすることもできます. + また標準出力および標準エラー出力は, [FD]>[FILENAME] の書式 + によりファイルハンドル FD に出力される内容がファイル + FILENAME に出力されます. [FD]>>[FILENAME] の書式のときには + ファイルハンドル FD に出力される内容がファイル FILENAME に + 追加されます. + + ただし以上の拡張は環境変数 AWKSHELL, SHELL もしくは COMSPEC に + unix の born shell 系シェルが設定してある際には機能せず, そのま + まシェルを利用します. + + 6. 環境変数 + + - AWKPATH + 環境変数 AWKPATH がセットされていれば, その値をディレクトリ + のリストとみなし, -f で指定されたファイルをそのディレクトリ + から探します. それぞれのディレクトリはコロン `:', セミコロン + `;' のどちらで区切っても構いません. + + - LFN + Windows95 において環境変数 LFN に `N' または `n' 以外の文字 + が設定されているならばロングファイルネームが使用できるように + なります. + またリアルモード版では環境変数 XARGS に ``LFN'' を設定するこ + とでもロングファイルネームが使用できるようになります. + + 7. マニュアル + roff 系のフォーマッタが使えない人のために GNU roff でフォーマッ + ト済みのマニュアルを用意しました. less などでお読みください. エ + ディタなどで見る方は, おまけの readman.awk を用い次のようにする + ことで通常のテキストファイルが得られます. + + A>gawk -f readman.awk gawk.cat >gawk.txt + +○インストール(コンパイル) + 1. unix or cygwin の場合 + デフォルトのマルチバイト文字の設定は, ./configure 実行時に指定し + ます. デフォルトは EUC となっていますので, デフォルトを + Shift-JIS, ASCII に変更するには ./configure 実行時のオプションに + それぞれ --enable-SJIS, --enable-ASCII を加えてください. + + 例) + ./configure --enable-SJIS (デフォルトで Shift-JIS の場合) + ./configure --enable-ASCII (デフォルトで ASCII の場合) + + インストールに関するその他の事項は INSTALL を参照してください. + + 2. MSDOS or OS/2 の場合 + ディレクトリ pc にあるファイルをこのディレクトリにコピーし, 必要 + があれば Makefile を編集し, make でコンパイル, make install でイ + ンストールして下さい. + + Makefile 中にある prefix の定義は gawk をインストールするディレ + クトリに影響します. 例えば prefix の値が a:/gnu に設定されている + ときには gawk.exe は a:/gnu/bin にインストールされます. また + gawk のライブラリは a:/gnu/lib/awk にインストールされます. + + make をする際には使用するOSやコンパイラによってターゲットを与 + える必要があります. ターゲットには以下のものが用意されています. + + ターゲット 使用OS コンパイラ + djgpp MSDOS DJGPP v2 + djgppv1 MSDOS DJGPP v1 + emx OS/2 emx/gcc + emxnt Win32 emx/gcc + emxbnd OS/2 and MSDOS emx/gcc + msc MSDOS Microsoft C 7 & 8 + msc6 MSDOS Microsoft C 6.00a + msc6os2 OS/2 Microsoft C 6.00a + msc6bnd OS/2 and MSDOS Microsoft C 6.00a + vcWin32 Win32 Microsoft Visual C + bcc31 MSDOS Borland C 3.1 + + 当方では vcWin32 でコンパイルできることを確認しています. + + 例) Borland C 3.1 でコンパイルする場合 + A>copy pc/*.* . + A>make bcc31 + + 3. HUMAN68K の場合 + autoconf や configure が実行できる環境が整っていないとインストー + ルできません. 環境が整っている場合は最初に autoconf で HUMAN68K + 用の configure を作成し unix でのインストール方に従ってください. + +○バグなどその他 + OREADME.MB を参照してください. + +○条件 + 1. オリジナルの GNU awk の著作権は Free Software Foundation, Inc. が + 有しています. パッチ部分の著作権は 岡部勝幸 が有しています. ただ + し gawk-2.15.6+mb1.04 から引き継いでいる部分に関しては t^2 氏が著 + 作権を有しています. + + 2. GNU awk のソースコードは各所の ftp サイトから入手可能です. + + 3. パッチファイルの再配布は自由です. これに関しては FSF の規定に従う + 必要はありません. しかし差分を適用した結果のソースコード, および + 実行形式での再配布の際は GNU GENERAL PUBLIC LICENSE (COPYING 参照) + に従ってください. + + gawk+mb に何らかの改変を加えたものを再配布する際も, GNU GENERAL + PUBLIC LICENSE に従うようにしてください. また gawk+mb に含まれる + コード (dfa.[ch] や regex.[ch] など) を利用したプログラムを配布す + る際も GNU GENERAL PUBLIC LICENSE の該当部分に従ってください. + + 4. このプログラムは無保証です. + + 5. この gawk+mb に何らかの不具合いが発生した場合, 私に連絡をしてくだ + さい. + +○謝辞 + 原作者および FSF に感謝します. + + gawk+mb の原作者 t^2 氏に感謝します. + + このドキュメント(README.MB) は, OREADME.MB を参考にさせていただきま + した. + + MSDOS 版には serow 氏作 xargs.[ch] を使用させていただきました. + WIN32 版には Bruce. 氏対応 xargs32.[ch] を使用させていただきました. + djgpp 版, MSDOS 版, WIN32 版には serow 氏作 xsystem.c を使用させていた + だきました. + + MSDOS リアルモード版における Windows95 ロングファイルネームの対応に + は, Bruce. 氏作 mawk for MBCS のソースを参考にさせていただきました. + + 優れたソウトウェアを開発し, そのソースコードを公開してくださっている + 多くの方々に感謝します. + +○連絡先 + e-mail: HGC02147@nifty.ne.jp + + ういるす / WILLs / 岡部 勝幸 + +# Local variables: +# mode: indented-text +# indent-tabs-mode: nil +# tab-stop-list: (4 8 16 24 32 40 48 56 64 72 80) +# left-margin: 4 +# fill-column: 72 +# fill-prefix: " " +# version-control: never +# End: diff -u2rN -x awktab.c gawk-3.0.6/acconfig.h gawk-3.0.6+mb1.15/acconfig.h --- gawk-3.0.6/acconfig.h Wed Jun 7 17:47:25 2000 +++ gawk-3.0.6+mb1.15/acconfig.h Fri May 4 21:23:28 2001 @@ -34,4 +34,6 @@ #undef _LARGEFILE_SOURCE /* makes fseeko etc. visible on some hosts */ #undef _LARGE_FILES /* emables large files on AIX-style hosts */ +#undef DTOS18_MISSING /* _dtos18() buf fix for human68k libc-1.1.32 */ +#undef FCONVERT_MISSING /* fconvert() and _round() bug fix for human68k libc-1.1.32 */ @BOTTOM@ diff -u2rN -x awktab.c gawk-3.0.6/array.c gawk-3.0.6+mb1.15/array.c --- gawk-3.0.6/array.c Fri Aug 4 02:08:01 2000 +++ gawk-3.0.6+mb1.15/array.c Fri May 4 21:32:14 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Oct. 7, 1994 by t^2 */ /* @@ -404,5 +406,5 @@ if (symbol->table_size <= 0) { memset(symbol->var_array, '\0', - sizeof(NODE *) * symbol->array_size); + sizeof(NODE *) * (size_t) symbol->array_size); symbol->table_size = symbol->array_size = 0; symbol->flags &= ~ARRAYMAXED; @@ -560,5 +562,5 @@ /* allocate new table */ emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table"); - memset(new, '\0', newsize * sizeof(NODE *)); + memset(new, '\0', (size_t) newsize * sizeof(NODE *)); /* brand new hash table, set things up and return */ diff -u2rN -x awktab.c gawk-3.0.6/awk.h gawk-3.0.6+mb1.15/awk.h --- gawk-3.0.6/awk.h Sun Jul 16 22:52:37 2000 +++ gawk-3.0.6+mb1.15/awk.h Sat May 5 18:12:08 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: May 28, 1997 by okabe katsuyuki */ /* ------------------------------ Includes ------------------------------ */ @@ -52,5 +54,5 @@ #include #endif /* HAVE_LOCALE_H */ -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ +#if (defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__) || defined(MSDOS) #include #else @@ -101,5 +103,5 @@ -#ifdef __STDC__ +#if defined(__STDC__) || defined(MSDOS) #define P(s) s #define MALLOC_ARG_T size_t @@ -156,5 +158,9 @@ #if HAVE_UNISTD_H #include -#endif /* HAVE_UNISTD_H */ +#else /* !HAVE_UNISTD_H */ +#ifdef MSDOS +#include +#endif +#endif /* !HAVE_UNISTD_H */ #ifndef HAVE_VPRINTF @@ -196,4 +202,24 @@ #endif /* GNU_REGEX */ +#include "mbc.h" + +#ifdef MSDOS +#if defined(_MSC_VER) && (_MSC_VER >= 600) && !defined(_PCODE) +#pragma intrinsic(memset,memcmp,memcpy,strcat,strcmp,strcpy,strlen) +#endif +#if defined(__BORLANDC__) && (__BORLANDC__ >= 0x400) +#pragma intrinsic memcmp +#pragma intrinsic memcpy +#pragma intrinsic memset +#pragma intrinsic strcat +#pragma intrinsic strchr +#pragma intrinsic strcmp +#pragma intrinsic strcpy +#pragma intrinsic strlen +#pragma intrinsic strrchr +#pragma intrinsic -strncmp /* for Borland C 3.1 bug */ +#endif +#endif + /* ------------------ Constants, Structures, Typedefs ------------------ */ @@ -209,5 +235,5 @@ /* Figure out what '\a' really is. */ -#ifdef __STDC__ +#if defined(__STDC__) || defined(MSDOS) #define BELL '\a' /* sure makes life easy, don't it? */ #else @@ -569,5 +595,5 @@ extern NODE *Nnull_string; extern NODE **fields_arr; -extern int sourceline; +extern long sourceline; extern char *source; extern NODE *expression_value; @@ -584,4 +610,5 @@ extern int do_lint_old; extern int do_intervals; +extern int save_memory; extern int in_begin_rule; extern int in_end_rule; @@ -690,5 +717,5 @@ _ts : r_force_string(_ts);}) #else -#ifdef MSDOS +#if defined(MSDOS) && (_MSC_VER == 510) extern double _msc51bug; #define force_number(n) (_msc51bug=(_t = (n),\ @@ -711,4 +738,10 @@ #define fatal set_loc(__FILE__, __LINE__), r_fatal +#if (defined (_MSC_VER) || defined (__TURBOC__)) && !defined (WIN32) +#define SIGNED_SIZE_T(n) ((int) (n)) +#else +#define SIGNED_SIZE_T(n) ((long) (n)) +#endif + /* ------------- Function prototypes or defs (as appropriate) ------------- */ @@ -738,4 +771,7 @@ extern NODE *do_fflush P((NODE *tree)); extern NODE *do_index P((NODE *tree)); +extern NODE *do_jindex P((NODE *tree)); +extern NODE *do_jlength P((NODE *tree)); +extern NODE *do_jsubstr P((NODE *tree)); extern NODE *do_int P((NODE *tree)); extern NODE *do_length P((NODE *tree)); @@ -830,5 +866,5 @@ extern void r_fatal P((va_list va_alist, ...)); #else -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ +#if (defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__) || defined(MSDOS) extern void msg (char *mesg, ...); extern void error (char *mesg, ...); @@ -872,4 +908,10 @@ extern int strncasecmp P((const char *s1, const char *s2, register size_t n)); +/* mbfuncs.c */ +extern int mbstrncasecmp P((const char *s1, const char *s2, size_t n)); +extern int mbmemcmp P((const void *s1, size_t n1, const void *s2, size_t n2)); +extern size_t mblength P((const char *str, size_t n)); +extern size_t mbbyte P((const char *str, size_t n)); + #if defined(atarist) #if defined(PIPES_SIMULATED) @@ -885,4 +927,14 @@ #define INVALID_HANDLE (-1) #endif /* atarist */ + +#if defined(__TURBOC__) +#if !__STDC__ +#undef __STDC__ +#define __STDC__ 1 +#endif +#define strcasecmp stricmp +#define strncasecmp strnicmp +#undef random +#endif /* __TURBOC__ */ #ifndef STATIC diff -u2rN -x awktab.c gawk-3.0.6/awk.y gawk-3.0.6+mb1.15/awk.y --- gawk-3.0.6/awk.y Mon Jul 17 00:29:50 2000 +++ gawk-3.0.6+mb1.15/awk.y Sat May 5 02:50:31 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: May 5, 2001 by okabe katsuyuki */ %{ @@ -34,5 +36,5 @@ #define DONT_FREE FALSE -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ +#if (defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__) || defined(MSDOS) static void yyerror(const char *m, ...) ; #else @@ -79,5 +81,5 @@ extern char *source; -extern int sourceline; +extern long sourceline; extern struct src *srcfiles; extern int numfiles; @@ -902,4 +904,5 @@ # define GAWKX 0x0400 /* gawk extension */ # define RESX 0x0800 /* Bell Labs Research extension */ +# define GAWKMBX 0x1000 /* gawk+mb extension */ NODE *(*ptr)(); /* function that implements this keyword */ }; @@ -942,4 +945,7 @@ {"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, {"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, +{"jindex", Node_builtin, LEX_BUILTIN, GAWKMBX|A(2), do_jindex}, +{"jlength", Node_builtin, LEX_LENGTH, GAWKMBX|A(0)|A(1), do_jlength}, +{"jsubstr", Node_builtin, LEX_BUILTIN, GAWKMBX|A(2)|A(3), do_jsubstr}, {"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, {"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, @@ -986,5 +992,5 @@ /* yyerror --- print a syntax error message, show where */ -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ +#if (defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__) || defined(MSDOS) static void yyerror(const char *m, ...) @@ -1039,5 +1045,5 @@ *bp++ = ' '; } -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ +#if (defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__) || defined(MSDOS) va_start(args, m); if (mesg == NULL) @@ -1391,4 +1397,18 @@ } tokadd(c); + if (ismbchar(c)) { + size_t n = mbclen(c) - 1; + while (n-- > 0) { + if ((c = nextc()) == '\0' || c == '\n') { + if (c == '\n') + pushback(); + yyerror("incomplete multi-byte char"); + while (n-- >= 0) + tokadd(' '); + break; + } + tokadd(c); + } + } } } @@ -1652,4 +1672,18 @@ } tokadd(c); + if (ismbchar(c)) { + size_t n = mbclen(c) - 1; + while (n-- > 0) { + if ((c = nextc()) == '\0' || c == '\n') { + if (c == '\n') + pushback(); + yyerror("incomplete multi-byte char"); + while (n-- >= 0) + tokadd(' '); + break; + } + tokadd(c); + } + } } yylval.nodeval = make_str_node(tokstart, @@ -1793,5 +1827,5 @@ } - if (c != '_' && ! isalpha(c)) { + if (c != '_' && ! isalpha(c) && ! ismbchar(c)) { yyerror("Invalid char '%c' in expression\n", c); exit(1); @@ -1802,4 +1836,18 @@ while (is_identchar(c)) { tokadd(c); + if (ismbchar(c)) { + size_t n = mbclen(c) - 1; + while (n-- > 0) { + if ((c = nextc()) == '\0' || c == '\n') { + if (c == '\n') + pushback(); + yyerror("incomplete multi-byte char"); + while (n-- >= 0) + tokadd(' '); + break; + } + tokadd(c); + } + } c = nextc(); } @@ -1830,4 +1878,7 @@ else { if (do_lint) { + if (tokentab[mid].flags & GAWKMBX) + warning("%s() is a gawk+mb extension", + tokentab[mid].operator); if (tokentab[mid].flags & GAWKX) warning("%s() is a gawk extension", @@ -1844,4 +1895,5 @@ tokentab[mid].operator); if ((do_traditional && (tokentab[mid].flags & GAWKX)) + || (do_traditional && (tokentab[mid].flags & GAWKMBX)) || (do_posix && (tokentab[mid].flags & NOT_POSIX))) break; @@ -1940,5 +1992,5 @@ * the corresponding routine. */ - if (nexp == 0 && r->proc == do_length) { + if (nexp == 0 && (r->proc == do_length || r->proc == do_jlength)) { subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), Node_expression_list, diff -u2rN -x awktab.c gawk-3.0.6/awklib/Makefile.in gawk-3.0.6+mb1.15/awklib/Makefile.in --- gawk-3.0.6/awklib/Makefile.in Thu Oct 8 05:33:20 1998 +++ gawk-3.0.6+mb1.15/awklib/Makefile.in Sat May 5 02:49:37 2001 @@ -47,4 +47,5 @@ AUXPROGS = pwcat grcat AUXAWK = passwd.awk group.awk +IGAWKSH = @IGAWKSH@ all: stamp-eg $(AUXPROGS) igawk $(AUXAWK) @@ -62,6 +63,6 @@ $(CC) $(CFLAGS) $(srcdir)/eg/lib/grcat.c $(LDFLAGS) -o $@ -igawk: $(srcdir)/eg/prog/igawk.sh - cp $(srcdir)/eg/prog/igawk.sh $@ ; chmod 755 $@ +igawk: $(srcdir)/$(IGAWKSH) + cp $(srcdir)/$(IGAWKSH) $@ ; chmod 755 $@ passwd.awk: $(srcdir)/eg/lib/passwdawk.in diff -u2rN -x awktab.c gawk-3.0.6/builtin.c gawk-3.0.6+mb1.15/builtin.c --- gawk-3.0.6/builtin.c Sun Jul 16 12:13:57 2000 +++ gawk-3.0.6+mb1.15/builtin.c Sat May 5 18:08:31 2001 @@ -23,5 +23,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ - +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Jun 15, 1997 by okabe katsuyuki */ + +/* Modified for Human68k by K.Murakami 1993.06.01 + Last change: Feb. 3, 1997 by okabe katsuyuki */ #include "awk.h" @@ -32,4 +36,13 @@ #include "random.h" +#ifdef __human68k__ +#define system(cmd) ({ int _rc = system((cmd)); _rc < 0 ? 0xff00 : (_rc << 8) & 0xff00 ; }) +#else +#if defined(MSDOS) && !defined(__EMX__) +extern int xsystem P((char *)); +#define system(cmd) xsystem((cmd)) +#endif +#endif + /* can declare these, since we always use the random shipped with gawk */ extern char *initstate P((unsigned seed, char *state, int n)); @@ -212,4 +225,6 @@ register size_t l1, l2; long ret; + int mbf; + size_t skip; @@ -229,9 +244,19 @@ if (l2 > l1) break; - if (casetable[(int)*p1] == casetable[(int)*p2] - && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) { + if (((mbf = ismbchar(*p1)) != 0 + ? *p1 == *p2 + : (casetable[(unsigned char) *p1] + == casetable[(unsigned char) *p2])) + && (l2 == 1 || mbstrncasecmp(p1, p2, l2) == 0)) { ret = 1 + s1->stlen - l1; break; } + if (mbf) { + skip = mbclen(*p1) - 1; + if (l1 <= skip) + break; + l1 -= skip; + p1 += skip; + } l1--; p1++; @@ -246,8 +271,86 @@ break; } + if (ismbchar(*p1)) { + skip = mbclen(*p1) - 1; + if (l1 <= skip) + break; + l1 -= skip; + p1 += skip; + } + l1--; + p1++; + } + } + free_temp(s1); + free_temp(s2); + return tmp_number((AWKNUM) ret); +} + +/* do_jindex --- find index of a multi-byte string */ + +NODE * +do_jindex(tree) +NODE *tree; +{ + NODE *s1, *s2; + register char *p1, *p2; + register size_t l1, l2; + long ret; + int mbf; + size_t skip; + + s1 = tree_eval(tree->lnode); + s2 = tree_eval(tree->rnode->lnode); + force_string(s1); + force_string(s2); + p1 = s1->stptr; + p2 = s2->stptr; + l1 = s1->stlen; + l2 = s2->stlen; + ret = 1; + if (IGNORECASE) { + while (l1 > 0) { + if (l2 > l1) + break; + if (((mbf = ismbchar(*p1)) != 0 + ? *p1 == *p2 + : (casetable[(unsigned char) *p1] + == casetable[(unsigned char) *p2])) + && (l2 == 1 || mbstrncasecmp(p1, p2, l2) == 0)) { + goto found; + } + if (mbf) { + skip = mbclen(*p1) - 1; + if (l1 <= skip) + break; + l1 -= skip; + p1 += skip; + } + l1--; + p1++; + ret++; + } + } else { + while (l1 > 0) { + if (l2 > l1) + break; + if (*p1 == *p2 + && (l2 == 1 || STREQN(p1, p2, l2))) { + goto found; + } + if (ismbchar(*p1)) { + skip = mbclen(*p1) - 1; + if (l1 <= skip) + break; + l1 -= skip; + p1 += skip; + } l1--; p1++; + ret++; } } + ret = 0; +found: free_temp(s1); free_temp(s2); @@ -299,4 +402,20 @@ } +/* do_jlength --- length of a multi-byte string or $0 */ + +NODE * +do_jlength(tree) +NODE *tree; +{ + NODE *tmp; + size_t len; + + tmp = tree_eval(tree->lnode); + (void) force_string(tmp); + len = mblength(tmp->stptr, tmp->stlen); + free_temp(tmp); + return tmp_number((AWKNUM) len); +} + /* do_log --- the log function */ @@ -333,4 +452,24 @@ register NODE *carg; { +#if (defined (_MSC_VER) || defined (__TURBOC__)) && !defined (WIN32) +#define allocate_space(minsiz) do { \ + size_t nsiz = osiz * 2; \ + if (nsiz > INT_MAX) { \ + nsiz = INT_MAX; \ + if (nsiz - osiz < (minsiz)) \ + fatal("format_tree: string too long"); \ + } \ + erealloc(obuf, char *, nsiz, "format_tree"); \ + ofre += nsiz - osiz; \ + osiz += nsiz - osiz; \ +} while (0) +#else +#define allocate_space(minsiz) do { \ + erealloc(obuf, char *, osiz * 2, "format_tree"); \ + ofre += osiz; \ + osiz *= 2; \ +} while (0) +#endif + /* copy 'l' bytes from 's' to 'obufout' checking for space in the process */ /* difference of pointers should be of ptrdiff_t type, but let us be kind */ @@ -338,7 +477,5 @@ while ((l) > ofre) { \ long olen = obufout - obuf; \ - erealloc(obuf, char *, osiz * 2, "format_tree"); \ - ofre += osiz; \ - osiz *= 2; \ + allocate_space((l)); \ obufout = obuf + olen; \ } \ @@ -352,7 +489,5 @@ if (ofre <= 0) { \ long olen = obufout - obuf; \ - erealloc(obuf, char *, osiz * 2, "format_tree"); \ - ofre += osiz; \ - osiz *= 2; \ + allocate_space(1); \ obufout = obuf + olen; \ } \ @@ -364,8 +499,6 @@ #define chksize(l) if ((l) > ofre) { \ long olen = obufout - obuf; \ - erealloc(obuf, char *, osiz * 2, "format_tree"); \ + allocate_space((l)); \ obufout = obuf + olen; \ - ofre += osiz; \ - osiz *= 2; \ } @@ -408,4 +541,5 @@ char signchar = FALSE; size_t len; + size_t skip; int zero_flag = FALSE; static char sp[] = " "; @@ -424,4 +558,14 @@ s0 = s1 = fmt_string; while (n0-- > 0) { +#if 1 /* EUC や Shift-JIS では必要ないと思うが念のため. */ + if (ismbchar(*s1)) { + skip = mbclen(*s1) - 1; + if (n0 < skip) + break; + n0 -= skip; + s1 += skip + 1; + continue; + } +#endif if (*s1 != '%') { s1++; @@ -447,5 +591,5 @@ break; - switch (cs1 = *s1++) { + switch (cs1 = (unsigned char) *s1++) { case (-1): /* dummy case to allow for checking */ check_pos: @@ -833,4 +977,10 @@ break; default: +#if 1 /* EUC や Shift-JIS では必要ないと思うが念のため. */ + if (ismbchar(cs1) && n0 >= (skip = mbclen(cs1) - 1)) { + n0 -= skip; + s1 += skip; + } +#endif break; } @@ -950,4 +1100,7 @@ indx = d_index - 1; /* awk indices are from 1, C's are from 0 */ + if (indx < t1->stlen) { + indx = mbbyte(t1->stptr, indx); + } if (tree->rnode->rnode == NULL) { /* third arg. missing */ @@ -996,4 +1149,85 @@ } +/* do_jsubstr --- do the jsubstr function */ + +NODE * +do_jsubstr(tree) +NODE *tree; +{ + NODE *t1, *t2, *t3; + NODE *r; + register size_t indx; + size_t length; + double d_index, d_length; + const char *p; + int past_eos = 0; + int exceeds_length = 0; + + t1 = force_string(tree_eval(tree->lnode)); + t2 = tree_eval(tree->rnode->lnode); + d_index = force_number(t2); + free_temp(t2); + + if (d_index < 1.0) { + if (do_lint) + warning("jsubstr: start index %g invalid, using 1", + d_index); + d_index = 1; + } + if (do_lint && double_to_int(d_index) != d_index) + warning("jsubstr: non-integer start index %g will be truncated", + d_index); + + indx = d_index - 1; /* awk indices are from 1, C's are from 0 */ + for (p = t1->stptr; indx && p < (t1->stptr + t1->stlen); indx--) + p += mbclen(*p); + past_eos = indx; + indx = p - t1->stptr; + + if (tree->rnode->rnode == NULL) { /* third arg. missing */ + /* use remainder of string */ + length = t1->stlen - indx; + } else { + t3 = tree_eval(tree->rnode->rnode->lnode); + d_length = force_number(t3); + free_temp(t3); + if (d_length <= 0.0) { + if (do_lint) + warning("jsubstr: length %g is <= 0", d_length); + free_temp(t1); + return Nnull_string; + } + if (do_lint && double_to_int(d_length) != d_length) + warning( + "jsubstr: non-integer length %g will be truncated", + d_length); + for (length = d_length; length && p < (t1->stptr + t1->stlen); length--) + p += mbclen(*p); + exceeds_length = length; + length = p - t1->stptr - indx; + } + + if (t1->stlen == 0) { + if (do_lint) + warning("jsubstr: source string is zero length"); + free_temp(t1); + return Nnull_string; + } + if (do_lint && exceeds_length) + warning( + "jsubstr: length %g at position %g exceeds length of first argument (%d)", + d_length, d_index, mblength(t1->stptr, t1->stlen)); + if (past_eos) { + if (do_lint) + warning("jsubstr: start index %g is past end of string", + d_index); + free_temp(t1); + return Nnull_string; + } + r = tmp_string(t1->stptr + indx, length); + free_temp(t1); + return r; +} + /* do_strftime --- format a time stamp */ @@ -1034,4 +1268,7 @@ t2 = tree_eval(tree->rnode->lnode); fclock = (time_t) force_number(t2); +#if defined(_MSC_VER) && (_MSC_VER == 700) + fclock += (1UL + 70UL * 365UL + (70 >> 2)) * 24UL * 60UL * 60UL; +#endif free_temp(t2); } @@ -1078,4 +1315,7 @@ (void) time(&lclock); +#if defined(_MSC_VER) && (_MSC_VER == 700) + lclock -= (1UL + 70UL * 365UL + (70 >> 2)) * 24UL * 60UL * 60UL; +#endif return tmp_number((AWKNUM) lclock); } @@ -1214,5 +1454,7 @@ for (cp = (unsigned char *)t2->stptr, cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) - if (ISUPPER(*cp)) + if (ismbchar(*cp)) + cp += mbclen(*cp) - 1; + else if (ISUPPER(*cp)) *cp = tolower(*cp); free_temp(t1); @@ -1234,5 +1476,7 @@ for (cp = (unsigned char *)t2->stptr, cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) - if (ISLOWER(*cp)) + if (ismbchar(*cp)) + cp += mbclen(*cp) - 1; + else if (ISLOWER(*cp)) *cp = toupper(*cp); free_temp(t1); @@ -1323,7 +1567,13 @@ (void) setstate(state); - if (tree == NULL) + if (tree == NULL) { +#if defined(_MSC_VER) && (_MSC_VER == 700) + save_seed = time((time_t *) 0) + - (1UL + 70UL * 365UL + (70 >> 2)) * 24UL * 60 UL * 60UL; + srandom((int) save_seed); +#else srandom((unsigned int) (save_seed = (long) time((time_t *) 0))); - else { +#endif + } else { tmp = tree_eval(tree->lnode); srandom((unsigned int) (save_seed = (long) force_number(tmp))); @@ -1513,5 +1763,5 @@ } else if (*scan == '\\') { if (backdigs) { /* gensub, behave sanely */ - if (ISDIGIT(scan[1])) { + if (ISDIGIT((unsigned char)scan[1])) { ampersands++; scan++; @@ -1538,4 +1788,6 @@ } } + else if (ismbchar(*scan)) + scan += mbclen(*scan) - 1; } @@ -1583,5 +1835,5 @@ else if (*scan == '\\') { if (backdigs) { /* gensub, behave sanely */ - if (ISDIGIT(scan[1])) { + if (ISDIGIT((unsigned char) scan[1])) { int dig = scan[1] - '0'; char *start, *end; @@ -1616,4 +1868,9 @@ *bp++ = *scan; } + } else if (ismbchar(*scan)) { + size_t n = mbclen(*scan) - 1; + while (n-- > 0) + *bp++ = *scan++; + *bp++ = *scan; } else *bp++ = *scan; @@ -1631,4 +1888,7 @@ /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */ if (matchstart == matchend && matchend < text + textlen) { + size_t n = mbclen(*matchend) - 1; + while (n-- > 0) + *bp++ = *matchend++; *bp++ = *matchend; matchend++; @@ -1638,5 +1898,5 @@ if ((current >= how_many && !global) - || ((long) textlen <= 0 && matchstart == matchend) + || ((long) SIGNED_SIZE_T (textlen) <= 0 && matchstart == matchend) || research(rp, t->stptr, text - t->stptr, textlen, TRUE) == -1) break; diff -u2rN -x awktab.c gawk-3.0.6/configh.in gawk-3.0.6+mb1.15/configh.in --- gawk-3.0.6/configh.in Mon Jun 12 21:56:11 2000 +++ gawk-3.0.6+mb1.15/configh.in Sat May 5 14:12:07 2001 @@ -137,4 +137,6 @@ #undef _LARGEFILE_SOURCE /* makes fseeko etc. visible on some hosts */ #undef _LARGE_FILES /* emables large files on AIX-style hosts */ +#undef DTOS18_MISSING /* _dtos18() buf fix for human68k libc-1.1.32 */ +#undef FCONVERT_MISSING /* fconvert() and _round() bug fix for human68k libc-1.1.32 */ /* Define if you have the fmod function. */ @@ -153,4 +155,7 @@ #undef HAVE_MEMCPY +/* Define if you have the memmove function. */ +#undef HAVE_MEMMOVE + /* Define if you have the memset function. */ #undef HAVE_MEMSET @@ -206,4 +211,7 @@ /* Define if you have the header file. */ #undef HAVE_UNISTD_H + +/* Define if you have the hmem library (-lhmem). */ +#undef HAVE_LIBHMEM /* Define if you have the m library (-lm). */ diff -u2rN -x awktab.c gawk-3.0.6/configure gawk-3.0.6+mb1.15/configure --- gawk-3.0.6/configure Mon Jun 12 21:56:04 2000 +++ gawk-3.0.6+mb1.15/configure Wed May 16 01:28:18 2001 @@ -17,4 +17,12 @@ --enable-non-decimal-data Enable Octal and Hex constants as valid input data" ac_help="$ac_help + --enable-ASCII set default multi-byte code to ASCII" +ac_help="$ac_help + --enable-EUC set default multi-byte code to EUC (default)" +ac_help="$ac_help + --enable-SJIS set defailt multi-byte code to SJIS" +ac_help="$ac_help + --enable-UTF8 set default multi-byte code to UTF8" +ac_help="$ac_help --disable-largefile omit support for large files" @@ -550,4 +558,36 @@ +# Check whether --enable-ASCII or --disable-ASCII was given. +if test "${enable_ASCII+set}" = set; then + enableval="$enable_ASCII" + test "$enableval" = "yes" && mbctype="ascii" +fi + +# Check whether --enable-EUC or --disable-EUC was given. +if test "${enable_EUC+set}" = set; then + enableval="$enable_EUC" + test "$enableval" = "yes" && mbctype="euc" +fi + +# Check whether --enable-SJIS or --disable-SJIS was given. +if test "${enable_SJIS+set}" = set; then + enableval="$enable_SJIS" + test "$enableval" = "yes" && mbctype="sjis" +fi + +# Check whether --enable-UTF8 or --disable-UTF8 was given. +if test "${enable_UTF8+set}" = set; then + enableval="$enable_UTF8" + test "$enableval" = "yes" && mbctype="utf8" +fi + +case "$mbctype" in +ascii) MBCTYPE_DEF="-DASCII" ;; +sjis) MBCTYPE_DEF="-DSJIS" ;; +utf8) MBCTYPE_DEF="-DUTF8" ;; +*) MBCTYPE_DEF="-DEUC" ;; +esac + + for ac_prog in 'bison -y' byacc do @@ -555,5 +595,5 @@ set dummy $ac_prog; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:558: checking for $ac_word" >&5 +echo "configure:598: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_YACC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -586,5 +626,5 @@ echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6 -echo "configure:589: checking whether ln -s works" >&5 +echo "configure:629: checking whether ln -s works" >&5 if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -609,5 +649,5 @@ set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:612: checking for $ac_word" >&5 +echo "configure:652: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -639,5 +679,5 @@ set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:642: checking for $ac_word" >&5 +echo "configure:682: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -690,5 +730,5 @@ set dummy cl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:693: checking for $ac_word" >&5 +echo "configure:733: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -722,5 +762,5 @@ echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:725: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 +echo "configure:765: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c @@ -733,10 +773,10 @@ cat > conftest.$ac_ext << EOF -#line 736 "configure" +#line 776 "configure" #include "confdefs.h" main(){return(0);} EOF -if { (eval echo configure:741: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:781: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. @@ -764,10 +804,10 @@ fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:767: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 +echo "configure:807: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:772: checking whether we are using GNU C" >&5 +echo "configure:812: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -778,5 +818,5 @@ #endif EOF -if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:781: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then +if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:821: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else @@ -797,5 +837,5 @@ CFLAGS= echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:800: checking whether ${CC-cc} accepts -g" >&5 +echo "configure:840: checking whether ${CC-cc} accepts -g" >&5 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -829,5 +869,5 @@ echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:832: checking how to run the C preprocessor" >&5 +echo "configure:872: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then @@ -844,5 +884,5 @@ # not just through cpp. cat > conftest.$ac_ext < @@ -850,5 +890,5 @@ EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:853: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:893: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -861,5 +901,5 @@ CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < @@ -867,5 +907,5 @@ EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:870: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:910: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -878,5 +918,5 @@ CPP="${CC-cc} -nologo -E" cat > conftest.$ac_ext < @@ -884,5 +924,5 @@ EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:887: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:927: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -943,5 +983,5 @@ # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:946: checking for a BSD compatible install" >&5 +echo "configure:986: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then @@ -997,5 +1037,5 @@ echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:1000: checking whether ${MAKE-make} sets \${MAKE}" >&5 +echo "configure:1040: checking whether ${MAKE-make} sets \${MAKE}" >&5 set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then @@ -1052,7 +1092,7 @@ echo $ac_n "checking for AIX""... $ac_c" 1>&6 -echo "configure:1055: checking for AIX" >&5 +echo "configure:1095: checking for AIX" >&5 cat > conftest.$ac_ext <&6 -echo "configure:1079: checking for POSIXized ISC" >&5 +echo "configure:1119: checking for POSIXized ISC" >&5 if test -d /etc/conf/kconfig.d && grep _POSIX_VERSION /usr/include/sys/unistd.h >/dev/null 2>&1 @@ -1098,15 +1138,15 @@ ac_safe=`echo "minix/config.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for minix/config.h""... $ac_c" 1>&6 -echo "configure:1101: checking for minix/config.h" >&5 +echo "configure:1141: checking for minix/config.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1111: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1151: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -1154,5 +1194,5 @@ echo $ac_n "checking for special C compiler options needed for large files""... $ac_c" 1>&6 -echo "configure:1157: checking for special C compiler options needed for large files" >&5 +echo "configure:1197: checking for special C compiler options needed for large files" >&5 if eval "test \"`echo '$''{'gawk_cv_sys_largefile_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1163,5 +1203,5 @@ # so use the C compiler's -n32 option if that helps. cat > conftest.$ac_ext < @@ -1172,5 +1212,5 @@ ; return 0; } EOF -if { (eval echo configure:1175: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1215: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then : else @@ -1181,5 +1221,5 @@ CC="$CC -n32" cat > conftest.$ac_ext < @@ -1190,5 +1230,5 @@ ; return 0; } EOF -if { (eval echo configure:1193: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1233: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* gawk_cv_sys_largefile_CC=' -n32' @@ -1210,5 +1250,5 @@ echo $ac_n "checking for _FILE_OFFSET_BITS value needed for large files""... $ac_c" 1>&6 -echo "configure:1213: checking for _FILE_OFFSET_BITS value needed for large files" >&5 +echo "configure:1253: checking for _FILE_OFFSET_BITS value needed for large files" >&5 if eval "test \"`echo '$''{'gawk_cv_sys_file_offset_bits'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1216,5 +1256,5 @@ gawk_cv_sys_file_offset_bits=no cat > conftest.$ac_ext < @@ -1227,5 +1267,5 @@ ; return 0; } EOF -if { (eval echo configure:1230: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1270: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then : else @@ -1234,5 +1274,5 @@ rm -rf conftest* cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1289: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* gawk_cv_sys_file_offset_bits=64 @@ -1266,5 +1306,5 @@ fi echo $ac_n "checking for _LARGEFILE_SOURCE value needed for large files""... $ac_c" 1>&6 -echo "configure:1269: checking for _LARGEFILE_SOURCE value needed for large files" >&5 +echo "configure:1309: checking for _LARGEFILE_SOURCE value needed for large files" >&5 if eval "test \"`echo '$''{'gawk_cv_sys_largefile_source'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1272,5 +1312,5 @@ gawk_cv_sys_largefile_source=no cat > conftest.$ac_ext < @@ -1283,5 +1323,5 @@ ; return 0; } EOF -if { (eval echo configure:1286: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1326: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then : else @@ -1290,5 +1330,5 @@ rm -rf conftest* cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1345: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* gawk_cv_sys_largefile_source=1 @@ -1322,5 +1362,5 @@ fi echo $ac_n "checking for _LARGE_FILES value needed for large files""... $ac_c" 1>&6 -echo "configure:1325: checking for _LARGE_FILES value needed for large files" >&5 +echo "configure:1365: checking for _LARGE_FILES value needed for large files" >&5 if eval "test \"`echo '$''{'gawk_cv_sys_large_files'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1328,5 +1368,5 @@ gawk_cv_sys_large_files=no cat > conftest.$ac_ext < @@ -1339,5 +1379,5 @@ ; return 0; } EOF -if { (eval echo configure:1342: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1382: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then : else @@ -1346,5 +1386,5 @@ rm -rf conftest* cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1401: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* gawk_cv_sys_large_files=1 @@ -1378,5 +1418,5 @@ fi echo $ac_n "checking for _XOPEN_SOURCE value needed for large files""... $ac_c" 1>&6 -echo "configure:1381: checking for _XOPEN_SOURCE value needed for large files" >&5 +echo "configure:1421: checking for _XOPEN_SOURCE value needed for large files" >&5 if eval "test \"`echo '$''{'gawk_cv_sys_xopen_source'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1384,5 +1424,5 @@ gawk_cv_sys_xopen_source=no cat > conftest.$ac_ext < @@ -1395,5 +1435,5 @@ ; return 0; } EOF -if { (eval echo configure:1398: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1438: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then : else @@ -1402,5 +1442,5 @@ rm -rf conftest* cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1457: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* gawk_cv_sys_xopen_source=500 @@ -1437,5 +1477,5 @@ echo $ac_n "checking for AIX compilation hacks""... $ac_c" 1>&6 -echo "configure:1440: checking for AIX compilation hacks" >&5 +echo "configure:1480: checking for AIX compilation hacks" >&5 if eval "test \"`echo '$''{'gawk_cv_aix_hack'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1460,10 +1500,10 @@ echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:1463: checking for ANSI C header files" >&5 +echo "configure:1503: checking for ANSI C header files" >&5 if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -1473,5 +1513,5 @@ EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1476: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1516: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -1490,5 +1530,5 @@ # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat > conftest.$ac_ext < @@ -1508,5 +1548,5 @@ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat > conftest.$ac_ext < @@ -1529,5 +1569,5 @@ else cat > conftest.$ac_ext < @@ -1540,5 +1580,5 @@ EOF -if { (eval echo configure:1543: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1583: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then : @@ -1564,10 +1604,10 @@ echo $ac_n "checking for sys/wait.h that is POSIX.1 compatible""... $ac_c" 1>&6 -echo "configure:1567: checking for sys/wait.h that is POSIX.1 compatible" >&5 +echo "configure:1607: checking for sys/wait.h that is POSIX.1 compatible" >&5 if eval "test \"`echo '$''{'ac_cv_header_sys_wait_h'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -1585,5 +1625,5 @@ ; return 0; } EOF -if { (eval echo configure:1588: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1628: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_header_sys_wait_h=yes @@ -1609,15 +1649,15 @@ ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1612: checking for $ac_hdr" >&5 +echo "configure:1652: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1622: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1662: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -1651,15 +1691,15 @@ ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1654: checking for $ac_hdr" >&5 +echo "configure:1694: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1664: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1704: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -1692,15 +1732,15 @@ ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1695: checking for $ac_hdr" >&5 +echo "configure:1735: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1705: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1745: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -1731,10 +1771,10 @@ echo $ac_n "checking for pid_t""... $ac_c" 1>&6 -echo "configure:1734: checking for pid_t" >&5 +echo "configure:1774: checking for pid_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_pid_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -1764,10 +1804,10 @@ echo $ac_n "checking return type of signal handlers""... $ac_c" 1>&6 -echo "configure:1767: checking return type of signal handlers" >&5 +echo "configure:1807: checking return type of signal handlers" >&5 if eval "test \"`echo '$''{'ac_cv_type_signal'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -1786,5 +1826,5 @@ ; return 0; } EOF -if { (eval echo configure:1789: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1829: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_type_signal=void @@ -1805,10 +1845,10 @@ echo $ac_n "checking for size_t""... $ac_c" 1>&6 -echo "configure:1808: checking for size_t" >&5 +echo "configure:1848: checking for size_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -1838,10 +1878,10 @@ echo $ac_n "checking for uid_t in sys/types.h""... $ac_c" 1>&6 -echo "configure:1841: checking for uid_t in sys/types.h" >&5 +echo "configure:1881: checking for uid_t in sys/types.h" >&5 if eval "test \"`echo '$''{'ac_cv_type_uid_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -1872,5 +1912,5 @@ echo $ac_n "checking type of array argument to getgroups""... $ac_c" 1>&6 -echo "configure:1875: checking type of array argument to getgroups" >&5 +echo "configure:1915: checking type of array argument to getgroups" >&5 if eval "test \"`echo '$''{'ac_cv_type_getgroups'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1880,5 +1920,5 @@ else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1948: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_type_getgroups=gid_t @@ -1919,5 +1959,5 @@ if test $ac_cv_type_getgroups = cross; then cat > conftest.$ac_ext < @@ -1943,5 +1983,5 @@ cat > conftest.$ac_ext < @@ -1970,10 +2010,10 @@ # for constant arguments. Useless! echo $ac_n "checking for working alloca.h""... $ac_c" 1>&6 -echo "configure:1973: checking for working alloca.h" >&5 +echo "configure:2013: checking for working alloca.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_alloca_h'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -1982,5 +2022,5 @@ ; return 0; } EOF -if { (eval echo configure:1985: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2025: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_header_alloca_h=yes @@ -2003,10 +2043,10 @@ echo $ac_n "checking for alloca""... $ac_c" 1>&6 -echo "configure:2006: checking for alloca" >&5 +echo "configure:2046: checking for alloca" >&5 if eval "test \"`echo '$''{'ac_cv_func_alloca_works'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2079: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_func_alloca_works=yes @@ -2068,10 +2108,10 @@ echo $ac_n "checking whether alloca needs Cray hooks""... $ac_c" 1>&6 -echo "configure:2071: checking whether alloca needs Cray hooks" >&5 +echo "configure:2111: checking whether alloca needs Cray hooks" >&5 if eval "test \"`echo '$''{'ac_cv_os_cray'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&6 -echo "configure:2101: checking for $ac_func" >&5 +echo "configure:2141: checking for $ac_func" >&5 if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2169: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_$ac_func=yes" @@ -2153,5 +2193,5 @@ echo $ac_n "checking stack direction for C alloca""... $ac_c" 1>&6 -echo "configure:2156: checking stack direction for C alloca" >&5 +echo "configure:2196: checking stack direction for C alloca" >&5 if eval "test \"`echo '$''{'ac_cv_c_stack_direction'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -2161,5 +2201,5 @@ else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2223: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_c_stack_direction=1 @@ -2207,10 +2247,10 @@ echo $ac_n "checking for vprintf""... $ac_c" 1>&6 -echo "configure:2210: checking for vprintf" >&5 +echo "configure:2250: checking for vprintf" >&5 if eval "test \"`echo '$''{'ac_cv_func_vprintf'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2278: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_vprintf=yes" @@ -2259,10 +2299,10 @@ if test "$ac_cv_func_vprintf" != yes; then echo $ac_n "checking for _doprnt""... $ac_c" 1>&6 -echo "configure:2262: checking for _doprnt" >&5 +echo "configure:2302: checking for _doprnt" >&5 if eval "test \"`echo '$''{'ac_cv_func__doprnt'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2330: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func__doprnt=yes" @@ -2313,5 +2353,5 @@ echo $ac_n "checking for fmod in -lm""... $ac_c" 1>&6 -echo "configure:2316: checking for fmod in -lm" >&5 +echo "configure:2356: checking for fmod in -lm" >&5 ac_lib_var=`echo m'_'fmod | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then @@ -2321,5 +2361,5 @@ LIBS="-lm $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2375: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" @@ -2359,14 +2399,14 @@ fi -for ac_func in madvise memset memcpy memcmp fmod setlocale strchr strerror \ +for ac_func in madvise memset memmove memcpy memcmp fmod setlocale strchr strerror \ strftime strncasecmp strtod system tzset do echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2366: checking for $ac_func" >&5 +echo "configure:2406: checking for $ac_func" >&5 if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2434: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_$ac_func=yes" @@ -2420,15 +2460,15 @@ ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:2423: checking for $ac_hdr" >&5 +echo "configure:2463: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:2433: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:2473: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then @@ -2459,10 +2499,10 @@ do echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2462: checking for $ac_func" >&5 +echo "configure:2502: checking for $ac_func" >&5 if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:2530: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_$ac_func=yes" @@ -2512,5 +2552,5 @@ echo $ac_n "checking for working mmap""... $ac_c" 1>&6 -echo "configure:2515: checking for working mmap" >&5 +echo "configure:2555: checking for working mmap" >&5 if eval "test \"`echo '$''{'ac_cv_func_mmap_fixed_mapped'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -2520,5 +2560,5 @@ else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2703: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_func_mmap_fixed_mapped=yes @@ -2707,5 +2747,5 @@ # fall into the default echo $ac_n "checking whether getpgrp takes no argument""... $ac_c" 1>&6 -echo "configure:2710: checking whether getpgrp takes no argument" >&5 +echo "configure:2750: checking whether getpgrp takes no argument" >&5 if eval "test \"`echo '$''{'ac_cv_func_getpgrp_void'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -2715,5 +2755,5 @@ else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2813: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_func_getpgrp_void=yes @@ -2795,5 +2835,5 @@ ;; *) echo $ac_n "checking whether getpgrp takes no argument""... $ac_c" 1>&6 -echo "configure:2798: checking whether getpgrp takes no argument" >&5 +echo "configure:2838: checking whether getpgrp takes no argument" >&5 if eval "test \"`echo '$''{'ac_cv_func_getpgrp_void'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -2803,5 +2843,5 @@ else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2901: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_func_getpgrp_void=yes @@ -2885,5 +2925,5 @@ else echo $ac_n "checking whether getpgrp takes no argument""... $ac_c" 1>&6 -echo "configure:2888: checking whether getpgrp takes no argument" >&5 +echo "configure:2928: checking whether getpgrp takes no argument" >&5 if eval "test \"`echo '$''{'ac_cv_func_getpgrp_void'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -2893,5 +2933,5 @@ else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2991: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_func_getpgrp_void=yes @@ -2974,10 +3014,10 @@ echo $ac_n "checking for st_blksize in struct stat""... $ac_c" 1>&6 -echo "configure:2977: checking for st_blksize in struct stat" >&5 +echo "configure:3017: checking for st_blksize in struct stat" >&5 if eval "test \"`echo '$''{'ac_cv_struct_st_blksize'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -2987,5 +3027,5 @@ ; return 0; } EOF -if { (eval echo configure:2990: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:3030: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_struct_st_blksize=yes @@ -3008,10 +3048,10 @@ echo $ac_n "checking whether time.h and sys/time.h may both be included""... $ac_c" 1>&6 -echo "configure:3011: checking whether time.h and sys/time.h may both be included" >&5 +echo "configure:3051: checking whether time.h and sys/time.h may both be included" >&5 if eval "test \"`echo '$''{'ac_cv_header_time'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -3022,5 +3062,5 @@ ; return 0; } EOF -if { (eval echo configure:3025: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:3065: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_header_time=yes @@ -3043,10 +3083,10 @@ echo $ac_n "checking whether struct tm is in sys/time.h or time.h""... $ac_c" 1>&6 -echo "configure:3046: checking whether struct tm is in sys/time.h or time.h" >&5 +echo "configure:3086: checking whether struct tm is in sys/time.h or time.h" >&5 if eval "test \"`echo '$''{'ac_cv_struct_tm'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -3056,5 +3096,5 @@ ; return 0; } EOF -if { (eval echo configure:3059: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:3099: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_struct_tm=time.h @@ -3077,10 +3117,10 @@ echo $ac_n "checking for tm_zone in struct tm""... $ac_c" 1>&6 -echo "configure:3080: checking for tm_zone in struct tm" >&5 +echo "configure:3120: checking for tm_zone in struct tm" >&5 if eval "test \"`echo '$''{'ac_cv_struct_tm_zone'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -3090,5 +3130,5 @@ ; return 0; } EOF -if { (eval echo configure:3093: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:3133: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_struct_tm_zone=yes @@ -3110,10 +3150,10 @@ else echo $ac_n "checking for tzname""... $ac_c" 1>&6 -echo "configure:3113: checking for tzname" >&5 +echo "configure:3153: checking for tzname" >&5 if eval "test \"`echo '$''{'ac_cv_var_tzname'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < @@ -3125,5 +3165,5 @@ ; return 0; } EOF -if { (eval echo configure:3128: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then +if { (eval echo configure:3168: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* ac_cv_var_tzname=yes @@ -3148,5 +3188,5 @@ echo $ac_n "checking whether char is unsigned""... $ac_c" 1>&6 -echo "configure:3151: checking whether char is unsigned" >&5 +echo "configure:3191: checking whether char is unsigned" >&5 if eval "test \"`echo '$''{'ac_cv_c_char_unsigned'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -3155,5 +3195,5 @@ # GCC predefines this symbol on systems where it applies. cat > conftest.$ac_ext < conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +if { (eval echo configure:3230: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then ac_cv_c_char_unsigned=yes @@ -3211,10 +3251,10 @@ echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:3214: checking for working const" >&5 +echo "configure:3254: checking for working const" >&5 if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:3308: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_c_const=yes @@ -3288,10 +3328,10 @@ echo $ac_n "checking for ANSI stringizing capability""... $ac_c" 1>&6 -echo "configure:3291: checking for ANSI stringizing capability" >&5 +echo "configure:3331: checking for ANSI stringizing capability" >&5 if eval "test \"`echo '$''{'gawk_cv_c_stringize'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&6 +echo "configure:3372: checking for hmemset in -lhmem" >&5 +ac_lib_var=`echo hmem'_'hmemset | sed 'y%./+-%__p_%'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lhmem $LIBS" +cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=yes" +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 + rm -rf conftest* + eval "ac_cv_lib_$ac_lib_var=no" +fi +rm -f conftest* +LIBS="$ac_save_LIBS" + +fi +if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then + echo "$ac_t""yes" 1>&6 + ac_tr_lib=HAVE_LIB`echo hmem | sed -e 's/[^a-zA-Z0-9_]/_/g' \ + -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` + cat >> confdefs.h <&6 +fi + + if test "$cross_compiling" = yes; then + { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } +else + cat > conftest.$ac_ext < +main () +{ + char buf[256]; + sprintf (buf, "%g", 1e+300); + exit (strcmp (buf, "1e+300") ? 0 : 1); +} + +EOF +if { (eval echo configure:3434: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +then + cat >> confdefs.h <<\EOF +#define DTOS18_MISSING 1 +EOF + +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 +fi +rm -fr conftest* +fi + + if test "$cross_compiling" = yes; then + { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } +else + cat > conftest.$ac_ext < +#include +main () +{ + char buf[256]; + sprintf (buf, "%f", log(exp(1.0))); + exit (strcmp (buf, "1.000000") ? 0 : 1); +} + +EOF +if { (eval echo configure:3464: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null +then + cat >> confdefs.h <<\EOF +#define FCONVERT_MISSING 1 +EOF + +else + echo "configure: failed program was:" >&5 + cat conftest.$ac_ext >&5 +fi +rm -fr conftest* +fi + + ;; +*) + exec_suffix= + IGAWKSH=eg/prog/igawk.sh + ;; +esac + + + trap '' 1 2 15 cat > confcache <<\EOF @@ -3456,4 +3616,5 @@ s%@infodir@%$infodir%g s%@mandir@%$mandir%g +s%@MBCTYPE_DEF@%$MBCTYPE_DEF%g s%@YACC@%$YACC%g s%@LN_S@%$LN_S%g @@ -3465,4 +3626,6 @@ s%@SET_MAKE@%$SET_MAKE%g s%@ALLOCA@%$ALLOCA%g +s%@exec_suffix@%$exec_suffix%g +s%@IGAWKSH@%$IGAWKSH%g CEOF diff -u2rN -x awktab.c gawk-3.0.6/configure.in gawk-3.0.6+mb1.15/configure.in --- gawk-3.0.6/configure.in Mon Jun 12 21:55:50 2000 +++ gawk-3.0.6+mb1.15/configure.in Wed May 16 01:27:48 2001 @@ -22,4 +22,7 @@ dnl +dnl Multi-byte extension added May, 1996 by okabe katsuyuki +dnl Last change: May 27, 1997 by okabe katsuyuki + dnl Process this file with autoconf to produce a configure script. @@ -36,4 +39,25 @@ AC_ARG_ENABLE(non-decimal-data, [ --enable-non-decimal-data Enable Octal and Hex constants as valid input data], AC_DEFINE(NONDECDATA)) +dnl check default multi-byte code +AC_ARG_ENABLE(ASCII, + [ --enable-ASCII set default multi-byte code to ASCII], + [test "$enableval" = "yes" && mbctype="ascii"]) +AC_ARG_ENABLE(EUC, + [ --enable-EUC set default multi-byte code to EUC (default)], + [test "$enableval" = "yes" && mbctype="euc"]) +AC_ARG_ENABLE(SJIS, + [ --enable-SJIS set defailt multi-byte code to SJIS], + [test "$enableval" = "yes" && mbctype="sjis"]) +AC_ARG_ENABLE(UTF8, + [ --enable-UTF8 set default multi-byte code to UTF8], + [test "$enableval" = "yes" && mbctype="utf8"]) +case "$mbctype" in +ascii) MBCTYPE_DEF="-DASCII" ;; +sjis) MBCTYPE_DEF="-DSJIS" ;; +utf8) MBCTYPE_DEF="-DUTF8" ;; +*) MBCTYPE_DEF="-DEUC" ;; +esac +AC_SUBST(MBCTYPE_DEF) + dnl checks for programs AC_PROG_YACC @@ -115,5 +139,5 @@ AC_CHECK_LIB(m, fmod) -AC_CHECK_FUNCS(madvise memset memcpy memcmp fmod setlocale strchr strerror \ +AC_CHECK_FUNCS(madvise memset memmove memcpy memcmp fmod setlocale strchr strerror \ strftime strncasecmp strtod system tzset) @@ -162,4 +186,47 @@ AC_C_CONST GAWK_AC_C_STRINGIZE + +dnl checks for X680x0 +case "$KSH_VERSION" in +*X6*) + exec_suffix=.x + CFLAGS="$CFLAGS -cc1-stack=262144" + IGAWKSH=../human/awklib/igawk.sh + AC_CHECK_LIB(hmem, hmemset) + AC_TRY_RUN( +changequote(<<, >>)dnl +<< +#include +main () +{ + char buf[256]; + sprintf (buf, "%g", 1e+300); + exit (strcmp (buf, "1e+300") ? 0 : 1); +} +>>, +changequote([, ])dnl +AC_DEFINE(DTOS18_MISSING)) + AC_TRY_RUN( +changequote(<<, >>)dnl +<< +#include +#include +main () +{ + char buf[256]; + sprintf (buf, "%f", log(exp(1.0))); + exit (strcmp (buf, "1.000000") ? 0 : 1); +} +>>, +changequote([, ])dnl +AC_DEFINE(FCONVERT_MISSING)) + ;; +*) + exec_suffix= + IGAWKSH=eg/prog/igawk.sh + ;; +esac +AC_SUBST(exec_suffix) +AC_SUBST(IGAWKSH) AC_OUTPUT(Makefile doc/Makefile test/Makefile awklib/Makefile, [date > stamp-h]) diff -u2rN -x awktab.c gawk-3.0.6/custom.h gawk-3.0.6+mb1.15/custom.h --- gawk-3.0.6/custom.h Wed Jun 7 17:45:28 2000 +++ gawk-3.0.6+mb1.15/custom.h Sat May 5 15:56:19 2001 @@ -44,4 +44,9 @@ #endif +/* for HUMAN68K, from Katsuyuki Okabe, hgc02147@niftyserve.or.jp */ +#ifdef __human68k__ +#define GETPGRP_VOID 1 +#endif + /* For QNX, based on submission from Michael Hunter, mphunter@qnx.com */ #ifdef __QNX__ diff -u2rN -x awktab.c gawk-3.0.6/dfa.c gawk-3.0.6+mb1.15/dfa.c --- gawk-3.0.6/dfa.c Fri May 2 10:36:18 1997 +++ gawk-3.0.6+mb1.15/dfa.c Sat May 5 18:08:43 2001 @@ -18,4 +18,6 @@ /* Written June, 1988 by Mike Haertel Modified July, 1988 by Arthur David Olson to assist BMG speedups */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: January 27, 2000 by okabe katsuyuki */ #ifdef HAVE_CONFIG_H @@ -39,4 +41,8 @@ #undef index #define index strchr +#undef bcopy +#define bcopy(s, d, n) memcpy(d, s, n) +#undef bzero +#define bzero(d, n) memset(d, 0, n) #else #include @@ -80,6 +86,7 @@ #include "regex.h" #include "dfa.h" +#include "mbc.h" -#ifdef __STDC__ +#if defined(__STDC__) || defined(MSDOS) typedef void *ptr_t; #else @@ -194,4 +201,7 @@ fprintf(stderr, "END"); else if (t < NOTCHAR) + if (t & 0x80) + fprintf(stderr, "0x%02x", (unsigned char)t); + else fprintf(stderr, "%c", t); else @@ -292,4 +302,16 @@ } +static int +isemptyset(s) + charclass s; +{ + int i; + + for (i = 0; i < CHARCLASS_INTS; i++) + if (s[i]) + return 0; + return 1; +} + /* A pointer to the current dfa is kept here during parsing. */ static struct dfa *dfa; @@ -342,4 +364,66 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */ +static charclass cs_cset[8]; +static unsigned char cs_ready[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + +static enum { + MBEXTTOK_NONE = -1, + MBEXTTOK_NOTCHAR = 256, + MBEXTTOK_ORMBC = MBEXTTOK_NOTCHAR, + MBEXTTOK_ORMBC_NL, + MBEXTTOK_CLASS, + MBEXTTOK_INVCLASS, +} mbexttok = MBEXTTOK_NONE; + +static charclass mbcset_set; +static charclass mbcset_all; +static charclass mbcset[128]; /* 128*256/8 = 4 Kbytes */ + +/* 頻繁に使用される (と思われる) 文字集合をトークンとして返す. + n = 0 ... 1バイト文字全体の集合. + 1 ... 2バイト文字の1バイト目全体の集合. + 2 ... 2バイト文字の2バイト目全体の集合. + +4 ... '\n'を除外しない. */ +static token +setcodeset(n) + int n; +{ + token c; + + if (!cs_ready[n]) { + zeroset(cs_cset[n]); + switch (n) { + case 0: + case 4: + /* 1バイト文字全体の集合. */ + for (c = 0; c < NOTCHAR; c++) + if (ismbchar(c)) + setbit(c, cs_cset[n]); + notset(cs_cset[n]); + break; + case 1: + case 5: + /* 2バイト文字の1文字目全体の集合. */ + for (c = 0; c < NOTCHAR; c++) + if (ismbchar(c)) + setbit(c, cs_cset[n]); + break; + case 2: + case 6: + /* 2バイト文字の2文字目全体の集合. */ + notset(cs_cset[n]); + break; + } + if (!(n & 4)) { + if (syntax_bits & RE_DOT_NOT_NULL || n != 0) + clrbit('\0', cs_cset[n]); + if (!(syntax_bits & RE_DOT_NEWLINE) || n != 0) + clrbit('\n', cs_cset[n]); + } + cs_ready[n] = 1; + } + return CSET + charclass_index(cs_cset[n]); +} + /* Note that characters become unsigned here. */ #define FETCH(c, eoferr) \ @@ -426,4 +510,5 @@ it means that just about every case begins with "if (backslash) ...". */ + mbexttok = MBEXTTOK_NONE; for (i = 0; i < 2; ++i) { @@ -617,14 +702,19 @@ if (backslash) goto normal_char; + if (current_mbctype != MBCTYPE_ASCII) + mbexttok = MBEXTTOK_ORMBC; + laststart = 0; + return lasttok = setcodeset(0); + + case 'w': + if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) + goto normal_char; zeroset(ccl); - notset(ccl); - if (!(syntax_bits & RE_DOT_NEWLINE)) - clrbit('\n', ccl); - if (syntax_bits & RE_DOT_NOT_NULL) - clrbit('\0', ccl); + for (c2 = 0; c2 < NOTCHAR; ++c2) + if (ISALNUM(c2)) + setbit(c2, ccl); laststart = 0; return lasttok = CSET + charclass_index(ccl); - case 'w': case 'W': if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) @@ -632,9 +722,8 @@ zeroset(ccl); for (c2 = 0; c2 < NOTCHAR; ++c2) - if (ISALNUM(c2)) + if (!ISALNUM(c2) && !ismbchar(c2)) setbit(c2, ccl); setbit('_', ccl); - if (c == 'W') - notset(ccl); + mbexttok = MBEXTTOK_ORMBC_NL; laststart = 0; return lasttok = CSET + charclass_index(ccl); @@ -654,4 +743,6 @@ do { + unsigned char ch = 0, c2h = 0; + /* Nobody ever said this had to be fast. :-) Note that if we're looking at some other [:...:] @@ -679,4 +770,8 @@ if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) FETCH(c, "Unbalanced ["); + if (ismbchar(c)) { + ch = (unsigned char)c; + FETCH(c, "Multi-byte char incomplete"); + } FETCH(c1, "Unbalanced ["); if (c1 == '-') @@ -689,4 +784,5 @@ --lexptr; ++lexleft; + c2h = ch; c2 = c; } @@ -696,19 +792,83 @@ && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) FETCH(c2, "Unbalanced ["); + if (ismbchar(c2)) { + c2h = (unsigned char)c2; + FETCH(c2, "Multi-byte char incomplete"); + } FETCH(c1, "Unbalanced ["); } } - else + else { + c2h = ch; c2 = c; - while (c <= c2) - { - setbit(c, ccl); - if (case_fold) - if (ISUPPER(c)) - setbit(tolower(c), ccl); - else if (ISLOWER(c)) - setbit(toupper(c), ccl); - ++c; + } + if (ch < c2h || (ch == c2h && c <= c2)) { + if (ch == 0) { + ch = (unsigned char)c2; + if (c2h > 0) + ch = NOTCHAR - 1; + for (; (unsigned char)c <= ch; c++) { + setbit(c, ccl); + if (case_fold) { + if (ISUPPER(c)) + setbit(tolower(c), ccl); + else if (ISLOWER(c)) + setbit(toupper(c), ccl); + } + } + ch = 0x80; + c = 0x00; } + if (ch <= c2h) { + if (mbexttok < 0) { + mbexttok = MBEXTTOK_CLASS; + zeroset(mbcset_set); + zeroset(mbcset_all); + } + if (ch < c2h && c != 0x00) { /* 最初の半端 */ + int t; + + if (ismbchar(ch) + && ((t = tstbit(ch, mbcset_set)) + || !tstbit(ch, mbcset_all))) { + if (!t) { + setbit(ch, mbcset_set); + zeroset(mbcset[ch & 0177]); + } + for (; c < NOTCHAR; c++) + setbit(c, mbcset[ch & 0177]); + } + ch++; + c = 0x00; + } + if (ch < c2h || (ch == c2h && c == 0x00 && c2 == 0xff)) { + if (c == 0x00 && c2 == 0xff) + c2h++; + for (; ch < c2h; ch++) + if (ismbchar(ch)) { + clrbit(ch, mbcset_set); + setbit(ch, mbcset_all); + } + if (c == 0x00 && c2 == 0xff) + c2h--; + c = 0x00; + } + if (ch <= c2h) { + int t; + + /* ここでは必ず c <= c2 となっている. */ + if (ismbchar(ch) + && ((t = tstbit(ch, mbcset_set)) + || !tstbit(ch, mbcset_all))) { + if (!t) { + setbit(ch, mbcset_set); + zeroset(mbcset[ch & 0177]); + } + for (; c <= c2; c++) + setbit(c, mbcset[ch & 0177]); + } + } + } + } skip: ; @@ -720,5 +880,23 @@ if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) clrbit('\n', ccl); + if (mbexttok == MBEXTTOK_CLASS) { + mbexttok = MBEXTTOK_INVCLASS; + if (!isemptyset(mbcset_set)) { + for (c = 0x80; c <= 0xff; c++) + if (tstbit(c, mbcset_set)) + notset(mbcset[c & 0177]); + } + notset(mbcset_all); + for (c = 0; c <= 0xff; c++) + if (!ismbchar(c)) + clrbit(c, mbcset_all); + } + else + mbexttok = MBEXTTOK_ORMBC_NL; } + if (current_mbctype != MBCTYPE_ASCII) + for (c = 0x80; c <= 0xff; c++) + if (ismbchar(c)) + clrbit(c, ccl); laststart = 0; return lasttok = CSET + charclass_index(ccl); @@ -727,4 +905,8 @@ normal_char: laststart = 0; + if (ismbchar(c)) { + FETCH(mbexttok, "Multi-byte char incomplete"); + return lasttok = c; + } if (case_fold && ISALPHA(c)) { @@ -737,5 +919,5 @@ return lasttok = CSET + charclass_index(ccl); } - return c; + return lasttok = c; } } @@ -821,4 +1003,66 @@ atom() { + if (mbexttok >= 0) { + if (mbexttok < MBEXTTOK_NOTCHAR) { + addtok(tok); + addtok(mbexttok); + addtok(CAT); + } + else + switch (mbexttok) { + case MBEXTTOK_ORMBC: + case MBEXTTOK_ORMBC_NL: + addtok(tok); + if (mbexttok == MBEXTTOK_ORMBC) { + addtok(setcodeset(1)); + addtok(setcodeset(2)); + } + else { + addtok(setcodeset(5)); + addtok(setcodeset(6)); + } + addtok(CAT); + addtok(OR); + break; + case MBEXTTOK_CLASS: + case MBEXTTOK_INVCLASS: + { + token c; + + addtok(tok); + if (!isemptyset(mbcset_set)) + for (c = 0x80; c <= 0xff; c++) + if (tstbit(c, mbcset_set)) { + /* Make sure all bits in mbcset_all valid. */ + clrbit(c, mbcset_all); + addtok(c); + if (mbexttok == MBEXTTOK_CLASS) { + clrbit('\n', mbcset[c & 0177]); + clrbit('\0', mbcset[c & 0177]); + } + else { + setbit('\n', mbcset[c & 0177]); + setbit('\0', mbcset[c & 0177]); + } + addtok(CSET + charclass_index(mbcset[c & 0177])); + addtok(CAT); + addtok(OR); + } + if (!isemptyset(mbcset_all)) { + addtok(CSET + charclass_index(mbcset_all)); + if (mbexttok == MBEXTTOK_CLASS) + addtok(setcodeset(2)); + else + addtok(setcodeset(6)); + addtok(CAT); + addtok(OR); + } + } + break; + default: + break; + } + tok = lex(); + } else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD @@ -1994,8 +2238,8 @@ if (case_fold) /* dummy folding in service of dfamust() */ { - char *lcopy; + char *lcopy, *p; int i; - lcopy = malloc(len); + p = lcopy = malloc(len + 7); if (!lcopy) dfaerror("out of memory"); @@ -2003,17 +2247,38 @@ /* This is a kludge. */ case_fold = 0; + if (current_mbctype != MBCTYPE_ASCII && searchflag) { + *p++ = '^'; + *p++ = '.'; + *p++ = '*'; + if (!(syntax_bits & RE_NO_BK_PARENS)) + *p++ = '\\'; + *p++ = '('; + } for (i = 0; i < len; ++i) - if (ISUPPER(s[i])) - lcopy[i] = tolower(s[i]); + if (ISUPPER((unsigned char)s[i])) + *p++ = tolower((unsigned char)s[i]); else - lcopy[i] = s[i]; + if (ismbchar(*p++ = s[i]) && ++i < len) + *p++ = s[i]; + if (current_mbctype != MBCTYPE_ASCII && searchflag) { + if (!(syntax_bits & RE_NO_BK_PARENS)) + *p++ = '\\'; + *p++ = ')'; + } dfainit(d); - dfaparse(lcopy, len, d); - free(lcopy); + dfaparse(lcopy, p - lcopy, d); dfamust(d); d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0; case_fold = 1; - dfaparse(s, len, d); + if (current_mbctype != MBCTYPE_ASCII && searchflag) { + bcopy(s, lcopy + (syntax_bits & RE_NO_BK_PARENS ? 4 : 5), len); + dfaparse(lcopy, p - lcopy, d); + free(lcopy); + } + else { + free(lcopy); + dfaparse(s, len, d); + } dfaanalyze(d, searchflag); } @@ -2021,5 +2286,24 @@ { dfainit(d); - dfaparse(s, len, d); + if (current_mbctype != MBCTYPE_ASCII && searchflag) { + char *lcopy, *p; + + p = lcopy = malloc(len + 7); + *p++ = '^'; + *p++ = '.'; + *p++ = '*'; + if (!(syntax_bits & RE_NO_BK_PARENS)) + *p++ = '\\'; + *p++ = '('; + bcopy(s, p, len); + p += len; + if (!(syntax_bits & RE_NO_BK_PARENS)) + *p++ = '\\'; + *p++ = ')'; + dfaparse(lcopy, p - lcopy, d); + free(lcopy); + } + else + dfaparse(s, len, d); dfamust(d); dfaanalyze(d, searchflag); diff -u2rN -x awktab.c gawk-3.0.6/dfa.h gawk-3.0.6+mb1.15/dfa.h --- gawk-3.0.6/dfa.h Thu Aug 17 00:19:31 1995 +++ gawk-3.0.6+mb1.15/dfa.h Sat May 5 15:58:55 2001 @@ -306,5 +306,5 @@ /* Entry points. */ -#ifdef __STDC__ +#if defined(__STDC__) || defined(MSDOS) /* dfasyntax() takes two arguments; the first sets the syntax bits described diff -u2rN -x awktab.c gawk-3.0.6/eval.c gawk-3.0.6+mb1.15/eval.c --- gawk-3.0.6/eval.c Sun Jul 16 22:54:12 2000 +++ gawk-3.0.6+mb1.15/eval.c Sat May 5 16:06:10 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: May. 5, 2001 by okabe katsuyuki */ #include "awk.h" @@ -44,5 +46,5 @@ NODE *_t; /* used as a temporary in macros */ #endif -#ifdef MSDOS +#if defined(MSDOS) && (_MSC_VER == 510) double _msc51bug; /* to get around a bug in MSC 5.1 */ #endif @@ -366,4 +368,13 @@ int volatile traverse = TRUE; /* True => loop thru tree (Node_rule_list) */ +#if defined(MSDOS) && (defined(_MSC_VER) || defined(__TURBOC__)) + extern void test_signal P((void)); + static int thin = 0; + + if ((thin++ & 0xff) == 0) { + test_signal(); + } +#endif + /* avoid false source indications */ source = NULL; @@ -1069,12 +1080,8 @@ l = (ldiff <= 0 ? len1 : len2); if (IGNORECASE) { - register unsigned char *cp1 = (unsigned char *) t1->stptr; - register unsigned char *cp2 = (unsigned char *) t2->stptr; - - for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++) - ret = casetable[*cp1] - casetable[*cp2]; + ret = mbstrncasecmp(t1->stptr, t2->stptr, l); + return (ret == 0 ? ldiff : ret); } else - ret = memcmp(t1->stptr, t2->stptr, l); - return (ret == 0 ? ldiff : ret); + return mbmemcmp(t1->stptr, len1, t2->stptr, len2); } @@ -1730,15 +1737,15 @@ if (*p++ != '%') return 0; - while (*p && strchr(" +-#", *p) != NULL) /* flags */ + while (*p && strchr(" +-#", (unsigned char) *p) != NULL) /* flags */ p++; - while (*p && isdigit(*p)) /* width - %*.*g is NOT allowed */ + while (*p && isdigit((unsigned char) *p)) /* width - %*.*g is NOT allowed */ p++; - if (*p == '\0' || (*p != '.' && ! isdigit(*p))) + if (*p == '\0' || (*p != '.' && ! isdigit((unsigned char) *p))) return 0; if (*p == '.') p++; - while (*p && isdigit(*p)) /* precision */ + while (*p && isdigit((unsigned char) *p)) /* precision */ p++; - if (*p == '\0' || strchr("efgEG", *p) == NULL) + if (*p == '\0' || strchr("efgEG", (unsigned char) *p) == NULL) return 0; if (*++p != '\0') diff -u2rN -x awktab.c gawk-3.0.6/field.c gawk-3.0.6+mb1.15/field.c --- gawk-3.0.6/field.c Wed Jun 14 19:00:10 2000 +++ gawk-3.0.6+mb1.15/field.c Sat May 5 16:18:16 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: May 22, 1997 by okabe katsuyuki */ #include "awk.h" @@ -366,4 +368,8 @@ && nf < up_to) { if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */ + if (ismbchar(*scan) && (scan += mbclen(*scan) - 1) >= end) { + scan = end; + break; + } scan++; if (scan == end) { @@ -543,4 +549,5 @@ register long nf = parse_high_water; register char *end = scan + len; + long skip; if (up_to == HUGE) @@ -549,6 +556,9 @@ return nf; - for (; nf < up_to && scan < end; scan++) - (*set)(++nf, scan, 1L, n); + for (; nf < up_to && scan < end;) { + skip = mbclen(*scan); + (*set)(++nf, scan, skip, n); + scan += skip; + } *buf = scan; @@ -591,7 +601,7 @@ fschar = fs->stptr[0]; - onecase = (IGNORECASE && isalpha(fschar)); + onecase = (IGNORECASE && isalpha((unsigned char) fschar) && !ismbchar(fschar)); if (onecase) - fschar = casetable[(int) fschar]; + fschar = casetable[(unsigned char) fschar]; /* before doing anything save the char at *end */ @@ -603,9 +613,19 @@ field = scan; if (onecase) { - while (casetable[(int) *scan] != fschar) + while (casetable[(unsigned char) *scan] != fschar) { + if (ismbchar(*scan) && (scan += mbclen(*scan) - 1) >= end) { + scan = end; + break; + } scan++; + } } else { - while (*scan != fschar) + while (*scan != fschar) { + if (ismbchar(*scan) && (scan += mbclen(*scan) - 1) >= end) { + scan = end; + break; + } scan++; + } } (*set)(++nf, field, (long)(scan - field), n); diff -u2rN -x awktab.c gawk-3.0.6/gawkmisc.c gawk-3.0.6+mb1.15/gawkmisc.c --- gawk-3.0.6/gawkmisc.c Wed Jun 7 17:45:44 2000 +++ gawk-3.0.6+mb1.15/gawkmisc.c Sat May 5 16:20:02 2001 @@ -24,4 +24,6 @@ */ +/* Modified for Human68k April 8, 1996 by okabe katsuyuki */ + #include "awk.h" @@ -37,12 +39,16 @@ #include "atari/gawkmisc.atr" #else +#if defined(__human68k__) +#include "human/gawkmisc.x68" +#else #include "posix/gawkmisc.c" #endif #endif #endif +#endif /* xmalloc --- provide this so that other GNU library routines work */ -#if __STDC__ +#if __STDC__ || defined(MSDOS) typedef void *pointer; #else diff -u2rN -x awktab.c gawk-3.0.6/getopt.c gawk-3.0.6+mb1.15/getopt.c --- gawk-3.0.6/getopt.c Mon Jan 27 12:56:50 1997 +++ gawk-3.0.6+mb1.15/getopt.c Sat May 5 16:22:36 2001 @@ -38,8 +38,10 @@ /* This is a separate conditional since some stdc systems reject `defined (const)'. */ +#ifndef MSDOS #ifndef const #define const #endif #endif +#endif #include @@ -73,4 +75,9 @@ #endif /* GNU C library. */ +#ifdef MSDOS +#include +#include +#endif + #ifdef VMS #include @@ -284,5 +291,5 @@ the new indices of the non-options in ARGV after they are moved. */ -#if defined (__STDC__) && __STDC__ +#if (defined (__STDC__) && __STDC__) || defined (MSDOS) static void exchange (char **); #endif @@ -346,5 +353,5 @@ /* Initialize the internal data when the first call is made. */ -#if defined (__STDC__) && __STDC__ +#if (defined (__STDC__) && __STDC__) || defined (MSDOS) static const char *_getopt_initialize (int, char *const *, const char *); #endif diff -u2rN -x awktab.c gawk-3.0.6/getopt.h gawk-3.0.6+mb1.15/getopt.h --- gawk-3.0.6/getopt.h Mon Jan 27 12:56:53 1997 +++ gawk-3.0.6+mb1.15/getopt.h Sat May 5 16:23:45 2001 @@ -81,5 +81,5 @@ struct option { -#if defined (__STDC__) && __STDC__ +#if (defined (__STDC__) && __STDC__) || defined(MSDOS) const char *name; #else @@ -99,5 +99,5 @@ #define optional_argument 2 -#if defined (__STDC__) && __STDC__ +#if (defined (__STDC__) && __STDC__) || defined(MSDOS) #ifdef __GNU_LIBRARY__ /* Many other libraries have conflicting prototypes for getopt, with diff -u2rN -x awktab.c gawk-3.0.6/getopt1.c gawk-3.0.6+mb1.15/getopt1.c --- gawk-3.0.6/getopt1.c Mon Jan 27 12:56:52 1997 +++ gawk-3.0.6+mb1.15/getopt1.c Sat May 5 16:25:49 2001 @@ -29,6 +29,8 @@ /* This is a separate conditional since some stdc systems reject `defined (const)'. */ +#ifndef MSDOS #ifndef const #define const +#endif #endif #endif diff -u2rN -x awktab.c gawk-3.0.6/human/ChangeLog gawk-3.0.6+mb1.15/human/ChangeLog --- gawk-3.0.6/human/ChangeLog Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/human/ChangeLog Sat May 5 16:27:24 2001 @@ -0,0 +1,21 @@ +Wed Jul 16 16:07:00 1997 Katsuyuki Okabe (wills at x68k) + + * configure.in: DTOS18_MISSING および FCONVERT_MISSING の定義条件 + が逆だったのを直した. + +Tue Feb 04 05:28:13 1997 Katsuyuki Okabe (wills at x68k) + + * libc-1.1.32 の _round.c, fconvert.c のバグを直した. これでテスト + の math をクリアするようになった. + +Sun Jan 26 19:13:12 1997 Katsuyuki Okabe (wills at x68k) + + * 出力先が端末でなければバイナリモードにするようにした. + + * io.c (gawk_popen): パイプ処理がうまくいかないことがあったのを直 + した. + +Sun Apr 14 20:48:35 1996 Katsuyuki Okabe (wills at x68k) + + * ChangeLog created. + diff -u2rN -x awktab.c gawk-3.0.6/human/_dtos18.c gawk-3.0.6+mb1.15/human/_dtos18.c --- gawk-3.0.6/human/_dtos18.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/human/_dtos18.c Sat May 5 16:27:24 2001 @@ -0,0 +1,250 @@ +/* + * PROJECT C Library, X68000 PROGRAMMING INTERFACE DEFINITION + * -------------------------------------------------------------------- + * This file is written by the Project C Library Group, and completely + * in public domain. You can freely use, copy, modify, and redistribute + * the whole contents, without this notice. + * -------------------------------------------------------------------- + * $Id: _dtos18.c,v 1.2 1994/11/27 13:05:20 mura Exp $ + */ + +/* System headers */ +#include +#include + +/* +** 本関数は浮動小数点を倍精度整数に変換してから文字列にするため、精度的には +** 倍精度整数に格納できる桁数までしか扱うことができない。したがって最高精度 +** は18桁である。 +*/ + +/* File scope variables */ +static double _pos1[32] = { + 1.0e+17, /* + 0 */ + 1.0e+18, /* + 1 */ + 1.0e+19, /* + 2 */ + 1.0e+20, /* + 3 */ + 1.0e+21, /* + 4 */ + 1.0e+22, /* + 5 */ + 1.0e+23, /* + 6 */ + 1.0e+24, /* + 7 */ + 1.0e+25, /* + 8 */ + 1.0e+26, /* + 9 */ + 1.0e+27, /* +10 */ + 1.0e+28, /* +11 */ + 1.0e+29, /* +12 */ + 1.0e+30, /* +13 */ + 1.0e+31, /* +14 */ + 1.0e+32, /* +15 */ + 1.0e+33, /* +16 */ + 1.0e+34, /* +17 */ + 1.0e+35, /* +18 */ + 1.0e+36, /* +19 */ + 1.0e+37, /* +20 */ + 1.0e+38, /* +21 */ + 1.0e+39, /* +22 */ + 1.0e+40, /* +23 */ + 1.0e+41, /* +24 */ + 1.0e+42, /* +25 */ + 1.0e+43, /* +26 */ + 1.0e+44, /* +27 */ + 1.0e+45, /* +28 */ + 1.0e+46, /* +29 */ + 1.0e+47, /* +30 */ + 1.0e+48, /* +31 */ +}; + +/* File scope variables */ +static double _neg1[32] = { + 1.0e+17, /* - 0 */ + 1.0e+16, /* - 1 */ + 1.0e+15, /* - 2 */ + 1.0e+14, /* - 3 */ + 1.0e+13, /* - 4 */ + 1.0e+12, /* - 5 */ + 1.0e+11, /* - 6 */ + 1.0e+10, /* - 7 */ + 1.0e+9, /* - 8 */ + 1.0e+8, /* - 9 */ + 1.0e+7, /* -10 */ + 1.0e+6, /* -11 */ + 1.0e+5, /* -12 */ + 1.0e+4, /* -13 */ + 1.0e+3, /* -14 */ + 1.0e+2, /* -15 */ + 1.0e+1, /* -16 */ + 1.0e+0, /* -17 */ + 1.0e-1, /* -18 */ + 1.0e-2, /* -19 */ + 1.0e-3, /* -20 */ + 1.0e-4, /* -21 */ + 1.0e-5, /* -22 */ + 1.0e-6, /* -23 */ + 1.0e-7, /* -24 */ + 1.0e-8, /* -25 */ + 1.0e-9, /* -26 */ + 1.0e-10, /* -27 */ + 1.0e-11, /* -28 */ + 1.0e-12, /* -29 */ + 1.0e-13, /* -30 */ + 1.0e-14, /* -31 */ +}; + +/* File scope variables */ +static double _pos2[10] = { + 1.0e+0, /* 000 */ + 1.0e+32, /* 001 */ + 1.0e+64, /* 010 */ + 1.0e+96, /* 011 */ + 1.0e+128, /* 100 */ + 1.0e+160, /* 101 */ + 1.0e+192, /* 110 */ + 1.0e+224, /* 111 */ + 1.0e+256, /* 1000 */ + 1.0e+288, /* 1001 */ +}; + +/* File scope variables */ +static double _neg2[10] = { + 1.0e-0, /* 000 */ + 1.0e-32, /* 001 */ + 1.0e-64, /* 010 */ + 1.0e-96, /* 011 */ + 1.0e-128, /* 100 */ + 1.0e-160, /* 101 */ + 1.0e-192, /* 110 */ + 1.0e-224, /* 111 */ + 1.0e-256, /* 1000 */ + 1.0e-288, /* 1001 */ +}; + +/* File scope functions */ +static int _cmpd (double x, double y) +{ + unsigned long vx, vy, rc; + unsigned long *x_ptr = (unsigned long *) &x; + unsigned long *y_ptr = (unsigned long *) &y; + + /* xの指数ビットを取り出す */ + vx = x_ptr[0] & 0x7FF00000; + + /* yの指数ビットを取り出す */ + vy = y_ptr[0] & 0x7FF00000; + + /* 指数ビットだけで判断する */ + if ((rc = vy - vx) != 0) + return rc; + + /* xの有効数字の上位ビットを取り出す */ + vx = x_ptr[0] & 0x000FFFFF; + + /* yの有効数字の上位ビットを取り出す */ + vy = y_ptr[0] & 0x000FFFFF; + + /* 上位ビットだけで判断する */ + if ((rc = vy - vx) != 0) + return rc; + + /* xの有効数字の下位ビットを取り出す */ + vx = x_ptr[1]; + + /* yの有効数字の下位ビットを取り出す */ + vy = y_ptr[1]; + + /* 最終判断 */ + return vy - vx; +} + +/* Functions */ +void _dtos18 (double x, int *decpt, int *sign, char *buffer) +{ + short e2; + int e, n; + + /* 基数2の指数を求める(バイアスなしの状態) */ + e2 = (((unsigned short *) &x)[0] & 0x7FF0U) >> 4; + + /* 指数が0の場合は±0.0チェック */ + if (e2 == 0) { + + unsigned long hi = ((unsigned long *) &x)[0] & 0xFFFFF; + unsigned long lo = ((unsigned long *) &x)[1]; + + /* 有効数字が全部0かどうか */ + if (hi == 0 && lo == 0) { + + /* 文字列を設定 */ + buffer[0] = '0'; + + /* NULを設定 */ + buffer[1] = '\0'; + + /* 小数点位置を計算 */ + *decpt = 1; + + /* 符号を計算 */ + /* *sign = hi & 0x80000000UL; */ + *sign = 0; + + /* 確定 */ + return; + + } + + } + + /* 2の指数にバイアスをかけてから10の指数を概算 (approx. log10(2)) */ + e = ((int) ((e2 - 1023) * 77)) >> 8; + + /* 指数が正の場合 */ + if (e >= 0) { + + /* 指数が32より小さい場合はテーブル1から */ + if (e < 32) + x *= _neg1[e]; + + /* 指数が32より大きい場合はテーブル1,2から */ + else + x *= _neg1[e & 31] * _neg2[e >> 5]; + + } + + /* 指数が負の場合 */ + else { + + /* 絶対値を計算 */ + n = -e; + + /* 絶対値が32より小さい場合はテーブル1から */ + if (n < 32) + x *= _pos1[n]; + + /* 絶対値が32より大きい場合はテーブル1,2から */ + else { + x *= _pos1[n & 31]; + x *= _pos2[n >> 5]; + } + + } + + /* スケーリングしすぎた場合は戻す */ + if (_cmpd (1.0e+18, x) >= 0) { + e++; + x *= 1.0e-1; + } + + /* スケーリングし足りない場合は追加 */ + else if (_cmpd (1.0e+17, x) < 0) { + e--; + x *= 1.0e+1; + } + + /* 小数点位置を計算 */ + *decpt = e + 1; + + /* 符号を計算 */ + *sign = ((unsigned char *) &x)[0] & 0x80U; + + /* 文字列に変換 */ + _ulltoa ((unsigned long long) x, buffer); +} diff -u2rN -x awktab.c gawk-3.0.6/human/_round.c gawk-3.0.6+mb1.15/human/_round.c --- gawk-3.0.6/human/_round.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/human/_round.c Sat May 5 16:27:24 2001 @@ -0,0 +1,45 @@ +/* + * PROJECT C Library, X68000 PROGRAMMING INTERFACE DEFINITION + * -------------------------------------------------------------------- + * This file is written by the Project C Library Group, and completely + * in public domain. You can freely use, copy, modify, and redistribute + * the whole contents, without this notice. + * -------------------------------------------------------------------- + * $Id: _round.c,v 1.1 1994/11/27 13:05:36 mura Exp $ + */ +/* changed 1997.2.2 by K.Okabe */ + +/* System headers */ +#include +#include + +/* Functions */ +int _round (char *top, char *cur, int undig) +{ + char *ptr; + + /* 最後が5未満なら丸めは必要ない */ + if (undig < '5') + return 0; + + /* ポインタ設定 */ + ptr = cur - 1; + + /* 先頭まで戻りながら丸め処理 */ + while (ptr >= top) { + + /* 繰り上がらなければそれで終わり */ + if (++(*ptr) <= '9') + return 0; + + /* その桁を0に戻す */ + *ptr-- = '0'; + + } + + /* 先頭を1にする */ + *++ptr = '1'; + + /* 繰り上がりをしらせる */ + return 1; +} diff -u2rN -x awktab.c gawk-3.0.6/human/awklib/igawk.sh gawk-3.0.6+mb1.15/human/awklib/igawk.sh --- gawk-3.0.6/human/awklib/igawk.sh Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/human/awklib/igawk.sh Sat May 5 16:27:24 2001 @@ -0,0 +1,141 @@ +#! /bin/sh + +# igawk --- like gawk but do @include processing +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# July 1993 + +# Modified for HUMAN68K April 9, 1996 by WILLs (Katsuyuki Okabe) + +: ${TMPDIR=`test -n "$temp" -a -d "$temp" && echo "$temp" || + test -n "$TMP" -a -d "$TMP" && echo "$TMP" || + echo '.'`} +igs=${TMPDIR}/igs$$ +ige=${TMPDIR}/ige$$ + +if [ "$1" = debug ] +then + set -x + shift +else + # cleanup on exit, hangup, interrupt, quit, termination + trap 'rm -f $igs $ige' 0 1 2 3 15 +fi + +while [ $# -ne 0 ] # loop over arguments +do + case $1 in + --) shift; break;; + + -W) shift + set -- -W"$@" + continue;; + + -[vF]) opts="$opts $1 '$2'" + shift;; + + -[vF]*) opts="$opts '$1'" ;; + + -f) echo @include "$2" >> $igs + shift;; + + -f*) f=`echo "$1" | sed 's/-f//'` + echo @include "$f" >> $igs ;; + + -?file=*) # -Wfile or --file + f=`echo "$1" | sed 's/-.file=//'` + echo @include "$f" >> $igs ;; + + -?file) # get arg, $2 + echo @include "$2" >> $igs + shift;; + + -?source=*) # -Wsource or --source + t=`echo "$1" | sed 's/-.source=//'` + echo "$t" >> $igs ;; + + -?source) # get arg, $2 + echo "$2" >> $igs + shift;; + + -?version) + echo igawk: version 1.0 1>&2 + gawk --version + exit 0 ;; + + -[W-]*) opts="$opts '$1'" ;; + + *) break;; + esac + shift +done + +if [ ! -s $igs ] +then + if [ -z "$1" ] + then + echo igawk: no program! 1>&2 + exit 1 + else + echo "$1" > $igs + shift + fi +fi + +# at this point, $igs has the program +gawk -- ' +# process @include directives + +function pathto(file, i, t, junk) +{ + if (index(file, "/") != 0) + return file + + for (i = 1; i <= ndirs; i++) { + t = (pathlist[i] "/" file) + if ((getline junk < t) > 0) { + # found it + close(t) + return t + } + } + return "" +} +BEGIN { + path = ENVIRON["AWKPATH"] + gsub(/\\/, "/", path) + gsub(/:/, ";", path) + path = gensub(/((^|;)[A-Za-z]);/, "\\1:", "g", path) + ndirs = split(path, pathlist, ";") + for (i = 1; i <= ndirs; i++) { + if (pathlist[i] == "") + pathlist[i] = "." + } + stackptr = 0 + input[stackptr] = ARGV[1] # ARGV[1] is first file + + for (; stackptr >= 0; stackptr--) { + while ((getline < input[stackptr]) > 0) { + if (tolower($1) != "@include") { + print + continue + } + fpath = pathto($2) + if (fpath == "") { + printf("igawk:%s:%d: cannot find %s\n", \ + input[stackptr], FNR, $2) > "/dev/stderr" + continue + } + if (! (fpath in processed)) { + processed[fpath] = input[stackptr] + input[++stackptr] = fpath + } else + print $2, "included in", input[stackptr], \ + "already included in", \ + processed[fpath] > "/dev/stderr" + } + close(input[stackptr]) + } +}' $igs > $ige +eval gawk -f $ige $opts -- "$@" + +exit $? diff -u2rN -x awktab.c gawk-3.0.6/human/fconvert.c gawk-3.0.6+mb1.15/human/fconvert.c --- gawk-3.0.6/human/fconvert.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/human/fconvert.c Sat May 5 16:27:24 2001 @@ -0,0 +1,81 @@ +/* + * PROJECT C Library, X68000 PROGRAMMING INTERFACE DEFINITION + * -------------------------------------------------------------------- + * This file is written by the Project C Library Group, and completely + * in public domain. You can freely use, copy, modify, and redistribute + * the whole contents, without this notice. + * -------------------------------------------------------------------- + * $Id: fconvert.c,v 1.2 1994/11/27 13:06:19 mura Exp $ + */ +/* changed 1997.2.3 by K.Okabe */ + +/* System headers */ +#include +#include + +/* Functions */ +char *fconvert (double x, int ndigit, int *decpt, int *sign, char *buffer) +{ + int pos, n; + char *src, *dst; + char string[24]; + int figup; + + /* 18桁の文字列に変換 */ + _dtos18 (x, decpt, sign, string); + + /* コピー元アドレスを設定 */ + src = string; + + /* コピー先アドレスを設定 */ + dst = buffer; + + /* 小数点位置を得る */ + pos = *decpt; + + /* 小数点位置が負なら */ + if (pos < 0) { + + /* 埋める桁数を計算 */ + n = min (-pos, ndigit); + + /* 先頭を0で埋める */ + while (n-- > 0) + *dst++ = '0'; + + /* 小数点位置は0になる */ + *decpt = 0; + + } + + /* 残りのコピー桁数 */ + n = ndigit + pos; + + /* 格納先にコピー */ + while (n-- > 0) { + + /* 足りない部分は0で埋める */ + if (*src == '\0') { + while (n-- >= 0) + *dst++ = '0'; + break; + } + + /* 変換文字列からコピー */ + *dst++ = *src++; + + } + + /* 丸める */ + *decpt += (figup = _round (buffer, dst, *src)); + + /* 繰り上がりがあれば末尾に0を追加する */ + if (figup) + *dst++ = '0'; + + /* 終端に NUL を打つ */ + *dst = '\0'; + + /* アドレスを返す */ + return buffer; +} diff -u2rN -x awktab.c gawk-3.0.6/human/gawkmisc.x68 gawk-3.0.6+mb1.15/human/gawkmisc.x68 --- gawk-3.0.6/human/gawkmisc.x68 Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/human/gawkmisc.x68 Sat May 5 16:27:24 2001 @@ -0,0 +1,209 @@ +/* + * gawkmisc.c --- miscellanious gawk routines that are OS specific. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991 - 95 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#ifndef O_RDONLY +#include +#endif +#ifndef O_ACCMODE +#define O_ACCMODE (O_RDONLY | O_WRONLY | O_RDWR) +#endif + +#ifndef __LIBC__ +#include +#endif + +extern char *_toslash P((char *)); + +char quote = '\''; +#ifdef DEFPATH +char *defpath = DEFPATH; +#else +char *defpath = ".;/usr/lib/awk;/usr/local/lib/awk"; +#endif +char envsep = ';'; + +/* gawk_name --- pull out the "gawk" part from how the OS called us */ + +char * +gawk_name(filespec) +const char *filespec; +{ + char *p, *q; + + p = (char *) filespec; /* Sloppy... */ + + _toslash(p); + if ((q = strrchr(p, '/')) != NULL) + p = q + 1; + if ((q = strrchr(p, '.')) != NULL && strcasecmp(q, ".x") == 0) + *q = '\0'; + return strlwr(p); +} + +/* os_arg_fixup --- fixup the command line */ + +void +os_arg_fixup(argcp, argvp) +int *argcp; +char ***argvp; +{ + /* no-op */ + return; +} + +/* os_devopen --- open special per-OS devices */ + +int +os_devopen(name, flag) +const char *name; +int flag; +{ + int openfd = -1; + + if (STREQ(name, "/dev/null")) + openfd = open("nul", flag, 0666); + else if (STREQ(name, "/dev/tty")) + openfd = open("con", flag, 0666); + + if (openfd >= 0 && (flag & O_ACCMODE) == O_WRONLY && !isatty(openfd)) + setmode(openfd, O_BINARY); + + return openfd; +} + +/* optimal_bufsize --- determine optimal buffer size */ + +int +optimal_bufsize(fd, stb) +int fd; +struct stat *stb; +{ + /* force all members to zero in case OS doesn't use all of them. */ + memset(stb, '\0', sizeof(struct stat)); + + /* + * System V.n, n < 4, doesn't have the file system block size in the + * stat structure. So we have to make some sort of reasonable + * guess. We use stdio's BUFSIZ, since that is what it was + * meant for in the first place. + */ +#ifdef HAVE_ST_BLKSIZE +#define DEFBLKSIZE (stb->st_blksize ? stb->st_blksize : BUFSIZ) +#else +#define DEFBLKSIZE BUFSIZ +#endif + + if (isatty(fd)) + return BUFSIZ; + if (fstat(fd, stb) == -1) + fatal("can't stat fd %d (%s)", fd, strerror(errno)); + if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */ + return DEFBLKSIZE; + if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */ + return stb->st_size; + return DEFBLKSIZE; +} + +/* ispath --- return true if path has directory components */ + +int +ispath(file) +const char *file; +{ + _toslash((char *) file); /* Sloppy... */ + for (; *file; file++) { + switch (*file) { + case '/': + case ':': + return 1; + } + } + return 0; +} + +/* isdirpunct --- return true if char is a directory separator */ + +int +isdirpunct(c) +int c; +{ + return (strchr(":\\/", c) != NULL); +} + +#ifndef __LIBC__ +char * +_toslash (path) +char *path; +{ + char *p; + + p = path; + while (*p != '\0') { + if (iskanji ((unsigned char) *p)) + p++; + else if (*p == '\\') + *p = '/'; + p++; + } + + return path; +} + +int +getegid () +{ + return 0; +} + +int +geteuid () +{ + return 0; +} + +int +getgid () +{ + return 0; +} + +int +getuid () +{ + return 0; +} + +int +getpgrp () +{ + return 0; +} + +int +getppid () +{ + return 0; +} +#endif diff -u2rN -x awktab.c gawk-3.0.6/io.c gawk-3.0.6+mb1.15/io.c --- gawk-3.0.6/io.c Sun Jul 16 12:13:59 2000 +++ gawk-3.0.6+mb1.15/io.c Sat May 5 18:08:56 2001 @@ -23,4 +23,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Aug 5, 1998 by okabe katsuyuki */ + +/* Modified for Human68k by K.Murakami 1993.06.01 */ +/* Last modified: May 23, 1997 by okabe katsuyuki */ #include "awk.h" @@ -66,8 +71,22 @@ #endif -#if defined(MSDOS) || defined(OS2) || defined(WIN32) +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__human68k__) #define PIPES_SIMULATED #endif +#if defined (MSDOS) && !defined (__GO32__) +#ifndef __EMX__ +extern int os_open P((const char *, int, ...)); +#define open os_open +#endif +extern int getegid P((void)); +extern int geteuid P((void)); +extern int getgid P((void)); +extern int getuid P((void)); +extern int getpid P((void)); +extern int getpgrp P((void)); +extern int getppid P((void)); +#endif + static IOBUF *nextfile P((int skipping)); static int inrec P((IOBUF *iop)); @@ -84,7 +103,7 @@ static int gawk_pclose P((struct redirect *rp)); static int do_pathopen P((const char *file)); -static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode)); +static int get_a_record P((char **out, IOBUF *iop, const char *rsp, Regexp *RSre, int *errcode)); #ifdef HAVE_MMAP -static int mmap_get_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode)); +static int mmap_get_record P((char **out, IOBUF *iop, const char *rsp, Regexp *RSre, int *errcode)); #endif /* HAVE_MMAP */ static int str2mode P((const char *mode)); @@ -229,5 +248,5 @@ if ((cnt = iop->cnt) != EOF) cnt = (*(iop->getrec)) - (&begin, iop, RS->stptr[0], RS_regexp, NULL); + (&begin, iop, RS->stptr, RS_regexp, NULL); if (cnt == EOF) { cnt = 0; @@ -471,4 +490,12 @@ rp->fp = stderr; else { +#ifdef __human68k__ + if (fcntl(fd, F_GETFL) & O_BINARY) { + char binmode[4]; + strcpy(binmode, mode); + strcat(binmode, "b"); + rp->fp = fdopen(fd, binmode); + } else +#endif rp->fp = fdopen(fd, (char *) mode); /* don't leak file descriptors */ @@ -827,4 +854,8 @@ if (S_ISDIR(buf.st_mode)) fatal("file `%s' is a directory", name); +#ifdef __human68k__ + if (openfd != INVALID_HANDLE && (flag & O_ACCMODE) == O_WRONLY && !isatty(openfd)) + setmode(openfd, O_BINARY); +#endif return openfd; } @@ -1023,4 +1054,8 @@ if (S_ISDIR(buf.st_mode)) fatal("file `%s' is a directory", name); +#ifdef __human68k__ + if (openfd != INVALID_HANDLE && (flag & O_ACCMODE) == O_WRONLY && !isatty(openfd)) + setmode(openfd, O_BINARY); +#endif return iop_alloc(openfd, name, iop); } @@ -1172,4 +1207,14 @@ #else /* not (VMS || OS2 || MSDOS) */ +#ifndef _NFILE +#ifdef __human68k__ +#define _NFILE OPEN_MAX +#else /* !__human68k__ */ +#ifdef MSDOS +#define _NFILE 20 +#endif +#endif /* !__human68k__ */ +#endif /* !_NFILE */ + static struct pipeinfo { char *command; @@ -1187,11 +1232,38 @@ int current; char *name; +#if !defined(__human68k__) && !defined(MSDOS) static char cmdbuf[256]; +#endif +#if defined(MSDOS) && !defined(__EMX__) + extern int xsystem(char *); +#define system(cmd) xsystem((cmd)) +#endif /* get a name to use */ if ((name = tempnam(".", "pip")) == NULL) return NULL; +#if defined(__human68k__) || defined(MSDOS) +#ifndef STDOUT_FILENO +#define STDOUT_FILENO fileno(stdout) +#endif + { + int save_stdout; + + if ((save_stdout = dup(STDOUT_FILENO)) == -1) + return NULL; + if ((current = open(name, O_WRONLY | O_CREAT | O_TRUNC, 0666)) == -1) { + close(save_stdout); + return NULL; + } + dup2(current, STDOUT_FILENO); + system(cmd); + dup2(save_stdout, STDOUT_FILENO); + close(current); + close(save_stdout); + } +#else sprintf(cmdbuf, "%s > %s", cmd, name); system(cmdbuf); +#endif if ((current = open(name, O_RDONLY)) == INVALID_HANDLE) return NULL; @@ -1265,5 +1337,5 @@ } errcode = 0; - cnt = (*(iop->getrec))(&s, iop, RS->stptr[0], RS_regexp, &errcode); + cnt = (*(iop->getrec))(&s, iop, RS->stptr, RS_regexp, &errcode); if (errcode != 0) { if (! do_traditional) { @@ -1381,6 +1453,16 @@ trypath[0] = '\0'; /* this should take into account limits on size of trypath */ +#if defined(__human68k__) || defined(MSDOS) + cp = trypath; + if (isalpha((unsigned char) awkpath[0]) && awkpath[1] == ':') { + *cp++ = *awkpath++; + *cp++ = *awkpath++; + } + while (*awkpath && *awkpath != ':' && *awkpath != ';') + *cp++ = *awkpath++; +#else for (cp = trypath; *awkpath && *awkpath != envsep; ) *cp++ = *awkpath++; +#endif if (cp != trypath) { /* nun-null element in path */ @@ -1396,6 +1478,11 @@ /* no luck, keep going */ +#if defined(__human68k__) || defined(MSDOS) + if ((*awkpath == ':' || *awkpath == ';') && awkpath[1] != '\0') + awkpath++; /* skip colon and semicolon */ +#else if(*awkpath == envsep && awkpath[1] != '\0') awkpath++; /* skip colon */ +#endif } while (*awkpath != '\0'); /* @@ -1523,8 +1610,8 @@ static int -get_a_record(out, iop, grRS, RSre, errcode) +get_a_record(out, iop, grRSp, RSre, errcode) char **out; /* pointer to pointer to data */ IOBUF *iop; /* input IOP */ -register int grRS; /* first char in RS->stptr */ +const char *grRSp; /* pointer to RS->stptr */ Regexp *RSre; /* regexp for RS */ int *errcode; /* pointer to error variable */ @@ -1538,4 +1625,6 @@ int continuing = FALSE, continued = FALSE; /* used for re matching */ int onecase; + int mbc_state = 0; + size_t skip; /* first time through */ @@ -1556,7 +1645,7 @@ rs = '\n'; else - rs = (char) grRS; + rs = (unsigned char) grRSp[0]; - onecase = (IGNORECASE && isalpha(rs)); + onecase = (IGNORECASE && isalpha(rs) && !ismbchar(rs)); if (onecase) rs = casetable[rs]; @@ -1606,5 +1695,5 @@ oldsplit, bp - oldsplit); } else - memcpy(newsplit - len, start, len); + memcpy(newsplit - len, start, (size_t)len); } bp = iop->end = iop->off = iop->buf + iop->secsiz; @@ -1737,12 +1826,78 @@ } /* search for RS, #2, RS = */ + if (mbc_state) { + /* RS を見つけた負数がくることはない. */ + assert(mbc_state > 0); + /* 前回の最後に半端なバイトが残っている. */ + bp -= mbc_state; + mbc_state = 0; + } if (onecase) { - while (casetable[(int) *bp++] != rs) - continue; + for (;;) { + int c = (unsigned char) *bp++; + if (ismbchar(c)) { + skip = mbclen(c) - 1; + if ((bp += skip) > iop->end) { + /* 今回, 半端なバイトを残した. */ + mbc_state = iop->end - bp + skip + 1; + break; + } + continue; + } + if (casetable[c] == rs) + break; + } + } else if (!ismbchar(rs)) { + for (;;) { + int c = (unsigned char) *bp++; + if (ismbchar(c)) { + skip = mbclen(c) - 1; + if ((bp += skip) > iop->end) { + /* 今回, 半端なバイトを残した. */ + mbc_state = iop->end - bp + skip + 1; + break; + } + continue; + } + if (c == rs) + break; + } } else { - while (*bp++ != rs) - continue; + for (;;) { + int c = (unsigned char) *bp++; + if (ismbchar(c)) { + skip = mbclen(c) - 1; + if ((bp + skip) > iop->end) { + /* 今回, 半端なバイトを残した. */ + bp += skip; + mbc_state = iop->end - bp + skip + 1; + break; + } + if (c != rs) { + bp += skip; + continue; + } + if (bp > iop->end) { + mbc_state = 0; + break; + } + if (memcmp(bp, grRSp + 1, skip) == 0) { + /* RS を見つけた. */ + mbc_state = -skip - 1; + bp += skip; + break; + } + bp += skip; + } + } + } + if (ismbchar(rs)) { + if (mbc_state < 0) + /* do_traditional が真のときのみ実行されるが + 実際に RT がセットされることはない. */ + set_RT(bp + mbc_state, -mbc_state); + } else { + set_RT(bp - 1, 1); } - set_RT(bp - 1, 1); if (bp <= iop->end) @@ -1763,13 +1918,22 @@ if (do_traditional || rsre == NULL) { - char *bstart; - - bstart = iop->off = bp; + iop->off = bp; + if (!ismbchar(rs)) { bp--; - if (onecase ? casetable[(int) *bp] != rs : *bp != rs) { + /* mbc_state > 0 なら最後に半端なバイトがある. */ + if (!mbc_state + && (onecase ? casetable[(unsigned char) *bp] != rs : (unsigned char) *bp != rs)) { bp++; - bstart = bp; } *bp = '\0'; + } else { + /* mbc_state > 0 なら最後に半端なバイトがある. + mbc_state < 0 なら最後に RS がある. */ + if (mbc_state > 0) + bp -= mbc_state; + else + bp += mbc_state; + *bp = '\0'; + } } else if (RS_is_null && iop->cnt == EOF) { /* @@ -1780,4 +1944,11 @@ bp--; *bp = '\0'; + } else if (iop->cnt == EOF && bp > start) { + size_t n; + + /* ファイルの末尾が半端なバイトなら削除する. */ + n = mbbyte(start, bp - start); + if (n != bp - start) + *(bp = start + n) = '\0'; } @@ -1795,13 +1966,14 @@ char *out; int cnt; - char rs[2]; + char *rs; - rs[0] = '\0'; + rs = ""; if (argc > 1) bufsize = atoi(argv[1]); if (argc > 2) - rs[0] = *argv[2]; + rs = argv[2]; + RS_is_null = strlen(rs) == 0; iop = iop_alloc(0, "stdin", NULL); - while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) { + while ((cnt = get_a_record(&out, iop, rs, NULL, NULL)) > 0) { fwrite(out, 1, cnt, stdout); fwrite(rs, 1, 1, stdout); @@ -1815,8 +1987,8 @@ static int -mmap_get_record(out, iop, grRS, RSre, errcode) +mmap_get_record(out, iop, grRSp, RSre, errcode) char **out; /* pointer to pointer to data */ IOBUF *iop; /* input IOP */ -register int grRS; /* first char in RS->stptr */ +const char *grRSp; /* pointer to RS->stptr */ Regexp *RSre; /* regexp for RS */ int *errcode; /* pointer to error variable */ @@ -1828,6 +2000,8 @@ Regexp *rsre = NULL; int onecase; + int mbc_state = 0; + size_t skip; register char *end = iop->end; - int cnt; + size_t cnt; /* first time through */ @@ -1849,7 +2023,7 @@ rs = '\n'; else - rs = (char) grRS; + rs = (unsigned char) grRSp[0]; - onecase = (IGNORECASE && isalpha(rs)); + onecase = (IGNORECASE && isalpha(rs) && !ismbchar(rs)); if (onecase) rs = casetable[rs]; @@ -1896,4 +2070,13 @@ iop->off = iop->end; /* all done with the record */ set_RT_to_null(); + { + size_t n; + + /* ファイルの末尾が半端なバイトであれば + そのバイトを削除した分の長さを返す. */ + n = mbbyte(start, end - start); + if (n != end - start) + return n; + } /* special case, don't allow trailing newlines */ if (RS_is_null && *(iop->end - 1) == '\n') @@ -1926,14 +2109,69 @@ /* search for RS, #2, RS = */ if (onecase) { - while (bp < end && casetable[*bp++] != rs) - continue; + while (bp < end) { + int c = (unsigned char) *bp++; + if (ismbchar(c)) { + skip = mbclen(c) - 1; + if ((bp += skip) > end) { + mbc_state = skip; + break; + } + continue; + } + if (casetable[c] == rs) + break; + } + } else if (!ismbchar(rs)) { + while (bp < end) { + int c = (unsigned char) *bp++; + if (ismbchar(c)) { + skip = mbclen(c) - 1; + if ((bp += skip) > end) { + mbc_state = skip; + break; + } + continue; + } + if (c == rs) + break; + } } else { - while (bp < end && *bp++ != rs) - continue; + while (bp < end) { + int c = (unsigned char) *bp++; + if (ismbchar(c)) { + skip = mbclen(c) - 1; + if ((bp + skip) > iop->end) { + bp += skip; + mbc_state = skip; + break; + } + if (c != rs) { + bp += skip; + continue; + } + if (memcmp(bp, grRSp + 1, skip) == 0) { + mbc_state = -skip - 1; + bp += skip; + break; + } + bp += skip; + } + } } cnt = (bp - start) - 1; + if (mbc_state) { + if (mbc_state < 0) { + /* do_traditional が真のときのみ実行されるが + 実際に RT がセットされることはない. */ + set_RT(bp + mbc_state, -mbc_state); + cnt += mbc_state + 1; + } else { + set_RT_to_null(); + cnt -= mbc_state; + } + } else if (bp >= iop->end) { /* at end, may have actually seen rs, or may not */ - if (*(bp-1) == rs) + if (!ismbchar(rs) && (unsigned char) *(bp-1) == (unsigned char) rs) set_RT(bp - 1, 1); /* real RS seen */ else { diff -u2rN -x awktab.c gawk-3.0.6/main.c gawk-3.0.6+mb1.15/main.c --- gawk-3.0.6/main.c Wed Jun 7 18:44:09 2000 +++ gawk-3.0.6+mb1.15/main.c Sat May 5 16:45:27 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Feb. 16, 2001 by okabe katsuyuki */ #include "awk.h" @@ -90,4 +92,6 @@ int do_intervals = FALSE; /* allow {...,...} in regexps */ +int save_memory = FALSE; + int in_begin_rule = FALSE; /* we're in a BEGIN rule */ int in_end_rule = FALSE; /* we're in a END rule */ @@ -95,4 +99,8 @@ int output_is_tty = FALSE; /* control flushing of output */ +#ifdef __TURBOC__ +int _stklen = 0x8000; +#endif + extern char *version_string; /* current version, for printing */ @@ -117,4 +125,6 @@ { "usage", no_argument, NULL, 'u' }, { "help", no_argument, NULL, 'u' }, + { "ctype", required_argument, NULL, 'T' }, + { "memory", no_argument, & save_memory, 1 }, #ifdef DEBUG { "parsedebug", no_argument, NULL, 'D' }, @@ -148,8 +158,28 @@ (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig); #endif +#if defined(MSDOS) && !defined(DJGPP) + (void) signal(SIGINT, (RETSIGTYPE (*) P((int))) catchsig); +#endif +#ifdef __human68k__ + if (!isatty(fileno(stdout))) + fmode(stdout, _IOBIN); + if (!isatty(fileno(stderr))) + fmode(stderr, _IOBIN); +#endif myname = gawk_name(argv[0]); argv[0] = (char *) myname; os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ + if (myname[0] == 'j' || myname[0] == 'J') { + mbcinit(MBCTYPE_DEFAULT); + } else if (!strncasecmp(myname, "euc", 3)) { + mbcinit(MBCTYPE_EUC); + } else if (!strncasecmp(myname, "sjis", 4)) { + mbcinit(MBCTYPE_SJIS); + } else if (!strncasecmp(myname, "utf8", 4)) { + mbcinit(MBCTYPE_UTF8); + } else if (getenv("GAWKMB_ENABLE")) { + mbcinit(MBCTYPE_DEFAULT); + } /* remove sccs gunk */ @@ -216,5 +246,5 @@ */ scan = optarg; - while (ISSPACE(*scan)) + while (ISSPACE((unsigned char) *scan)) scan++; @@ -286,4 +316,17 @@ break; + case 'T': + if (strcasecmp(optarg, "ASCII") == 0) + mbcinit(MBCTYPE_ASCII); + else if (strcasecmp(optarg, "EUC") == 0) + mbcinit(MBCTYPE_EUC); + else if (strcasecmp(optarg, "SJIS") == 0) + mbcinit(MBCTYPE_SJIS); + else if (strcasecmp(optarg, "UTF8") == 0) + mbcinit(MBCTYPE_UTF8); + else + warning("unknown argument to --ctype ignored"); + break; + case '?': default: @@ -444,7 +487,13 @@ fputs("\t-W usage\t\t--usage\n", fp); fputs("\t-W version\t\t--version\n", fp); + fputs("\t-W ctype=ASCII\t\t--ctype=ASCII\n", stderr); + fputs("\t-W ctype=EUC\t\t--ctype=EUC\n", stderr); + fputs("\t-W ctype=SJIS\t\t--ctype=SJIS\n", stderr); + fputs("\t-W ctype=UTF8\t\t--ctype=UTF8\n", stderr); + fputs("\t-W memory\t\t--memory\n", stderr); fputs("\nTo report bugs, see node `Bugs' in `gawk.info', which\n", fp); fputs("is section `Reporting Problems and Bugs' in the\n", fp); fputs("printed version.\n", fp); + fputs("\nReport multi-byte extension version bugs to HGC02147@nifty.ne.jp\n", fp); exit(exitval); } @@ -595,5 +644,5 @@ load_environ() { -#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC)) +#if ! (defined(MSDOS) && !defined(__TURBOC__) && !defined(__GO32__)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC)) extern char **environ; #endif @@ -650,13 +699,25 @@ /* first check that the variable name has valid syntax */ badvar = FALSE; - if (! isalpha(arg[0]) && arg[0] != '_') + if (! isalpha((unsigned char)arg[0]) && arg[0] != '_' + && !ismbchar(arg[0])) badvar = TRUE; else - for (cp2 = arg+1; *cp2; cp2++) - if (! isalnum(*cp2) && *cp2 != '_') { + for (cp2 = arg; *cp2; cp2++) { + if (ismbchar(*cp2)) { + size_t n = mbclen(*cp2) - 1; + while (n-- > 0) + if (! *++cp2) { + badvar = TRUE; + break; + } + if (badvar == TRUE) + break; + continue; + } + if (! isalnum((unsigned char)*cp2) && *cp2 != '_') { badvar = TRUE; break; } - + } if (badvar) { if (do_lint) @@ -706,5 +767,13 @@ code = 0; sig = code; code = sig; #endif +#if defined(MSDOS) && !defined(DJGPP) + if (sig == SIGINT) { + exit(1); + } +#endif if (sig == SIGFPE) { +#if defined(_MSC_VER) && (_MSC_VER >= 700) + _fpreset(); +#endif fatal("floating point exception"); } else if (sig == SIGSEGV @@ -736,5 +805,5 @@ version() { - printf("%s.%d\n", version_string, PATCHLEVEL); + printf("%s.%d + multi-byte extension 1.15\n", version_string, PATCHLEVEL); /* * Per GNU coding standards, print copyright info, diff -u2rN -x awktab.c gawk-3.0.6/mbc.c gawk-3.0.6+mb1.15/mbc.c --- gawk-3.0.6/mbc.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/mbc.c Sat May 5 16:48:26 2001 @@ -0,0 +1,220 @@ +/* Functions for multi-byte support. + Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto) + Last change: Jun. 15, 2000 by okabe katsuyuki */ +#ifdef MSDOS +#ifndef __STDC__ +#define __STDC__ 1 +#endif +#endif +#include "mbc.h" + +#ifdef GAWK +#undef EUC +#undef SJIS +#undef UTF8 +#undef ASCII +#endif + +static const unsigned char mbctab_ascii[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const unsigned char mbctab_euc[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const unsigned char mbctab_sjis[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const unsigned char mbctab_utf8[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 +}; + +#ifdef EUC +const unsigned char *mbctab = mbctab_euc; +int current_mbctype = MBCTYPE_EUC; +#else +#ifdef SJIS +const unsigned char *mbctab = mbctab_sjis; +int current_mbctype = MBCTYPE_SJIS; +#else +#ifdef UTF8 +const unsigned char *mbctab = mbctab_utf8; +int current_mbctype = MBCTYPE_UTF8; +#else +const unsigned char *mbctab = mbctab_ascii; +int current_mbctype = MBCTYPE_ASCII; +#endif +#endif +#endif + +void +#ifdef __STDC__ +mbcinit(int mbctype) +#else +mbcinit(mbctype) + int mbctype; +#endif +{ + switch (mbctype) { + case MBCTYPE_ASCII: + mbctab = mbctab_ascii; + current_mbctype = MBCTYPE_ASCII; + break; + case MBCTYPE_EUC: + mbctab = mbctab_euc; + current_mbctype = MBCTYPE_EUC; + break; + case MBCTYPE_SJIS: + mbctab = mbctab_sjis; + current_mbctype = MBCTYPE_SJIS; + break; + case MBCTYPE_UTF8: + mbctab = mbctab_utf8; + current_mbctype = MBCTYPE_UTF8; + break; + } +} + +mbchar_t +#ifdef __STDC__ +mbcfetch(const char *s, char **endptr) +#else +mbcfetch(s, endptr) + const char *s; + char **endptr; +#endif +{ + mbchar_t wc; + int n; + + if (current_mbctype == MBCTYPE_UTF8) { + wc = (unsigned char) *s++; + if ((n = mbclen(wc) - 1) > 0) { + wc &= 0x3f >> n; + while (n-- > 0) + wc = (wc << 6) | (*s++ & 0x3f); + } + } else { + wc = (unsigned char) *s++; + for (n = mbclen(wc) - 1; n > 0; n--) + wc = (wc << 8) | (unsigned char) *s++; + } + + if (endptr) + *endptr = (char *) s; + + return wc; +} + +char * +#ifdef __STDC__ +mbcstore(char *s, mbchar_t wc) +#else +mbcstore(s, wc) + char *s; + mbchar_t wc; +#endif +{ + mbchar_t mask; + int n; + char *ret; + + if (current_mbctype == MBCTYPE_UTF8) { + if (wc < 0x80) { + *s++ = (char) wc; + return s; + } + + mask = ~0x7ffL; + for (n = 2; n < 6; n++) { + if ((wc & mask) == 0) + break; + mask <<= 5; + } + + ret = s + n; + s[0] = (char) (~0xff >> n); + while (--n > 0) { + s[n] = 0x80 | (wc & 0x3f); + wc >>= 6; + } + s[0] |= wc; + } else { + mask = ~0xffL; + for (n = 1; n < 4; n++) { + if ((wc & mask) == 0) + break; + mask <<= 8; + } + + ret = s + n; + while (--n >= 0) { + s[n] = (char) wc; + wc >>= 8; + } + } + + return ret; +} diff -u2rN -x awktab.c gawk-3.0.6/mbc.h gawk-3.0.6+mb1.15/mbc.h --- gawk-3.0.6/mbc.h Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/mbc.h Sat May 5 16:48:26 2001 @@ -0,0 +1,60 @@ +#ifndef MBC_H +#define MBC_H 1 +/* Definitions for multi-byte support. + Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto) + Last change: Jun. 15, 2000 by okabe katsuyuki */ + +#ifndef const +#ifndef __STDC__ +#ifdef __GNUC__ +#define const __const__ +#define volatile __volatile__ +#else +#ifndef MSDOS +#define const +#define volatile +#endif +#endif +#endif +#endif + +#ifndef _ +#if defined(__STDC__) || defined(MSDOS) +#define _(x) x +#else +#define _(x) () +#endif +#endif + +typedef unsigned long mbchar_t; + +#define MBCTYPE_ASCII 0 +#define MBCTYPE_EUC 1 +#define MBCTYPE_SJIS 2 +#define MBCTYPE_UTF8 3 + +#ifdef EUC +#define MBCTYPE_DEFAULT MBCTYPE_EUC +#else +#ifdef SJIS +#define MBCTYPE_DEFAULT MBCTYPE_SJIS +#else +#ifdef UTF8 +#define MBCTYPE_DEFAULT MBCTYPE_UTF8 +#else +#define MBCTYPE_DEFAULT MBCTYPE_ASCII +#endif +#endif +#endif + +extern const unsigned char *mbctab; +extern int current_mbctype; + +void mbcinit _((int)); +mbchar_t mbcfetch _((const char *, char **)); +char *mbcstore _((char *, mbchar_t)); + +#define ismbchar(c) mbctab[(unsigned char)(c)] +#define mbclen(c) (mbctab[(unsigned char)(c)] + 1) + +#endif /* !MBC_H */ diff -u2rN -x awktab.c gawk-3.0.6/mbfuncs.c gawk-3.0.6+mb1.15/mbfuncs.c --- gawk-3.0.6/mbfuncs.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/mbfuncs.c Sat May 5 16:48:26 2001 @@ -0,0 +1,129 @@ +/* Functions for multi-byte support. + Created for gawk multi-byte extension May, 1993 by t^2 (Takahiro Tanimoto) + Last change: May 21, 1997 by K.Okabe */ +#include "awk.h" + +int +#ifdef __STDC__ +mbstrncasecmp(const char *s1, const char *s2, size_t n) +#else +mbstrncasecmp(s1, s2, n) + const char *s1, *s2; + size_t n; +#endif +{ + int c1; + + while (n--) { + if ((c1 = (unsigned char)*s1++) == 0) + return -(unsigned char)*s2; + if (!ismbchar(c1)) { + if ((c1 = ((unsigned char)casetable[c1] + - (unsigned char)casetable[(unsigned char)*s2++])) != 0) + return c1; + } + else { + size_t len = mbclen(c1) - 1; + + if ((c1 -= (unsigned char)*s2++) != 0) + return c1; + + while (len--) { + if (!n--) + return 0; + if ((c1 = (unsigned char)*s1++) == 0) + return -(unsigned char)*s2; + if ((c1 -= (unsigned char)*s2++) != 0) + return c1; + } + } + } + return 0; +} + +int +#ifdef __STDC__ +mbmemcmp(const void *p1, size_t n1, const void *p2, size_t n2) +#else +mbmemcmp(p1, n1, p2, n2) + const void *p1, *p2; + size_t n1, n2; +#endif +{ + const unsigned char *s1 = p1, *s2 = p2; + unsigned char c1; + size_t n; + + for (;;) { + if (!n1--) + return n2 ? -1 : 0; + if (!n2--) + return 1; + if ((c1 = *s1++) != *s2++) { + s2--; + return ismbchar(c1) ? (ismbchar(*s2) ? (int)c1 - *s2 : 1): + (ismbchar(*s2) ? -1 : (int)c1 - *s2); + } + + if (!ismbchar(c1)) + continue; + for (n = mbclen(c1) - 1; n > 0; n--) { + if (!n1--) + return n2 ? -1 : 0; + if (!n2--) + return 1; + if (*s1++ != *s2++) + return (int)*--s1 - *--s2; + } + } +} + +size_t +#ifdef __STDC__ +mblength(const char *s, size_t len) +#else +mblength(s, len) + const char *s; + size_t len; +#endif +{ + const char *send; + + send = s + len; + len = 0; + while ((s += mbclen(*s)) <= send) + len++; + return len; +} + +size_t +#ifdef __STDC__ +mbbyte(const char *s, size_t len) +#else +mbbyte(s, len) + const char *s; + size_t len; +#endif +{ + const char *s0, *s1; + + if (current_mbctype == MBCTYPE_UTF8) { + if (len == 0) + return 0; + + s0 = s; + s1 = s + len; + s = s1 - 1; + while (s > s0 && ! ismbchar (*s) && ! ((unsigned char) *s < 0x80)) + s--; + return (s + mbclen(*s) == s1) ? len : s - s0 ; + } else { + s0 = s; + s1 = s + len; + for (s = s1; s-- > s0 && ismbchar (*s);) + ; + if (! ((s1 - s) & 1)) + len--; + return len; + } +} diff -u2rN -x awktab.c gawk-3.0.6/missing/memmove.c gawk-3.0.6+mb1.15/missing/memmove.c --- gawk-3.0.6/missing/memmove.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/missing/memmove.c Sat May 5 16:49:14 2001 @@ -0,0 +1,24 @@ +/* + * memmove --- move memories. + * + * We supply this routine for those systems that aren't standard yet. + */ + +char * +memmove (dst, src, n) + char *dst, *src; + int n; +{ + char *ret = dst; + + if (src < dst) { + src += n; + dst += n; + while (n--) + *--dst = *--src; + } + else if (dst < src) + while (n--) + *dst++ = *src++; + return ret; +} diff -u2rN -x awktab.c gawk-3.0.6/missing/strftime.c gawk-3.0.6+mb1.15/missing/strftime.c --- gawk-3.0.6/missing/strftime.c Fri Oct 22 22:43:29 1999 +++ gawk-3.0.6+mb1.15/missing/strftime.c Sat May 5 16:51:27 2001 @@ -96,5 +96,5 @@ #undef strchr /* avoid AIX weirdness */ -#ifndef __STDC__ +#if !defined(__STDC__) && !defined(MSDOS) #define const /**/ extern void tzset(); @@ -125,5 +125,5 @@ #define range(low, item, hi) max(low, min(item, hi)) -#if !defined(OS2) && !defined(MSDOS) && defined(HAVE_TZNAME) +#if !defined(OS2) && !defined(MSDOS) && !defined(WIN32) && defined(HAVE_TZNAME) extern char *tzname[2]; extern int daylight; @@ -139,5 +139,5 @@ /* min --- return minimum of two numbers */ -#ifndef __STDC__ +#if !defined(__STDC__) && !defined(MSDOS) static inline int min(a, b) @@ -155,5 +155,5 @@ /* max --- return maximum of two numbers */ -#ifndef __STDC__ +#if !defined(__STDC__) && !defined(MSDOS) static inline int max(a, b) @@ -169,5 +169,5 @@ /* strftime --- produce formatted time */ -#ifndef __STDC__ +#if !defined(__STDC__) && !defined(MSDOS) size_t strftime(s, maxsize, format, timeptr) @@ -754,5 +754,5 @@ /* With thanks and tip of the hatlo to ado@elsie.nci.nih.gov */ -#ifndef __STDC__ +#if !defined(__STDC__) && !defined(MSDOS) static int weeknumber(timeptr, firstweekday) diff -u2rN -x awktab.c gawk-3.0.6/missing.c gawk-3.0.6+mb1.15/missing.c --- gawk-3.0.6/missing.c Fri Oct 25 23:10:52 1996 +++ gawk-3.0.6+mb1.15/missing.c Sat May 5 18:12:31 2001 @@ -3,4 +3,8 @@ * overlapping includes in the files in missing.d. */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Aug. 29, 1994 by t^2 */ +/* Modified for Human68k Feb. 3, 1997 by okabe katsuyuki */ + #include "awk.h" @@ -15,4 +19,21 @@ #endif /* atarist */ +#ifdef __human68k__ +/* + * libc-1.1.32 の double から文字列への変換処理のバグを取り除きます. + */ +#ifdef DTOS18_MISSING +#include "human/_dtos18.c" +#endif +#ifdef FCONVERT_MISSING +#include "human/_round.c" +#include "human/fconvert.c" +#endif +#endif /* __human68k__ */ + +#if defined(MSDOS) && defined(_MSC_VER) && (_MSC_VER == 700) +#include "pc/atof.c" +#endif + #ifndef HAVE_SYSTEM #ifdef atarist @@ -34,4 +55,8 @@ #include "missing/memset.c" #endif /* HAVE_MEMSET */ + +#ifndef HAVE_MEMMOVE +#include "missing/memmove.c" +#endif /* HAVE_MEMMOVE */ #ifndef HAVE_STRNCASECMP diff -u2rN -x awktab.c gawk-3.0.6/msg.c gawk-3.0.6+mb1.15/msg.c --- gawk-3.0.6/msg.c Wed Jun 7 17:45:53 2000 +++ gawk-3.0.6+mb1.15/msg.c Sat May 5 16:58:38 2001 @@ -23,8 +23,10 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added Aug., 1994 by t^2 (Takahiro Tanimoto) + Last change: May 1, 1995 by t^2 */ #include "awk.h" -int sourceline = 0; +long sourceline = 0; char *source = NULL; @@ -60,5 +62,5 @@ (void) fprintf(stderr, "cmd. line:"); - (void) fprintf(stderr, "%d: ", sourceline); + (void) fprintf(stderr, "%ld: ", sourceline); } if (FNR > 0) { diff -u2rN -x awktab.c gawk-3.0.6/node.c gawk-3.0.6+mb1.15/node.c --- gawk-3.0.6/node.c Wed Jun 7 17:45:55 2000 +++ gawk-3.0.6+mb1.15/node.c Sat May 5 18:07:50 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Jun 7, 1997 by okabe katsuyuki */ #include "awk.h" @@ -59,11 +61,11 @@ cp = n->stptr; - if (ISALPHA(*cp)) + if (ISALPHA((unsigned char) *cp)) return 0.0; cpend = cp + n->stlen; - while (cp < cpend && isspace(*cp)) + while (cp < cpend && isspace((unsigned char) *cp)) cp++; - if (cp == cpend || isalpha(*cp)) + if (cp == cpend || isalpha((unsigned char) *cp)) return 0.0; @@ -74,5 +76,5 @@ newflags = 0; if (cpend - cp == 1) { - if (ISDIGIT(*cp)) { + if (ISDIGIT((unsigned char) *cp)) { n->numbr = (AWKNUM)(*cp - '0'); n->flags |= newflags; @@ -95,5 +97,5 @@ /* POSIX says trailing space is OK for NUMBER */ - while (ISSPACE(*ptr)) + while (ISSPACE((unsigned char) *ptr)) ptr++; *cpend = save; @@ -284,4 +286,5 @@ register NODE *r; + len = mbbyte(s, len); getnode(r); r->type = Node_val; @@ -303,6 +306,8 @@ end = &(r->stptr[len]); for (pf = ptm = r->stptr; pf < end;) { - c = *pf++; + c = (unsigned char)*pf++; if (c == '\\') { + if (ismbchar(*pf)) /* case of multi-byte char after '\\' */ + continue; /* skip the '\\' */ c = parse_escape(&pf); if (c < 0) { @@ -311,7 +316,36 @@ c = '\\'; } - *ptm++ = c; - } else - *ptm++ = c; + if (ismbchar(*ptm++ = c)) { + size_t n = mbclen(c) - 1; + while (n > 0 && pf < end && *pf == '\\') { + pf++; + c = parse_escape(&pf); + if (c < 0) { + if (do_lint) + warning( + "backslash at end of string"); + c = '\\'; + } + *ptm++ = c; + if (c == '\n' || c == '\0') + break; + n--; + } + if (do_lint && n > 0) + warning( + "incomplete multi-byte char to use escape sequence"); + } + } + else if (ismbchar(*ptm++ = c)) { + size_t n = mbclen(c) - 1; + if (pf + n > end) { + if (do_lint) + warning("incomplete multi-byte char"); + ptm--; + break; + } + while (n-- > 0) + *ptm++ = *pf++; + } } len = ptm - r->stptr; @@ -320,4 +354,8 @@ r->flags |= PERM; } +#if (defined (_MSC_VER) || defined (__TURBOC__)) && !defined (WIN32) + if (len > INT_MAX) + fatal("make_str_node: string too long"); +#endif r->stlen = len; r->stref = 1; @@ -436,5 +474,5 @@ char **string_ptr; { - register int c = *(*string_ptr)++; + register int c = (unsigned char)*(*string_ptr)++; register int i; register int count; @@ -471,5 +509,5 @@ count = 0; while (++count < 3) { - if ((c = *(*string_ptr)++) >= '0' && c <= '7') { + if ((c = (unsigned char) *(*string_ptr)++) >= '0' && c <= '7') { i *= 8; i += c - '0'; @@ -491,5 +529,5 @@ if (do_posix) return ('x'); - if (! isxdigit((*string_ptr)[0])) { + if (! isxdigit((unsigned char) (*string_ptr)[0])) { warning("no hex digits in \\x escape sequence"); return ('x'); @@ -498,5 +536,5 @@ for (;;) { /* do outside test to avoid multiple side effects */ - c = *(*string_ptr)++; + c = (unsigned char) *(*string_ptr)++; if (ISXDIGIT(c)) { i *= 16; diff -u2rN -x awktab.c gawk-3.0.6/pc/Makefile gawk-3.0.6+mb1.15/pc/Makefile --- gawk-3.0.6/pc/Makefile Thu May 18 20:07:02 2000 +++ gawk-3.0.6+mb1.15/pc/Makefile Sat May 5 17:09:45 2001 @@ -7,4 +7,5 @@ # - for Microsoft C 7 [16bit ececutable for DOS] # - for Microsoft C 6.00A [16bit executable for OS/2 or DOS] +# - for Borland C 3.1 [16bit executable for DOS] # Tested with GNU make and dmake-3.8 under OS/2 and DOS, and ndmake and @@ -13,4 +14,7 @@ # few edits in the configuration section below. +# Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) +# Last change: Feb. 20, 1997 by WILLs (Katsuyuki Okabe) + default: @echo "Enter $(MAK) target " @@ -24,4 +28,5 @@ @echo " msc ..... DOS exe [Microsoft C 7 & 8 (AKA 1.52)] " @echo " msc6 .... DOS exe [Microsoft C 6.00a] " + @echo " bcc31 ... DOS exe [Borland C 3.1] " @echo " msc6os2 . OS/2 exe [Microsoft C 6.00a] " @echo " msc6bnd . OS/2 and DOS exe [Microsoft C 6.00a] " @@ -95,16 +100,20 @@ LDJG = $(CC) $(LF) -o gawk.exe $(LDRSP) $(LF2) BDJG = stubify -g awk.exe | stubedit awk.exe runfile=gawk +ODJG = xsystem.o + +ILDJ = $(CC) $(LF) -o igawk $(IGAWKOBJS) +IBDJ = coff2exe igawk djgpp: $(MAK) all \ CC=gcc O=.o CF=-O2 \ - LNK=LDJG LF=-s LF2=-lm \ - BIND=BDJG + LNK=LDJG LF=-s LF2=-lm OBJ=ODJG BIND=BDJG \ + ILK=ILDJ IBND=IBDJ IOBJS= djgpp-debug: $(MAK) all \ - CC=gcc O=.o CF='-O2 -g' \ - LNK=LDJG LF2=-lm \ - BIND=BDJG + CC=gcc O=.o "CF=-g -O2" \ + LNK=LDJG LF2=-lm OBJ=ODJG BIND=BDJG \ + ILK=ILDJ IBND=IBDJ IOBJS= LDJGv1 = $(CC) $(LF) -o gawk $(LDRSP) $(LF2) @@ -115,6 +124,6 @@ $(MAK) all \ CC=gcc O=.o CF=-O \ - LNK=LDJGv1 LF=-s LF2=-lm \ - BIND=BDJGv1 + LNK=LDJGv1 LF=-s LF2=-lm OBJ=ODJG BIND=BDJGv1 \ + ILK=ILDJ IBND=IBDJ IOBJS= #======================================================================== @@ -135,26 +144,32 @@ #BEMX = emxbind -bs /emx/bin/emx.exe a.out $@ -p BEMXD = emxbind -b -o $@ a.out -p +OEMX = popen.o + +ILEX = $(CC) $(LF) -o $@ $(IGAWKOBJS) igawk.def $(LF2) +ILEXBND = $(CC) $(LF) -o a.out $(IGAWKOBJS) igawk.def $(LF2) emx: $(MAK) all \ - "CC=gcc -Zomf" O=.obj "CF=-O -DOS2" \ - LNK=LEMX "LF=-s -Zcrtdll -Zstack 512" RSP= + "CC=gcc -Zomf" O=.obj "CF=-O -DOS2" OBJ=EMPTY \ + LNK=LEMX "LF=-s -Zcrtdll -Zstack 512" RSP= \ + ILK=ILEX IOBJS= emxnt: $(MAK) all \ - "CC=gcc -Zwin32 -Zcrtdll=rsxntcs" O=.o "CF=-O -DOS2" \ - LNK=LEMX "LF=-s -Zstack 512" RSP= + "CC=gcc -Zwin32 -Zcrtdll=rsxntcs" O=.o "CF=-O -DOS2" OBJ=EMPTY \ + LNK=LEMX "LF=-s -Zstack 512" RSP= \ + ILK=ILEX IOBJS= emxbnd: $(MAK) all \ - CC=gcc O=.o "CF=-O -DOS2 -DMSDOS" OBJ=popen.o \ - LNK=LEMXBND \ - BIND=BEMX "P=|tr \" \" \"\n\"" + CC=gcc O=.o "CF=-O -DOS2 -DMSDOS" OBJ=OEMX \ + LNK=LEMXBND BIND=BEMX "P=|tr \" \" \"\n\"" \ + ILK=ILEXBND IBND=BEMX IOBJS= emxbnd-debug: $(MAK) all \ - CC=gcc O=.o "CF=-g -DOS2 -DMSDOS" OBJ=popen.o \ - LNK=LEMXBND \ - BIND=BEMXD "P=|tr \" \" \"\n\"" + CC=gcc O=.o CF="-g -DOS2 -DMSDOS" OBJ=OEMX \ + LNK=LEMXBND BIND=BEMXD "P=|tr \" \" \"\n\"" \ + ILK=ILEXBND IBND=BEMXD IOBJS= #======================================================================== @@ -165,9 +180,11 @@ # The following might work around command-line length limitations: #LMINGW32 = $(CC) $(LF) -o $@ *.o $(LF2) +ILMINGW32 = $(CC) $(LF) -o $@ $(IGAWKOBJS) $(LF2) mingw32: $(MAK) all \ CC=gcc O=.o CF=-O OBJ=popen.o \ - LNK=LMINGW32 LF=-s RSP= + LNK=LMINGW32 LF=-s RSP= \ + ILK=ILMINGW32 IOBJS= #======================================================================== @@ -193,8 +210,9 @@ #MSCCL = -FPc -LMSC = link $(LF) $(LNKRSP) $(STDARGV)/NOE,$@,,/NOD:llibce $(MSCLIB)$(LF2)/STACK:0x4990,nul +LMSC = link $(LF) $(LNKRSP) $(STDARGV)/NOE,$@,,/NOD:llibce $(MSCLIB)$(LF2)/STACK:0x8000,nul +ILMC = $(CC) -o $@ $(LF) $(IGAWKOBJS) $(ILF) # CLMSC-linking works when building under OS/2 -CLMSC = $(CC) -o $@ $(LF) $(GAWKOBJS) $(STDARGV) $(LF2) -link /NOE/NOI/STACK:0x6f00 +CLMSC = $(CC) -o $@ $(LF) $(GAWKOBJS) $(STDARGV) $(LF2) -link /NOE/NOI/STACK:0x8000 BMSC = bind $@ /n DOSMAKEPIPE DOSCWAIT @@ -214,6 +232,6 @@ $(MAK) all \ "CC=cl -nologo $(MSCCL)" O=.obj "CF=-AL -Ze -Ipc/include $(MSCOPT)" \ - OBJ=popen.obj \ - LNK=LMSC P=+ + LNK=LMSC P=+ STDARGV= \ + ILK=ILMC Lmsc = $(LMSC) # for broken makes (nmake) which cannot expand $($X) Bmsc = @@ -222,6 +240,6 @@ $(MAK) all \ "CC=cl $(MSCCL)" O=.obj "CF=-AL -Ze -Ipc/include -W2 -Zi -Od" \ - OBJ=popen.obj \ - LNK=LMSC LF2=/CO P=+ + LNK=LMSC LF2=/CO P=+ STDARGV= \ + ILK=ILMC msc6: @@ -239,6 +257,7 @@ "CC=cl -nologo $(MSCCL)" O=.obj "CF=-AL -DOS2 -UMSDOS $(MSCOPT) -Od" $(MAK) all \ - "CC=cl $(MSCCL)" O=.obj "CF=-AL -DOS2 -UMSDOS $(MSCOPT)" \ - LNK=LMSC "LF2=p,gawk.def" P=+ + "CC=cl $(MSCCL)" O=.obj "CF=-AL -DOS2 -UMSDOS $(MSCOPT)" OBJ=EMPTY \ + LNK=LMSC "LF2=p,gawk.def" P=+ \ + ILK=ILMC "ILF=p,igawk.def" IOBJS= msc6bnd: @@ -247,7 +266,6 @@ $(MAK) all \ "CC=cl $(MSCCL)" O=.obj "CF=-AL -DOS2 $(MSCOPT)" \ - OBJ=popen.obj \ - LNK=LMSC "LF2=p,gawk.def" P=+ \ - BIND=BMSC + LNK=LMSC "LF2=p,gawk.def" P=+ BIND=BMSC \ + ILK=ILMC "ILF=p,igawk.def" IBND=BMSC Lmsc6bnd = $(LMSC) # for broken makes (nmake) which cannot expand $($X) Bmsc6bnd = $(BMSC) @@ -256,10 +274,11 @@ # Windows '9x / NT LvcWin32 = link -nologo -subsystem:console -release -out:$@ $(LNKRSP) +OBJvc = popen.obj xargs32.obj xsystem.obj +ILvcWin32 = link -nologo -subsystem:console -release -out:$@ $(IGAWKOBJS) vcWin32: $(MAK) all \ - "CC=cl -nologo" O=.obj "CF=-o2 -DWIN32 -D__STDC__=0" \ - OBJ=popen.obj \ - LNK=LvcWin32 + "CC=cl -nologo" O=.obj "CF=-o2 -DWIN32 -D__STDC__=0 -Dalloca=_alloca" \ + OBJ=OBJvc LNK=LvcWin32 ILK=ILvcWin32 IOBJS=xargs32.obj @@ -278,4 +297,33 @@ #======================================================================== +#========================== BCC ========================================= +#======================================================================== + +# Optimization and library options: +# O1-i == optimize for size without expanding builtin functions. +# O2-i == optitimize for speed without expanding builtin functions. +# N == check stack overflow. +# 2 == 286 or better. +# d == merge duplicate strings. +#BCCOPT = -O1-i -N -d -2 +BCCOPT = -O2-i -N -d # -2 +# Emulator lib, uses math coprocessor if present. +BCCCL = -ml -f +#BCCCL = $(BCCML) -f287 + +CLBCC = $(CC) -e$@ $(LF) $(LDRSP) $(LF2) +ICLBC = $(CC) -f- -e$@ $(LF) $(IGAWKOBJS) $(LF2) + +bcc31: + $(MAK) all \ + "CC=bcc $(BCCCL)" O=.obj "CF=-A- -DMSDOS $(BCCOPT) -w-" \ + LNK=CLBCC ILK=ICLBC + +bcc31-debug: + $(MAK) all \ + "CC=bcc $(BCCCL)" O=.obj "CF=-A- -DMSDOS -Od -N -w -v" \ + LNK=CLBCC ILK=ICLBC + +#======================================================================== # Define BIND for BINDless compiles, otherwise $($(BIND)) may break. @@ -285,12 +333,24 @@ # bitwise operations (-DBITOPS) and non-decimal input data (-DNONDECDATA) are # undocumented in 3.0.3. They may be enabled in config.h, or added to CFLAGS. -CFLAGS = $(CF) -DGAWK -I. -DHAVE_CONFIG_H +ODOS = popen$O xargs$O xsystem$O +OBJ = ODOS +OBJS = $($(OBJ)) + +CFLAGS = $(CF) -DGAWK -I. -DHAVE_CONFIG_H $(MBCTYPE_DEF) + +# Things you might set to MBCTYPE_DEF to spec. default multi-byte char type. +# -DEUC will make default multi-byte char type EUC and +# -DSJIS SJIS. +# If you do not set EUC/SJIS, gawk assumes no multi-byte +# char as default. +MBCTYPE_DEF = -DSJIS # object files AWKOBJS1 = array$O builtin$O eval$O field$O gawkmisc$O io$O main$O AWKOBJS2 = missing$O msg$O node$O re$O version$O -AWKOBJS = $(AWKOBJS1) $(AWKOBJS2) +AWKOBJS3 = mbc$O mbfuncs$O +AWKOBJS = $(AWKOBJS1) $(AWKOBJS2) $(AWKOBJS3) -ALLOBJS = $(AWKOBJS) awktab$O getid$O $(OBJ) +ALLOBJS = $(AWKOBJS) awktab$O getid$O $(OBJS) # LIBOBJS @@ -308,7 +368,7 @@ # rules to build gawk -all : gawk.exe +all : gawk.exe igawk.exe -gawk.exe:: $(GAWKOBJS) $(RSP) +gawk.exe: $(GAWKOBJS) $(RSP) $(DO_LNK) $(DO_BIND) @@ -317,9 +377,10 @@ echo $(AWKOBJS1)$P > $@ echo $(AWKOBJS2)$P >> $@ - echo awktab$O getid$O $(OBJ) $(LIBOBJS)$P >> $@ + echo $(AWKOBJS3)$P >> $@ + echo awktab$O getid$O $(OBJS) $(LIBOBJS)$P >> $@ -$(ALLOBJS): awk.h dfa.h regex.h config.h +$(ALLOBJS): awk.h dfa.h regex.h config.h mbc.h -gawkmisc$O: pc/gawkmisc.pc +gawkmisc$O: pc/gawkmisc.pc xargs.h defpath.h getopt$O: getopt.h @@ -327,8 +388,12 @@ getopt1$O: getopt.h +dfa$O: mbc.h + +regex$O: mbc.h + main$O: patchlevel.h # A bug in ndmake requires the following rule -awktab$O: awk.h awktab.c +awktab$O: awk.h awktab.c mbc.h $(CC) -c $(CFLAGS) awktab.c @@ -338,4 +403,20 @@ alloca$O: alloca.c +defpath.h: Makefile + echo '#define DEFPATH ".;$(prefix)/lib/awk"' | sed "s/'//g" > $@ + +IBND = EMPTY +IOBJS = xargs$O +IGAWKOBJS = igawk$O $(IOBJS) + +igawk.exe: $(IGAWKOBJS) + $($(ILK)) + $($(IBND)) + +igawk$O: igawk.c igawk.d + +igawk.d: pc/awklib/igawk.awk mkigawk.awk gawk.exe + ./gawk -f mkigawk.awk $< > $@ + install: install$(install) @@ -358,5 +439,5 @@ clean: - rm -rf gawk gawk.exe gawk.map *.o *.obj core a.out $(RSPFILE) + rm -rf gawk gawk.exe igawk igawk.exe *.o *.obj core a.out $(RSPFILE) # cd doc && $(MAKE) clean # cd test && $(MAKE) clean diff -u2rN -x awktab.c gawk-3.0.6/pc/Makefile.tst gawk-3.0.6+mb1.15/pc/Makefile.tst --- gawk-3.0.6/pc/Makefile.tst Fri Jul 14 10:01:26 2000 +++ gawk-3.0.6+mb1.15/pc/Makefile.tst Sat May 5 18:05:51 2001 @@ -150,7 +150,7 @@ # lack it. # @cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@` - $(CP) $(AWK) /tmp/gawk.exe && $(srcdir)/poundbang $(srcdir)/poundbang >_$@ -# @rm -f /tmp/gawk - rm -f /tmp/gawk.exe + $(CP) $(AWK) $(TMP)/gawk.exe && $(srcdir)/poundbang $(srcdir)/poundbang >_$@ +# @rm -f $(TMP)/gawk + rm -f $(TMP)gawk.exe # $(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@` $(CMP) $(srcdir)/poundbang.ok _$@ && rm -f _$@ @@ -183,5 +183,5 @@ *) cp $(srcdir)/argarray.in . ;; \ esac - @TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@ + @echo TEST=test just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@ @echo 'If argarray fails, set try setting LFN=n in your environment' @echo "before running make. If that still doesn't work, read the" @@ -533,5 +533,5 @@ # TZ=UTC0; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@ ; \ # fi - @if $(CMP) -s $(srcdir)/strftlng.ok _$@ ; then : ; else \ + @if $(CMP) -s $(srcdir)/strftlng.ok _$@ >NUL ; then : ; else \ env TZ=UTC0; $(AWK) -f $(srcdir)/strftlng.awk >_$@ ; \ fi diff -u2rN -x awktab.c gawk-3.0.6/pc/README.MSC gawk-3.0.6+mb1.15/pc/README.MSC --- gawk-3.0.6/pc/README.MSC Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/README.MSC Sat May 5 17:21:52 2001 @@ -0,0 +1,99 @@ +PC-9801 用 MS-C version 6.00A の引数のセットアップルーチンにはバグがあり +ます. + +#include + +int +main(int argc, char **argv) +{ + int i; + + for (i = 0; i <= argc; i++) + printf("argv[%d] == %s\n", i, argv[i]); + return 0; +} + +をコンパイル, リンクした FOO.EXE に + + A>foo "\\" abc + +などの引数を渡して実行すると, バグが確認できます. また, ワイルドカード +展開ルーチンにもバグがあり, 上記のプログラムを SETARGV.OBJ とともにリン +クして + + A>foo \DOS\*.com + +などの引数で実行すると, おかしな展開の仕方をしてしまいます. + +このバグは SOURCE/STARTUP 下の DOS/STDARGV.ASM および WILD.C に以下のパッ +チを当てると修正できるようです. パッチを当てて STARTUP.BAT でコンパイル +してください. その後, 例えばラージモデル用のライブラリを修正する場合, +L/DOS/STDARGV.OBJ, L/DOS/_SETARGV.OBJ, L/WILD.OBJ をそれぞれ +KSTDARGV.OBJ, _KSTARGV.OBJ, KWILD.OBJ とリネームし, + + lib \msc6\lib\llibce.lib-+dos\kstdargv.obj-+dos\_kstargv.obj-+kwild.obj; + +などとしてモジュールを更新してください. 念のためこの作業を行う前に, + + lib \msc6\lib\llibce.lib*kstdargv.obj*_kstargv.obj*kwild.obj; + +などで, kstdargv.obj, _kstargv.obj, kwild.obj のバックアップをとって置く +といいでしょう. + +なお, このパッチは当然のことながら無保証です. + +Mar. 5, 1994 t^2 + +*** stdargv.org Mon Oct 8 19:50:46 1990 +--- stdargv.asm Thu Jul 22 17:50:44 1993 +*************** +*** 409,415 **** + shr cx,1 + adc dx,cx ; add 1 for every pair of backslashes + test al,1 ; plus 1 for the " if odd number of \ +! jz arg310 ; [J1] + jmp arg210 ; [J1] + ; + ; Command line is fully parsed - compute number of bytes needed +--- 409,415 ---- + shr cx,1 + adc dx,cx ; add 1 for every pair of backslashes + test al,1 ; plus 1 for the " if odd number of \ +! jnz arg310 ; ! Jul.21.93 t^2 + jmp arg210 ; [J1] + ; + ; Command line is fully parsed - compute number of bytes needed + +*** wild.org Mon Oct 8 19:49:48 1990 +--- wild.c Sat Mar 5 00:42:12 1994 +*************** +*** 186,197 **** + char *ptr2 = arg; // [J1] + + if(ptr != arg) { // [J1] +! while(ptr2 + 1 != ptr && *ptr2 != SLASHCHAR && *ptr2 != FWDSLASHCHAR +! && *ptr2 != ':') { // [J1] + if(iskanji(*ptr2)) ptr2++; // [J1] + ptr2++; // [J1] + } // [J1] +! ptr = ptr2; // [J1] + } // [J1] + + if (*ptr == ':' && ptr != arg+1) /* weird name, just add it as is */ +--- 186,201 ---- + char *ptr2 = arg; // [J1] + + if(ptr != arg) { // [J1] +! char *ptr3 = arg; +! +! while (ptr2 < ptr) { +! if (*ptr2 == SLASHCHAR || *ptr2 == FWDSLASHCHAR +! || *ptr2 == ':') +! ptr3 = ptr2; + if(iskanji(*ptr2)) ptr2++; // [J1] + ptr2++; // [J1] + } // [J1] +! ptr = ptr3; + } // [J1] + + if (*ptr == ':' && ptr != arg+1) /* weird name, just add it as is */ diff -u2rN -x awktab.c gawk-3.0.6/pc/atof.c gawk-3.0.6+mb1.15/pc/atof.c --- gawk-3.0.6/pc/atof.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/atof.c Sat May 5 17:22:15 2001 @@ -0,0 +1,70 @@ +#include +#include +#define E_MAX 308 +#define E_MIN (-308) + +double atof(s) +char *s; +{ + double d = 0.0; + int sign = 0; + int esign = 0; + int exp = 0; + int eexp = 0; + int prec = 0; + + errno = 0; + while (isspace(*s)) { + s++; + } + if (*s == '-') { + s++; + sign = 1; + } + while (isdigit(*s)) { + if (prec || *s != '0') + prec++; + d = 10.0 * d + (double) (*s++ - '0'); + } + if (*s == '.') { + while (isdigit(*++s)) { + exp--; + d = 10.0 * d + (double) (*s - '0'); + } + } + if (prec == 0) { + prec = exp; + } + if (*s == 'e' || *s == 'E') { + if (*++s == '-') { + esign = 1; + s++; + } + if (*s == '+') { + s++; + } + while (isdigit(*s)) { + eexp = eexp * 10 + *s++ - '0'; + } + if (esign) { + eexp = -eexp; + } + } + if (eexp <= E_MIN - prec || E_MAX + prec <= eexp) { + errno = ERANGE; + return 0.0; + } + exp += eexp; + while (exp < 0) { + d = d / 10.0; + exp++; + } + while (exp > 0) { + d = d * 10.0; + exp--; + } + if (sign) { + return -d; + } + return d; +} diff -u2rN -x awktab.c gawk-3.0.6/pc/awklib/igawk.awk gawk-3.0.6+mb1.15/pc/awklib/igawk.awk --- gawk-3.0.6/pc/awklib/igawk.awk Sat Dec 16 03:09:45 1995 +++ gawk-3.0.6+mb1.15/pc/awklib/igawk.awk Sat May 5 17:38:29 2001 @@ -19,4 +19,7 @@ BEGIN { path = ENVIRON["AWKPATH"] + gsub(/\\/, "/", path) + gsub(/:/, ";", path) + path = gensub(/((^|;)[A-Za-z]);/, "\\1:", "g", path) ndirs = split(path, pathlist, ";") for (i = 1; i <= ndirs; i++) { diff -u2rN -x awktab.c gawk-3.0.6/pc/config.h gawk-3.0.6+mb1.15/pc/config.h --- gawk-3.0.6/pc/config.h Sun Jun 18 20:55:33 2000 +++ gawk-3.0.6+mb1.15/pc/config.h Sat May 5 17:42:11 2001 @@ -141,4 +141,7 @@ #define HAVE_MEMCPY 1 +/* Define if you have the memmove function. */ +#define HAVE_MEMMOVE 1 + /* Define if you have the memset function. */ #define HAVE_MEMSET 1 @@ -188,8 +191,10 @@ /* Library search path */ +#if 0 #define DEFPATH ".;c:/lib/awk;c:/gnu/lib/awk" +#endif -#if defined (_MSC_VER) -#if !defined(__STDC__) +#if defined (_MSC_VER) || defined(__TURBOC__) +#if !defined(__STDC__) && !defined(__TURBOC__) # define __STDC__ 1 #endif @@ -218,6 +223,12 @@ #endif -#if defined(DJGPP) +#if defined(__TURBOC__) +#define off_t long +#endif + +#if defined(DJGPP) || defined (_MSC_VER) || defined (__TURBOC__) # define HAVE_LIMITS_H 1 +#endif +#if defined(DJGPP) # undef HAVE_POPEN_H #endif @@ -230,5 +241,7 @@ #if defined(WIN32) && defined(_MSC_VER) #define alloca _alloca +#if 0 #define system(s) os_system(s) +#endif #endif diff -u2rN -x awktab.c gawk-3.0.6/pc/gawkmisc.pc gawk-3.0.6+mb1.15/pc/gawkmisc.pc --- gawk-3.0.6/pc/gawkmisc.pc Mon Oct 21 10:13:45 1996 +++ gawk-3.0.6+mb1.15/pc/gawkmisc.pc Sat May 5 17:44:14 2001 @@ -24,4 +24,26 @@ */ +#include +#if defined(MSDOS) && !defined(__GO32__) && !defined(__EMX__) +#include +#include +#include "xargs.h" +#elif defined(_WIN32) && !defined(__CYGWIN32__) +#include "xargs32.h" +#endif +#include "defpath.h" + +#if defined(MSDOS) && !defined(__GO32__) && !defined(__EMX__) +#ifndef FP_OFF +#ifdef _FP_OFF +#define FP_OFF(p) _FP_OFF(p) +#define FP_SEG(p) _FP_SEG(p) +#else +#define FP_OFF(p) ((unsigned short)((unsigned long)((void far *)(p)))) +#define FP_SEG(p) ((unsigned short)((unsigned long)((void far *)(p)) >> 16)) +#endif +#endif +#endif + char quote = '"'; char envsep = ';'; @@ -63,4 +85,12 @@ _wildcard(argcp, argvp); setvbuf(stdout, NULL, _IOLBF, BUFSIZ); +#else +#if (defined(MSDOS) && !defined(__GO32__)) || (defined(_WIN32) && !defined(__CYGWIN32__)) +#ifndef XARGS_USELFN +#define XARGS_USELFN 0 +#endif + xargs_modify_rule(XARGS_DOTBREAK | XARGS_USELFN, XARGS_DOTBREAK | XARGS_USELFN); + xargs(argcp, argvp); +#endif #endif return; @@ -74,8 +104,133 @@ int flag; { - /* no-op */ +#ifdef MSDOS + if (STREQ(name, "/dev/null")) + return open("nul", flag, 0666); + else if (STREQ(name, "/dev/tty")) + return open("con", flag, 0666); +#endif + return -1; } +/* os_open --- support long file name */ + +#if defined(MSDOS) && !defined(__GO32__) && !defined(__EMX__) + +static int get_drive P((void)); +static int check_lfn P((const char *)); + +#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ +#define VA_START(args,last) va_start(args,last) +int +os_open(const char *name, int flag, ...) +#else +#define VA_START(args,last) va_start(args) +int +os_open(name, flag, va_alist) +const char *name; +int flag; +va_dcl +#endif +{ + union REGS ireg, oreg; + struct SREGS sreg; + char *sname; + va_list ap; + int mode; + int fd; + + VA_START(ap, flag); + mode = va_arg(ap, int); + va_end(ap); + + if (check_lfn(name) == 0) { + fd = open(name, flag, mode); + } else { + sname = (char *) alloca(strlen(name) + 1); + + ireg.x.ax = 0x7160; + ireg.h.cl = 1; + ireg.h.ch = 0x80; + ireg.x.si = FP_OFF(name); + sreg.ds = FP_SEG(name); + ireg.x.di = FP_OFF(sname); + sreg.es = FP_SEG(sname); + + intdosx(&ireg, &oreg, &sreg); + + fd = open((oreg.x.cflag ? name : sname), flag, mode); + } + + return fd; +} + +static int +check_lfn(path) +const char *path; +{ + static int lfn_supported = -1; + static char drv_cache['Z' - 'A' + 1]; + union REGS ireg, oreg; + struct SREGS sreg; + char buf[32]; + char *root; + char *env; + int lfn_ok; + int drv; + + if (lfn_supported == -1) { + lfn_supported = ((env = getenv("XARGS")) != 0 && strstr(env, "LFN") != 0) + || ((env = getenv("LFN")) != 0 && tolower(env[0]) != 'n'); + memset(drv_cache, (char) -1, sizeof (drv_cache)); + } + + if (lfn_supported == 0) + return 0; + + root = (char *) alloca(strlen(path) + 4); + if (path[0] == '\\' && path[1] == '\\') { + drv = -1; + strcpy(root, path); + } else { + drv = isalpha(path[0]) && path[1] == ':' + ? toupper(path[0]) - 'A' : get_drive() ; + + if (drv_cache[drv] >= 0) + return drv_cache[drv]; + + root[0] = drv + 'A'; + root[1] = ':'; + root[2] = '\\'; + root[3] = '\0'; + } + + ireg.x.ax = 0x71a0; + ireg.x.di = FP_OFF(buf); + sreg.es = FP_SEG(buf); + ireg.x.dx = FP_OFF(root); + sreg.ds = FP_SEG(root); + + intdosx(&ireg, &oreg, &sreg); + + lfn_ok = oreg.x.cflag ? 0 : 1 ; + + if (drv >= 0) + drv_cache[drv] = lfn_ok; + + return lfn_ok; +} + +static int +get_drive() +{ + union REGS ireg, oreg; + + ireg.h.ah = 0x19; + intdos(&ireg, &oreg); + return (unsigned char) oreg.h.al; +} +#endif + /* optimal_bufsize --- determine optimal buffer size */ @@ -133,2 +288,12 @@ } +#if defined(MSDOS) && (defined(_MSC_VER) || defined(__TURBOC__)) +void +test_signal() +{ + union REGS reg; + + reg.h.ah = 0x0b; /* check stdin stat, for ^C check */ + intdos(®, ®); +} +#endif diff -u2rN -x awktab.c gawk-3.0.6/pc/getid.c gawk-3.0.6+mb1.15/pc/getid.c --- gawk-3.0.6/pc/getid.c Mon Nov 16 04:04:36 1998 +++ gawk-3.0.6+mb1.15/pc/getid.c Sat May 5 17:45:12 2001 @@ -1,3 +1,6 @@ -#if defined(_MSC_VER) || defined(__MINGW32__) +/* modified for Borland C 3.1 and djgpp v2.01 Jan. 31, 1997 by K.Okabe */ +/* Last modified: Aug 5, 1998 by WILLs (Katsuyuki Okabe) */ + +#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__TURBOC__) #ifdef OS2 @@ -8,14 +11,21 @@ # endif #else +# include # include +# include +# undef getpid #endif -#ifdef OS2 +#ifndef WIN32 int getpid(void) { +#ifdef OS2 PIDINFO PidInfo; DosGetPID(&PidInfo); return(PidInfo.pid); +#else + return _psp; +#endif } #endif @@ -48,5 +58,24 @@ } -#if defined(_MSC_VER) || defined(__GO32__) || defined(__MINGW32__) +#if defined(DJGPP) && DJGPP >= 2 + +#include +#include + +int +getpid(void) +{ + return (int) (_go32_info_block.linear_address_of_original_psp >> 16); +} + +int +getppid(void) +{ + return (int) _farpeekw(_dos_ds, _go32_info_block.linear_address_of_original_psp + 0x16); +} + +#else /* ! (DJGPP >= 2) */ + +#if defined(_MSC_VER) || defined(__GO32__) || defined(__MINGW32__) || defined(__TURBOC__) int getppid(void) { @@ -57,6 +86,11 @@ return(PidInfo.pidParent); #else - return(0); +#ifndef WIN32 + return *(int *)(((unsigned long)_psp << 16) + 0x16); +#else + return 0; +#endif #endif } #endif +#endif /* ! (DJGPP >= 2) */ diff -u2rN -x awktab.c gawk-3.0.6/pc/igawk.c gawk-3.0.6+mb1.15/pc/igawk.c --- gawk-3.0.6/pc/igawk.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/igawk.c Sat May 5 17:26:12 2001 @@ -0,0 +1,444 @@ +/* igawk C version + * Katsuyuki Okabe + * April 26, 1997 + * Last change: August 5, 1998 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef __GO32__ +#ifdef WIN32 +#include "xargs32.h" +#else +#include "xargs.h" +#endif +#endif + +static const char *version_string = "1.0"; + + +#if (defined (_MSC_VER) || defined (__TURBOC__)) && !defined(WIN32) +#define NEAR _near +#else +#define NEAR +#endif + +#define STREQ(s1,s2) (strcmp ((s1), (s2)) == 0) +#define STRNEQ(s1,s2,n) (strncmp ((s1), (s2), (n)) == 0) +#define tmpfile_open(f) file_open ((f), tmpnam ((char *) 0)) + +struct args { + int size; + int argc; + char **argv; +}; + +struct file { + FILE *fp; + char *name; +}; + +static const char igawk_awk[] = +#include "igawk.d" +; + +static const char *program_name; +static const char *gawk_path; +static struct file igs, ige; + +static void NEAR error (int, const char *, ...) +#if defined (__GNUC__) && __GNUC__ >= 2 +__attribute__ ((noreturn, format (printf, 2, 3))) +#endif +; +static void * NEAR xmalloc (size_t); +static void * NEAR xrealloc (void *, size_t); +static char * NEAR xstrdup (const char *); +static int NEAR strempty (const char *); +static struct file * NEAR file_open (struct file *, const char *); +static int NEAR file_close (struct file *); +static int NEAR file_remove (struct file *); +static void NEAR args_init (struct args *); +static void NEAR args_add (struct args *, const char *); +static void NEAR args_cat (struct args *, const char *); +static void NEAR parse_arguments (struct args *, struct file *, int, char **); +static void NEAR filter_program (const char *, int); +static int NEAR run_program (struct args *); +static void remove_files (void); +static void sigint (int); + +static void NEAR +error (int errnum, const char *message, ...) +{ + va_list ap; + + fprintf (stderr, "%s: ", program_name); + + va_start (ap, message); + vfprintf (stderr, message, ap); + va_end (ap); + + if (errnum) + fprintf (stderr, ": %s", strerror (errnum)); + putc ('\n', stderr); + fflush (stderr); + + exit (EXIT_FAILURE); +} + +static void * NEAR +xmalloc (size_t size) +{ + void *ptr; + + ptr = (void *) malloc (size ? size : 1); + if (ptr == (void *) 0) + error (0, "Memory exhausted"); + return ptr; +} + +static void * NEAR +xrealloc (void *ptr, size_t size) +{ + if (ptr == (void *) 0) + return xmalloc (size); + + ptr = (void *) realloc (ptr, size ? size : 1); + if (ptr == (void *) 0) + error (0, "Memory exhausted"); + return ptr; +} + +static char * NEAR +xstrdup (const char *str) +{ + return strcpy (xmalloc (strlen (str) + 1), str); +} + +static int NEAR +strempty (const char *str) +{ + while (isspace ((unsigned char) *str)) + str++; + return *str == '\0'; +} + +static struct file * NEAR +file_open (struct file *f, const char *path) +{ + f->name = xstrdup (path); + if ((f->fp = fopen (path, "w")) == (FILE *) 0) + error (errno, "Opening %s", path); + + return f; +} + +static int NEAR +file_close (struct file *f) +{ + if (f == 0) + return -1; + + if (f->fp) + fclose (f->fp); + f->fp = (FILE *) 0; + + return 0; +} + +static int NEAR +file_remove (struct file *f) +{ + if (file_close (f) < 0) + return -1; + + if (f->name) + { + remove (f->name); + free (f->name); + } + f->name = (char *) 0; + + return 0; +} + +static void NEAR +args_init (struct args *args) +{ + args->size = 0; + args->argc = 0; + args->argv = (char **) 0; +} + +static void NEAR +args_add (struct args *args, const char *arg) +{ + if (args->argc >= args->size) + { + args->size += 16; + args->argv = (char **) xrealloc (args->argv, sizeof (char *) * args->size); + } + + if (arg) + args->argv[args->argc++] = xstrdup (arg); + else + args->argv[args->argc] = (char *) 0; +} + +static void NEAR +args_cat (struct args *args, const char *arg) +{ + int idx; + char *p; + + if (args->argc == 0) + { + args_add (args, arg); + return; + } + + if (arg == (char *) 0) + return; + + idx = args->argc - 1; + p = args->argv[idx]; + args->argv[idx] = strcat (xrealloc (p, strlen (p) + strlen (arg) + 1), arg); +} + +static void NEAR +parse_arguments (struct args *args_p, struct file *f, int argc, char **argv) +{ + int program_in_f = 0; + int i; + + for (i = 1; i < argc && argv[i][0] == '-'; i++) + { + char *arg = argv[i] + 1; + + if (STREQ (arg, "-")) + { + i++; + break; + } + else if (STREQ (arg, "W")) + { + i++; + if (i < argc) + { + arg = (char *) alloca (strlen (argv[i]) + 3); + strcat (strcpy (arg, "-W"), argv[i]); + arg++; + } + else + { + args_add (args_p, "-W"); + break; + } + } + + if (arg[0] == 'v' || arg[0] == 'F') + { + args_add (args_p, arg - 1); + if (arg[1] == '\0' && ++i < argc) + args_cat (args_p, argv[i]); + } + else if (arg[0] == 'f') + { + if (arg[1]) + fprintf (f->fp, "@include %s\n", arg + 1); + else + fprintf (f->fp, "@include %s\n", ++i < argc ? argv[i] : ""); + program_in_f = 1; + } + else if (arg[0] == '-' || arg[0] == 'W') + { + arg++; + if (STRNEQ (arg, "file=", 5)) + { + fprintf (f->fp, "@include %s\n", arg + 5); + program_in_f = 1; + } + else if (STREQ (arg, "file")) + { + fprintf (f->fp, "@include %s\n", ++i < argc ? argv[i] : ""); + program_in_f = 1; + } + else if (STRNEQ (arg, "source=", 7)) + { + fputs (arg + 7, f->fp); + putc ('\n', f->fp); + program_in_f = 1; + } + else if (STREQ (arg, "source")) + { + if (++i < argc) + { + fputs (argv[i], f->fp); + putc ('\n', f->fp); + program_in_f = 1; + } + } + else if (STREQ (arg, "version")) + { + fprintf (stderr, "igawk: C version %s\n", version_string); + if (spawnlp (P_WAIT, gawk_path, "gawk", "--version", (char *) 0) < 0) + error (errno, "Cannot exec gawk"); + exit (EXIT_SUCCESS); + } + else if (STREQ (arg, "help")) + { + if (spawnlp (P_WAIT, gawk_path, "igawk", "--help", (char *) 0) < 0) + error (errno, "Cannot exec gawk"); + exit (EXIT_SUCCESS); + } + else + args_add (args_p, arg - 2); + } + else + break; + } + + if (! program_in_f) + if (i >= argc || strempty (argv[i])) + error (0, "no program!"); + else + { + fputs (argv[i++], igs.fp); + putc ('\n', igs.fp); + } + + file_close (f); + + if (i < argc) + { + args_add (args_p, "--"); + while (i < argc) + args_add (args_p, argv[i++]); + } +} + +static void NEAR +filter_program (const char *in_path, int out_fd) +{ + struct file awk; + int fd1; + int status; + int err; + + tmpfile_open (&awk); + fputs (igawk_awk, awk.fp); + file_close (&awk); + + if ((fd1 = dup (1)) < 0) + error (errno, "Cannot duplicate STDOUT"); + dup2 (out_fd, 1); + status = spawnlp (P_WAIT, gawk_path, "gawk", "-f", awk.name, in_path, (char *) 0); + err = errno; + dup2 (fd1, 1); + close (fd1); + file_remove (&awk); + if (status < 0) + error (err, "Cannot exec gawk"); +} + +static int NEAR +run_program (struct args *args_p) +{ +#if defined (__DJGPP__) && (__DJGPP__ > 2 || (__DJGPP__ == 2 && __DJGPP_MINOR__ >= 1)) + return spawnvp (P_WAIT, gawk_path, args_p->argv); +#else + struct file res; + char *p; + int status; + int i; + int c; + + tmpfile_open (&res); + for (i = 1; i < args_p->argc; i++) + if (strpbrk (args_p->argv[i], " \t\n")) + { + putc ('"', res.fp); + p = args_p->argv[i]; + while ((c = (unsigned char) *p++) != '\0') + { + if (c == '"') + putc ('\\', res.fp); + putc (c, res.fp); + } + fputs ("\"\n", res.fp); + } + else + { + fputs (args_p->argv[i], res.fp); + putc ('\n', res.fp); + } + file_close (&res); + + p = (char *) alloca (strlen (res.name) + 2); + strcat (strcpy (p, "@"), res.name); + status = spawnlp (P_WAIT, gawk_path, "gawk", p, (char *) 0); + file_remove (&res); + + return status; +#endif +} + +static void +remove_files (void) +{ + file_remove (&igs); + file_remove (&ige); +} + +static void +sigint (int sig) +{ + exit (EXIT_FAILURE); +} + +int +main (int argc, char **argv) +{ + struct args opts; + +#ifdef __EMX__ + _response (&argc, &argv); +#else +#ifndef __GO32__ +#ifndef XARGS_USELFN +#define XARGS_USELFN 0 +#endif + xargs_modify_rule (XARGS_DOTBREAK | XARGS_USELFN, XARGS_DOTBREAK | XARGS_USELFN); + xargs (&argc, &argv); +#endif +#endif + + program_name = argv[0]; + + gawk_path = getenv ("GAWK_PATH"); + if (gawk_path == (char *) 0) + gawk_path = "gawk.exe"; + + atexit (remove_files); + signal (SIGINT, sigint); + + tmpfile_open (&igs); + tmpfile_open (&ige); + + args_init (&opts); + args_add (&opts, "gawk"); + args_add (&opts, "-f"); + args_add (&opts, ige.name); + parse_arguments (&opts, &igs, argc, argv); + args_add (&opts, (char *) 0); + filter_program (igs.name, fileno (ige.fp)); + return run_program (&opts); +} diff -u2rN -x awktab.c gawk-3.0.6/pc/igawk.def gawk-3.0.6+mb1.15/pc/igawk.def --- gawk-3.0.6/pc/igawk.def Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/igawk.def Sat May 5 17:26:12 2001 @@ -0,0 +1,2 @@ +NAME igawk WINDOWCOMPAT NEWFILES +DESCRIPTION 'GNU iawk for OS/2' diff -u2rN -x awktab.c gawk-3.0.6/pc/mkigawk.awk gawk-3.0.6+mb1.15/pc/mkigawk.awk --- gawk-3.0.6/pc/mkigawk.awk Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/mkigawk.awk Sat May 5 17:26:12 2001 @@ -0,0 +1,4 @@ +{ + gsub(/[\\"]/, "\\\\&") + print "\"" $0 "\\n\"" +} diff -u2rN -x awktab.c gawk-3.0.6/pc/popen.c gawk-3.0.6+mb1.15/pc/popen.c --- gawk-3.0.6/pc/popen.c Tue Jun 1 07:02:54 1999 +++ gawk-3.0.6+mb1.15/pc/popen.c Sat May 5 17:46:29 2001 @@ -12,13 +12,13 @@ #endif -#if defined(WIN32) && defined(_MSC_VER) -#define popen _popen -#define pclose _pclose -#endif - #ifndef _NFILE #define _NFILE 40 #endif +#if defined(GAWK) && ((defined(MSDOS) && !defined(__EMX__)) || defined(WIN32)) /* patch 1997.2.20 by K.Okabe */ +extern int xsystem(char *); +#define system(cmd) xsystem((cmd)) +#endif + static char template[] = "piXXXXXX"; typedef enum { unopened = 0, reading, writing } pipemode; @@ -39,5 +39,5 @@ */ -#if defined(_MSC_VER) || defined(__MINGW32__) +#if (defined(OS2) && defined(_MSC_VER)) || defined(__MINGW32__) static int @@ -167,5 +167,5 @@ return NULL; -#if defined(__MINGW32__) || (defined(_MSC_VER) && defined(WIN32)) +#if defined(__MINGW32__) current = popen(command = scriptify(command), mode); cur = fileno(current); @@ -216,5 +216,5 @@ #endif -#if defined(__MINGW32__) || (defined(_MSC_VER) && defined(WIN32)) +#if defined(__MINGW32__) rval = pclose(current); pipes[cur].pmode = unopened; diff -u2rN -x awktab.c gawk-3.0.6/pc/readman.awk gawk-3.0.6+mb1.15/pc/readman.awk --- gawk-3.0.6/pc/readman.awk Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/readman.awk Sat May 5 17:26:12 2001 @@ -0,0 +1 @@ +{ gsub(/./, ""); print } diff -u2rN -x awktab.c gawk-3.0.6/pc/redir.c gawk-3.0.6+mb1.15/pc/redir.c --- gawk-3.0.6/pc/redir.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/redir.c Sat May 5 17:26:12 2001 @@ -0,0 +1,81 @@ +/* $Id: redir.c,v 1.1 1994/08/29 04:30:54 tt Rel $ + * + * redir -- Command Redirector version 1.0 + * + * Copyright (c) 1994 t^2 + * All rights reserved. + */ +#include +#include +#include + +int +main(int argc, char **argv) +{ + int i; + const char *p, *q; + int fd, nfd; + enum {OUT, IN, APPEND, ERR} mode; + + for (i = 1; i < argc; i++) { + p = argv[i]; + if (*p++ != '-') + break; + if (p[0] == '-' || p[1] == '\0') + break; + /* リダイレクト指示 */ + mode = OUT; + fd = (int)strtoul(p, (char **)&q, 0); /* ディスクリプタ */ + if (p == q) + mode = ERR; + p = q; + if (*p == '\0') + p = argv[++i]; + else { + if (*p == '+') { + p++; + if (mode != ERR) mode = APPEND; + } + else if (*p == '-') { + p++; + if (mode != ERR) mode = IN; + } + else if (*p == ':') { + p++; + nfd = (int)strtoul(p, (char **)&q, 0); + if (p != q && *q == '\0') + if (mode != ERR) dup2(nfd, fd); + continue; + } + if (*p == '=') + p++; + } + if (*p == '\0') + mode = ERR; + switch (mode) { + case OUT: + nfd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0666); + break; + case IN: + nfd = open(p, O_RDONLY); + break; + case APPEND: + nfd = open(p, O_WRONLY | O_CREAT | O_APPEND, 0666); + if (nfd >= 0) lseek(nfd, 0L, 2); + break; + default: + nfd = -1; + break; + } + if (nfd >= 0) { + dup2(nfd, fd); + close(nfd); + } + } + if (i < argc) { + argv += i; + execvp(argv[0], argv); + return 255; + } + return 0; +} diff -u2rN -x awktab.c gawk-3.0.6/pc/redir.txt gawk-3.0.6+mb1.15/pc/redir.txt --- gawk-3.0.6/pc/redir.txt Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/redir.txt Sat May 5 17:26:12 2001 @@ -0,0 +1,89 @@ +# $Id: redir.txt,v 1.1 1994/08/29 04:31:20 tt Rel $ +●●●●●●●●● Command Redirector version 1.0 ●●●●●●●●● +●●●●●●●●● Aug. 29, 1994 by t^2 ●●●●●●●●● + +NAME + redir - 入出力をリダイレクトしてコマンドを実行する + +SYNOPSIS + redir [ -fd-[=]file [ -fd[=| ]file ] [ -fd+[=]file ] [ -fd1:fd2 ] + [ -- ] command ... + +DESCRIPTION + redir は COMMAND.COM では不可能な柔軟性のあるリダイレクトを行い, + 与えられた command ... を実行します. + + redir -fd-file command ... + + は, ファイルディスクリプタ fd の入力として file を使用します. + (sh(1) の fdfile に相当) + + redir -fd+file command ... + + は, 出力として file を使用しますが, file が存在する場合, file の + 最後に追加出力します. 存在しない場合は, 新たに作成されます. + (sh(1) の fd>>file に相当) + + redir -fd1:fd2 command ... + + は ファイルディスクリプタ fd1 の入出力として, その時点で fd2 が対 + 応しているファイルを使用します. (sh(1) の fd1>&fd2, fd1<&fd2 に相 + 当) + + リダイレクトの指定順序は重要です. redir は左から順番に引数を解釈 + し, リダイレクトを実行して行きます. (See EXAMPLES below.) + +EXAMPLES + + (1) redir -2=foo command ... + + command ... の標準エラー出力を foo というファイルに書き出します. + + (2) redir -2:1 command ... + + 標準エラー出力を, 標準出力へくっつけます. + + (3) redir -1=foo -2:1 command ... + + 標準出力と標準エラー出力の両方を foo にリダイレクトします. この例 + は, 最初の -f=foo の動作を COMMAND.COM に指定した + + redir -2:1 command ... > foo + + と全く等価です. + + (4) redir -2:1 -1=foo command ... + + (3) の例と順序が逆になっただけですが, まず標準エラー出力がその時 + 点で標準出力が対応しているファイル (通常はコンソール画面) へリダ + イレクトされます. その後, 標準出力のみが foo へリダイレクトされま + す. + +SEE ALSO + sh(1), open(2), dup2(2) + +BUGS + エラーが発生しても, 何もメッセージを出力しません. 期待通りに動か + なかったときは, エラーの原因を自力で突き止めてください. (超手抜き) + + 標準出力や標準エラー出力を入力に, 標準入力を出力に使用することが + 簡単にできてしまい, 混乱を招きます. + + こんなツールを使うくらいなら, 素直に DOS 上で動く sh ライクなシェ + ルを準備した方が楽です. + +# Local variables: +# mode: indented-text +# indent-tabs-mode: nil +# tab-stop-list: (4 8 16 24 32 40 48 56 64 72 80) +# left-margin: 4 +# fill-column: 72 +# fill-prefix: " " +# version-control: never +# End: diff -u2rN -x awktab.c gawk-3.0.6/pc/xargs.c gawk-3.0.6+mb1.15/pc/xargs.c --- gawk-3.0.6/pc/xargs.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/xargs.c Sat May 5 17:26:12 2001 @@ -0,0 +1,949 @@ +/* $Id: xargs.c 2.3 1993/09/04 17:09:31 serow Exp serow $ + * + * wild card expand routine for BORLAND C, MS C, LSI C.. + * This code is in the public domain. + * + * $Log: xargs.c $ + * Revision 2.3 1993/09/04 17:09:31 serow + * _heapmin (MS-C) + * + * Revision 2.2 1993/05/22 04:33:02 serow + * 自前でargv[0]をつくる + * + * Revision 2.1 1993/01/20 10:43:04 serow + * v2 + * + */ +/* Last modified 1997.4.1 by K.Okabe */ + +#include +#include +#include +#include +#include +#include "xargs.h" + +#ifdef TEST +#define static +#endif + +static unsigned long xargs_rule = XARGS_CSH_LIKE_RULE; + +#define OnlySubDir (0x100) +#define AllFindAttr (0x17) +#define INVERTCHAR '^' +#define LBRACE '[' +#define RBRACE ']' +#define LCURLY '{' +#define RCURLY '}' +#define Isend(c) ((c) == '\0' || ((xargs_rule & XARGS_DOTBREAK) && (c) == '.')) +#define Iswild(c) ((c) == '*' || (c) == '?' || (c) == LBRACE || (c) == LCURLY) +#define Isalpha(c) (Isupper(c) || Islower(c)) +#define Isupper(c) ('A' <= (c) && (c) <= 'Z') +#define Islower(c) ('a' <= (c) && (c) <= 'z') +#define Isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r') +/* ascii only */ +#define Tolower(c) ((c) | 0x20) +/* Shift-JIS only */ +#if 1 /* patch 1997.1.29 by K.Okabe */ +#define isk1(c) dbcs_table[(unsigned char) (c)] +#else +#define isk1(c) ((0x81 <= (unsigned char)(c) && (unsigned char)(c) <= 0x9F)\ + || (0xE0 <= (unsigned char)(c) && (unsigned char)(c) <= 0xFC)) +#endif + +#define REALLOC_UNIT 128 + +#ifdef __TURBOC__ +#if defined(__COMPACT__) || defined(__LARGE__) || defined(__HUGE__) /* patch 1997.1.27 by K.Okabe */ +#define LARGE_DATA 1 +#endif +#else /* may be MS-C */ +#if defined(M_I86LM) || defined(M_I86CM) || defined(M_I86HM) +#define LARGE_DATA 1 +#endif +#endif + +#if defined(LSI_C) +/* union REGS dose not have '.cflag' but '.flags' */ +#define CFLAG(reg) ((reg).x.flags & 1) +#else +#define CFLAG(reg) ((reg).x.cflag) +#endif + +#if defined(_MSC_VER) || defined(__TURBOC__) +/* MS-C */ +#define NEAR _near +#else +#define NEAR +#endif + +#undef FP_OFF +#undef FP_SEG +#undef MK_FP +#define FP_OFF(fp) ((unsigned int)((unsigned long)((void far *)(fp)) & 0xFFFFL)) +#define FP_SEG(fp) ((unsigned int)((unsigned long)((void far *)(fp)) >> 16)) +#define MK_FP(sg, of) ((void far *)(((unsigned long)(sg) << 16)|(unsigned)(of))) + +typedef struct { + char d_buf[21]; + char d_attribute; + unsigned short d_time; + unsigned short d_date; + long d_size; + char d_name[13]; +} SFNDTA_BUF; + +typedef struct { + unsigned long d_attribute; + unsigned long d_ctime[2]; + unsigned long d_atime[2]; + unsigned long d_mtime[2]; + unsigned long d_size[2]; + unsigned long d_reserve[2]; + char d_longname[260]; + char d_name[14]; +} LFNDTA_BUF; + +typedef union { + SFNDTA_BUF sfn; + LFNDTA_BUF lfn; +} DTA_BUF; + +static DTA_BUF dta_buf; +#define dtabuf (dta_buf.sfn) +#define lfnbuf (dta_buf.lfn) + +static union REGS reg, nreg; +#ifdef LARGE_DATA +static struct SREGS sreg; +#endif + +static char ERNOMEM[] = "not enought memory.\r\n"; + +/* patch 1997.1.29 by K.Okabe */ +#define DBCS_TABLE_SIZE 256 +static unsigned char *dbcs_table = 0; +/* end of patch */ + +/* patch 1997.4.1 by K.Okabe */ +static int fhandle; +static int lfn_supported = -1; +static char * NEAR find_file_sfn(char *, int); +static char * NEAR find_file_lfn(char *, int); +static char *(NEAR *find_file)(char *, int) = find_file_sfn; +#ifdef LARGE_DATA +#define INTDOS(in,out,seg) intdosx((in),(out),(seg)) +#else +#define INTDOS(in,out,seg) intdos((in),(out)) +#endif +/* end of patch */ + +/***************************************************************/ + +static void NEAR +error(char * s) +{ + write(2, "xargs: ", 7); + write(2, s, strlen(s)); + exit(1); +} + +static char * NEAR +find_file_lfn(char *dir, int Attr) +{ + if (dir != (char *) 0) { /* get first entry */ + reg.x.ax = 0x714e; + reg.x.cx = (unsigned char) Attr; + reg.x.si = 1; +#ifdef LARGE_DATA + reg.x.di = FP_OFF(&lfnbuf); + sreg.es = FP_SEG(&lfnbuf); + reg.x.dx = FP_OFF(dir); + sreg.ds = FP_SEG(dir); +#else + reg.x.di = (unsigned) &lfnbuf; + reg.x.dx = (unsigned) dir; +#endif + INTDOS(®, &nreg, &sreg); + + if (CFLAG(nreg)) + return (char *) 0; + fhandle = nreg.x.ax; + } else { /* get next entry */ + retry: + reg.x.ax = 0x714f; + reg.x.bx = fhandle; + reg.x.si = 1; +#ifdef LARGE_DATA + reg.x.di = FP_OFF(&lfnbuf); + sreg.es = FP_SEG(&lfnbuf); +#else + reg.x.di = (unsigned) &lfnbuf; +#endif + INTDOS(®, &nreg, &sreg); + + if (CFLAG(nreg)) { + reg.x.ax = 0x71a1; + intdos(®, &nreg); + return (char *) 0; + } + } + + if ((lfnbuf.d_longname[0] == '.' && lfnbuf.d_longname[1] == '\0') + || (lfnbuf.d_longname[0] == '.' && lfnbuf.d_longname[1] == '.' && lfnbuf.d_longname[2] == '\0')) + goto retry; + + if ((Attr & OnlySubDir) && (lfnbuf.d_attribute & 0x10) == 0) + goto retry; + + if (lfnbuf.d_longname[0] == '.' && (xargs_rule & XARGS_HIDDENDOT) && (Attr & 2) == 0) + goto retry; + + return lfnbuf.d_longname; +} + +static void NEAR +set_dta(void) +{ + reg.h.ah = 0x1A; +#ifdef LARGE_DATA + reg.x.dx = FP_OFF(&dtabuf); + sreg.ds = FP_SEG(&dtabuf); +#else + reg.x.dx = (int) &dtabuf; +#endif + INTDOS(®, &nreg, &sreg); +} + +static char * NEAR +find_file_sfn(char *dir, int Attr) +{ + if (dir != (char *) 0) { /* get first entry */ + set_dta(); + reg.h.ah = 0x4E; + reg.h.cl = (char) Attr; +#ifdef LARGE_DATA + reg.x.dx = FP_OFF(dir); + sreg.ds = FP_SEG(dir); +#else + reg.x.dx = (unsigned) dir; +#endif + } else { /* get next entry */ + retry:; + set_dta(); + reg.h.ah = 0x4F; +#ifdef LARGE_DATA + reg.x.dx = FP_OFF(&dtabuf); + sreg.ds = FP_SEG(&dtabuf); +#else + reg.x.dx = (unsigned) &dtabuf; +#endif + } + INTDOS(®, &nreg, &sreg); + + if (CFLAG(nreg)) + return (char *) 0; + + if (dtabuf.d_name[0] == 5) + dtabuf.d_name[0] = (char) 0xE5; + + if (Attr & OnlySubDir) { + if ((dtabuf.d_attribute & 0x10) == 0) + goto retry; + if (dtabuf.d_name[0] == '.') + goto retry; + } + if (dtabuf.d_name[0] == '.' && (xargs_rule & XARGS_HIDDENDOT) && (Attr & 2) == 0) + goto retry; + + return dtabuf.d_name; +} + +static char * NEAR +jstrtolwr(char *p) +{ + char *op = p; + + while (*p) { + if (isk1(*p) && p[1]) + p++; + else if (Isupper(*p)) + *p = (char) Tolower(*p); + p++; + } + return op; +} + +static char * NEAR +jrindex(char *p, int c) +{ + char *oldp; + + c &= 0xFF; + for (oldp = 0; *p; p++) { + if (isk1(*p) && p[1]) { + p++; + } else if (((*p) & 0xFF) == c) { + oldp = p; + } + } + return oldp; +} + +static int +ustrcmp(void const *s1, void const *s2) +{ + unsigned char *p1 = *(unsigned char **)s1; + unsigned char *p2 = *(unsigned char **)s2; + + while ((*p1 != '\0') && (*p1 == *p2)) { + p1++; + p2++; + } + return *p1 - *p2; +} + +static char * NEAR +xmalloc(size_t n) +{ + char *bp; + + if ((bp = calloc(1, n)) == (char *) 0) + error(ERNOMEM); + return bp; +} + +static char * NEAR +xrealloc(void *blk, size_t n) +{ + char *bp; + + if ((bp = realloc(blk, n)) == (char *) 0) + error(ERNOMEM); +#ifdef _MSC_VER + _heapmin(); +#endif + return bp; +} +static void NEAR +xfree(void *p) +{ + if (p) + free(p); +#ifdef _MSC_VER + _heapmin(); +#endif +} + +/* patch 1997.1.29 by K.Okabe */ +static void NEAR +dbcs_alloc(void) +{ + struct SREGS segs; + union REGS regs; + unsigned char far *ptr; + + if (dbcs_table != 0) + return; + + dbcs_table = (unsigned char *) xmalloc(DBCS_TABLE_SIZE); + segs.ds = regs.x.si = 0; + regs.x.ax = 0x6300; + intdosx(®s, ®s, &segs); + ptr = (unsigned char far *) MK_FP(segs.ds + (regs.x.si >> 4), regs.x.si & 15); + if (ptr == (unsigned char far *) 0L) + return; + + for (;;) { + int lo, hi, c; + + lo = *ptr++; + hi = *ptr++; + if (lo == 0 && hi == 0) + break; + + for (c = lo; c <= hi; c++) + dbcs_table[c] = 1; + } +} +/* end of patch */ + +/* patch 1997.4.1 by K.Okabe */ +static int NEAR +get_drive(void) +{ + reg.h.ah = 0x19; + intdos(®, &nreg); + return (unsigned char) nreg.h.al; +} + +static void NEAR +check_lfn(char *path) +{ + char buf[32]; + char *root; + char *env; + + find_file = find_file_sfn; + + if (lfn_supported == -1) + lfn_supported = ((env = getenv("XARGS")) != 0 && strstr(env, "LFN") != 0) + || ((env = getenv("LFN")) != 0 && Tolower(env[0]) != 'n'); + + if (lfn_supported == 0) + return; + + if ((xargs_rule & XARGS_USELFN) == 0) + return; + + root = xmalloc(strlen(path) + 4); + if (path[0] == '\\' && path[1] == '\\') { + strcpy(root, path); + } else { + root[0] = Isalpha(path[0]) && path[1] == ':' ? path[0] : get_drive() + 'A' ; + root[1] = ':'; + root[2] = '\\'; + root[3] = '\0'; + } + + reg.x.ax = 0x71a0; +#ifdef LARGE_DATA + reg.x.di = FP_OFF(buf); + sreg.es = FP_SEG(buf); + reg.x.dx = FP_OFF(root); + sreg.ds = FP_SEG(root); +#else + reg.x.di = (unsigned) buf; + reg.x.dx = (unsigned) root; +#endif + + INTDOS(®, &nreg, &sreg); + + xfree(root); + + if (CFLAG(nreg)) + return; + + find_file = find_file_lfn; +} +/* end of patch */ + +static int NEAR +has_glob_char(char *pat) +{ + int c; + + while ((c = *pat++) != '\0') { + if (Iswild(c)) + return 1; + if (isk1(c) && *pat) + pat++; + } + return 0; +} + +#define Scan(c, p) {\ + c = (unsigned char)(*p++);\ + if (isk1(c) && *p) \ + c = (c << 8) + (unsigned char)(*p++);\ + else if (Isupper(c))\ + c = Tolower(c);\ + } +#define ScanRaw(c, p) {\ + c = (unsigned char)(*p++);\ + if (isk1(c) && *p)\ + c = (c << 8) + (unsigned char)(*p++);\ + } + +static int NEAR +glob_match(char *text, char *pattern) +{ + int include, exclude; + unsigned int ks, ke, kp, kt; + char *start, *stop; + + while (*pattern) { + switch (*pattern) { + case LCURLY: + if (Isend(*text)) + return 0; + stop = start = text; + pattern++; + do { + text = start; + include = 1; + do { + if (Isend(*pattern)) + return 0; /*ERMISSCURLY*/ + Scan(kt, text); + Scan(kp, pattern); + if (kt != kp) + include = 0; + } while (*pattern != ',' && *pattern != RCURLY); + + if (include && text - start > stop - start) + stop = text; + if (*pattern == ',') + pattern++; + } while (*pattern != RCURLY); + if (stop == start) + return 0; + pattern++; + text = stop; + break; + + case '?': + if (Isend(*text)) + return 0; + ScanRaw(kt, text); + pattern++; + break; + + case '*': + while ((include = glob_match(text, pattern + 1)) == 0 && !Isend(*text)) { + ScanRaw(kt, text); + } + return include; + + case LBRACE: + if (Isend(*text)) + return 0; + include = exclude = 0; + pattern++; + if (*pattern == INVERTCHAR) { + exclude = 1; + pattern++; + } + Scan(kt, text); + do { + if (Isend(*pattern)) + return 0; /*ERMISSBRACKET*/ + Scan(ks, pattern); + ke = ks; + if (*pattern == '-' && *(pattern + 1) != RBRACE) { + pattern++; + if (Isend(*pattern)) + return 0; /*ERMISSBRACKET*/ + Scan(ke, pattern); + } + if (ks <= kt && kt <= ke) { + include = 1; + /* no break to checking missing brace */ + /*break;*/ + } + } while (*pattern != RBRACE); + + if (include == exclude) + return 0; + pattern++; + break; + + case '.': + if (xargs_rule & XARGS_DOTBREAK) { + if (!Isend(*text)) + return 0; + if (*text == '.') + text++; + pattern++; + break; + } + default: + Scan(kt, text); + Scan(kp, pattern); + if (kt != kp) + return 0; + } + } + return *text == '\0' ? 1 : 0; +} + +static char * NEAR +jrpathdlm(char *s) +{ + char *ap, *ap1, *ap2; + + ap1 = jrindex(s, '\\'); + ap2 = jrindex(s, '/'); + if ((ap = (ap1 < ap2) ? ap2 : ap1) == (char *) 0) + ap = jrindex(s, ':'); + return ap; +} + +char ** +glob_filename(char *pattern) +{ + char *ap, *ap1, *ap2; + char **res; + char *pat; + char **paths; + char *dirp; + char *wld; + char *pp; + int ra, ri, i, plen; + int np; + int attr = ((int) (xargs_rule & 0xFF)); + + /* patch 1997.1.29 by K.Okabe */ + dbcs_alloc(); + /* end of patch */ + ap = jrpathdlm(pattern); + if (ap && ap[0] != ':' && ap[1] == '\0') { + attr = AllFindAttr | OnlySubDir; + pp = (char *) xmalloc(strlen(pattern) + 1); + strcpy(pp, pattern); + pp[(int) (ap - pattern)] = '\0'; + ap = jrpathdlm(pp); + if (ap) + ap = pattern + (int) (ap - pp); + xfree(pp); + } + if (ap) { + pat = xmalloc(strlen(ap + 1) + 1); + strcpy(pat, ap + 1); + ap2 = dirp = (char *) xmalloc((int) (ap - pattern) + 2); + for (ap1 = pattern; ap1 <= ap; ap1++) + *ap2++ = *ap1; + *ap2 = '\0'; + if (ap[0] != ':' && has_glob_char(dirp)) { + paths = glob_filename(dirp); + xfree(dirp); + } else { + paths = (char **) xmalloc(sizeof(paths[0]) * 2); + paths[0] = dirp; + paths[1] = (char *) 0; + } + } else { + pat = xmalloc(strlen(pattern) + 1); + strcpy(pat, pattern); + paths = (char **) xmalloc(sizeof(paths[0]) * 2); + paths[0] = (char *) xmalloc(2); + paths[1] = (char *) 0; + strcpy(paths[0], ""); + } + ap = jrpathdlm(pat); + if (ap && ap[0] != ':' && ap[1] == '\0') { + ap[0] = '\0'; + } + res = (char **) xmalloc(sizeof(char *) * (ra = 128)); + ri = 0; + res[ri] = 0; + for (i = 0; paths[i]; i++) { + check_lfn(paths[i]); + plen = strlen(paths[i]); + wld = xmalloc(plen + 5); + strcpy(wld, paths[i]); + ap = jrpathdlm(wld); + if (plen == 0 || (ap && ap[1] == '\0')) { + np = 0; + } else { + strcat(wld, "/"); + np = 1; + } + strcat(wld, "*.*"); + if ((ap = find_file(wld, attr)) != (char *) 0) { + do { + if (glob_match(ap, pat)) { + if (xargs_rule & XARGS_CVLOWER) + jstrtolwr(ap); + res[ri] = xmalloc(plen + strlen(ap) + 2); + strcat(strcat(strcpy(res[ri], paths[i]), np ? "/" : ""), ap); + res[++ri] = (char *) 0; + if (ri >= ra - 2) { + res = (char **) xrealloc(res, sizeof(char *) * (ra += 128)); + } + } + } while ((ap = find_file((char *) 0, attr)) != (char *) 0); + } + xfree(wld); + } + if (ri) { + if (xargs_rule & XARGS_DOSORT) + qsort(&res[0], ri, sizeof(res[0]), ustrcmp); + } else { + strcpy(res[ri] = xmalloc(strlen(pattern) + 1), pattern); + res[++ri] = (char *) 0; + } + xfree(pat); + for (i = 0; paths[i]; i++) + xfree(paths[i]); + xfree(paths); + return res; +} + +/******/ +struct inputredir { + int type; +#define IP_FILE 1 +#define IP_MEM 2 + struct inputredir *next; + union { + FILE *fp; + char *cp; + } i; +}; +static struct inputredir *iroot = 0; + +static void NEAR +add_input(void *p, int type) +{ + struct inputredir *ip; + + ip = (struct inputredir *) xmalloc(sizeof(struct inputredir)); + ip->type = type; + ip->i.cp = p; + if (iroot) { + ip->next = iroot; + iroot = ip; + } else { + iroot = ip; + iroot->next = 0; + } +} + +static int pushbackc = -1; + +static int NEAR +nextc(void) +{ + int c; + struct inputredir *lip; + + if (pushbackc != -1) { + c = pushbackc; + pushbackc = -1; + return c; + } + if (iroot == 0) + return EOF; + switch (iroot->type) { + case IP_FILE: + if ((c = fgetc(iroot->i.fp)) == EOF) { + fclose(iroot->i.fp); + lip = iroot; + iroot = iroot->next; + xfree(lip); + c = nextc(); + } + break; + case IP_MEM: + if ((c = *(iroot->i.cp++)) == '\0') { + lip = iroot; + iroot = iroot->next; + xfree(lip); + c = nextc(); + } + break; + default: + c = EOF; + break; + } + return c; +} +static void NEAR +pushback(int c) +{ + pushbackc = c; +} + +/******/ + +static void NEAR +setav(char *NCptr, int *_ARGC, char ***_ARGV) +{ + int c, i, jf, at; + int quote, withwild; + char *ap; + char *buf; + size_t absiz; + size_t bfsiz; +#define argbuf (*_ARGV) +#define argcnt (*_ARGC) + char **res; + FILE *fp; + + buf = (char *) xmalloc((bfsiz = REALLOC_UNIT * 2) * sizeof(char)); + argbuf = (char **) xmalloc((absiz = REALLOC_UNIT) * sizeof(char *)); + argcnt = 0; + + add_input(NCptr, IP_MEM); + + while ((c = nextc()) != EOF) { + withwild = quote = 0; + while (Isspace(c)) + c = nextc(); + if (c == EOF) + break; + at = (c == '@') ? 1 : 0; + ap = buf; + jf = 0; + do { + if (ap == &buf[bfsiz - 1]) { + buf = (char *) xrealloc(buf, (bfsiz + REALLOC_UNIT) * sizeof(buf[0])); + ap = &buf[bfsiz - 1]; + bfsiz += REALLOC_UNIT; + } + if (jf || isk1(c)) { + *ap++ = (char) c; + jf = !jf; + continue; + } + if (!quote && Iswild(c)) + withwild = 1; + if (c == quote) { /* end of quoted string */ + quote = 0; + } else if (!quote && (c == '\'' || c == '"')) { + quote = c; + } else { + if (c == '\\') { + c = nextc(); + if (c != '"' && c != '\'') { + pushback(c); + c = '\\'; + } + } + *ap++ = (char) c; + } + } while ((c = nextc()) != EOF && (quote || !Isspace(c))); + *ap = 0; + if ((xargs_rule & XARGS_ATRESP) && at) { + if ((fp = fopen(buf + 1, "r")) != 0) { + add_input(fp, IP_FILE); + continue; + } else { + /* ignore */ + } + } + if (withwild) { + res = glob_filename(buf); + } else { + res = (char **) xmalloc(sizeof(char *) * 2); + strcpy(res[0] = (char *) xmalloc(strlen(buf) + 1), buf); + res[1] = (char *) 0; + } + for (i = 0; res[i]; i++) { + argbuf[argcnt++] = res[i]; + if ((size_t) argcnt >= absiz) { + argbuf = (char **) xrealloc(argbuf, (absiz += REALLOC_UNIT) * sizeof(argbuf[0])); + } + } + xfree(res); + } + argbuf[argcnt] = (char *) 0; + argbuf = (char **) xrealloc(argbuf, (argcnt + 1) * sizeof(argbuf[0])); + xfree(buf); +} + +unsigned long +xargs_set_rule(unsigned long rule) +{ + unsigned long lastrule; + + lastrule = xargs_rule; + xargs_rule = rule; + return lastrule; +} + +unsigned long +xargs_modify_rule(unsigned long mask, unsigned long rule) +{ + unsigned long lastrule; + + lastrule = xargs_rule; + xargs_rule = (xargs_rule & ~mask) | rule; + return lastrule; +} + +void +xargs(int *argc, char ***argv) +{ + char far *pp; + char *s; + int i, j; + int jf = 0; + int li = 0; + int sa = 0; + +#if defined(GAWK) && defined(OS2) && (_MSC_VER != 510) /* patch 1997.2.20 by K.Okabe */ + if (_osmode == OS2_MODE) + return; +#endif + + /* patch 1997.1.29 by K.Okabe */ + dbcs_alloc(); + /* end of patch */ + if (!(xargs_rule & XARGS_USECMDL) + || (s = getenv("CMDLINE")) == 0 + || getenv("NOCMDLINE") != 0) { + sa = 1; + if (_osmajor >= 3) { + pp = MK_FP(*(unsigned int far *)MK_FP(_psp, 0x2C), 0); /* environ */ + while (pp[0] != 0 || pp[1] != 0) + pp++; + pp += 4; + } else { + pp = "xargs"; + } + s = xmalloc(1024); + for (i = 0; (s[i] = *pp++) != '\0'; i++) { + if (jf) { + jf = 0; + continue; + } + if (isk1(s[i]) && !jf) { + jf = 1; + continue; + } + if ((xargs_rule & XARGS_0CVLOWER) && Isupper(s[i])) + s[i] = (char) Tolower(s[i]); + if (s[i] == '.') + li = i; + if (s[i] == '/' || s[i] == '\\') + li = 0; + if ((xargs_rule & XARGS_0CVSLASH) && s[i] == '\\') + s[i] = '/'; + } + if ((xargs_rule & XARGS_0REMOVEXT) && li && li != i - 1) { + i = li; + s[i] = 0; + } + pp = MK_FP(_psp, 128); + j = *pp++; + while ((*pp == ' ' || *pp == '\t') && j) { + pp++; + j--; + } + if (j) + s[i++] = ' '; + while (j--) { + s[i++] = *pp++; + } + s[i] = 0; + } + setav(s, argc, argv); + if (sa) + xfree(s); +} + + +#ifdef TEST +#include + +static char QUOTE[]= "\033[33m\"\033[m"; + +void +main(int argc, char **argv) +{ + int a = 0; + + /*xargs_modify_rule(XARGS_DOTBREAK, XARGS_DOTBREAK);*/ + xargs(&argc, &argv); + + printf("[argv]-------------------------- %d argument(s)\n", argc); + while (argc--) { + printf("[%4d]%s%s%s\n", a++, QUOTE, *argv++, QUOTE); + } + printf("-------------------------------- Thats all\n"); +} + +#endif diff -u2rN -x awktab.c gawk-3.0.6/pc/xargs.h gawk-3.0.6+mb1.15/pc/xargs.h --- gawk-3.0.6/pc/xargs.h Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/xargs.h Sat May 5 17:26:12 2001 @@ -0,0 +1,66 @@ +/* $Id: xargs.h 2.1 1993/01/20 10:43:04 serow Exp serow $ + * + * header for xargs.c + * This code is in the public domain. + * + * $Log: xargs.h $ + * Revision 2.1 1993/01/20 10:43:04 serow + * v2 + * + */ + +#ifndef _INC_XARGS +#ifdef __cplusplus +extern "C" { +#endif + +/* The following bits are used in the xargs_rule variable + to select globbing rule */ + +#define XARGS_F_RO (1L ) /* attribute for find file. */ +#define XARGS_F_HID (1L << 1L) /* */ +#define XARGS_F_SYS (1L << 2L) /* */ +#define XARGS_F_VOL (1L << 3L) /* */ +#define XARGS_F_DIR (1L << 4L) /* */ + +#define XARGS_DOTBREAK (1L << 8L) /* ?, * doesnot math with '.' */ +#define XARGS_HIDDENDOT (1L << 9L) /* treat '.' & '..' as hidden file */ +#define XARGS_DOSORT (1L << 12L) /* wild card args will be sorted by name */ +#define XARGS_CVLOWER (1L << 13L) /* convert filenames to lower case */ +#define XARGS_ATRESP (1L << 14L) /* @responsefile */ +#define XARGS_USECMDL (1L << 16L) /* get arg from "CMDLINE" environ if there */ +#define XARGS_USELFN (1L << 17L) /* enable long file name */ +#define XARGS_0CVLOWER (1L << 20L) /* convert argv[0] to lower case */ +#define XARGS_0CVSLASH (1L << 21L) /* convert '\\' to '/' in argv[0] */ +#define XARGS_0REMOVEXT (1L << 22L) /* remove extention from argv[0] */ + +/* typical rules */ + +#define XARGS_CSH_LIKE_RULE (\ + XARGS_F_RO|XARGS_F_DIR|XARGS_F_SYS \ + |XARGS_HIDDENDOT \ + |XARGS_DOSORT \ + |XARGS_CVLOWER \ + |XARGS_ATRESP \ + |XARGS_0CVLOWER \ + |XARGS_0CVSLASH \ + |XARGS_0REMOVEXT \ + ) + +#define XARGS_MESSY_RULE (\ + XARGS_F_RO|XARGS_F_SYS \ + |XARGS_DOTBREAK \ + |XARGS_HIDDENDOT \ + ) + +/* prototypes */ + +char ** glob_filename(char *pattern); +unsigned long xargs_set_rule(unsigned long rule); +unsigned long xargs_modify_rule(unsigned long mask, unsigned long rule); +void xargs(int *argc, char ***argv); +#ifdef __cplusplus +} +#endif +#define _INC_XARGS 1 +#endif /* _INC_XARGS */ diff -u2rN -x awktab.c gawk-3.0.6/pc/xargs32.c gawk-3.0.6+mb1.15/pc/xargs32.c --- gawk-3.0.6/pc/xargs32.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/xargs32.c Sat May 5 17:26:12 2001 @@ -0,0 +1,712 @@ +/* Id: xargs.c 2.3 1993/09/04 17:09:31 serow Exp serow + * + * wild card expand routine for BORLAND C, MS C, LSI C.. + * This code is in the public domain. + * + * $Log: xargs32.c $ + * Revision 1.2 1997/07/18 17:13:11 koichik + * 32bit + * + * Revision 1.1 1997/04/27 15:01:25 koichik + * Initial revision + * + * Revision 2.3 1993/09/04 17:09:31 serow + * _heapmin (MS-C) + * + * Revision 2.2 1993/05/22 04:33:02 serow + * ゥOargv[0] + * + * Revision 2.1 1993/01/20 10:43:04 serow + * v2 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "xargs32.h" + +#ifdef TEST +#define static +#endif + +static unsigned long xargs_rule = XARGS_CSH_LIKE_RULE; + +#define OnlySubDir (1<<31) +#define AllFindAttr (0xB7) +#define INVERTCHAR '^' +#define LBRACE '[' +#define RBRACE ']' +#define LCURLY '{' +#define RCURLY '}' +#define Isend(c) ((c) == '\0' || ((xargs_rule & XARGS_DOTBREAK) && (c) == '.')) +#define Iswild(c) ((c) == '*' || (c) == '?' || (c) == LBRACE || (c) == LCURLY) +#define Isupper(c) ('A' <= (c) && (c) <= 'Z') +#define Isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r') +/* ascii only */ +#define Tolower(c) ((c) | 0x20) +/* Shift-JIS only */ +#define isk1(c) ((0x81 <= (unsigned char)(c) && (unsigned char)(c) <= 0x9F)\ + || (0xE0 <= (unsigned char)(c) && (unsigned char)(c) <= 0xFC)) + +#define REALLOC_UNIT 512 + +static char ERNOMEM[] = "not enought memory.\r\n"; + +/***************************************************************/ + +static void +error(char * s) +{ + write(2, "xargs: ", 7); + write(2, s, strlen(s)); + exit(1); +} + +static char * +find_file(char *dir, HANDLE *hFind, DWORD Attr, WIN32_FIND_DATA *dta) +{ + char *ret = NULL; + + if (hFind == NULL || *hFind == INVALID_HANDLE_VALUE) { /* get first entry */ + dta->dwFileAttributes = (DWORD)(Attr & 0xffff); + if ((*hFind = FindFirstFile(dir, dta)) != INVALID_HANDLE_VALUE) + ret = dta->cFileName; + } else { /* get next entry */ +retry: + if (FindNextFile(*hFind, dta)) + ret = dta->cFileName; + } + + if (ret && (!strcmp(ret, "..") || !strcmp(ret, "."))) + goto retry; + + if ((Attr & OnlySubDir) && (dta->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + goto retry; + + if (dta->cFileName[0] == '.' && (xargs_rule & XARGS_HIDDENDOT) + && (dta->dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) == 0) + goto retry; + + return ret; +} + +static char * +jstrtolwr(char *p) +{ + char *op = p; + + while (*p) { + if (isk1(*p) && p[1]) + p++; + else if (Isupper(*p)) + *p = (char) Tolower(*p); + p++; + } + return op; +} + +static char * +jrindex(char *p, int c) +{ + char *oldp; + + c &= 0xFF; + for (oldp = 0; *p; p++) { + if (isk1(*p) && p[1]) { + p++; + } else if (((*p) & 0xFF) == c) { + oldp = p; + } + } + return oldp; +} + +static int +ustrcmp(void const *s1, void const *s2) +{ + unsigned char *p1 = *(unsigned char **)s1; + unsigned char *p2 = *(unsigned char **)s2; + + while ((*p1 != '\0') && (*p1 == *p2)) { + p1++; + p2++; + } + return *p1 - *p2; +} + +static char * +xmalloc(size_t n) +{ + char *bp; + + if ((bp = calloc(1, n)) == (char *) 0) + error(ERNOMEM); + return bp; +} + +static char * +xrealloc(void *blk, size_t n) +{ + char *bp; + + if ((bp = realloc(blk, n)) == (char *) 0) + error(ERNOMEM); + + return bp; +} +static void +xfree(void *p) +{ + if (p) + free(p); + +} + +static int +has_glob_char(char *pat) +{ + int c; + + while ((c = *pat++) != '\0') { + if (Iswild(c)) + return 1; + if (isk1(c) && *pat) + pat++; + } + return 0; +} + +#define Scan(c, p) {\ + c = (unsigned char)(*p++);\ + if (isk1(c) && *p) \ + c = (c << 8) + (unsigned char)(*p++);\ + else if (Isupper(c))\ + c = Tolower(c);\ + } +#define ScanRaw(c, p) {\ + c = (unsigned char)(*p++);\ + if (isk1(c) && *p)\ + c = (c << 8) + (unsigned char)(*p++);\ + } + +static int +glob_match(char *text, char *pattern) +{ + int include, exclude; + unsigned int ks, ke, kp, kt; + char *start, *stop; + + while (*pattern) { + switch (*pattern) { + case LCURLY: + if (Isend(*text)) + return 0; + stop = start = text; + pattern++; + do { + text = start; + include = 1; + do { + if (Isend(*pattern)) + return 0; /*ERMISSCURLY*/ + Scan(kt, text); + Scan(kp, pattern); + if (kt != kp) + include = 0; + } while (*pattern != ',' && *pattern != RCURLY); + + if (include && text - start > stop - start) + stop = text; + if (*pattern == ',') + pattern++; + } while (*pattern != RCURLY); + if (stop == start) + return 0; + pattern++; + text = stop; + break; + + case '?': + if (Isend(*text)) + return 0; + ScanRaw(kt, text); + pattern++; + break; + + case '*': + while ((include = glob_match(text, pattern + 1)) == 0 && !Isend(*text)) { + ScanRaw(kt, text); + } + return include; + + case LBRACE: + if (Isend(*text)) + return 0; + include = exclude = 0; + pattern++; + if (*pattern == INVERTCHAR) { + exclude = 1; + pattern++; + } + Scan(kt, text); + do { + if (Isend(*pattern)) + return 0; /*ERMISSBRACKET*/ + Scan(ks, pattern); + ke = ks; + if (*pattern == '-' && *(pattern + 1) != RBRACE) { + pattern++; + if (Isend(*pattern)) + return 0; /*ERMISSBRACKET*/ + Scan(ke, pattern); + } + if (ks <= kt && kt <= ke) { + include = 1; + /* no break to checking missing brace */ + /*break;*/ + } + } while (*pattern != RBRACE); + + if (include == exclude) + return 0; + pattern++; + break; + + case '.': + if (xargs_rule & XARGS_DOTBREAK) { + if (!Isend(*text)) + return 0; + if (*text == '.') + text++; + pattern++; + break; + } + default: + Scan(kt, text); + Scan(kp, pattern); + if (kt != kp) + return 0; + } + } + return *text == '\0' ? 1 : 0; +} + +static char * +jrpathdlm(char *s) +{ + char *ap, *ap1, *ap2; + + ap1 = jrindex(s, '\\'); + ap2 = jrindex(s, '/'); + if ((ap = (ap1 < ap2) ? ap2 : ap1) == (char *) 0) + ap = jrindex(s, ':'); + return ap; +} + +char ** +glob_filename(char *pattern) +{ + char *ap, *ap1, *ap2; + char **res; + char *pat; + char **paths; + char *dirp; + char *wld; + char *pp; + int ra, ri, i, plen; + int np; + DWORD Attr = ((DWORD) (xargs_rule & 0xFFFF)); + + HANDLE hFind = INVALID_HANDLE_VALUE; + WIN32_FIND_DATA dta; + + ap = jrpathdlm(pattern); + if (ap && ap[0] != ':' && ap[1] == '\0') { + Attr = AllFindAttr | OnlySubDir; + pp = (char *) xmalloc(strlen(pattern) + 1); + strcpy(pp, pattern); + pp[(int) (ap - pattern)] = '\0'; + ap = jrpathdlm(pp); + if (ap) + ap = pattern + (int) (ap - pp); + xfree(pp); + } + if (ap) { + pat = xmalloc(strlen(ap + 1) + 1); + strcpy(pat, ap + 1); + ap2 = dirp = (char *) xmalloc((int) (ap - pattern) + 2); + for (ap1 = pattern; ap1 <= ap; ap1++) + *ap2++ = *ap1; + *ap2 = '\0'; + if (ap[0] != ':' && has_glob_char(dirp)) { + paths = glob_filename(dirp); + xfree(dirp); + } else { + paths = (char **) xmalloc(sizeof(paths[0]) * 2); + paths[0] = dirp; + paths[1] = (char *) 0; + } + } else { + pat = xmalloc(strlen(pattern) + 1); + strcpy(pat, pattern); + paths = (char **) xmalloc(sizeof(paths[0]) * 2); + paths[0] = (char *) xmalloc(2); + paths[1] = (char *) 0; + strcpy(paths[0], ""); + } + ap = jrpathdlm(pat); + if (ap && ap[0] != ':' && ap[1] == '\0') { + ap[0] = '\0'; + } + res = (char **) xmalloc(sizeof(char *) * (ra = 260)); + ri = 0; + res[ri] = 0; + for (i = 0; paths[i]; i++) { + plen = strlen(paths[i]); + wld = xmalloc(plen + 5); + strcpy(wld, paths[i]); + ap = jrpathdlm(wld); + + if (plen == 0 || (ap && ap[1] == '\0')) { + np = 0; + } else { + strcat(wld, "/"); + np = 1; + } + + strcat(wld, "*.*"); + + if ((ap = find_file(wld, &hFind, Attr, &dta)) != NULL) { + do { + if (glob_match(ap, pat)) { + if (xargs_rule & XARGS_CVLOWER) + jstrtolwr(ap); + res[ri] = xmalloc(plen + strlen(ap) + 2); + strcat(strcat(strcpy(res[ri], paths[i]), np ? "/" : ""), ap); + res[++ri] = (char *) 0; + if (ri >= ra - 2) { + res = (char **) xrealloc(res, sizeof(char *) * (ra += 260)); + } + } + } while (find_file(wld, &hFind, Attr, &dta)); + + FindClose(hFind); + hFind=INVALID_HANDLE_VALUE; + } + + xfree(wld); + } + if (ri) { + if (xargs_rule & XARGS_DOSORT) + qsort(&res[0], ri, sizeof(res[0]), ustrcmp); + } else { + strcpy(res[ri] = xmalloc(strlen(pattern) + 1), pattern); + res[++ri] = (char *) 0; + } + xfree(pat); + for (i = 0; paths[i]; i++) + xfree(paths[i]); + xfree(paths); + return res; +} + +/******/ +struct inputredir { + int type; +#define IP_FILE 1 +#define IP_MEM 2 + struct inputredir *next; + union { + FILE *fp; + char *cp; + } i; +}; +static struct inputredir *iroot = 0; + +static void +add_input(void *p, int type) +{ + struct inputredir *ip; + + ip = (struct inputredir *) xmalloc(sizeof(struct inputredir)); + ip->type = type; + ip->i.cp = p; + if (iroot) { + ip->next = iroot; + iroot = ip; + } else { + iroot = ip; + iroot->next = 0; + } +} + +static int pushbackc = -1; + +static int +nextc(void) +{ + int c; + struct inputredir *lip; + + if (pushbackc != -1) { + c = pushbackc; + pushbackc = -1; + return c; + } + if (iroot == 0) + return EOF; + switch (iroot->type) { + case IP_FILE: + if ((c = fgetc(iroot->i.fp)) == EOF) { + fclose(iroot->i.fp); + lip = iroot; + iroot = iroot->next; + xfree(lip); + c = nextc(); + } + break; + case IP_MEM: + if ((c = *(iroot->i.cp++)) == '\0') { + lip = iroot; + iroot = iroot->next; + xfree(lip); + c = nextc(); + } + break; + default: + c = EOF; + break; + } + return c; +} +static void +pushback(int c) +{ + pushbackc = c; +} + +/******/ + +static void +setav(char *NCptr, int *_ARGC, char ***_ARGV) +{ + int c, i, jf, at; + int quote, withwild; + char *ap; + char *buf; + size_t absiz; + size_t bfsiz; +#define argbuf (*_ARGV) +#define argcnt (*_ARGC) + char **res; + FILE *fp; + + buf = (char *) xmalloc((bfsiz = REALLOC_UNIT * 2) * sizeof(char)); + argbuf = (char **) xmalloc((absiz = REALLOC_UNIT) * sizeof(char *)); + argcnt = 0; + + add_input(NCptr, IP_MEM); + + while ((c = nextc()) != EOF) { + withwild = quote = 0; + while (Isspace(c)) + c = nextc(); + if (c == EOF) + break; + at = (c == '@') ? 1 : 0; + ap = buf; + jf = 0; + do { + if (ap == &buf[bfsiz - 1]) { + buf = (char *) xrealloc(buf, (bfsiz + REALLOC_UNIT) * sizeof(buf[0])); + ap = &buf[bfsiz - 1]; + bfsiz += REALLOC_UNIT; + } + if (jf || isk1(c)) { + *ap++ = (char) c; + jf = !jf; + continue; + } + if (quote != '\'' && Iswild(c)) + withwild = 1; + if (c == quote) { /* end of quoted string */ + quote = 0; + } else if (!quote && (c == '\'' || c == '"')) { + quote = c; + } else { + if (c == '\\') { + c = nextc(); + if (c != '"' && c != '\'') { + pushback(c); + c = '\\'; + } + } + *ap++ = (char) c; + } + } while ((c = nextc()) != EOF && (quote || !Isspace(c))); + *ap = 0; + if ((xargs_rule & XARGS_ATRESP) && at) { + if ((fp = fopen(buf + 1, "r")) != 0) { + add_input(fp, IP_FILE); + continue; + } else { + /* ignore */ + } + } + if (withwild) { + res = glob_filename(buf); + } else { + res = (char **) xmalloc(sizeof(char *) * 2); + strcpy(res[0] = (char *) xmalloc(strlen(buf) + 1), buf); + res[1] = (char *) 0; + } + for (i = 0; res[i]; i++) { + argbuf[argcnt++] = res[i]; + if ((size_t) argcnt >= absiz) { + argbuf = (char **) xrealloc(argbuf, (absiz += REALLOC_UNIT) * sizeof(argbuf[0])); + } + } + xfree(res); + } + argbuf[argcnt] = (char *) 0; + argbuf = (char **) xrealloc(argbuf, (argcnt + 1) * sizeof(argbuf[0])); + xfree(buf); +} + +unsigned long +xargs_set_rule(unsigned long rule) +{ + unsigned long lastrule; + + lastrule = xargs_rule; + xargs_rule = rule; + return lastrule; +} + +unsigned long +xargs_modify_rule(unsigned long mask, unsigned long rule) +{ + unsigned long lastrule; + + lastrule = xargs_rule; + xargs_rule = (xargs_rule & ~mask) | rule; + return lastrule; +} + +unsigned char cmdlinebuf[1024]; + +void +xargs(int *argc, char ***argv) +{ + char far *pp; + char *s; + int i, j; + int jf = 0; + int li = 0; + int sa = 0; + + if (!(xargs_rule & XARGS_USECMDL) + || (s = getenv("CMDLINE")) == 0 + || getenv("NOCMDLINE") != 0) { + sa = 1; + pp = "xargs"; + s = xmalloc(1024); + +#if 0 + { + cmdlinebuf[0] = '\0'; + //wcstombs(cmdlinebuf, GetCommandLine(), (sizeof (cmdlinebuf) - 1)); + //pp = cmdlinebuf; + strcpy(cmdlinebuf, GetCommandLine()); + pp = cmdlinebuf; + } + + for (i = 0; (s[i] = *pp++) != '\0'; i++) { + //if (isspace(s[i])) { + // s[i] = '\0'; + // break; + //} + + if (jf) { + jf = 0; + continue; + } + if (isk1(s[i]) && !jf) { + jf = 1; + continue; + } + if ((xargs_rule & XARGS_0CVLOWER) && Isupper(s[i])) + s[i] = (char) Tolower(s[i]); + if (s[i] == '.') + li = i; + if (s[i] == '/' || s[i] == '\\') + li = 0; + if ((xargs_rule & XARGS_0CVSLASH) && s[i] == '\\') + s[i] = '/'; + } + if ((xargs_rule & XARGS_0REMOVEXT) && li && li != i - 1) { + i = li; + s[i] = 0; + } + { + //cmdlinebuf[0] = '\0'; + //wcstombs(cmdlinebuf, GetCommandLine(), (sizeof (cmdlinebuf) - 1)); + //pp = cmdlinebuf; + //strcpy(cmdlinebuf, GetCommandLine()); + // pp = cmdlinebuf; + //while (*pp && !isspace(*pp)) + // *pp++; + //j = *pp; + pp = cmdlinebuf+i; + j = strlen(cmdlinebuf+i); + + } + while ((*pp == ' ' || *pp == '\t') && j) { + pp++; + j--; + } + if (j) + s[i++] = ' '; + while (j--) { + s[i++] = *pp++; + } + s[i] = 0; +#else + s[0] = '\0'; + strcpy(s, GetCommandLine()); +#endif + } + setav(s, argc, argv); + if (sa) + xfree(s); +} + + +#ifdef TEST +#include + +static char QUOTE[]= "\033[33m\"\033[m"; + +int +main(int argc, char **argv) +{ + int a = 0; + + /*xargs_modify_rule(XARGS_DOTBREAK, XARGS_DOTBREAK);*/ + xargs(&argc, &argv); + + printf("[argv]-------------------------- %d argument(s)\n", argc); + while (argc--) { + printf("[%4d]%s%s%s\n", a++, QUOTE, *argv++, QUOTE); + } + printf("-------------------------------- Thats all\n"); + + return 0; +} + +#endif diff -u2rN -x awktab.c gawk-3.0.6/pc/xargs32.h gawk-3.0.6+mb1.15/pc/xargs32.h --- gawk-3.0.6/pc/xargs32.h Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/xargs32.h Sat May 5 17:26:12 2001 @@ -0,0 +1,68 @@ +/* Id: xargs.h 2.1 1993/01/20 10:43:04 serow Exp serow + * + * header for xargs.c + * This code is in the public domain. + * + * $Log: xargs32.h $ + * Revision 1.1 1997/04/27 15:04:01 koichik + * Initial revision + * + * Revision 2.1 1993/01/20 10:43:04 serow + * v2 + * + */ + +#ifndef _INC_XARGS +#ifdef __cplusplus +extern "C" { +#endif + +/* The following bits are used in the xargs_rule variable + to select globbing rule */ + +#define XARGS_F_RO (1L ) /* attribute for find file. */ +#define XARGS_F_HID (1L << 1L) /* */ +#define XARGS_F_SYS (1L << 2L) /* */ +#define XARGS_F_VOL (1L << 3L) /* */ +#define XARGS_F_DIR (1L << 4L) /* */ + +#define XARGS_DOTBREAK (1L << 22L) /* ?, * doesnot math with '.' */ +#define XARGS_HIDDENDOT (1L << 23L) /* treat '.' & '..' as hidden file */ +#define XARGS_DOSORT (1L << 24L) /* wild card args will be sorted by name */ +#define XARGS_CVLOWER (1L << 25L) /* convert filenames to lower case */ +#define XARGS_ATRESP (1L << 26L) /* @responsefile */ +#define XARGS_USECMDL (1L << 27L) /* get arg from "CMDLINE" environ if there */ +#define XARGS_0CVLOWER (1L << 28L) /* convert argv[0] to lower case */ +#define XARGS_0CVSLASH (1L << 29L) /* convert '\\' to '/' in argv[0] */ +#define XARGS_0REMOVEXT (1L << 30L) /* remove extention from argv[0] */ + +/* typical rules */ + +#define XARGS_CSH_LIKE_RULE (\ + XARGS_F_RO|XARGS_F_DIR|XARGS_F_SYS \ + |XARGS_HIDDENDOT \ + |XARGS_DOSORT \ + |XARGS_CVLOWER \ + |XARGS_ATRESP \ + |XARGS_0CVLOWER \ + |XARGS_0CVSLASH \ + |XARGS_0REMOVEXT \ + ) + +#define XARGS_MESSY_RULE (\ + XARGS_F_RO|XARGS_F_SYS \ + |XARGS_DOTBREAK \ + |XARGS_HIDDENDOT \ + ) + +/* prototypes */ + +char ** glob_filename(char *pattern); +unsigned long xargs_set_rule(unsigned long rule); +unsigned long xargs_modify_rule(unsigned long mask, unsigned long rule); +void xargs(int *argc, char ***argv); +#ifdef __cplusplus +} +#endif +#define _INC_XARGS 1 +#endif /* _INC_XARGS */ diff -u2rN -x awktab.c gawk-3.0.6/pc/xsystem.c gawk-3.0.6+mb1.15/pc/xsystem.c --- gawk-3.0.6/pc/xsystem.c Thu Jan 1 09:00:00 1970 +++ gawk-3.0.6+mb1.15/pc/xsystem.c Sat May 5 17:26:12 2001 @@ -0,0 +1,847 @@ +/* $Id: xsystem.c,v 1.13 1992/02/24 06:59:13 serow Exp serow $ + * like system("cmd") but return with exit code of "cmd" + * for Turbo-C/MS-C/LSI-C + * This code is in the public domain. + * + * $Log: xsystem.c,v $ + * Revision 1.13 1992/02/24 06:59:13 serow + * *** empty log message *** + * + * Revision 1.12 1991/04/09 08:48:20 serow + * ignore new line at command line tail + * + * Revision 1.11 1991/03/12 07:12:50 serow + * CMDLINE + * + * Revision 1.10 91/02/24 05:10:14 serow + * 2>&1 + * + * Revision 1.9 91/02/22 07:01:17 serow + * NEAR for ms-c + * + */ +/* modified for djgpp v2.01 1997.1.29 by K.Okabe */ +/* Last modified 2000.1.29 by K.Okabe */ +#include +#include +#include +#include +#include +#include +#include +/* patch 1997.1.27 by K.Okabe */ +#include +#include +#ifdef __GO32__ +#include +#include +#include +#include +#endif +/* end of patch */ +/* patch 1998.8.4 by K.Okabe */ +#ifdef WIN32 +#include +#endif +/* end of patch */ + + +#ifndef USECMDLINE +#define USECMDLINE 0 +#endif + +/* patch 1997.1.27 by K.Okabe */ +#ifndef PATH_MAX +#define PATH_MAX 128 +#endif +/* end of patch */ + +extern char *mktemp(char *); + +#define UCH(c) ((unsigned char)(c)) +#if 1 /* patch 1997.1.28 by K.Okabe */ +#define isk1(c) dbcs_bitset_member(UCH(c)) +#else +#define isk1(c) ((0x81<=UCH(c)&&UCH(c)<=0x9F)||(0xE0<=UCH(c)&&UCH(c)<=0xFC)) +#endif +#define isq(c) ((c) == '"') +#define isspc(c) ((c) == ' ' || (c) == '\t') +#define issep(c) (isspc(c) || (c) == '"' || (c) == '\'' || (c) == '<' || (c) == '>' || (c) == '\0') +#define issep2(c) (issep(c) || (c) == '.' || (c) == '\\' || (c) == '/') +#define isdeg(c) ('0' <= (c) && (c) <= '9') + +#if (defined (_MSC_VER) || defined (__TURBOC__)) && !defined (WIN32) +/* MS-C */ +#define NEAR _near +#else +#define NEAR +#endif + +/* patch 1997.4.2 by K.Okabe */ +#if defined (GAWK) && !defined (__GO32__) && !defined(WIN32) +extern int os_open (const char *, int, ...); +#define open os_open +#endif +/* end of patch */ + +/* patch 1997.1.27 by K.Okabe */ +#ifdef __GNUC__ +#define ALLOCA(n) __builtin_alloca((n)) +#define FREE(p) +#else +#define ALLOCA(n) xmalloc((n)) +#define FREE(p) free((p)) +#endif + +#if defined(DJGPP) && ((DJGPP > 2) || (DJGPP == 2 && DJGPP_MINOR >= 1)) +#define SPAWN(cmd,arg) djgpp201_spawn((cmd), (arg)) +#define CMDLEN_LIMIT 125 + +extern char **environ; + +static int NEAR +djgpp201_spawn(const char *cmd, const char *arg) +{ + if (strlen(arg) > CMDLEN_LIMIT) { + errno = E2BIG; + return -1; + } + return _dos_exec(cmd, arg, environ); +} +#else +#define SPAWN(cmd,arg) spawnl(P_WAIT, (cmd), (cmd), (arg), (char *) 0) +#endif + +#if defined(DJGPP) && DJGPP >= 2 +#define REGS __dpmi_regs +#define INTDOS(regs) __dpmi_int(0x21, &(regs)) +#define INTDOSX(regs,segs) __dpmi_int(0x21, &(regs)) +#else +#define REGS union REGS +#define INTDOS(regs) intdos(&(regs), &(regs)) +#define INTDOSX(regs,segs) intdosx(&(regs), &(regs), &(segs)) +#endif +#ifdef __GO32__ +#define OFFSET unsigned +#define MAKE_OFFSET(seg,off) ((OFFSET) (((seg) << 4) + (off))) +#define PEEKB(off) _farpeekb(_go32_info_block.selector_for_linear_memory, (off)) +#else +#define OFFSET unsigned char far * +#define MAKE_OFFSET(seg,off) \ + ((OFFSET) (((unsigned long) ((seg) + ((unsigned) (off) >> 4)) << 16) | (unsigned) ((off) & 15))) +#define PEEKB(off) (*(off)) +#endif + +#define CHARBITS 8 + +#define dbcs_bitset_access(n,op) \ + (dbcs_table[(unsigned) (n) / CHARBITS] op (1 << ((n) & (CHARBITS - 1)))) + +#define dbcs_bitset_member(n) dbcs_bitset_access(n, &) +#define dbcs_bitset_enjoin(n) dbcs_bitset_access(n, |=) + +static unsigned char dbcs_table[(1 << CHARBITS) / CHARBITS]; + +static void NEAR +init_dbcs() +{ +/* patch 1998.8.5 by K.Okabe */ +#if defined(WIN32) + int c; + + static int done = 0; + + if (done) + return; + + done = 1; + memset(dbcs_table, 0, sizeof(dbcs_table)); + for (c = 0; c < 256; c++) + if (_ismbblead(c)) + dbcs_bitset_enjoin(c); +#else +/* end of patch */ +#if defined(DJGPP) && DJGPP >= 2 +#define SEG_DS regs.x.ds +#else +#define SEG_DS segs.ds + struct SREGS segs; +#endif + REGS regs; + OFFSET offset; + + static int done = 0; + + if (done) + return; + + done = 1; + memset(dbcs_table, 0, sizeof(dbcs_table)); + SEG_DS = regs.x.si = 0; + regs.x.ax = 0x6300; + INTDOSX(regs, segs); + offset = MAKE_OFFSET(SEG_DS, regs.x.si); + if (offset == (OFFSET) 0) + return; + + for (;;) { + int lo, hi, c; + + lo = (unsigned char) PEEKB(offset); + offset++; + hi = (unsigned char) PEEKB(offset); + offset++; + if (lo == 0 && hi == 0) + break; + + for (c = lo; c <= hi; c++) + dbcs_bitset_enjoin(c); + } +#endif +} +/* end of patch */ + +typedef struct _proc { + struct _proc *next; + char *line; + char *cmd; + char *arg; + char *inf; + int infmod; + char *outf; + int outfmod; + /* patch 1997.1.27 by K.Okabe */ + char *errf; + int errfmod; + /* end of patch */ + int ored[10]; + int sred[10]; +} PRO; + +static PRO *p1 = 0; + +static char *NEAR +xmalloc(size_t n) +{ + char *bp; + + if ((bp = calloc(1, n)) == (char *) 0) { + write(2, "xsystem: Out of memory.!\n", 25); + exit(1); + } + return bp; +} + +static char *NEAR +xrealloc(void *p, size_t n) +{ + char *bp; + + if ((bp = realloc(p, n)) == (char *) 0) { + write(2, "xsystem: Out of memory!.\n", 25); + exit(1); + } + return bp; +} + +static int NEAR +iscommandcommand(char *s) +{ + static char *cmdtab[]= + { + "dir", "type", "rem", "ren", "rename", "erase", "del", + "copy", "pause", "date", "time", "ver", "vol", + "cd", "chdir", "md", "mkdir", "rd", "rmdir", "break", + "verify", "set", "prompt", "path", "exit", "ctty", "echo", + "if", "for", "cls", "goto", "shift" + }; + int i, l, lc; + + l = strlen(s); + for (i = 0; i < sizeof(cmdtab) / sizeof(cmdtab[0]); i++) { + if (stricmp(s, cmdtab[i]) == 0) + return 1; + lc = strlen(cmdtab[i]); + if (lc < l && strnicmp(s, cmdtab[i], lc) == 0 && issep2(s[lc])) + return 1; + } + return 0; +} + +static int NEAR +getswchar(void) +{ +#ifdef WIN32 + return '/'; +#else + REGS reg; + + reg.x.ax = 0x3700; + INTDOS(reg); + return reg.h.dl; +#endif +} + +static int NEAR +csystem(PRO * p) +{ + char *cmp; + char SW[3]; + int rc; + +#ifdef GAWK /* patch 1997.1.27 by K.Okabe */ + cmp = getenv("SHELL"); + if (! cmp) + cmp = getenv("COMSPEC"); + if (! cmp) + return -2; +#else + if ((cmp = getenv("COMSPEC")) == 0) + return -2; +#endif + SW[0] = getswchar(); + SW[1] = 'c'; + SW[2] = 0; +#if 1 /* patch 1997.1.27 by K.Okabe */ + { + char *cmdline = (char *) ALLOCA(strlen(p->cmd) + strlen(p->arg) + 4); + if (strpbrk(p->cmd, "\t ")) { + cmdline[0] = '"'; + strcat(strcpy(cmdline + 1, p->cmd), "\""); + } else + strcpy(cmdline, p->cmd); + strcat(strcat(cmdline, " "), p->arg); + rc = spawnl(P_WAIT, cmp, cmp, SW, cmdline, (char *) 0); + FREE(cmdline); + } +#else + rc = spawnl(P_WAIT, cmp, cmp, SW, p->cmd, p->arg, (char *) 0); +#endif + return rc < 0 ? -2 : rc; +} + +static PRO *NEAR +pars1c(char *s) +{ + PRO *pp; + char *fnp; + int ms, mi; + int fs, fi, inpf; + int q; + + pp = (PRO *) xmalloc(sizeof(PRO)); + for (q = 0; q < sizeof(pp->ored) / sizeof(pp->ored[0]); q++) + pp->ored[q] = q; + while (isspc(*s)) + s++; + pp->line = strdup(s); + pp->cmd = xmalloc(ms = 8); + mi = 0; +#if 1 /* patch 1997.1.28 by K.Okabe */ + if ((q = *s == '"')) + s++; + while (q ? *s != '"' && *s != '\0' : ! issep(*s)) { + if (mi >= ms - 1) + pp->cmd = xrealloc(pp->cmd, ms += 8); + pp->cmd[mi++] = *s++; + } + if (q && *s == '"') + s++; +#else + while (!issep(*s)) { + if (mi >= ms - 1) + pp->cmd = xrealloc(pp->cmd, ms += 8); + pp->cmd[mi++] = *s++; + } +#endif + pp->cmd[mi] = '\0'; + q = 0; + pp->arg = xmalloc(ms = 32); + if (isspc(*s)) + s++; + mi = 0; + while (*s) { + if (mi >= ms - 1) { + pp->arg = xrealloc(pp->arg, ms += 32); + } + if (q == 0) { + /* patch 1997.1.28 by K.Okabe */ + int errf = 0; + /* end of patch */ + inpf = 0; + if ((mi == 0 || isspc(s[-1])) && + isdeg(s[0]) && s[1] == '>' && + s[2] == '&' && isdeg(s[3])) { + + pp->ored[s[0] & 15] = s[3] & 15; + s += 4; + continue; + } else if (s[0] == '<') { + if (pp->inf == 0) { + pp->infmod = O_RDONLY; + } + inpf = 1; + } else if (s[0] == '>' && s[1] == '>') { + if (pp->outf == 0) { + pp->outfmod = O_WRONLY | O_CREAT | O_APPEND; + } + s++; + } else if (s[0] == '>') { + if (pp->outf == 0) { + pp->outfmod = O_WRONLY | O_CREAT | O_TRUNC; + } + /* patch 1997.1.28 by K.Okabe */ + } else if ((mi == 0 || isspc(s[-1])) && s[0] == '1' && s[1] == '>') { + if (s[2] == '>') { + if (pp->outf == 0) { + pp->outfmod = O_WRONLY | O_CREAT | O_APPEND; + } + s += 2; + } else { + if (pp->outf == 0) { + pp->outfmod = O_WRONLY | O_CREAT | O_TRUNC; + } + s++; + } + } else if ((mi == 0 || isspc(s[-1])) && s[0] == '2' && s[1] == '>') { + if (s[2] == '>') { + if (pp->errf == 0) { + pp->errfmod = O_WRONLY | O_CREAT | O_APPEND; + } + s += 2; + } else { + if (pp->errf == 0) { + pp->errfmod = O_WRONLY | O_CREAT | O_TRUNC; + } + s++; + } + errf = 1; + /* end of patch */ + } else { + if (*s == '"') + q = !q; + pp->arg[mi++] = *s++; + continue; + } + fnp = xmalloc(fs = 16); + fi = 0; + s++; + while (isspc(*s)) + s++; + while (!issep(*s)) { + if (fi >= fs - 1) + fnp = xrealloc(fnp, fs += 16); + fnp[fi++] = *s++; + } + fnp[fi] = 0; + if (inpf) { + if (pp->inf == 0) + pp->inf = fnp; + /* patch 1997.1.28 by K.Okabe */ + } else if (errf) { + if (pp->errf == 0) + pp->errf = fnp; + /* end of patch */ + } else { + if (pp->outf == 0) + pp->outf = fnp; + } + } else if (s[0] == '"') { + q = !q; + pp->arg[mi++] = *s++; + } else { + pp->arg[mi++] = *s++; + } + } + /* patch 1997.1.28 by K.Okabe */ + while (mi > 0 && isspc(pp->arg[mi - 1])) + mi--; + /* end of patch */ + pp->arg[mi] = '\0'; + return pp; +} + +static PRO *NEAR +pars(char *s) +{ + char *lb; + int li, ls, q; + int c; + PRO *pp; + + lb = xmalloc(ls = 128); /* about */ + li = q = 0; + p1 = 0; + + for (;;) { + c = *s++; + if (li >= ls - 3) + lb = xrealloc(lb, ls += 128); + if (isk1(c) && *s) { + lb[li++] = c; + lb[li++] = *s++; + } else if ((!q && c == '|') || c == '\0' || (c == '\n' && *s == '\0')) { + lb[li++] = '\0'; + if (p1 == 0) { + pp = p1 = pars1c(lb); + } else { + pp->next = pars1c(lb); + pp = pp->next; + } + li = 0; + if (c == '\0' || (c == '\n' && *s == '\0')) + break; +#if 0 /* patch 1997.1.27 by K.Okabe */ + } else if (c == '\\') { + lb[li++] = c; + if (*s) { + if (isk1(*s)) + lb[li++] = *s++; + lb[li++] = *s++; + } +#endif + } else if (c == '"') { + q = !q; + lb[li++] = c; + } else { + lb[li++] = c; + } + } + free(lb); + return p1; +} + +static int NEAR +try3(char *cnm, PRO * p) +{ + char cmdb[PATH_MAX]; + int rc; + + strcat(strcpy(cmdb, cnm), ".com"); + if ((rc = open(cmdb, O_RDONLY)) >= 0) { + close(rc); + return SPAWN(cmdb, p->arg); + } + strcat(strcpy(cmdb, cnm), ".exe"); + if ((rc = open(cmdb, O_RDONLY)) >= 0) { + close(rc); + return SPAWN(cmdb, p->arg); + } + strcat(strcpy(cmdb, cnm), ".bat"); + if ((rc = open(cmdb, O_RDONLY)) >= 0) { + close(rc); + return csystem(p); + } + return -1; +} + +static int NEAR +pgo(PRO * p) +{ + char *s; + char *extp = 0; + char cmdb[PATH_MAX]; + char *ep; + int rc, lc; + + s = p->cmd + strlen(p->cmd) - 1; + while (p->cmd <= s && *s != '\\' && *s != '/' && *s != ':') { + if (*s == '.') + extp = s; + s--; + } + if (iscommandcommand(p->cmd) || (extp && stricmp(extp, ".bat") == 0)) + return csystem(p); + + if (s < p->cmd) { /* cmd has no PATH nor Drive */ + ep = getenv("PATH"); + strcpy(cmdb, p->cmd); + for (;;) { + if (extp) { /* has extention */ + if ((rc = open(cmdb, O_RDONLY)) >= 0) { + close(rc); + rc = SPAWN(cmdb, p->arg); + } + } else { + rc = try3(cmdb, p); + } + if (rc >= 0) + return rc; + + if (ep && *ep) { + int i; + for (i = 0; *ep != ';' && *ep != '\0'; ep++, i++) + lc = cmdb[i] = *ep; + if (*ep == ';') + ep++; + if (i > 0 && lc != ':' && lc != '\\' && lc != '/') + cmdb[i++] = '\\'; + cmdb[i] = 0; + strcat(cmdb, p->cmd); + } else { + if (rc == -2) + return rc; + return -1; + } + } + } else { /* has PATH or Drive */ + if (extp) { /* has extention */ + if ((rc = open(p->cmd, O_RDONLY)) >= 0) { + close(rc); + return SPAWN(p->cmd, p->arg); + } + return -1; + } else { + return try3(p->cmd, p); + } + } +} + +static char *NEAR +tmpf(char *tp) +{ + char tplate[PATH_MAX]; + char *ev; + int i; + +#ifdef __GO32__ + ev = getenv("TMPDIR"); + if (! ev) + ev = getenv("TMP"); +#else + ev = getenv("TMP"); +#endif + if (ev != 0) { + strcpy(tplate, ev); + i = strlen(ev); + if (i && ev[i - 1] != '\\' && ev[i - 1] != '/') + strcat(tplate, "\\"); + } else { + tplate[0] = 0; + } + strcat(tplate, tp); + return strdup(mktemp(tplate)); +} + +static int NEAR +redopen(char *fn, int md, int sfd) +{ + int rc; + int fd; + + if ((fd = open(fn, md, 0666)) != -1) { + if (md & O_APPEND) + lseek(fd, 0L, SEEK_END); + rc = dup(sfd); + if (fd != sfd) { + dup2(fd, sfd); + close(fd); + } + return rc; + } + return -1; +} + +static int NEAR +redclose(int fd, int sfd) +{ + if (fd != -1) { + dup2(fd, sfd); + close(fd); + } + return -1; +} + +static void NEAR +redswitch(PRO * p) +{ + int d; + + for (d = 0; d < sizeof(p->ored) / sizeof(p->ored[0]); d++) { + if (d != p->ored[d]) { + p->sred[d] = dup(d); + dup2(p->ored[d], d); + } + } +} +static void NEAR +redunswitch(PRO * p) +{ + int d; + + for (d = 0; d < sizeof(p->ored) / sizeof(p->ored[0]); d++) { + if (d != p->ored[d]) { + dup2(p->sred[d], d); + close(p->sred[d]); + } + } +} + +/* patch 1997.1.31 by K.Okabe */ +static int +is_unixy_shell(const char *shell) +{ + static const char *shells[] = { + "SH", + "SH16", + "SH32", + "KSH", + "ZSH", + "BASH", + 0 + }; + char shellexe[16]; + const char *p; + int i; + + for (p = shell; *p != '\0'; p++) + if (*p == ':' || *p == '/' || *p == '\\') + shell = p + 1; + for (i = 0; shells[i]; i++) { + if (stricmp(shell, shells[i]) == 0) + return 1; + strcpy(shellexe, shells[i]); + strcat(shellexe, ".EXE"); + if (stricmp(shell, shellexe) == 0) + return 1; + } + return 0; +} +/* end of patch */ + +int +xsystem(char *cmd) +{ + PRO *p, *pn; + char *pof, *pif, *pxf; + int psstdin, psstdout; + int rdstdin, rdstdout; + /* patch 1997.1.27 by K.Okabe */ + int rdstderr; + /* end of patch */ + int rc = 0; +#if USECMDLINE + static char *cmdline = 0; + char *oldcmdline; +#endif + +#if defined(GAWK) && defined(OS2) && (_MSC_VER != 510) /* patch 1997.2.20 by K.Okabe */ + if (_osmode == OS2_MODE) + return system(cmd); +#endif + + /* patch 1997.1.31 by K.Okabe */ + { + char *shell = (char *) 0; +#ifdef GAWK + shell = getenv("AWKSHELL"); + if (! shell) + shell = getenv("SHELL"); +#endif + if (! shell) + shell = getenv("COMSPEC"); + if (shell && is_unixy_shell(shell)) { +#if defined(DJGPP) && ((DJGPP > 2) || (DJGPP == 2 && DJGPP_MINOR >= 1)) + rc = spawnl(P_WAIT, shell, shell, "-c", cmd, (char *) 0); +#else + char *quoted_cmd, *p; + quoted_cmd = (char *) ALLOCA(strlen(cmd) * 2 + 3); + p = quoted_cmd; + *p++ = '"'; + while (*cmd != '\0') { + if (*cmd == '"') + *p++ = '\\'; + *p++ = *cmd++; + } + *p++ = '"'; + *p = '\0'; + rc = spawnl(P_WAIT, shell, shell, "-c", quoted_cmd, (char *) 0); + FREE(quoted_cmd); +#endif + return rc < 0 ? 0xff00 : (rc << 8) & 0xff00 ; + } + } + init_dbcs(); + /* end of patch */ + + pof = pif = pxf = 0; + p = pars(cmd); + pof = tmpf("p1XXXXXX"); + pif = tmpf("p2XXXXXX"); + psstdin = psstdout = rdstdin = rdstdout = -1; + /* patch 1997.1.27 by K.Okabe */ + rdstderr = -1; + /* end of patch */ + while (p) { +#if USECMDLINE + if (!getenv("NOCMDLINE")) { + oldcmdline = cmdline; + cmdline = xmalloc(strlen(p->cmd) + strlen(p->arg) + 10); + strcat(strcat(strcat(strcpy(cmdline, "CMDLINE="), p->cmd), " "), p->arg); + putenv(cmdline); + if (oldcmdline) + free(oldcmdline); + } +#endif + if (p->next) + psstdout = redopen(pof, O_WRONLY | O_CREAT | O_TRUNC, 1); + if (p->inf) + rdstdin = redopen(p->inf, p->infmod, 0); + if (p->outf) + rdstdout = redopen(p->outf, p->outfmod, 1); + /* patch 1997.1.27 by K.Okabe */ + if (p->errf) + rdstderr = redopen(p->errf, p->errfmod, 2); + /* end of patch */ + redswitch(p); + rc = pgo(p); + redunswitch(p); + rdstdin = redclose(rdstdin, 0); + rdstdout = redclose(rdstdout, 1); + /* patch 1997.1.27 by K.Okabe */ + rdstderr = redclose(rdstderr, 2); + /* end of patch */ + psstdout = redclose(psstdout, 1); + psstdin = redclose(psstdin, 0); + if ((p = p->next) != 0) { + pxf = pif; + pif = pof; + pof = pxf; + psstdin = redopen(pif, O_RDONLY, 0); + } + } + unlink(pif); + free(pif); + unlink(pof); + free(pof); + for (pn = p = p1; p; p = pn) { + pn = p->next; + if (p->line) + free(p->line); + if (p->cmd) + free(p->cmd); + if (p->arg) + free(p->arg); + if (p->inf) + free(p->inf); + if (p->outf) + free(p->outf); + free(p); + } + if (rc == -2) + return 127; + return rc < 0 ? 0xFF00 : (rc << 8) & 0xFF00; +} + +#ifdef TEST +#include + +void +main() +{ + char lb[128]; + while (gets(lb)) { + printf("\nreturn %04X\n", xsystem(lb)); + } +} +#endif /* TEST */ diff -u2rN -x awktab.c gawk-3.0.6/protos.h gawk-3.0.6+mb1.15/protos.h --- gawk-3.0.6/protos.h Sun Jun 18 20:48:47 2000 +++ gawk-3.0.6+mb1.15/protos.h Sat May 5 17:48:37 2001 @@ -24,5 +24,5 @@ */ -#ifdef __STDC__ +#if defined(__STDC__) || defined(MSDOS) #define aptr_t void * /* arbitrary pointer type */ #else @@ -64,5 +64,5 @@ extern size_t strftime P((char *, size_t, const char *, const struct tm *)); #endif -#ifdef __STDC__ +#if defined(__STDC__) || defined(MSDOS) extern time_t time P((time_t *)); #else diff -u2rN -x awktab.c gawk-3.0.6/re.c gawk-3.0.6+mb1.15/re.c --- gawk-3.0.6/re.c Wed Jun 7 17:46:25 2000 +++ gawk-3.0.6+mb1.15/re.c Sat May 5 17:54:01 2001 @@ -23,4 +23,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: May 22, 1997 by okabe katsuyuki */ #include "awk.h" @@ -57,5 +59,5 @@ while (src < end) { if (*src == '\\') { - c = *++src; + c = (unsigned char) *++src; switch (c) { case 'a': @@ -106,8 +108,24 @@ *dest++ = (char) c; src++; + if (ismbchar(c)) { + size_t n = mbclen(c) - 1; + if ((src + n) <= end) + while (n-- > 0) + *dest++ = *src++; + else + dest--; + } break; } /* switch */ - } else - *dest++ = *src++; /* not '\\' */ + } else { + if (ismbchar(*dest++ = *src++)) { /* not '\\' */ + size_t n = mbclen(dest[-1]) - 1; + if ((src + n) <= end) + while (n-- > 0) + *dest++ = *src++; + else + dest--; + } + } } /* for */ @@ -128,5 +146,5 @@ /* gack. this must be done *after* re_compile_pattern */ rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */ - if (dfa && ! ignorecase) { + if (dfa && ! ignorecase && ! save_memory && current_mbctype != MBCTYPE_UTF8) { dfacomp(temp, len, &(rp->dfareg), TRUE); rp->dfa = TRUE; diff -u2rN -x awktab.c gawk-3.0.6/regex.c gawk-3.0.6+mb1.15/regex.c --- gawk-3.0.6/regex.c Fri Aug 4 06:47:30 2000 +++ gawk-3.0.6+mb1.15/regex.c Sat May 5 18:09:08 2001 @@ -20,4 +20,7 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) + Last change: Dec 1, 2000 by okabe katsuyuki */ + /* AIX requires this to be the first thing in the file. */ #if defined (_AIX) && !defined (REGEX_MALLOC) @@ -101,5 +104,5 @@ #endif #ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#define bcopy(s, d, n) memmove ((d), (s), (n)) #endif #ifndef bzero @@ -129,4 +132,5 @@ /* Get the interface, including the syntax bits. */ #include "regex.h" +#include "mbc.h" /* isalpha etc. are used for the character classes. */ @@ -181,5 +185,5 @@ (Per Bothner suggested the basic approach.) */ #undef SIGN_EXTEND_CHAR -#if __STDC__ +#if __STDC__ || defined(MSDOS) #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) #else /* not __STDC__ */ @@ -255,5 +259,9 @@ #if 0 /* It is a bad idea to declare alloca. We always cast the result. */ #ifndef _AIX /* Already did AIX, up at the top. */ +#ifdef __TURBOC__ +#include +#else char *alloca (); +#endif #endif /* not _AIX */ #endif @@ -551,4 +559,28 @@ #endif /* DEBUG */ + +#define STORE_MBC(p, wc) do { \ + unsigned long _wc = (wc); \ + (p)[3] = (unsigned char) _wc; \ + (p)[2] = (unsigned char) (_wc >>= 8); \ + (p)[1] = (unsigned char) (_wc >>= 8); \ + (p)[0] = (unsigned char) (_wc >> 8); \ +} while (0) +#define STORE_MBC_AND_INCR(p, wc) do { \ + STORE_MBC((p), (wc)); \ + (p) += 4; \ +} while (0) + +#define EXTRACT_MBC(p) \ + ((((mbchar_t) (unsigned char) (p)[0]) << 24) \ + | (((mbchar_t) (unsigned char) (p)[1]) << 16) \ + | (((mbchar_t) (unsigned char) (p)[2]) << 8) \ + | ((mbchar_t) (unsigned char) (p)[3])) +#define EXTRACT_MBC_AND_INCR(p) ((p) += 4, EXTRACT_MBC(p - 4)) + +#define EXTRACT_UNSIGNED(p) \ + ((unsigned char) (p)[0] | (unsigned char) (p)[1] << 8) +#define EXTRACT_UNSIGNED_AND_INCR(p) \ + ((p) += 2, (unsigned char) (p)[-2] | (unsigned char) (p)[-1] << 8) /* If DEBUG is defined, Regex prints many voluminous messages about what @@ -645,5 +677,12 @@ do { + size_t n; putchar ('/'); + if (ismbchar (*p) && (n = mbclen(*p)) <= mcnt) { + printf ("/%.*s", n, (char *) p); + p += n; + mcnt -= (n - 1); + continue; + } putchar (*p++); } @@ -706,7 +745,20 @@ putchar (last); - putchar (']'); - p += 1 + *p; + { + unsigned short i, size; + char buf0[8]; + char buf1[8]; + + size = EXTRACT_UNSIGNED_AND_INCR (p); + for (i = 0; i < size; i++) { + bzero (buf0, sizeof buf0); + bzero (buf1, sizeof buf1); + mbcstore (buf0, EXTRACT_MBC_AND_INCR (p)); + mbcstore (buf1, EXTRACT_MBC_AND_INCR (p)); + printf ("%s-%s", buf0, buf1); + } + } + putchar (']'); } break; @@ -1037,4 +1089,12 @@ change it ourselves. */ +#ifndef INT_IS_16BIT +#ifndef WIN32 +#if defined (_MSC_VER) || defined (__TURBOC__) +#define INT_IS_16BIT +#endif +#endif +#endif + #ifdef INT_IS_16BIT @@ -1460,4 +1520,5 @@ static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, reg_syntax_t syntax)); +#if 0 /* We don't use this. */ static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, const char *pend, @@ -1465,4 +1526,5 @@ reg_syntax_t syntax, unsigned char *b)); +#endif /* Fetch the next character in the uncompiled pattern---translating it @@ -1474,5 +1536,6 @@ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ - if (translate) c = (unsigned char) translate[c]; \ + if (translate && !ismbchar (c)) \ + c = (unsigned char) translate[(unsigned char) (c)]; \ } while (0) #endif @@ -1488,4 +1551,12 @@ #define PATUNFETCH p-- +#define PATFETCH_MBC(wc) \ + do {if (p == pend || p + mbclen(*p) > pend) return REG_EEND; \ + { char *end; wc = mbcfetch (p, &end); p = end; } \ + if (translate && wc < (1 << BYTEWIDTH)) \ + wc = (mbchar_t) translate[(unsigned char) (wc)]; \ + } while (0) + + /* If `translate' is non-null, return translate[D], else just D. We @@ -1494,6 +1565,7 @@ when we use a character as a subscript we must make it unsigned. */ #ifndef TRANSLATE -#define TRANSLATE(d) \ - (translate ? (char) translate[(unsigned char) (d)] : (d)) +#define TRANSLATE(d) (translate \ + ? (unsigned char) translate[(unsigned char) (d)] \ + : (d)) #endif @@ -1562,5 +1634,5 @@ reallocating to 0 bytes. Such thing is not going to work too well. You have been warned!! */ -#if defined(_MSC_VER) && !defined(WIN32) +#if (defined(_MSC_VER) || defined(__TURBOC__)) && !defined(WIN32) /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. The REALLOC define eliminates a flurry of conversion warnings, @@ -1716,4 +1788,249 @@ #endif +/* Handle charset(_not)?. + + Structure of charset(_not)? in compiled pattern. + + struct { + unsinged char id; charset(_not)? + unsigned char sbc_size; + unsigned char sbc_map[sbc_size]; same as original up to here. + unsigned short mbc_size; number of intervals. + struct { + uint32_t beg; beginning of interval. + uint32_t end; end of interval. + } intervals[mbc_size]; + }; */ +#define UTF8_CHARSET_SIZE 128 + +static reg_errcode_t +#if defined(__STDC__) || defined(MSDOS) +set_list_bits (mbchar_t c1, mbchar_t c2, + reg_syntax_t syntax, unsigned char *b, const char *translate) +#else +set_list_bits (c1, c2, syntax, b, translate) + mbchar_t c1, c2; + reg_syntax_t syntax; + unsigned char *b; + const char *translate; +#endif +{ + unsigned char sbc_size = b[-1]; + unsigned short mbc_size = EXTRACT_UNSIGNED (&b[sbc_size]); + mbchar_t beg, end, upb; + boolean isutf8 = current_mbctype == MBCTYPE_UTF8; + size_t charset_size; + + if (c1 > c2) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + charset_size = isutf8 ? UTF8_CHARSET_SIZE : (1 << BYTEWIDTH); + if (c1 < charset_size) { + upb = c2; + if (charset_size <= upb) + upb = charset_size - 1; /* The last single-byte char */ + if (sbc_size <= upb / BYTEWIDTH) { + /* Allocate maximum size so it never happens again. */ + /* NOTE: memcpy() would not work here. */ + bcopy (&b[sbc_size], &b[charset_size / BYTEWIDTH], 2 + mbc_size*8); + bzero (&b[sbc_size], charset_size / BYTEWIDTH - sbc_size); + b[-1] = sbc_size = charset_size / BYTEWIDTH; + } + if (!translate) { + for (; c1 <= upb; c1++) + if (!ismbchar (c1)) + SET_LIST_BIT (c1); + } + else + for (; c1 <= upb; c1++) + if (!ismbchar (c1)) + SET_LIST_BIT (TRANSLATE (c1)); + if (c2 < charset_size) + return REG_NOERROR; + c1 = isutf8 ? 0x80 : 0x8000 ; /* The first wide char */ + } + b = &b[sbc_size + 2]; + + /* intervals[beg] + ●----------● ●----------● + c1 + ○----------------------● + + 上図のような区間のインデックス beg を決定する. */ + for (beg = 0, upb = mbc_size; beg < upb; ) { + mbchar_t mid = (beg + upb) >> 1; + + if (c1 - 1 > EXTRACT_MBC (&b[mid*8 + 4])) + beg = mid + 1; + else + upb = mid; + } + + /* intervals[end] + ●-------● ●----------● + c2 + ●---------------○ + + 上図のような区間のインデックス end を決定する. */ + for (end = beg, upb = mbc_size; end < upb; ) { + mbchar_t mid = (end + upb) >> 1; + + if (c2 >= EXTRACT_MBC (&b[mid*8]) - 1) + end = mid + 1; + else + upb = mid; + } + + if (beg != end) { + /* 既存の区間を少なくとも1つ統合する場合, + 区間の始点, 終点を修正する. */ + if (c1 > EXTRACT_MBC (&b[beg*8])) + c1 = EXTRACT_MBC (&b[beg*8]); + if (c2 < EXTRACT_MBC (&b[end*8 - 4])) + c2 = EXTRACT_MBC (&b[end*8 - 4]); + } + if (end < mbc_size && end != beg + 1) + /* 追加される区間の後ろに既存の区間を移動する. */ + /* NOTE: memcpy() would not work here. */ + bcopy (&b[end*8], &b[(beg + 1)*8], (mbc_size - end)*8); + STORE_MBC (&b[beg*8 + 0], c1); + STORE_MBC (&b[beg*8 + 4], c2); + mbc_size += beg + 1 - end; + STORE_NUMBER (&b[-2], mbc_size); + return REG_NOERROR; +} + +static int +#if defined(__STDC__) || defined(MSDOS) +is_in_list (mbchar_t c, const unsigned char *b) +#else +is_in_list (c, b) + mbchar_t c; + const unsigned char *b; +#endif +{ + unsigned short size; + int in = (re_opcode_t) b[-1] == charset_not; + boolean isutf8 = current_mbctype == MBCTYPE_UTF8; + + size = *b++; + if (c < (isutf8 ? UTF8_CHARSET_SIZE : (1 << BYTEWIDTH))) { + if (c / BYTEWIDTH < size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) + in = !in; + } + else { + mbchar_t i, j; + + b += size + 2; + size = EXTRACT_UNSIGNED (&b[-2]); + + /* intervals[i] + ●-------● ●--------● + c + ○----------------● + + 上図のような区間のインデックス i を決定する. */ + for (i = 0, j = size; i < j; ) { + mbchar_t k = (i + j) >> 1; + + if (c > EXTRACT_MBC (&b[k*8 + 4])) + i = k + 1; + else + j = k; + } + if (i < size && EXTRACT_MBC (&b[i*8]) <= c + /* [...] から, 無効なマルチバイト文字を除外する. ここでは簡単の + ため2バイト目が '\n' または '\0' だけを無効とした. [^...] + の場合は, 逆に無効なマルチバイト文字をマッチさせる. */ + && (((unsigned char) c != '\n' && (unsigned char) c != '\0') || isutf8)) + in = !in; + } + return in; +} + +static boolean +#if defined(__STDC__) || defined(MSDOS) +is_mblist_nooverlap (const unsigned char *b1, const unsigned char *b2) +#else +is_mblist_nooverlap (b1, b2) + const unsigned char *b1; + const unsigned char *b2; +#endif +{ + unsigned short size1; + mbchar_t c11; + mbchar_t c12; + unsigned short size2; + mbchar_t c21; + mbchar_t c22; + unsigned short i; + unsigned short j; + + b1 = &b1[2]; + size1 = EXTRACT_UNSIGNED (&b1[-2]); + b2 = &b2[2]; + size2 = EXTRACT_UNSIGNED (&b2[-2]); + for (i = 0; i < size1; i++) + { + c11 = EXTRACT_MBC (&b1[i * 8]); + c12 = EXTRACT_MBC (&b1[i * 8 + 4]); + for (j = 0; j < size2; j++) + { + c21 = EXTRACT_MBC (&b2[j * 8]); + c22 = EXTRACT_MBC (&b2[j * 8 + 4]); + if (! ((c22 < c11) || (c12 < c21))) + return false; + } + } + return true; +} + +static boolean +#if defined(__STDC__) || defined(MSDOS) +is_invmblist_nooverlap (const unsigned char *b1, const unsigned char *b2) +#else +is_invmblist_nooverlap (b1, b2) + const unsigned char *b1; + const unsigned char *b2; +#endif +{ + unsigned short size2; + unsigned char *inv_b2; + unsigned short inv_size2; + boolean isutf8 = current_mbctype == MBCTYPE_UTF8; + unsigned short i; + mbchar_t c2; + mbchar_t c; + boolean ret; + + b2 = &b2[2]; + size2 = EXTRACT_UNSIGNED (&b2[-2]); + inv_b2 = REGEX_ALLOCATE (2 + 8 * (size2 + 2)); + inv_b2 = &inv_b2[2]; + + c = isutf8 ? 0x80 : 0x8000; /* The first wide char */ + inv_size2 = 0; + for (i = 0; i < size2; i++) { + c2 = EXTRACT_MBC (&b2[i * 8]) - 1; + if (c <= c2) { + STORE_MBC (&inv_b2[inv_size2 * 8], c); + STORE_MBC (&inv_b2[inv_size2 * 8 + 4], c2); + inv_size2++; + } + c = EXTRACT_MBC (&b2[i * 8 + 4]) + 1; + } + c2 = 0x7fffffffL; + STORE_MBC (&inv_b2[inv_size2 * 8], c); + STORE_MBC (&inv_b2[inv_size2 * 8 + 4], c2); + inv_size2++; + STORE_NUMBER (&inv_b2[-2], inv_size2); + inv_b2 = &inv_b2[-2]; + + ret = is_mblist_nooverlap(b1, inv_b2); + + REGEX_FREE (inv_b2); + + return ret; +} + #ifndef MATCH_MAY_ALLOCATE @@ -2082,4 +2399,6 @@ { boolean had_char_class = false; + mbchar_t c, c1; + long last_char = -1L; if (p == pend) FREE_STACK_RETURN (REG_EBRACK); @@ -2087,5 +2406,6 @@ /* Ensure that we have enough space to push a charset: the opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); + /* + 2 + 8 for mbcharset(_not)? with just one interval. */ + GET_BUFFER_SPACE (34 + 2 + 8); laststart = b; @@ -2104,5 +2424,5 @@ /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH + 2); /* charset_not matches newline according to a syntax bit. */ @@ -2114,7 +2434,14 @@ for (;;) { + int size; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - PATFETCH (c); + if ((size = EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH]))) + /* Ensure the space is enough to hold another interval + of multi-byte chars in charset(_not)?. */ + GET_BUFFER_SPACE (32 + 2 + size*8 + 8); + + PATFETCH_RAW (c); /* \ might escape characters inside [...] and [^...]. */ @@ -2123,6 +2450,14 @@ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - PATFETCH (c1); - SET_LIST_BIT (c1); + PATFETCH_RAW (c1); + if (ismbchar (c1)) { + PATUNFETCH; + PATFETCH_MBC (c1); + (void) set_list_bits (c1, c1, syntax, b, translate); + last_char = c1; + continue; + } + SET_LIST_BIT (TRANSLATE (c1)); + last_char = c1; continue; } @@ -2139,4 +2474,9 @@ FREE_STACK_RETURN (REG_ERANGE); + if (ismbchar (c)) { + PATUNFETCH; + PATFETCH_MBC (c); + } + /* Look ahead to see if it's a range when the last thing was a character: if this is a hyphen not at the @@ -2144,10 +2484,23 @@ operator. */ if (c == '-' +#if 0 /* The original was: */ && !(p - 2 >= pattern && p[-2] == '[') && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') +#else /* I wonder why he did not write like this. + Have we got any problems? */ + && p != p1 + 1 +#endif && *p != ']') { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); + reg_errcode_t ret; + + assert (last_char >= 0); + PATFETCH_RAW (c1); + if (ismbchar (c1)) { + PATUNFETCH; + PATFETCH_MBC (c1); + } + ret = set_list_bits (last_char, c1, syntax, b, translate); + last_char = c1; if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } @@ -2158,7 +2511,13 @@ /* Move past the `-'. */ - PATFETCH (c1); + PATFETCH_RAW (c1); - ret = compile_range (&p, pend, translate, syntax, b); + PATFETCH_RAW (c1); + if (ismbchar (c1)) { + PATUNFETCH; + PATFETCH_MBC (c1); + } + ret = set_list_bits (c, c1, syntax, b, translate); + last_char = c1; if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } @@ -2171,5 +2530,5 @@ char str[CHAR_CLASS_MAX_LENGTH + 1]; - PATFETCH (c); + PATFETCH_RAW (c); c1 = 0; @@ -2268,4 +2627,7 @@ had_char_class = true; #endif /* libc || wctype.h */ +#ifdef DEBUG + last_char = -1; +#endif } else @@ -2274,7 +2636,13 @@ while (c1--) PATUNFETCH; +#if 0 /* The original was: */ SET_LIST_BIT ('['); SET_LIST_BIT (':'); +#else /* I think this is the right way. */ + SET_LIST_BIT (TRANSLATE ('[')); + SET_LIST_BIT (TRANSLATE (':')); +#endif had_char_class = false; + last_char = ':'; } } @@ -2282,5 +2650,6 @@ { had_char_class = false; - SET_LIST_BIT (c); + (void) set_list_bits (c, c, syntax, b, translate); + last_char = c; } } @@ -2290,5 +2659,9 @@ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; - b += b[-1]; + if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH) + bcopy (&b[(1 << BYTEWIDTH) / BYTEWIDTH], &b[b[-1]], + 2 + EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8); + b += b[-1] + 2 + EXTRACT_UNSIGNED (&b[b[-1]])*8; + break; } break; @@ -2773,5 +3146,6 @@ not to translate; but if we don't translate it it will never match anything. */ - c = TRANSLATE (c); + if (!ismbchar (c)) + c = TRANSLATE (c); goto normal_char; } @@ -2782,4 +3156,12 @@ /* Expects the character in `c'. */ normal_char: + + p1 = p; + if (ismbchar (c)) { + p += mbclen (c) - 1; + if (p > pend) + return REG_EEND; + } + /* If no exactn currently being built. */ if (!pending_exact @@ -2789,5 +3171,5 @@ /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 + || *pending_exact >= (1 << BYTEWIDTH) - mbclen(c) /* If followed by a repetition operator. */ @@ -2811,4 +3193,8 @@ BUF_PUSH (c); (*pending_exact)++; + while (p1 < p) { + BUF_PUSH (*p1++); + (*pending_exact)++; + } break; } /* switch (c) */ @@ -3016,4 +3402,5 @@ +#if 0 /* We use set_list_bits() now. */ /* Read the ending character of a range (in a bracket expression) from the uncompiled pattern *P_PTR (which ends at PEND). We assume the @@ -3072,4 +3459,5 @@ return REG_NOERROR; } +#endif /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in @@ -3170,18 +3558,73 @@ case charset: + /* NOTE: Charset for single-byte chars never contain + multi-byte char. See set_list_bits(). */ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) fastmap[j] = 1; + { + unsigned short size; + unsigned char c, end; + char buf[8], bufend[8]; + + p += p[-1] + 2; + size = EXTRACT_UNSIGNED (&p[-2]); + for (j = 0; j < size; j++) + { + /* set bits for 1st bytes of multi-byte chars. */ + mbcstore (buf, EXTRACT_MBC (&p[j*8])); + mbcstore (bufend, EXTRACT_MBC (&p[j*8 + 4])); + for (c = (unsigned char) buf[0], end = (unsigned char) bufend[0]; + c <= end; + c++) + /* NOTE: Charset for multi-byte chars might contain + single-byte chars. We must reject them. */ + if (ismbchar (c)) + fastmap[c] = 1; + } + } break; case charset_not: + /* S: set of all single-byte chars. + M: set of all first bytes that can start multi-byte chars. + s: any set of single-byte chars. + m: any set of first bytes that can start multi-byte chars. + + We assume S+M = U. + ___ _ _ + s+m = (S*s+M*m). */ /* Chars beyond end of map must be allowed. */ + /* NOTE: Charset_not for single-byte chars might contain + multi-byte chars. See set_list_bits(). */ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; + if (!ismbchar (j)) + fastmap[j] = 1; for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; + if (!ismbchar (j)) + fastmap[j] = 1; + { + unsigned short size; + unsigned c, beg; + char buf[8]; + + p += p[-1] + 2; + size = EXTRACT_UNSIGNED (&p[-2]); + c = 0x00; + for (j = 0; j < size; j++) { + mbcstore (buf, EXTRACT_MBC (&p[j*8 + 0])); + for (beg = (unsigned char) buf[0]; c <= beg; c++) + if (ismbchar (c)) + fastmap[c] = 1; + mbcstore (buf, EXTRACT_MBC (&p[j*8 + 4])); + c = (unsigned char) buf[0]; + } + for (beg = 0xff; c <= beg; c++) + if (ismbchar (c)) + fastmap[c] = 1; + } break; @@ -3517,4 +3960,5 @@ register int lim = 0; int irange = range; + unsigned char c; if (startpos < size1 && startpos + range >= size1) @@ -3526,11 +3970,28 @@ inside the loop. */ if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) + while (range > lim) { + c = *d++; + if (ismbchar (c)) { + size_t n; + if (fastmap[c]) + break; + d += (n = mbclen (c)) - 1; + range -= n; + continue; + } + if (fastmap[(unsigned char) translate[c]]) + break; range--; + } else - while (range > lim && !fastmap[(unsigned char) *d++]) + while (range > lim && (c = *d++, !fastmap[c])) { + if (ismbchar (c)) { + size_t n; + d += (n = mbclen (c)) - 1; + range -= n; + continue; + } range--; + } startpos += irange - range; @@ -3571,11 +4032,45 @@ else if (range > 0) { - range--; - startpos++; + const char *d = ((startpos >= size1 ? string2 - size1 : string1) + + startpos); + + if (ismbchar (*d)) { + size_t n; + range -= (n = mbclen (*d) - 1); + startpos += n; + if (range <= 0) + break; + } + range--, startpos++; } else { - range++; - startpos--; + range++, startpos--; + { + const char *s, *d, *p; + + if (startpos < size1) + s = string1, d = string1 + startpos; + else + s = string2, d = string2 + startpos - size1; + if (current_mbctype == MBCTYPE_UTF8) { + for (p = d; p > s && !ismbchar(*p) && !((unsigned char) *p < 0x80); p--) { + range++; + startpos--; + } + if (range > 0) + break; + } else { + for (p = d; p-- > s && ismbchar(*p); ) + /* --p >= s だと 80[12]?86 で動かない可能性がある. (huge + model 以外で, s のオフセットが 0 だった場合.) */ + ; + if (!((d - p) & 1)) { + if (!range) + break; + range++, startpos--; + } + } + } } } @@ -3972,5 +4467,5 @@ for (;;) { -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT2 ("\n%p: ", p); #else @@ -4168,7 +4663,22 @@ do { + unsigned char c; + PREFETCH (); - if ((unsigned char) translate[(unsigned char) *d++] - != (unsigned char) *p++) + c = *d++; + if (ismbchar (c)) { + size_t n; + if (c != (unsigned char) *p++) + goto fail; + for (n = mbclen (c) - 1; n > 0; n--) + if (!--mcnt /* パターンが正しくコンパイルさ + れている限り, このチェックは + 冗長だが念のため. */ + || d == dend + || (unsigned char) *d++ != (unsigned char) *p++) + goto fail; + continue; + } + if ((unsigned char) translate[c] != (unsigned char) *p++) goto fail; } @@ -4180,5 +4690,26 @@ { PREFETCH (); +#if 0 + /* 他の部分では, string1 と string2 にマルチバイト文字 + が跨るのを許していない. このことを速度を犠牲にして + もチェックする場合は, ここの `#if 0' を `#if 1' に + 変えること. */ + { + unsigned char c; + size_t n; + + c = (unsigned char) *d++; + if (c != (unsigned char) *p++) + goto fail; + if (ismbchar (c)) + for (n = mbclen (c) - 1; n > 0; n--) + if (!--mcnt + || d == dend + || (unsigned char) *d++ != (unsigned char) *p++) + goto fail; + } +#else if (*d++ != (char) *p++) goto fail; +#endif } while (--mcnt); @@ -4193,4 +4724,22 @@ PREFETCH (); + if (ismbchar (*d)) { + size_t i, n = mbclen (*d); + const char *p; + + if (d + n > dend) + goto fail; + for (p = d, i = n - 1; i > 0; i--) + if (*++p == '\n' || *p == '\0') + /* 無効なマルチバイト文字にはマッチさせない. ここでは, 簡 + 単のため2バイト目以降が '\n', '\0' のものだけを無効と + する. */ + goto fail; + + SET_REGS_MATCHED (); + DEBUG_PRINT2 (" Matched `%ld'.\n", (long) mbcfetch (d, NULL)); + d += n; + break; + } if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') @@ -4207,5 +4756,5 @@ case charset_not: { - register unsigned char c; + register mbchar_t c; boolean not = (re_opcode_t) *(p - 1) == charset_not; @@ -4213,18 +4762,21 @@ PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ + c = (unsigned char) *d; + if (ismbchar (c)) { + if (d + mbclen (c) > dend) + goto fail; + c = mbcfetch (d, NULL); + } + else + c = TRANSLATE (c); /* The character to match. */ - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; + not = is_in_list (c, p); - p += 1 + *p; + p += 1 + *p + 2 + EXTRACT_UNSIGNED (&p[1 + *p])*8; if (!not) goto fail; SET_REGS_MATCHED (); - d++; + d += mbclen (*d); break; } @@ -4555,5 +5107,5 @@ EXTRACT_NUMBER_AND_INCR (mcnt, p); -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); #else @@ -4582,5 +5134,5 @@ EXTRACT_NUMBER_AND_INCR (mcnt, p); -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); #else @@ -4679,5 +5231,5 @@ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) { - register unsigned char c + register mbchar_t c = *p2 == (unsigned char) endline ? '\n' : p2[2]; @@ -4692,13 +5244,10 @@ || (re_opcode_t) p1[3] == charset_not) { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned char) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; + if (ismbchar (c)) + c = mbcfetch (&p2[2], NULL); - /* `not' is equal to 1 if c would match, which means + /* `is_in_list()' is TRUE if c would match, which means that we can't change to pop_failure_jump. */ - if (!not) + if (!is_in_list (c, p1 + 4)) { p[-3] = (unsigned char) pop_failure_jump; @@ -4710,19 +5259,10 @@ { #ifdef DEBUG - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; + register mbchar_t c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; #endif -#if 0 - if ((re_opcode_t) p1[3] == exactn - && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] - && (p2[2 + p1[5] / BYTEWIDTH] - & (1 << (p1[5] % BYTEWIDTH))))) -#else - if ((re_opcode_t) p1[3] == exactn - && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] - && (p2[2 + p1[4] / BYTEWIDTH] - & (1 << (p1[4] % BYTEWIDTH))))) -#endif + mbchar_t wc = mbcfetch (&p1[5], NULL); + if ((re_opcode_t) p1[3] == exactn && !is_in_list (wc, p2 + 1)) { p[-3] = (unsigned char) pop_failure_jump; @@ -4742,5 +5282,6 @@ break; - if (idx == p2[1]) + if (idx == p2[1] + && is_invmblist_nooverlap (&p2[2 + p2[1]], &p1[5 + p1[4]])) { p[-3] = (unsigned char) pop_failure_jump; @@ -4759,5 +5300,6 @@ break; - if (idx == p2[1] || idx == p1[4]) + if ((idx == p2[1] || idx == p1[4]) + && is_mblist_nooverlap (&p2[2 + p2[1]], &p1[5 + p1[4]])) { p[-3] = (unsigned char) pop_failure_jump; @@ -4802,5 +5344,5 @@ unconditional_jump: -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT2 ("\n%p: ", p); #else @@ -4814,5 +5356,5 @@ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); p += mcnt; /* Do the jump. */ -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT2 ("(to %p).\n", p); #else @@ -4867,5 +5409,5 @@ p += 2; STORE_NUMBER_AND_INCR (p, mcnt); -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt); #else @@ -4875,5 +5417,5 @@ else if (mcnt == 0) { -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); #else @@ -4895,5 +5437,5 @@ mcnt--; STORE_NUMBER (p + 2, mcnt); -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt); #else @@ -4914,5 +5456,5 @@ p1 = p + mcnt; EXTRACT_NUMBER_AND_INCR (mcnt, p); -#ifdef _LIBC +#if defined (_LIBC) || defined (MSDOS) DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); #else @@ -5376,7 +5918,20 @@ register const unsigned char *p1 = (const unsigned char *) s1; register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c; + while (len) { - if (translate[*p1++] != translate[*p2++]) return 1; + c = *p1++; + if (ismbchar(c)) { + size_t n; + if (c != *p2++) + return 1; + for (n = mbclen (c) - 1; n > 0; n--) + if (!--len || *p1++ != *p2++) + return 1; + } + else + if (translate[c] != translate[*p2++]) + return 1; len--; } diff -u2rN -x awktab.c gawk-3.0.6/regex.h gawk-3.0.6+mb1.15/regex.h --- gawk-3.0.6/regex.h Wed May 14 01:40:38 1997 +++ gawk-3.0.6+mb1.15/regex.h Sat May 5 17:57:43 2001 @@ -422,5 +422,5 @@ worth it. */ -#if __STDC__ +#if __STDC__ || defined(MSDOS) #define _RE_ARGS(args) args diff -u2rN -x awktab.c gawk-3.0.6/stamp-h.in gawk-3.0.6+mb1.15/stamp-h.in --- gawk-3.0.6/stamp-h.in Mon Jun 12 21:56:11 2000 +++ gawk-3.0.6+mb1.15/stamp-h.in Wed May 16 01:28:22 2001 @@ -1 +1 @@ -Mon Jun 12 15:56:11 IDT 2000 +2001年 5月16日 水曜日 01時28分22秒 JST diff -u2rN -x awktab.c gawk-3.0.6/test/badargs.ok gawk-3.0.6+mb1.15/test/badargs.ok --- gawk-3.0.6/test/badargs.ok Fri Dec 10 00:09:46 1999 +++ gawk-3.0.6+mb1.15/test/badargs.ok Sat May 5 18:16:20 2001 @@ -19,6 +19,13 @@ -W usage --usage -W version --version + -W ctype=ASCII --ctype=ASCII + -W ctype=EUC --ctype=EUC + -W ctype=SJIS --ctype=SJIS + -W ctype=UTF8 --ctype=UTF8 + -W memory --memory To report bugs, see node `Bugs' in `gawk.info', which is section `Reporting Problems and Bugs' in the printed version. + +Report multi-byte extension version bugs to HGC02147@nifty.ne.jp