Index: less/LICENSE diff -u less/LICENSE:1.1.1.2 less/LICENSE:1.2 --- less/LICENSE:1.1.1.2 Sun Oct 29 02:03:35 2000 +++ less/LICENSE Tue Oct 31 04:30:38 2000 @@ -4,6 +4,9 @@ Less Copyright (C) 1984-2000 Mark Nudelman +Japanized part of Less +Copyright (c) 1994-2000 Kazushi (Jam) Marukawa + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Index: less/Makefile.aut diff -u less/Makefile.aut:1.1.1.7 less/Makefile.aut:1.10 --- less/Makefile.aut:1.1.1.7 Sun Oct 29 02:19:12 2000 +++ less/Makefile.aut Sun Oct 29 02:34:40 2000 @@ -14,14 +14,14 @@ help.c ifile.c input.c jump.c line.c linenum.c \ lsystem.c mark.c optfunc.c option.c opttbl.c os.c \ output.c position.c prompt.c search.c signal.c \ - tags.c ttyin.c version.c + tags.c ttyin.c version.c multi.c unify.c DISTFILES_W = \ defines.ds Makefile.dsb Makefile.dsg Makefile.dsu \ defines.o2 Makefile.o2e \ defines.o9 Makefile.o9c Makefile.o9u \ defines.wn Makefile.wnm Makefile.wnb DISTFILES = \ - ${SRC} regexp.c regexp.h \ + ${SRC} regexp.c regexp.h multi.h \ COPYING INSTALL LICENSE Makefile.in Makefile.aut NEWS README \ configure configure.in acconfig.h lesskey.c lessecho.c \ cmd.h funcs.h lglob.h less.h lesskey.h option.h pckeys.h position.h \ Index: less/Makefile.dsb diff -u less/Makefile.dsb:1.1.1.1 less/Makefile.dsb:1.4 --- less/Makefile.dsb:1.1.1.1 Sat Nov 30 14:41:23 1996 +++ less/Makefile.dsb Thu Mar 12 17:29:03 1998 @@ -23,7 +23,7 @@ input.obj jump.obj line.obj linenum.obj lsystem.obj \ mark.obj optfunc.obj option.obj opttbl.obj os.obj output.obj \ position.obj prompt.obj search.obj signal.obj tags.obj \ - ttyin.obj version.obj + ttyin.obj version.obj multi.obj unify.obj all: less$(EXT) lesskey$(EXT) @@ -41,7 +41,7 @@ -del defines.h -copy defines.ds defines.h -$(OBJ): less.h defines.h +$(OBJ): less.h multi.h defines.h clean: -del *.obj Index: less/Makefile.dsg diff -u less/Makefile.dsg:1.1.1.3 less/Makefile.dsg:1.6 --- less/Makefile.dsg:1.1.1.3 Sun Oct 29 02:03:39 2000 +++ less/Makefile.dsg Sun Oct 29 02:17:16 2000 @@ -28,7 +28,7 @@ help.${O} ifile.${O} input.${O} jump.${O} line.${O} linenum.${O} \ lsystem.${O} mark.${O} optfunc.${O} option.${O} opttbl.${O} os.${O} \ output.${O} position.${O} prompt.${O} search.${O} signal.${O} \ - tags.${O} ttyin.${O} version.${O} + tags.${O} ttyin.${O} version.${O} multi.${O} unify.${O} all: less lesskey lessecho @@ -44,7 +44,7 @@ defines.h: defines.ds command.com /c copy $< $@ -${OBJ}: less.h defines.h funcs.h +${OBJ}: less.h multi.h defines.h funcs.h info: install-info: Index: less/Makefile.dsm diff -u /dev/null less/Makefile.dsm:1.4 --- /dev/null Wed Dec 6 22:29:41 2000 +++ less/Makefile.dsm Thu Mar 12 17:29:03 1998 @@ -0,0 +1,58 @@ +# Makefile for less. +# MS-DOS version + +#### Start of system configuration section. #### + +CC = cl +# Change the following directories to match your installation. +LIBDIR = c:\msvc\lib +INCDIR = c:\msvc\include + +# CFLAGS are compile-time options and LDFLAGS are link-time options. They are +# customized for MSVC 1.0 (MSC 8.0). If you have a different version of the +# compiler, you may need to change some of the options to their equivalents. +# -Ot optimize for speed +# -AL large memory model +# -Za ANSI C conformance +# -nologo suppress MSVC banners +# -onerror:noexe no .EXE file if link errors occur +CFLAGS = -Ot -AL -Za -nologo +LDFLAGS = -onerror:noexe -nologo +LIBS = $(LIBDIR)\llibce.lib $(LIBDIR)\graphics.lib + +#### End of system configuration section. #### + +# This rule allows us to supply the necessary -D options +# in addition to whatever the user asks for. +.c.obj: + $(CC) -c -I. -I$(INCDIR) $(CPPFLAGS) $(CFLAGS) $< + +OBJ = main.obj screen.obj brac.obj ch.obj charset.obj cmdbuf.obj command.obj \ + decode.obj edit.obj filename.obj forwback.obj help.obj ifile.obj \ + input.obj jump.obj line.obj linenum.obj lsystem.obj \ + mark.obj optfunc.obj option.obj opttbl.obj os.obj output.obj \ + position.obj prompt.obj search.obj signal.obj tags.obj \ + ttyin.obj version.obj multi.obj unify.obj + +all: less lesskey + +# This is really horrible, but the command line is too long for +# MS-DOS if we try to link $(OBJ). +less: $(OBJ) + -if exist lesskey.obj del lesskey.obj + $(CC) $(LDFLAGS) -o $@ *.obj $(LIBS) + +lesskey: lesskey.obj version.obj + $(CC) $(LDFLAGS) -o $@ lesskey.obj version.obj $(LIBS) + +defines.h: defines.ds + -del defines.h + -copy defines.ds defines.h + +$(OBJ): less.h multi.h defines.h + +clean: + -del *.obj + -del less.exe + -del lesskey.exe + Index: less/Makefile.in diff -u less/Makefile.in:1.1.1.11 less/Makefile.in:1.14 --- less/Makefile.in:1.1.1.11 Mon Dec 27 19:18:58 1999 +++ less/Makefile.in Sun Oct 29 01:55:51 2000 @@ -42,7 +42,7 @@ help.${O} ifile.${O} input.${O} jump.${O} line.${O} linenum.${O} \ lsystem.${O} mark.${O} optfunc.${O} option.${O} opttbl.${O} os.${O} \ output.${O} position.${O} prompt.${O} search.${O} signal.${O} \ - tags.${O} ttyin.${O} version.${O} @REGEX_O@ + tags.${O} ttyin.${O} version.${O} @REGEX_O@ multi.${O} unify.${O} all: less lesskey lessecho @@ -55,7 +55,7 @@ lessecho: lessecho.${O} version.${O} ${CC} ${LDFLAGS} -o $@ lessecho.${O} version.${O} -${OBJ}: ${srcdir}/less.h ${srcdir}/funcs.h defines.h +${OBJ}: ${srcdir}/less.h ${srcdir}/funcs.h ${srcdir}/multi.h defines.h install: all ${srcdir}/less.nro ${srcdir}/lesskey.nro installdirs ${INSTALL_PROGRAM} less ${bindir}/${binprefix}less Index: less/Makefile.o2e diff -u less/Makefile.o2e:1.1.1.1 less/Makefile.o2e:1.4 --- less/Makefile.o2e:1.1.1.1 Sat Nov 30 14:41:23 1996 +++ less/Makefile.o2e Thu Mar 12 17:29:04 1998 @@ -23,7 +23,7 @@ help.${O} ifile.${O} input.${O} jump.${O} line.${O} linenum.${O} \ lsystem.${O} mark.${O} optfunc.${O} option.${O} opttbl.${O} os.${O} \ output.${O} position.${O} prompt.${O} search.${O} signal.${O} \ - tags.${O} ttyin.${O} version.${O} regexp.${O} + tags.${O} ttyin.${O} version.${O} regexp.${O} multi.${O} unify.${O} all: less.exe lesskey.exe @@ -33,7 +33,7 @@ lesskey.exe: lesskey.${O} version.${O} ${CC} lesskey.${O} version.${O} -o $@ ${LDFLAGS} -${OBJ}: defines.h less.h +${OBJ}: defines.h less.h multi.h defines.h: defines.o2 copy defines.o2 defines.h Index: less/Makefile.o9c diff -u less/Makefile.o9c:1.1.1.2 less/Makefile.o9c:1.5 --- less/Makefile.o9c:1.1.1.2 Sat Nov 30 14:41:24 1996 +++ less/Makefile.o9c Thu Mar 12 17:29:04 1998 @@ -27,7 +27,7 @@ help.${O} ifile.${O} input.${O} jump.${O} line.${O} linenum.${O} \ lsystem.${O} mark.${O} optfunc.${O} option.${O} opttbl.${O} os.${O} \ output.${O} position.${O} prompt.${O} search.${O} signal.${O} \ - tags.${O} ttyin.${O} version.${O} regexp.${O} + tags.${O} ttyin.${O} version.${O} regexp.${O} multi.${O} unify.${O} all: less lessecho lesskey @@ -40,7 +40,7 @@ lessecho: lessecho.${O} version.${O} ${CC} lessecho.${O} version.${O} -f=$@ ${LDFLAGS} -${OBJ}: defines.h less.h +${OBJ}: defines.h less.h multi.h defines.h: defines.o9 copy defines.o9 defines.h -rf Index: less/Makefile.o9u diff -u less/Makefile.o9u:1.1.1.1 less/Makefile.o9u:1.4 --- less/Makefile.o9u:1.1.1.1 Fri Sep 13 10:36:40 1996 +++ less/Makefile.o9u Thu Mar 12 17:29:04 1998 @@ -26,7 +26,7 @@ help.${O} ifile.${O} input.${O} jump.${O} line.${O} linenum.${O} \ lsystem.${O} mark.${O} optfunc.${O} option.${O} opttbl.${O} os.${O} \ output.${O} position.${O} prompt.${O} search.${O} signal.${O} \ - tags.${O} ttyin.${O} version.${O} regexp.${O} + tags.${O} ttyin.${O} version.${O} regexp.${O} multi.${O} unify.${O} all: less lesskey @@ -36,7 +36,7 @@ lesskey: lesskey.${O} version.${O} ${CC} lesskey.${O} version.${O} -f=$@ ${LDFLAGS} -${OBJ}: defines.h less.h +${OBJ}: defines.h less.h multi.h defines.h: defines.o9 copy defines.o9 defines.h -rf Index: less/Makefile.wnb diff -u less/Makefile.wnb:1.1.1.1 less/Makefile.wnb:1.4 --- less/Makefile.wnb:1.1.1.1 Sat Nov 30 14:41:25 1996 +++ less/Makefile.wnb Thu Mar 12 17:29:04 1998 @@ -23,7 +23,7 @@ input.obj jump.obj line.obj linenum.obj lsystem.obj \ mark.obj optfunc.obj option.obj opttbl.obj os.obj output.obj \ position.obj prompt.obj search.obj signal.obj tags.obj \ - ttyin.obj version.obj regexp.obj + ttyin.obj version.obj regexp.obj multi.obj unify.obj all: less lesskey @@ -40,7 +40,7 @@ -del defines.h -copy defines.wn defines.h -${OBJ}: less.h defines.h funcs.h cmd.h +${OBJ}: less.h multi.h defines.h funcs.h cmd.h clean: -del *.obj Index: less/Makefile.wnm diff -u less/Makefile.wnm:1.1.1.2 less/Makefile.wnm:1.5 --- less/Makefile.wnm:1.1.1.2 Tue Sep 21 20:02:13 1999 +++ less/Makefile.wnm Tue Sep 21 22:44:01 1999 @@ -28,7 +28,7 @@ input.obj jump.obj line.obj linenum.obj lsystem.obj \ mark.obj optfunc.obj option.obj opttbl.obj os.obj output.obj \ position.obj prompt.obj search.obj signal.obj tags.obj \ - ttyin.obj version.obj regexp.obj + ttyin.obj version.obj regexp.obj multi.obj unify.obj all: less.exe lesskey.exe @@ -45,7 +45,7 @@ -del defines.h -copy defines.wn defines.h -$(OBJ): less.h defines.h funcs.h cmd.h +$(OBJ): less.h multi.h defines.h funcs.h cmd.h clean: -del *.obj Index: less/README.iso diff -u /dev/null less/README.iso:1.19 --- /dev/null Wed Dec 6 22:29:41 2000 +++ less/README.iso Tue Nov 21 09:01:27 2000 @@ -0,0 +1,234 @@ +========================================================================= + This is the distribution of enhanced less. + It support ISO 2022 code extension techniques and Japanese codes. + Please report any problems of it to the author at jam@pobox.com. + See http://www.pobox.com/~jam/less/ for the latest info. +========================================================================= + + +**Features** + + - Support ISO 2022 code extension techniques in 7bits and 8bits. + - Understand all following codes. + - ISO 2022 level 3. + - a special sequence, ESC 2/4[$] 2/12[,] FE, which is used in MULE. + - LESS-JIS, UJIS and SJIS. + - LESS-JIS may contain JIS C 6226-1978, JIS X 0208-1983, + JIS X 0208:1990/1997, JIS X 0212:1990, JIS X 0213:2000, + JIS X 0201:1976/1997 left/right planes, and ASCII. + - UJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, + JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of + JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 + right plane, and both JIS X 0212:1990 and JIS X 0213:2000 + plane 2. + - SJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, + JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of + JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 + right plane, and JIS X 0213:2000 plane 2. + - Output following codes. + - iso8 (CTEXT, which is defined in X11) + - iso7 with only GZDM4, GZD4, G1DM4, G1DM6, G1D4, G1D6, SI and SO + escape sequences. + - LESS-JIS, UJIS and SJIS. + - LESS-JIS may contain JIS X 0208-1983 with an assumption + that its face is the same as JIS X 0213:2000 plane 1, + JIS X 0201:1976/1997 left/right planes and ASCII. + It does not contain JIS X 0212:1990 or JIS X 0213:2000 plane 2, + so those are marked as wrong character on the fly. + If you need the same JIS sequence as the input, please use iso7 + or iso8. + - UJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, + JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of + JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 + right plane, and both JIS X 0212:1990 and JIS X 0213:2000 + plane 2. + - SJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, + JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of + JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 + right plane, and JIS X 0213:2000 plane 2. + - Automatic decompression of compressed files. + +**Problems** + + - To search, the less compare characters after conversion into + internal codes. If converted codes contain some gaps, a regular + expression library is confused. e.g. "$B$"(Ba" which contain an escape + sequence and codes which is JIS X 0201 Japanese (Katakana) in UJIS + make problems. + - If you have a regular expression library with ability to detect + character set, you don't get this problem. + - Or if you search it without using regular expression library by + typing '^R', you can solve this problem. + +**Enhancements** + + - The JLESSCHARSET environment variable choose codes for input and + output. For example, if you set "japanese-iso7", the less read all + Japanese codes and iso7 and write iso7. If you set "ujis", the + less read and write only UJIS. Currently you can use followings. + - Same as LESSCHARSET: ascii, latin1, dos, koi8-r, next + - Others: iso7, iso8, ujis-iso7, euc-iso7, sjis-iso7, jis, + ujis-jis, euc-jis, sjis-jis, ujis, euc, jis-ujis, + jis-euc, sjis, jis-sjis, japanese, japanese-iso7, + japanese-jis, japanese-ujis, japanese-euc, japanese-sjis + - Following values of JLESSCHARSET are treated for input and output + separately. For example, if you set "ujis-sjis", the less read + only UJIS and write only SJIS. + - Values: ujis-sjis, euc-sjis, sjis-ujis, sjis-euc + - The JLESSPLANESET environment variable choose plane of iso8 and + iso7. Please read a manual about details. + - The JLESSKEYCHARSET environment variable choose codes for input + from keyboard. All values for the JLESSCHARSET environment + variable are used for this. + - Sometime the less may mistake automatic handling of Japanese codes. + The user change the status for it by '@' key. The function name of + it is "rotate-right". + - If you have a regular expression library with character set + detection, invoke configure script with "--with-cs-regex" option to + use it. Then compile the less. + +**History about Japanized and ISO 2022-ized less** + + - About Japanization of less + - Kato-san posted a difference which give less an ability for + special escape sequence. + - Kameyama-san posted a patch for UJIS code. + - Shinoda-san posted a less.exe for SJIS code. + - Tachi-san posted two patches for JIS and SJIS code. + - Saito-san posted kanji conversion filter for less. + - Ohta-san posted other less which convert roma-ji into kana. + - Yamakita-san posted other patch for convert them into kanji. + - Yoshida-san posted two patches of less-73 for UJIS and SJIS code. + - Yoshida-san posted two patches of less-97 for UJIS and SJIS code. + - Kato-san posted a patch of less-97(UJIS) for JIS code. + - Ishii-san posted a patch of less-97(UJIS or SJIS) for JIS code. + - Tanaka-san posted a modified less-123 for SJIS code. + - Kawakami-san posted a patch of it for JIS code. + - I changed less-177 for all Japanese codes. + - Kusakabe-san changed less-178 by using patches for less-177. + - Senda-san add printing function of Japanese code for less-177. + - Sakai-san changed less-200 by using patches for less-178. + - I changed less-205 for all Japanese codes and ISO 2022 codes. + - Ushine-san changed less-321 by using patches for less-290. + - I changed less-330 by using patches for less-290. It is named + iso202. + - I removed some bugs and made jless-330-iso203. + - I re-wrote jless and made jless-330-iso208. + - I released patches to correct bugs. + + - About jless + - I put 2 unofficial patches of KANJI searching enhancements for + ISO 2022-ized less-237 on ftp. + - I put an unofficial patch to use gawk-2.15.6-mb1.04's KANJI + regular expression library for jless-330-iso210 on ftp. It will + works with iso212 and following. + - I put a less.hlp file which is translated into Japanese. It will + works with iso212 and following. + +**FAQ** + + Q: When I try to view a Japanese text, the less shows inverse + characters only. How do I view them? + A: You have to set "japanese" to the JLESSCHARSET environment + variable. Otherwise you have to set "ja_JP" to the LANG + environment variable if your computer support it. + + Q: When I try to view a Japanese text, the less shows strange + characters only like '$' and others. How do I view them? + A: Do your terminal or terminal emulator have ability to show + Japanese? If so, I guess you have to adjust output of the less by + setting "japanese-ujis" or "japanese-sjis" as the JLESSCHARSET + environment variable. + + Q: When I try to view compressed text, the less doesn't work. How do + I view them? + A: The less uses the name of a text file to check whether it is + compressed or not, and uses a "zcat" program to uncompress it. So + the compressed file has to be finished with ".z", ".gz" or ".Z" + and a "zcat" program has to have ability to uncompress it. + + Q: How to set the default value of the JLESSCHARSET environment + variable. + A: You can use a DEFCHARSET declaration in "defines.h". If your + computer has a LANG property, you have to set HAVE_LOCALE to 0 in + "defines.h". Note: before editing it, you should execute a + "configure" program. + + Q: Sometime my terminal shows non ASCII characters only. + A: Some terminal emulators have bugs in handling the ISO 2022. You + should tell the less to display only JIS by setting "japanese-jis" + to the JLESSCHARSET environment variable. + + Q: Sometime the less shows wrong Japanese characters when I try to + view SJIS coded files. + A: A less's automatic Japanese code handling routine is not perfect. + Please type '^L' to re-display it. If it doesn't work fine, + please type '@' to change it to the manually handling routine. + + On the other hand, if you use the less with a -Z option, the less + give priority to SJIS over UJIS in automatic handling routine. + And you can change the default value of priority and invert the + means of a -Z option by compiling with SJIS_PRE declaration. + + Or if you know the file is SJIS before executing the less, you + should set "sjis" to the JLESSCHARSET environment variable to + diable the automatic handling routine. + + Q: How do I use SJIS as typing string? + A: You should set "sjis-jis" to the JLESSKEYCHARSET environment + variable. Otherwise you should set "sjis-jis" to a DEFKEYCHARSET + declaration in "defines.h". + + Q: Can I use regular expression with Japanese? + A: There are 2 ways. You can use multi bytes regular expression + library. Or you can use regular expression library with ability + to detect character sets. + + Please read "http://www.poboxes.com/~jam/less/". + + Q: When I looking up with Kanji, the less shows strange behavior. + A: The less enables MSB of bytes of all non ASCII characters to + decrease mis-matching between an ASCII character and an non ASCII + characters if they have same value. But your regular expression + library might not support characters enabled MSB. Please invoke + configure script with "--disable-msb" option to disable it. Then + compile the less. + + Q: How do I input search pattern by JIS? + A: Normally the less uses "\e$" sequence. You should write following + in your "~/.lesskey" file and execute a "lesskey" program to + cancel it. + +#line-edit +\e$ insert ^V\e$ + + Q: How do I use emacs-like key binding? + A: You should write following in your "~/.lesskey" file and execute a + "lesskey" program. + +#line-edit +^A home +^E end +^P up +^N down +^F right +^B left +\ef word-right +^D delete +\ed word-delete + + Q: The os.c was not compiled on NEWS-OS 4.0C. + A: Add following before "#include ". + +#define LANGUAGE_C 1 + +**Conclusion** + + - Please mail to "jam@pobox.com" if you have a problem or a suggestion. + - See http://www.pobox.com/~jam/less/ for the latest info. + + +Local Variables: +mode: indented-text +fill-column: 70 +End: Index: less/README.iso.jp diff -u /dev/null less/README.iso.jp:1.17 --- /dev/null Wed Dec 6 22:29:41 2000 +++ less/README.iso.jp Tue Nov 21 09:01:27 2000 @@ -0,0 +1,267 @@ +========================================================================= + This is the distribution of enhanced less. + It support ISO 2022 code extension techniques and Japanese codes. + Please report any problems of it to the author at jam@pobox.com. + See http://www.pobox.com/~jam/less/ for the latest info. +========================================================================= + This file is written by Japanese and JIS code and contains + almost same information in README.iso. +========================================================================= + + +**$BFCD'(B** + + - ISO 2022 in 7bits$B$H!"(Bin 8bits$B$r%5%]!<%H!#(B + - $BF~NO$H$7$FJ}$,MxMQ$G$-$^$9!#(B + - SJIS$B$O!"(BJIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(B + JIS X 0213:2000$B$N(B1$BLL$N$&$A$N$I$l$+!"(BJIS X 0201:1976/1997$B:8LL(B + $B$+(BASCII$B$N$I$A$i$+!"(BJIS X 0201:1976/1997$B1&LL!"$=$7$F(B + JIS X 0213:2000$B$N(B2$BLL$,MxMQ$G$-$^$9!#(B + - $B=PNO$H$7$FMxMQ$G$-$kJ8;z%3!<%I$NDj$7$?>e$G$N(BJIS X 0208-1983$B$H!"(B + JIS X 0201:1976/1997$B:8(B/$B1&LL!"(BASCII$B$rMxMQ$7$F=PNO$7$^(B + $B$9!#(BJIS X 0212:1990$B$H(BJIS X 0213:2000$B$N(B2$BLL$O=PNO$G$-$J(B + $B$$$?$a!"=PNO;~$K@5$7$/$J$$J8;z$H$7$F%^!<%/$7$FI=<($5(B + $B$l$^$9!#$b$7!"F~NODL$j$N=PNO$,M_$7$$>l9g$O!"(Biso7$B$d(B + iso8$B$rMxMQ$7$F=PNO$7$F2<$5$$!#(B + - UJIS$B$O!"(BJIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(B + JIS X 0213:2000$B$N(B1$BLL$N$&$A$N$I$l$+!"(BJIS X 0201:1976/1997$B:8LL(B + $B$+(BASCII$B$N$I$A$i$+!"(BJIS X 0201:1976/1997$B1&LL!"$=$7$F(B + JIS X 0212:1990$B$H(BJIS X 0213:2000$B$N(B2$BLL$NN>J}$,MxMQ$7$F(B + $B=PNO$7$^$9!#(B + - SJIS$B$O!"(BJIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(B + JIS X 0213:2000$B$N(B1$BLL$N$&$A$N$I$l$+!"(BJIS X 0201:1976/1997$B:8LL(B + $B$+(BASCII$B$N$I$A$i$+!"(BJIS X 0201:1976/1997$B1&LL!"$=$7$F(B + JIS X 0213:2000$B$N(B2$BLL$,MxMQ$7$F=PNO$7$^$9!#(B + - compress$B$d(Bgzip$B$5$l$?%U%!%$%k$r4JC1$KD/$a$k$3$H$,$G$-$k!#(B + +**$BLdBjE@(B** + + - $BJ8;z$N8!:w$O!"FbIt%3!<%I$KJQ49$7$?8e$K=hM}$7$^$9!#$=$NJQ(B + $B49$N:]$KJ8;z$HJ8;z$N4V$K7d4V$,@8$8$?>l9g!"@55,I=8=%i%$%V(B + $B%i%j$K$h$k8!:w$,$G$-$^$;$s!#6qBNE*$K$O!"(B"$B$"(Ba"$B$N$h$&$J%(%9(B + $B%1!<%W%7!<%1%s%9$r69$s$@J8;z$N8!:w$d!"H>3Q%+%?%+%J$,(BUJIS + $B$G=q$+$l$F$$$?>l9g$J$I$K!"8!:w$,$G$-$^$;$s!#(B + - character set$B$rM}2r$9$k@55,I=8=%i%$%V%i%j$rMxMQ$9$k$3$H$K(B + $B$h$C$F!"$3$NLdBj$O2r7h$G$-$^$9!#(B + - $B$^$?!"8!:w$N:]$K!"(B'^R'$B$HF~NO$7$F!"Hs@55,I=8=%b!<%I$G8!:w(B + $B$9$k$3$H$K$h$C$F$b!"$3$NLdBj$r2r7h$G$-$^$9!#(B + +**$B3HD%E@(B** + + - JLESSCHARSET$B4D6-JQ?t$G!"F~NO$H=PNO$KMxMQ$9$k%3!<%I$rF1;~(B + $B$K!"$b$7$/$O$=$l$>$l;XDj$G$-$^$9!#F1;~$K;XDj$9$k>l9g$O!"(B + $BC1=c$K$=$N%3!<%I$r;XDj$7$^$9!#Nc$($P(B"ujis"$B$H$9$k$H!"(BUJIS + $B$@$1$rF~NO$H=PNO$KMxMQ$7$^$9!#JL!9$K;XDj$9$k>l9g$O!"F~NO(B + $B$KMxMQ$9$k%3!<%I$r(B"-"$B$G@\B3$7$F;XDj$7$^$9!#$3$N>l9g:G8e$K(B + $B;XDj$7$?%3!<%I$,=PNOMQ$N%3!<%I$K$J$j$^$9!#Nc$($P!"(B + "japanese-iso7"$B$H$9$k$H!"F|K\8l$N%3!<%I$9$Y$F$H(Biso7$B$rF~NO(B + $B$H$7$F\$7$/$O%^(B + $B%K%e%"%k$rFI$s$G2<$5$$!#(B + - JLESSKEYCHARSET$B4D6-JQ?t$G!"%-!<%\!<%I$+$i$NF~NO$KMxMQ$9$k(B + $B%3!<%I$r;XDj$G$-$^$9!#(BJLESSCHARSET$B$HF1$8CM$,MxMQ$G$-$^$9!#(B + - $B<+F0H=JL$@$1$G$OET9g$,0-$$$3$H$b$"$k$?$a!":G>e0L%S%C%H$,(B + $BN)$C$?%3!<%I$NH=JLJ}K!$r=g!9$KJQ99$9$k$?$a$N%-!<$rMQ0U$7(B + $B$F$$$^$9!#(B'@'$B%-!$O(B"rotate-right"$B$G$9!#(B + - $B%-%c%i%/%?%;%C%H$r2rL>$KJQ49$b9T$&$h$&$KJQ99$7$?(B + $B$b$N$rEj9F$5$l$?!#(B + - $B;3KL$5$s$,!"99$K(BWnn$B$KDL$7$F<+F04A;zJQ49$9$k$?$a$N%Q%C%A(B + $B$rEj9F$5$l$?!#(B + - $B5HED$5$s$,!"(Bless-73$B$r(BUJIS$B!"(BSJIS$B2=$7$?$b$N$rEj9F$5$l$?!#(B + - $B5HED$5$s$,!"(Bless-97$B$r(BUJIS$B!"(BSJIS$B2=$7$?$b$N$rEj9F$5$l$?!#(B + - $B2CF#$5$s$,!"(Bless-97$B$N(BUJIS$B2=HG$r!"99$K(BJIS$B2=$9$k%Q%C%A$rEj(B + $B9F$5$l$?!#(B + - $B@P0f$5$s$,!"2CF#$5$s$N%Q%C%A$r;29M$K!"$I$C$A$NHG$G$b!"(B + JIS$B$H$rDL$9$h$&$K$9$k%Q%C%A$rEj9F$5$l$?!#(B + - $BEDCf$5$s$,!"(Bless-123$B$r(BSJIS$B2=$7$FEj9F$5$l$?!#(B + - $B@n>e$5$s$,!"99$K(BJIS$B2=$9$k%Q%C%A$rEj9F$5$l$?!#(B + - $B4]@n$,!"(Bless-171$B$rF|K\8l2=$7$FEj9F$7$?!#(B + - $B4]@n$,!"(Bless-177$B$rF|K\8l2=$7$FEj9F$7$?!#(B + - $BF|2A0$rIU$1$?!#(B + - $B%P%0$r$7$F(Bjless-330-iso208$B$H$7$FEj9F$7$?!#(B + - $B?'!9$H%P%0$re$O!"H?E>I=<($5$l$?J8;z$G0l(B + $BGU$G$9!#(B + A: JLESSCHARSET$B4D6-JQ?t$K(B"japanese"$B$H@_Dj$7$^$7$g$&!#(BLANG$B4D(B + $B6-JQ?t$K(B"ja_JP"$B$J$I@_Dj$9$k$H$$$&J}K!$b$"$j$^$9!#9%$-$J(B + $BJ}K!$rA*$s$G2<$5$$!#(B + + Q: $BF|K\8l$,I=<($5$l$^$;$s!#2hLL>e$O!"$J$s$+5-9f$G0lGU$G$9!#(B + A: less$B$,=PNO$7$F$$$k4A;z$rCA0$G%$%s(B + $B%9%H!<%k$7$F$*$/$3$H$r$*A&$a$7$^$9!#$^$?!"(Bless$B$N%^%K%e%"(B + $B%k$K=q$+$l$F$$$k$h$&$K!"(BLESSOPEN$B$H(BLESSCLOSE$B$r@_Dj$9$k$3(B + $B$H$K$h$C$F!"B>$N05=L%D!<%k$rMxMQ$9$k$3$H$b$G$-$^$9!#(B + + Q: $BAG?M$N?M$G$b!"4A;z$,I=<($G$-$k$h$&$K!"%G%#%U%)%k%H$N@_Dj(B + $B$r$7$F%$%s%9%H!<%k$7$?$$$N$G$9$,!#(B + A: less$B$O!"(BJLESSCHARSET$B$J$I$N4D6-JQ?t$,@_Dj$5$l$F$$$J$$>l9g(B + $B$K$O!"(Bdefines.h$BCf$N(BDEFCHARSET$B%^%/%m$NJ8;zNs$rMxMQ$9$k$h(B + $B$&$K$J$C$F$$$^$9!#:G=i$K(Bconfigure$B$rl(B + $B9g$O!"(BJLESSCHARSET$B4D6-JQ?t$K!"(B"japanese-jis"$B$J$I$H!"(BJIS + $B$@$1$rMxMQ$9$k$3$H$rL@3N$K@_Dj$7$F;H$C$F2<$5$$!#(B + + Q: SJIS$B$N%U%!%$%k$rFI$`$H!"$?$^$KJQ$JJ8;z$,I=<($5$l$^$9!#(B + A: $B4A;z$N<+F0H=JL$O40`z$G$O$"$j$^$;$s!#$=$N$?$a!"B?>/LdBj$,(B + $BH/@8$7$^$9!#(B'^L'$B$HF~NO$7!"2hLL$r:FIA2h$9$k$HD>$k>l9g$,B?(B + $B$$$G$9!#$=$l$G$bBLL\$J$i!"(B'@'$B%-!<$r?t2s2!$7$F!"2hLL2<$K(B + "sjis codeset..."$B$HI=<($5$;$F2<$5$$!#<+F0H=JL$r6X;_$G$-(B + $B$^$9!#85$N>uBV$KLa$9$K$O!"$3$N%-!<$r7+$jJV$72!$7$F2<$5$$!#(B + + $B0lJ}!"0z?t$K(B-Z$B$H;XDj$9$k$3$H$G!"(BSJIS$B$rM%@h$7$F<+F0H=JL$r(B + $B9T$&$3$H$b$G$-$^$9!#%3%s%Q%$%k;~$K(BSJIS_PRE$B$rDj5A$9$l$P!"(B + $B:G=i$+$i(BSJIS$B$rM%@h$7!"(B-Z$B$G:#EY$O5U$K(BUJIS$B$rM%@h$9$k$H$$$C(B + $B$?!"5U$NF0:n$r$9$k$h$&$K$b$G$-$^$9!#(B + + $B$^$?!"(BJLESSCHARSET$B4D6-JQ?t$r(B"sjis"$B$K$9$k$3$H$G!":G=i$+$i(B + $B<+F0H=JL$r6X;_$9$k$3$H$b$G$-$^$9!#(B + + Q: SJIS$B$G8!:w$9$kF|K\8l$rF~$l$k$HJQ$K$J$j$^$9!#(B + A: JLESSKEYCHARSET$B4D6-JQ?t$K(B"sjis-jis"$B$H@_Dj$7$^$7$g$&!#$^(B + $B$?%3%s%Q%$%k$9$k:]$K!"(Bdefines.h$BCf$G!"(BDEFKEYCHARSET$B$KDj5A(B + $B$7$F$$$kJ8;zNs$r!"(B"sjis-jis"$B$KJQ49$9$k$H$$$&\$7$/$O!"(Bhttp://www.pobox.com/~jam/less/$B$r;2>H$7$F2<$5(B + $B$$!#(B + + Q: $B4A;z$N8!:w$r$9$k$H$H$s$G$b$J$$F0:n$r$9$k$s$G$9$,!#(B + A: character set$B$rM}2r$7$J$$@55,I=8=%i%$%V%i%j$rMxMQ$7$?>l(B + $B9g!"(BASCII$B$HHs(BASCII$B$H$GF1$8CM$r;H$C$?J8;z$K$D$$$F!"4V0c$C(B + $B$F8!:w$K@.8y$7$F$7$^$&>l9g$,$"$j$^$9!#6qBNE*$K$O!"(BJIS + X 0201 Japanese (Katakana)$B$N(B"$B%+(B"$B$H(B"6"$B$J$I$,F1$8CM$K$J$j$^(B + $B$9!#$3$&$$$C$?8m$C$?8!:w$N2DG=@-$r>/$7$G$b8:$i$9$?$a!"(B + less$B$OJ8;z$,Hs(BASCII$B$G$"$C$?>l9g$9$Y$F$N%P%$%H$N(BMSB$B$rN)$F(B + $B$^$9!#$?$@$7!"%i%$%V%i%j$K$h$C$F$O!"(BMSB$B$,N)$C$?J8;z$rl9g!"$D$^$j$I$&$b8!(B + $B:w$,@5$7$/9T$($J$$>l9g$K$O!"(B"--disable-msb"$B$H$$$&%*%W%7%g(B + $B%s$H6&$K!"(Bconfigure$B%9%/%j%W%H$re"$B$NA0$K!"0J2<$NDj5A$rF~$l$F2<$5$$!#(B + +#define LANGUAGE_C 1 + +**$B:G8e$K(B** + + - $BLdBj$d2~A1MW5a$,$"$l$P(Bjam@pobox.com$B$K%a%$%k$7$F2<$5$$!#(B + $BBP=h$G$-$k$+$b$7$l$^$;$s!#$^$?!"2~A1MW5a$K$D$$$F$O!"B??t(B + $B=8$^$l$P!"BP=h$9$k2DG=@-$b9b$/$J$j$^$9!#(B + - http://www.pobox.com/~jam/less/$B$K:G?7$N>pJs$r=8$a$F$$$^$9!#(B + + +Local Variables: +mode: indented-text +fill-column: 60 +End: Index: less/acconfig.h diff -u less/acconfig.h:1.1.1.14 less/acconfig.h:1.10 --- less/acconfig.h:1.1.1.14 Sun Oct 29 02:19:14 2000 +++ less/acconfig.h Wed Nov 22 03:43:22 2000 @@ -7,18 +7,30 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1998-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* * Regular expression library. + * Some are extended to detect character set. * Define exactly one of the following to be 1: + * HAVE_POSIX_REGCOMP_CS: extended POSIX regcomp() and regex.h + * HAVE_V8_REGCOMP_CS: extended Henry Spencer V8 regcomp() and regexp.h * HAVE_POSIX_REGCOMP: POSIX regcomp() and regex.h * HAVE_PCRE: PCRE (Perl-compatible regular expression) library * HAVE_RE_COMP: BSD re_comp() * HAVE_REGCMP: System V regcmp() * HAVE_V8_REGCOMP: Henry Spencer V8 regcomp() and regexp.h - * NO_REGEX: pattern matching is supported, but without metacharacters. + * NO_REGEX: pattern matching with character set detection is supported, + * but without metacharacters. */ +#undef HAVE_POSIX_REGCOMP_CS +#undef HAVE_V8_REGCOMP_CS #undef HAVE_POSIX_REGCOMP #undef HAVE_PCRE #undef HAVE_RE_COMP @@ -26,6 +38,26 @@ #undef HAVE_V8_REGCOMP #undef NO_REGEX #undef HAVE_REGEXEC2 + +/* + * Does regular expression library detect character set? + */ +#undef CS_REGEX + +/* + * Define MSB_ENABLE if you want to enable a MSB of non ASCII characters. + * It will be help if your library has not ability to detect a character set. + * If it has ability, this should be 0. If your system was not worked well, + * try to change following into 0 by hand. + */ +#undef MSB_ENABLE + +/* + * Define SJIS0213 or UJIS0213 if you want to enable a JIS X 0213:2000 + * mapping of SJIS and UJIS encoding. + */ +#undef SJIS0213 +#undef UJIS0213 /* Define HAVE_VOID if your compiler supports the "void" type. */ #undef HAVE_VOID Index: less/charset.c diff -u less/charset.c:1.1.1.12 less/charset.c:1.65 --- less/charset.c:1.1.1.12 Sun Oct 29 02:03:31 2000 +++ less/charset.c Tue Dec 5 07:27:45 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -24,32 +30,232 @@ /* * Predefined character sets, - * selected by the LESSCHARSET environment variable. + * selected by the JLESSCHARSET or LESSCHARSET environment variable. */ struct charset { char *name; int *p_flag; char *desc; + CODESET left; + CODESET right; + CODESET output; } charsets[] = { - { "ascii", NULL, "8bcccbcc18b95.b" }, - { "dos", NULL, "8bcccbcc12bc5b95.b." }, - { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, - { "iso8859", NULL, "8bcccbcc18b95.33b." }, - { "koi8-r", NULL, "8bcccbcc18b95.b128." }, - { "latin1", NULL, "8bcccbcc18b95.33b." }, - { "next", NULL, "8bcccbcc18b95.bb125.bb" }, - { "utf-8", &utf_mode, "8bcccbcc18b." }, - { NULL, NULL, NULL } + { "ascii", NULL, "8bcccbcc18b95.b", + noconv, none, noconv }, + { "dos", NULL, "8bcccbcc12bc5b95.b.", + noconv, noconv, noconv }, + { "iso8859", NULL, "8bcccbcc18b95.33b.", + noconv, noconv, noconv }, + { "koi8-r", NULL, "8bcccbcc18b95.b128.", + noconv, noconv, noconv }, + { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b.", + noconv, noconv, noconv }, + { "latin1", NULL, "8bcccbcc18b95.33b.", + noconv, noconv, noconv }, + { "next", NULL, "8bcccbcc18b95.bb125.bb", + noconv, noconv, noconv }, + { "utf-8", &utf_mode, "8bcccbcc18b.", + noconv, noconv, noconv }, +#if ISO + { "iso7", NULL, "8bcccb4c11bc4b96.b", + iso7, noconv, iso7 }, + { "iso8", NULL, "8bcccb4c11bc4b95.15b2.16b.", + iso7, iso8, iso8 }, +# if JAPANESE + /* read all KANJI code sets */ + { "japanese", NULL, "8bcccb4c11bc4b95.b127.b", + jis, japanese, jis }, + { "japanese-iso7", NULL, "8bcccb4c11bc4b95.b127.b", + iso7, japanese, iso7 }, + { "japanese-jis", NULL, "8bcccb4c11bc4b95.b127.b", + jis, japanese, jis }, + { "japanese-ujis", NULL, "8bcccb4c11bc4b95.b127.b", + jis, japanese, ujis }, + { "japanese-euc", NULL, "8bcccb4c11bc4b95.b127.b", + jis, japanese, ujis }, + { "japanese-sjis", NULL, "8bcccb4c11bc4b95.b127.b", + jis, japanese, sjis }, + /* read JIS */ + { "jis", NULL, "8bcccb4c11bc4b95.b", + jis, none, jis }, + { "jis-ujis", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", + jis, ujis, ujis }, + { "jis-euc", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", + jis, ujis, ujis }, + { "jis-sjis", NULL, "8bcccb4c11bc4b95.b125.3b", + jis, sjis, sjis }, + /* read UJIS */ + { "ujis", NULL, "8bcccbcc18b95.15b2.17b94.b", + noconv, ujis, ujis }, + { "euc", NULL, "8bcccbcc18b95.15b2.17b94.b", + noconv, ujis, ujis }, + { "ujis-iso7", NULL, "8bcccb4c11bc4b96.14b2.17b94.b", + iso7, ujis, iso7 }, + { "euc-iso7", NULL, "8bcccb4c11bc4b96.14b2.17b94.b", + iso7, ujis, iso7 }, + { "ujis-jis", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", + jis, ujis, jis }, + { "euc-jis", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", + jis, ujis, jis }, + /* disallow UJIS's katakana to improve the encoding detection */ + { "ujis-sjis", NULL, "8bcccbcc18b95.15b.18b94.b", + noconv, ujis, sjis }, + { "euc-sjis", NULL, "8bcccbcc18b95.15b.18b94.b", + noconv, ujis, sjis }, + /* read SJIS */ + { "sjis", NULL, "8bcccbcc18b95.b125.3b", + noconv, sjis, sjis }, + { "sjis-iso7", NULL, "8bcccb4c11bc4b221.b", + iso7, sjis, iso7 }, + { "sjis-jis", NULL, "8bcccb4c11bc4b95.b125.3b", + jis, sjis, jis }, + { "sjis-ujis", NULL, "8bcccbcc18b95.b125.3b", + noconv, sjis, ujis }, + { "sjis-euc", NULL, "8bcccbcc18b95.b125.3b", + noconv, sjis, ujis }, +# endif +#endif + { NULL, NULL, NULL, noconv, noconv, noconv } }; +#if HAVE_LOCALE && ISO +/* + * Predefined local languages, + * selected by the setlocale(). + */ +struct charlocale { + char *name; + char *charset; +} charlocales[] = { + { "C", "ascii" }, + { "wr_WR.ct", "iso8" }, + { "ja_JP.jis8", "iso8" }, +# if JAPANESE + { "ja_JP.JIS", "japanese-jis" }, + { "ja_JP.jis7", "japanese-jis" }, + { "ja_JP.EUC", "japanese-ujis" }, + { "ja_JP.ujis", "japanese-ujis" }, + { "ja_JP.SJIS", "japanese-sjis" }, + { "ja_JP.mscode", "japanese-sjis" }, +/* Other local locales */ +# ifdef _AIX + /* AIX's */ + { "Ja_JP", "japanese-sjis" }, + { "ja_JP.IBM-eucJP", "japanese-ujis" }, + { "Ja_JP.IBM-932", "japanese-sjis" }, +# endif +# ifdef __hpux + /* HPUX */ + { "japanese", "japanese-sjis" }, + { "japanese.euc", "japanese-ujis" }, +# endif + { "ja", "japanese-ujis" }, + { "ja_JP", "japanese-ujis" }, + { "japan", "japanese-ujis" }, + { "Japan", "japanese-ujis" }, + { "japanese", "japanese-ujis" }, + { "Japanese", "japanese-ujis" }, + /* DEC OSF/1's */ + { "ja_JP.eucJP", "japanese-ujis" }, + { "ja_JP.deckanji", "japanese-ujis" }, + { "ja_JP.sdeckanji", "japanese-ujis" }, + /* BSDI's */ + { "Japanese-EUC", "japanese-ujis" }, + /* Win32 */ + { "Japanese_Japan.932", "japanese-sjis" }, +# endif + { NULL, "" } +}; +#endif + #define IS_BINARY_CHAR 01 #define IS_CONTROL_CHAR 02 static char chardef[256]; static char *binfmt = NULL; public int binattr = AT_STANDOUT; +public char* opt_charset = NULL; + + +/* + * Look for an appropriate charset and return it. + */ + static struct charset * +search_charset(name) + char *name; +{ + struct charset *p; + char *name2, *n2; + int namelen, name2len; + int maxscore, score; + struct charset *result; + + if (!name) + name = ""; + namelen = strlen(name); + name2 = strchr(name, '-'); + if (name2) + { + name2len = namelen; + namelen = (name2 - name); + name2len -= namelen; + } else + { + name2len = 0; + } + maxscore = 0; + result = NULL; + for (p = charsets; p->name != NULL; p++) + { + score = 0; + n2 = strchr(p->name, '-'); + if (strncmp(name, p->name, namelen) == 0) { + score += namelen; + if ((int) strlen(p->name) == namelen) + score++; /* add bonus point for exactly match */ + } + if (name2 && n2 && strncmp(name2, n2, name2len) == 0) { + score += name2len - 1; /* decrease score of '-' */ + if ((int) strlen(n2) == name2len) + score++; /* add bonus point for exactly match */ + } + if (score > maxscore) + { + maxscore = score; + result = p; + } + } + return (result); +} + +/* + * Return the CODESET of left plane of named charset. + */ + public CODESET +left_codeset_of_charset(name) + register char *name; +{ + struct charset *p = search_charset(name); + + if (p) + return (p->left); + return (noconv); +} +/* + * Return the CODESET of right plane of named charset. + */ + public CODESET +right_codeset_of_charset(name) + register char *name; +{ + struct charset *p = search_charset(name); + if (p) + return (p->right); + return (none); +} + /* * Define a charset, given a description string. * The string consists of 256 letters, @@ -130,15 +336,16 @@ if (name == NULL || *name == '\0') return (0); - for (p = charsets; p->name != NULL; p++) + p = search_charset(name); + if (p) { - if (strcmp(name, p->name) == 0) - { - ichardef(p->desc); - if (p->p_flag != NULL) - *(p->p_flag) = 1; - return (1); - } + ichardef(p->desc); + if (p->p_flag != NULL) + *(p->p_flag) = 1; +#if ISO + init_def_codesets(p->left, p->right, p->output); +#endif + return (1); } error("invalid charset name", NULL_PARG); @@ -154,6 +361,41 @@ ilocale() { register int c; +#if ISO + /* + * We cannot trust in a system's ctype because it + * cannot treat any coding system are not like EUC. + */ + register char *name; + register struct charlocale *p; + +#if MSB_ENABLE + /* HP-UX is used LC_COLLATE to specify codes in the regexp library. */ + (void) setlocale(LC_COLLATE, ""); +#endif + name = setlocale(LC_CTYPE, ""); +#ifdef __hpux + if (name != NULL) + name = getlocale(LOCALE_STATUS)->LC_CTYPE_D; +#endif + /* + * Search some environment variable like a setlocale() + * because some poor system's setlocale treat only + * system's local locale. + */ + if (name == NULL) + name = getenv("LC_CTYPE"); + if (name == NULL) + name = getenv("LANG"); + for (p = charlocales; name && p->name != NULL; p++) + { + if (strcmp(name, p->name) == 0) + { + (void) icharset(p->charset); + return; + } + } +#endif setlocale(LC_ALL, ""); for (c = 0; c < (int) sizeof(chardef); c++) @@ -196,6 +438,27 @@ } /* + * Initialize planeset data structures. + */ + public void +init_planeset() +{ + char *s; + +#if ISO + s = lgetenv("JLESSPLANESET"); + if (s == NULL) + s = DEFPLANESET; + if (set_planeset(s) < 0) + { + error("invalid plane set", NULL_PARG); + quit(1); + /*NOTREACHED*/ + } +#endif +} + +/* * Initialize charset data structures. */ public void @@ -206,6 +469,22 @@ s = lgetenv("LESSBINFMT"); setbinfmt(s); +#if JAPANESE + /* + * See if option -K is defined. + */ + s = opt_charset; + if (icharset(s)) + return; +#endif +#if ISO + /* + * See if environment variable JLESSCHARSET is defined. + */ + s = lgetenv("JLESSCHARSET"); + if (icharset(s)) + return; +#endif /* * See if environment variable LESSCHARSET is defined. */ @@ -213,7 +492,7 @@ if (icharset(s)) return; /* - * LESSCHARSET is not defined: try LESSCHARDEF. + * JLESSCHARSET and LESSCHARSET are not defined: try LESSCHARDEF. */ s = lgetenv("LESSCHARDEF"); if (s != NULL && *s != '\0') @@ -243,9 +522,10 @@ ilocale(); #else /* - * Default to "latin1". + * All variables are not defined either, default to DEFCHARSET. + * DEFCHARSET is defined in defines.h. */ - (void) icharset("latin1"); + (void) icharset(DEFCHARSET); #endif } @@ -271,18 +551,38 @@ return (chardef[c] & IS_CONTROL_CHAR); } +#if ISO +/* + * Change a database to check "control" character. + * This function is called by multi.c only to support iso2022 charset. + */ + public void +change_control_char(c, flag) + int c, flag; +{ + c &= 0377; + if (flag) + chardef[c] |= IS_CONTROL_CHAR; + else + chardef[c] &= ~IS_CONTROL_CHAR; +} +#endif + /* * Return the printable form of a character. * For example, in the "ascii" charset '\3' is printed as "^C". */ public char * -prchar(c) +prchar(c, cs) int c; + CHARSET cs; { static char buf[8]; c &= 0377; - if (!control_char(c)) + if (CSISWRONG(cs) && c > 127) + sprintf(buf, binfmt, c); + else if (!control_char(c)) sprintf(buf, "%c", c); else if (c == ESC) sprintf(buf, "ESC"); Index: less/cmd.h diff -u less/cmd.h:1.1.1.12 less/cmd.h:1.18 --- less/cmd.h:1.1.1.12 Sun Oct 29 02:03:36 2000 +++ less/cmd.h Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ #define MAX_USERCMD 500 @@ -63,6 +69,10 @@ #define A_F_FOREVER 50 #define A_GOPOS 51 #define A_REMOVE_FILE 52 + +#if JAPANESE +#define A_ROT_RCODESET 80 +#endif #define A_INVALID 100 #define A_NOACTION 101 Index: less/cmdbuf.c diff -u less/cmdbuf.c:1.1.1.14 less/cmdbuf.c:1.39 --- less/cmdbuf.c:1.1.1.14 Sun Oct 29 02:03:31 2000 +++ less/cmdbuf.c Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -25,6 +31,11 @@ static char *cp; /* Pointer into cmdbuf */ static int cmd_offset; /* Index into cmdbuf of first displayed char */ static int literal; /* Next input char should not be interpreted */ +static CHARSET cmdcs[CMDBUF_SIZE]; /* Buffer for holding a CHARSET of cmdbuf */ +static CHARSET *csp; /* Pointer into cmdcs */ +#if ISO +static MULBUF *mp = NULL; +#endif #if TAB_COMPLETE_FILENAME static int cmd_complete(); @@ -57,22 +68,23 @@ struct mlist *prev; struct mlist *curr_mp; char *string; + CHARSET *charset; }; /* * These are the various command histories that exist. */ struct mlist mlist_search = - { &mlist_search, &mlist_search, &mlist_search, NULL }; + { &mlist_search, &mlist_search, &mlist_search, NULL, NULL }; public void constant *ml_search = (void *) &mlist_search; struct mlist mlist_examine = - { &mlist_examine, &mlist_examine, &mlist_examine, NULL }; + { &mlist_examine, &mlist_examine, &mlist_examine, NULL, NULL }; public void constant *ml_examine = (void *) &mlist_examine; #if SHELL_ESCAPE || PIPEC struct mlist mlist_shell = - { &mlist_shell, &mlist_shell, &mlist_shell, NULL }; + { &mlist_shell, &mlist_shell, &mlist_shell, NULL, NULL }; public void constant *ml_shell = (void *) &mlist_shell; #endif @@ -95,6 +107,25 @@ /* + * Count the width of strings. + */ + static int +strwidth(s, cs) + char *s; + CHARSET cs; +{ +#if ISO + int w = 0; + + while (*s != '\0') + w += mwidth(*s++, cs); + return (w); +#else + return (strlen(s)); +#endif +} + +/* * Reset command buffer (to empty). */ public void @@ -102,9 +133,25 @@ { cp = cmdbuf; *cp = '\0'; + csp = cmdcs; + *csp = NULLCS; cmd_col = 0; cmd_offset = 0; literal = 0; +#if ISO + if (mp == NULL) + { + char *s = NULL; + mp = new_multi(); + s = getenv("JLESSKEYCHARSET"); + if (s == NULL) + s = DEFKEYCHARSET; + set_codesets(mp, left_codeset_of_charset(s), + right_codeset_of_charset(s)); + init_priority(mp); + } + init_multi(mp); +#endif } /* @@ -125,8 +172,8 @@ char *s; { putstr(s); - cmd_col += strlen(s); - prompt_col += strlen(s); + cmd_col += strwidth(s, ASCII); + prompt_col += strwidth(s, ASCII); } /* @@ -135,7 +182,7 @@ public int len_cmdbuf() { - return (strlen(cmdbuf)); + return (strlen_cs(cmdbuf, cmdcs)); } /* @@ -152,13 +199,13 @@ * Repaint the line from the current position. */ clear_eol(); - for ( ; *cp != '\0'; cp++) + for ( ; *cp != '\0'; cp++, csp++) { - p = prchar(*cp); - if (cmd_col + strlen(p) >= sc_width) + p = prchar(*cp, *csp); + if (cmd_col + strwidth(p, *csp) >= sc_width) break; - putstr(p); - cmd_col += strlen(p); + putmchrs(p, *csp); + cmd_col += strwidth(p, *csp); } /* @@ -182,6 +229,7 @@ } cp = &cmdbuf[cmd_offset]; + csp = &cmdcs[cmd_offset]; } /* @@ -191,6 +239,7 @@ cmd_lshift() { char *s; + CHARSET *t; char *save_cp; int cols; @@ -199,9 +248,10 @@ * right we'd have to move to reach the center of the screen. */ s = cmdbuf + cmd_offset; - cols = 0; - while (cols < (sc_width - prompt_col) / 2 && *s != '\0') - cols += strlen(prchar(*s++)); + t = cmdcs + cmd_offset; + for (cols = 0; cols < (sc_width - prompt_col) / 2 && *s != '\0'; + s++, t++) + cols += strwidth(prchar(*s, *t), *t); cmd_offset = s - cmdbuf; save_cp = cp; @@ -216,6 +266,7 @@ cmd_rshift() { char *s; + CHARSET *t; char *p; char *save_cp; int cols; @@ -226,11 +277,12 @@ * of displayed characters. */ s = cmdbuf + cmd_offset; + t = cmdcs + cmd_offset; cols = 0; while (cols < (sc_width - prompt_col) / 2 && s > cmdbuf) { - p = prchar(*--s); - cols += strlen(p); + p = prchar(*--s, *--t); + cols += strwidth(p, *t); } cmd_offset = s - cmdbuf; @@ -247,6 +299,9 @@ { char *p; +#if ISO + do { +#endif if (*cp == '\0') { /* @@ -254,14 +309,18 @@ */ return (CC_OK); } - p = prchar(*cp); - if (cmd_col + strlen(p) >= sc_width) + p = prchar(*cp, *csp); + if (cmd_col + strwidth(p, *csp) >= sc_width) cmd_lshift(); - else if (cmd_col + strlen(p) == sc_width - 1 && cp[1] != '\0') + else if (cmd_col + strwidth(p, *csp) == sc_width - 1 && cp[1] != '\0') cmd_lshift(); + putmchrs(p, *csp); + cmd_col += strwidth(p, *csp); cp++; - putstr(p); - cmd_col += strlen(p); + csp++; +#if ISO + } while (CSISREST(*csp)); +#endif return (CC_OK); } @@ -273,18 +332,25 @@ { char *p; +#if ISO + do { +#endif if (cp <= cmdbuf) { /* Already at the beginning of the line */ return (CC_OK); } - p = prchar(cp[-1]); - if (cmd_col < prompt_col + strlen(p)) + p = prchar(cp[-1], csp[-1]); + if (cmd_col < prompt_col + strwidth(p, csp[-1])) cmd_rshift(); cp--; - cmd_col -= strlen(p); + csp--; + cmd_col -= strwidth(p, *csp); while (*p++ != '\0') putbs(); +#if ISO + } while (CSISREST(*csp)); +#endif return (CC_OK); } @@ -296,8 +362,13 @@ int c; { char *s; + CHARSET *t; - if (strlen(cmdbuf) >= sizeof(cmdbuf)-2) +#if ISO + if (strlen_cs(cmdbuf, cmdcs) >= (int)sizeof(cmdbuf)-5) +#else + if (strlen_cs(cmdbuf, cmdcs) >= (int)sizeof(cmdbuf)-2) +#endif { /* * No room in the command buffer for another char. @@ -309,9 +380,46 @@ /* * Insert the character into the buffer. */ - for (s = &cmdbuf[strlen(cmdbuf)]; s >= cp; s--) +#if ISO + if (in_mca()) + { + char *cbuf; + CHARSET *csbuf; + int i, j; + + buffering_multi(mp, c, &cbuf, &csbuf, &i); + if (i > 0) + for ((s = &cmdbuf[strlen_cs(cmdbuf, cmdcs)]), + t = &cmdcs[strlen_cs(cmdbuf, cmdcs)]; + s >= cp; s--, t--) + { + s[i] = s[0]; + t[i] = t[0]; + } + for (j = 0; j < i; j++) + { + cp[j] = cbuf[j]; + csp[j] = csbuf[j]; + } + cbuf = &cp[i]; + /* + * Reprint the tail of the line from the inserted char. + */ + cmd_repaint(cp); + while (cp < cbuf) + cmd_right(); + return (CC_OK); + } +#endif + for ((s = &cmdbuf[strlen_cs(cmdbuf, cmdcs)]), + t = &cmdcs[strlen_cs(cmdbuf, cmdcs)]; + s >= cp; s--, t--) + { s[1] = s[0]; + t[1] = t[0]; + } *cp = c; + *csp = ASCII; /* * Reprint the tail of the line from the inserted char. */ @@ -328,6 +436,8 @@ cmd_erase() { register char *s; + register CHARSET *t; + int num; if (cp == cmdbuf) { @@ -340,12 +450,17 @@ /* * Move cursor left (to the char being erased). */ + s = cp; cmd_left(); + num = s - cp; /* * Remove the char from the buffer (shift the buffer left). */ - for (s = cp; *s != '\0'; s++) - s[0] = s[1]; + for ((s = cp), t = csp; *s != '\0'; s++, t++) + { + s[0] = s[num]; + t[0] = t[num]; + } /* * Repaint the buffer after the erased char. */ @@ -481,6 +596,8 @@ int action; { char *s; + CHARSET *t; + int i; if (curr_mlist == NULL) { @@ -503,14 +620,18 @@ * Copy the entry into cmdbuf and echo it on the screen. */ s = curr_mlist->curr_mp->string; + t = curr_mlist->curr_mp->charset; if (s == NULL) s = ""; - for (cp = cmdbuf; *s != '\0'; s++) + for ((cp = cmdbuf), (csp = cmdcs), i = 0; *s != '\0'; s++, t++, i++) { - *cp = *s; - cmd_right(); + cp[i] = *s; + csp[i] = *t; } - *cp = '\0'; + cp[i] = '\0'; + csp[i] = NULLCS; + while (*cp != '\0') + cmd_right(); return (CC_OK); } #endif @@ -519,9 +640,10 @@ * Add a string to a history list. */ public void -cmd_addhist(mlist, cmd) +cmd_addhist(mlist, cmd, cs) struct mlist *mlist; char *cmd; + CHARSET *cs; { #if CMD_HISTORY struct mlist *ml; @@ -529,7 +651,7 @@ /* * Don't save a trivial command. */ - if (strlen(cmd) == 0) + if (strlen_cs(cmd, cs) == 0) return; /* * Don't save if a duplicate of a command which is already @@ -548,7 +670,7 @@ * Save the command and put it at the end of the history list. */ ml = (struct mlist *) ecalloc(1, sizeof(struct mlist)); - ml->string = save(cmd); + ml->string = strdup_cs(cmd, cs, &ml->charset); ml->next = mlist; ml->prev = mlist->prev; mlist->prev->next = ml; @@ -575,7 +697,7 @@ */ if (curr_mlist == NULL) return; - cmd_addhist(curr_mlist, cmdbuf); + cmd_addhist(curr_mlist, cmdbuf, cmdcs); #endif } @@ -1019,4 +1141,13 @@ get_cmdbuf() { return (cmdbuf); +} + +/* + * Return a pointer to the character set bufffer of the command buffer. + */ + public CHARSET * +get_cmdcs() +{ + return (cmdcs); } Index: less/command.c diff -u less/command.c:1.1.1.17 less/command.c:1.25 --- less/command.c:1.1.1.17 Sun Oct 29 02:19:09 2000 +++ less/command.c Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -176,15 +182,17 @@ exec_mca() { register char *cbuf; + CHARSET *csbuf; cmd_exec(); cbuf = get_cmdbuf(); + csbuf = get_cmdcs(); switch (mca) { case A_F_SEARCH: case A_B_SEARCH: - multi_search(cbuf, number); + multi_search(cbuf, csbuf, number); break; case A_FIRSTCMD: /* @@ -726,8 +734,9 @@ * If SRCH_PAST_EOF is set, continue the search thru multiple files. */ static void -multi_search(pattern, n) +multi_search(pattern, charset, n) char *pattern; + CHARSET *charset; int n; { register int nomore; @@ -758,7 +767,7 @@ for (;;) { - n = search(search_type, pattern, n); + n = search(search_type, pattern, charset, n); /* * The SRCH_NO_MOVE flag doesn't "stick": it gets cleared * after being used once. This allows "n" to work after @@ -1210,7 +1219,7 @@ #define DO_SEARCH() if (number <= 0) number = 1; \ mca_search(); \ cmd_exec(); \ - multi_search((char *)NULL, number); + multi_search((char *)NULL, (CHARSET *)NULL, number); case A_F_SEARCH: @@ -1534,6 +1543,16 @@ } c = getcc(); goto again; + +#if JAPANESE + case A_ROT_RCODESET: + parg.p_string = + rotate_right_codeset(get_mulbuf(curr_ifile)); + screen_trashed = 1; + make_display(); + error("%s codeset is used in right plane", &parg); + break; +#endif case A_NOACTION: break; Index: less/configure.in diff -u less/configure.in:1.1.1.14 less/configure.in:1.9 --- less/configure.in:1.1.1.14 Sun Oct 29 02:19:14 2000 +++ less/configure.in Wed Nov 22 03:43:25 2000 @@ -2,6 +2,13 @@ AC_INIT(forwback.c) AC_CONFIG_HEADER(defines.h) +dnl Checks for arguments. +AC_ARG_WITH(cs-regex, [ --with-cs-regex Regular expression library with character set detection], have_regex_cs=$withval, have_regex_cs=no) +msb_enable=unknown +AC_ARG_ENABLE(msb, [ --disable-msb Disable the MSB of non ASCII characters], msb_enable=$enableval) +jisx0213_enable=unknown +AC_ARG_ENABLE(jisx0213, [ --disable-jisx0213 Disable the JIS X 0213 mapping for SJIS and UJIS], jisx0213_enable=$enableval) + dnl Checks for programs. AC_PROG_CC AC_ISC_POSIX @@ -226,7 +233,23 @@ fi dnl Checks for regular expression functions. +if test $have_regex_cs != no; then +AC_DEFINE(CS_REGEX) +dnl try to find it with character set detection. have_regex=no +if test $have_regex = no && test -f ${srcdir}/regex_cs.c; then +AC_MSG_RESULT(using POSIX regcomp_cs -- local source); AC_DEFINE(HAVE_POSIX_REGCOMP_CS) REGEX_O='regex_cs.$(O)' AC_SUBST(REGEX_O) have_regex=yes +fi +if test $have_regex = no && test -f ${srcdir}/regexp_cs.c; then +AC_MSG_RESULT(using V8 regcomp_cs -- local source); AC_DEFINE(HAVE_V8_REGCOMP_CS) REGEX_O='regexp_cs.$(O)' AC_SUBST(REGEX_O) have_regex=yes +fi +if test $have_regex = no; then +AC_MSG_RESULT(cannot find regular expression library with character set detection); AC_DEFINE(NO_REGEX) +fi +else +AC_DEFINE(CS_REGEX, 0) +dnl try to find it without character set detection. +have_regex=no have_posix_regex=unknown AC_MSG_CHECKING(for regcomp) @@ -306,6 +329,32 @@ if test $have_regex = no; then AC_MSG_RESULT(cannot find regular expression library); AC_DEFINE(NO_REGEX) fi +fi + +case $msb_enable in +yes) AC_MSG_RESULT(enable the MSB of non ASCII characters) + AC_DEFINE(MSB_ENABLE);; +no) AC_MSG_RESULT(disable the MSB of non ASCII characters) + AC_DEFINE(MSB_ENABLE, 0);; +unknown) + if test $have_regex_cs = no; then + AC_MSG_RESULT(decide to enable the MSB of non ASCII characters) + AC_DEFINE(MSB_ENABLE) + else + AC_MSG_RESULT(decide to disable the MSB of non ASCII characters) + AC_DEFINE(MSB_ENABLE, 0) + fi;; +esac + +case $jisx0213_enable in +unknown|yes) + AC_MSG_RESULT(enable the JIS X 0213 mapping for SJIS and UJIS) + AC_DEFINE(SJIS0213, 1) + AC_DEFINE(UJIS0213, 1);; +no) AC_MSG_RESULT(disable the JIS X 0213 mapping for SJIS and UJIS) + AC_DEFINE(SJIS0213, 0) + AC_DEFINE(UJIS0213, 0);; +esac AC_ARG_WITH(editor, [ --with-editor=PROGRAM use PROGRAM as the default editor [vi]], Index: less/configure diff -u less/configure:1.1.1.15 less/configure:1.9 --- less/configure:1.1.1.15 Sun Oct 29 02:19:13 2000 +++ less/configure Wed Nov 22 03:43:23 2000 @@ -12,6 +12,12 @@ ac_default_prefix=/usr/local # Any additions from configure.in: ac_help="$ac_help + --with-cs-regex Regular expression library with character set detection" +ac_help="$ac_help + --disable-msb Disable the MSB of non ASCII characters" +ac_help="$ac_help + --disable-jisx0213 Disable the JIS X 0213 mapping for SJIS and UJIS" +ac_help="$ac_help --with-regex={auto,pcre,posix,regcmp,re_comp,regcomp,regcomp-local} Select a regular expression library [auto]" ac_help="$ac_help --with-editor=PROGRAM use PROGRAM as the default editor [vi]" @@ -524,10 +530,33 @@ +# Check whether --with-cs-regex or --without-cs-regex was given. +if test "${with_cs_regex+set}" = set; then + withval="$with_cs_regex" + have_regex_cs=$withval +else + have_regex_cs=no +fi + +msb_enable=unknown +# Check whether --enable-msb or --disable-msb was given. +if test "${enable_msb+set}" = set; then + enableval="$enable_msb" + msb_enable=$enableval +fi + +jisx0213_enable=unknown +# Check whether --enable-jisx0213 or --disable-jisx0213 was given. +if test "${enable_jisx0213+set}" = set; then + enableval="$enable_jisx0213" + jisx0213_enable=$enableval +fi + + # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:531: checking for $ac_word" >&5 +echo "configure:560: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -556,7 +585,7 @@ # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:560: checking for $ac_word" >&5 +echo "configure:589: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -604,7 +633,7 @@ fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:608: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 +echo "configure:637: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. @@ -614,11 +643,11 @@ cross_compiling=$ac_cv_prog_cc_cross cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:651: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. if (./conftest; exit) 2>/dev/null; then @@ -638,12 +667,12 @@ { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:642: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 +echo "configure:671: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:647: checking whether we are using GNU C" >&5 +echo "configure:676: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -652,7 +681,7 @@ yes; #endif EOF -if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:656: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then +if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:685: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else ac_cv_prog_gcc=no @@ -667,7 +696,7 @@ ac_save_CFLAGS="$CFLAGS" CFLAGS= echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:671: checking whether ${CC-cc} accepts -g" >&5 +echo "configure:700: checking whether ${CC-cc} accepts -g" >&5 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -695,7 +724,7 @@ fi echo $ac_n "checking for POSIXized ISC""... $ac_c" 1>&6 -echo "configure:699: checking for POSIXized ISC" >&5 +echo "configure:728: checking for POSIXized ISC" >&5 if test -d /etc/conf/kconfig.d && grep _POSIX_VERSION /usr/include/sys/unistd.h >/dev/null 2>&1 then @@ -716,7 +745,7 @@ fi echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:720: checking how to run the C preprocessor" >&5 +echo "configure:749: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= @@ -731,13 +760,13 @@ # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:741: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:770: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out` if test -z "$ac_err"; then : @@ -748,13 +777,13 @@ rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:758: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:787: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out` if test -z "$ac_err"; then : @@ -778,13 +807,13 @@ if test $ac_cv_prog_gcc = yes; then echo $ac_n "checking whether ${CC-cc} needs -traditional""... $ac_c" 1>&6 -echo "configure:782: checking whether ${CC-cc} needs -traditional" >&5 +echo "configure:811: checking whether ${CC-cc} needs -traditional" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc_traditional'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else ac_pattern="Autoconf.*'x'" cat > conftest.$ac_ext < Autoconf TIOCGETP @@ -802,7 +831,7 @@ if test $ac_cv_prog_gcc_traditional = no; then cat > conftest.$ac_ext < Autoconf TCGETA @@ -853,7 +882,7 @@ # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:857: checking for a BSD compatible install" >&5 +echo "configure:886: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -904,7 +933,7 @@ echo $ac_n "checking for initscr in -lxcurses""... $ac_c" 1>&6 -echo "configure:908: checking for initscr in -lxcurses" >&5 +echo "configure:937: checking for initscr in -lxcurses" >&5 ac_lib_var=`echo xcurses'_'initscr | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -912,7 +941,7 @@ ac_save_LIBS="$LIBS" LIBS="-lxcurses $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:956: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -945,7 +974,7 @@ fi echo $ac_n "checking for initscr in -lncurses""... $ac_c" 1>&6 -echo "configure:949: checking for initscr in -lncurses" >&5 +echo "configure:978: checking for initscr in -lncurses" >&5 ac_lib_var=`echo ncurses'_'initscr | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -953,7 +982,7 @@ ac_save_LIBS="$LIBS" LIBS="-lncurses $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:997: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -986,7 +1015,7 @@ fi echo $ac_n "checking for initscr in -lcurses""... $ac_c" 1>&6 -echo "configure:990: checking for initscr in -lcurses" >&5 +echo "configure:1019: checking for initscr in -lcurses" >&5 ac_lib_var=`echo curses'_'initscr | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -994,7 +1023,7 @@ ac_save_LIBS="$LIBS" LIBS="-lcurses $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1038: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -1027,7 +1056,7 @@ fi echo $ac_n "checking for tgetent in -ltermcap""... $ac_c" 1>&6 -echo "configure:1031: checking for tgetent in -ltermcap" >&5 +echo "configure:1060: checking for tgetent in -ltermcap" >&5 ac_lib_var=`echo termcap'_'tgetent | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1035,7 +1064,7 @@ ac_save_LIBS="$LIBS" LIBS="-ltermcap $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1079: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -1068,7 +1097,7 @@ fi echo $ac_n "checking for tgetent in -ltermlib""... $ac_c" 1>&6 -echo "configure:1072: checking for tgetent in -ltermlib" >&5 +echo "configure:1101: checking for tgetent in -ltermlib" >&5 ac_lib_var=`echo termlib'_'tgetent | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1076,7 +1105,7 @@ ac_save_LIBS="$LIBS" LIBS="-ltermlib $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1120: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -1109,7 +1138,7 @@ fi echo $ac_n "checking for regcmp in -lgen""... $ac_c" 1>&6 -echo "configure:1113: checking for regcmp in -lgen" >&5 +echo "configure:1142: checking for regcmp in -lgen" >&5 ac_lib_var=`echo gen'_'regcmp | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1117,7 +1146,7 @@ ac_save_LIBS="$LIBS" LIBS="-lgen $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1161: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -1156,7 +1185,7 @@ fi echo $ac_n "checking for regcmp in -lintl""... $ac_c" 1>&6 -echo "configure:1160: checking for regcmp in -lintl" >&5 +echo "configure:1189: checking for regcmp in -lintl" >&5 ac_lib_var=`echo intl'_'regcmp | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1164,7 +1193,7 @@ ac_save_LIBS="$LIBS" LIBS="-lintl $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1208: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -1203,7 +1232,7 @@ fi echo $ac_n "checking for regcmp in -lPW""... $ac_c" 1>&6 -echo "configure:1207: checking for regcmp in -lPW" >&5 +echo "configure:1236: checking for regcmp in -lPW" >&5 ac_lib_var=`echo PW'_'regcmp | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -1211,7 +1240,7 @@ ac_save_LIBS="$LIBS" LIBS="-lPW $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1255: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -1251,7 +1280,7 @@ echo $ac_n "checking for working terminal libraries""... $ac_c" 1>&6 -echo "configure:1255: checking for working terminal libraries" >&5 +echo "configure:1284: checking for working terminal libraries" >&5 TERMLIBS= curses_broken=0 @@ -1268,14 +1297,14 @@ SAVE_LIBS=$LIBS LIBS="$LIBS $TERMLIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1308: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* termok=yes else @@ -1296,14 +1325,14 @@ SAVE_LIBS=$LIBS LIBS="$LIBS $TERMLIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1336: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* termok=yes else @@ -1324,14 +1353,14 @@ SAVE_LIBS=$LIBS LIBS="$LIBS $TERMLIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1364: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* termok=yes else @@ -1353,14 +1382,14 @@ SAVE_LIBS=$LIBS LIBS="$LIBS $TERMLIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1393: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* termok=yes else @@ -1383,14 +1412,14 @@ SAVE_LIBS=$LIBS LIBS="$LIBS $TERMLIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1423: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* termok=yes else @@ -1411,14 +1440,14 @@ SAVE_LIBS=$LIBS LIBS="$LIBS $TERMLIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1451: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* termok=yes else @@ -1445,17 +1474,17 @@ do ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:1449: checking for $ac_hdr" >&5 +echo "configure:1478: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1459: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1488: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out` if test -z "$ac_err"; then rm -rf conftest* @@ -1483,12 +1512,12 @@ echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:1487: checking for ANSI C header files" >&5 +echo "configure:1516: checking for ANSI C header files" >&5 if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #include @@ -1496,7 +1525,7 @@ #include EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1500: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1529: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out` if test -z "$ac_err"; then rm -rf conftest* @@ -1513,7 +1542,7 @@ if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat > conftest.$ac_ext < EOF @@ -1531,7 +1560,7 @@ if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat > conftest.$ac_ext < EOF @@ -1552,7 +1581,7 @@ : else cat > conftest.$ac_ext < #define ISLOWER(c) ('a' <= (c) && (c) <= 'z') @@ -1563,7 +1592,7 @@ exit (0); } EOF -if { (eval echo configure:1567: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null +if { (eval echo configure:1596: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null then : else @@ -1587,12 +1616,12 @@ fi echo $ac_n "checking for off_t""... $ac_c" 1>&6 -echo "configure:1591: checking for off_t" >&5 +echo "configure:1620: checking for off_t" >&5 if eval "test \"`echo '$''{'ac_cv_type_off_t'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #if STDC_HEADERS @@ -1620,16 +1649,16 @@ fi echo $ac_n "checking for void""... $ac_c" 1>&6 -echo "configure:1624: checking for void" >&5 +echo "configure:1653: checking for void" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1662: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_VOID 1 @@ -1643,16 +1672,16 @@ fi rm -f conftest* echo $ac_n "checking for const""... $ac_c" 1>&6 -echo "configure:1647: checking for const" >&5 +echo "configure:1676: checking for const" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1685: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_CONST 1 @@ -1666,16 +1695,16 @@ fi rm -f conftest* echo $ac_n "checking for time_t""... $ac_c" 1>&6 -echo "configure:1670: checking for time_t" >&5 +echo "configure:1699: checking for time_t" >&5 cat > conftest.$ac_ext < int main() { time_t t = 0; ; return 0; } EOF -if { (eval echo configure:1679: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1708: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_TIME_T 1 @@ -1690,12 +1719,12 @@ rm -f conftest* echo $ac_n "checking return type of signal handlers""... $ac_c" 1>&6 -echo "configure:1694: checking return type of signal handlers" >&5 +echo "configure:1723: checking return type of signal handlers" >&5 if eval "test \"`echo '$''{'ac_cv_type_signal'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #include @@ -1712,7 +1741,7 @@ int i; ; return 0; } EOF -if { (eval echo configure:1716: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then +if { (eval echo configure:1745: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_type_signal=void else @@ -1733,12 +1762,12 @@ for ac_func in memcpy popen _setjmp sigprocmask sigsetmask stat strchr strstr system do echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:1737: checking for $ac_func" >&5 +echo "configure:1766: checking for $ac_func" >&5 if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1794: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_func_$ac_func=yes" else @@ -1787,12 +1816,12 @@ echo $ac_n "checking for tcgetattr""... $ac_c" 1>&6 -echo "configure:1791: checking for tcgetattr" >&5 +echo "configure:1820: checking for tcgetattr" >&5 if eval "test \"`echo '$''{'ac_cv_func_tcgetattr'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1848: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_func_tcgetattr=yes" else @@ -1839,9 +1868,9 @@ echo $ac_n "checking for fileno""... $ac_c" 1>&6 -echo "configure:1843: checking for fileno" >&5 +echo "configure:1872: checking for fileno" >&5 cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1884: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_FILENO 1 @@ -1866,9 +1895,9 @@ rm -f conftest* echo $ac_n "checking for strerror""... $ac_c" 1>&6 -echo "configure:1870: checking for strerror" >&5 +echo "configure:1899: checking for strerror" >&5 cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1917: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_STRERROR 1 @@ -1899,16 +1928,16 @@ rm -f conftest* echo $ac_n "checking for sys_errlist""... $ac_c" 1>&6 -echo "configure:1903: checking for sys_errlist" >&5 +echo "configure:1932: checking for sys_errlist" >&5 cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1941: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_SYS_ERRLIST 1 @@ -1923,9 +1952,9 @@ rm -f conftest* echo $ac_n "checking for sigset_t""... $ac_c" 1>&6 -echo "configure:1927: checking for sigset_t" >&5 +echo "configure:1956: checking for sigset_t" >&5 cat > conftest.$ac_ext < @@ -1934,7 +1963,7 @@ sigset_t s; s = 0; ; return 0; } EOF -if { (eval echo configure:1938: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1967: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_SIGSET_T 1 @@ -1949,9 +1978,9 @@ rm -f conftest* echo $ac_n "checking for sigemptyset""... $ac_c" 1>&6 -echo "configure:1953: checking for sigemptyset" >&5 +echo "configure:1982: checking for sigemptyset" >&5 cat > conftest.$ac_ext < @@ -1960,7 +1989,7 @@ sigset_t s; sigemptyset(&s); ; return 0; } EOF -if { (eval echo configure:1964: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:1993: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_SIGEMPTYSET 1 @@ -1976,9 +2005,9 @@ have_errno=no echo $ac_n "checking for errno""... $ac_c" 1>&6 -echo "configure:1980: checking for errno" >&5 +echo "configure:2009: checking for errno" >&5 cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2021: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes - in errno.h" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_ERRNO 1 @@ -2001,7 +2030,7 @@ rm -f conftest* if test $have_errno = no; then cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2044: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes - must define" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_ERRNO 1 @@ -2030,9 +2059,9 @@ fi echo $ac_n "checking for locale""... $ac_c" 1>&6 -echo "configure:2034: checking for locale" >&5 +echo "configure:2063: checking for locale" >&5 cat > conftest.$ac_ext < #include @@ -2040,7 +2069,7 @@ setlocale(LC_CTYPE,""); isprint(0); iscntrl(0); ; return 0; } EOF -if { (eval echo configure:2044: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2073: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_LOCALE 1 @@ -2054,9 +2083,9 @@ fi rm -f conftest* echo $ac_n "checking for ctype functions""... $ac_c" 1>&6 -echo "configure:2058: checking for ctype functions" >&5 +echo "configure:2087: checking for ctype functions" >&5 cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2099: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_UPPER_LOWER 1 @@ -2082,9 +2111,9 @@ have_ospeed=no echo $ac_n "checking termcap for ospeed""... $ac_c" 1>&6 -echo "configure:2086: checking termcap for ospeed" >&5 +echo "configure:2115: checking termcap for ospeed" >&5 cat > conftest.$ac_ext < @@ -2098,7 +2127,7 @@ ospeed = 0; ; return 0; } EOF -if { (eval echo configure:2102: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2131: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes - in termcap.h" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_OSPEED 1 @@ -2111,14 +2140,14 @@ rm -f conftest* if test $have_ospeed = no; then cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2151: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""yes - must define" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_OSPEED 1 @@ -2136,10 +2165,39 @@ rm -f conftest* fi +if test $have_regex_cs != no; then +cat >> confdefs.h <<\EOF +#define CS_REGEX 1 +EOF + +have_regex=no +if test $have_regex = no && test -f ${srcdir}/regex_cs.c; then +echo "$ac_t""using POSIX regcomp_cs -- local source" 1>&6; cat >> confdefs.h <<\EOF +#define HAVE_POSIX_REGCOMP_CS 1 +EOF + REGEX_O='regex_cs.$(O)' have_regex=yes +fi +if test $have_regex = no && test -f ${srcdir}/regexp_cs.c; then +echo "$ac_t""using V8 regcomp_cs -- local source" 1>&6; cat >> confdefs.h <<\EOF +#define HAVE_V8_REGCOMP_CS 1 +EOF + REGEX_O='regexp_cs.$(O)' have_regex=yes +fi +if test $have_regex = no; then +echo "$ac_t""cannot find regular expression library with character set detection" 1>&6; cat >> confdefs.h <<\EOF +#define NO_REGEX 1 +EOF + +fi +else +cat >> confdefs.h <<\EOF +#define CS_REGEX 0 +EOF + have_regex=no have_posix_regex=unknown echo $ac_n "checking for regcomp""... $ac_c" 1>&6 -echo "configure:2143: checking for regcomp" >&5 +echo "configure:2201: checking for regcomp" >&5 WANT_REGEX=auto # Check whether --with-regex or --without-regex was given. @@ -2155,7 +2213,7 @@ have_posix_regex=unknown else cat > conftest.$ac_ext < @@ -2170,7 +2228,7 @@ #endif exit(0); } EOF -if { (eval echo configure:2174: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null +if { (eval echo configure:2232: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null then have_posix_regex=yes else @@ -2191,7 +2249,7 @@ have_regex=yes elif test $have_posix_regex = unknown; then cat > conftest.$ac_ext < @@ -2200,7 +2258,7 @@ regex_t *r; regfree(r); ; return 0; } EOF -if { (eval echo configure:2204: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2262: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""using POSIX regcomp" 1>&6 cat >> confdefs.h <<\EOF @@ -2221,7 +2279,7 @@ if test $have_regex = no; then if test $WANT_REGEX = auto -o $WANT_REGEX = pcre; then echo $ac_n "checking for pcre_compile in -lpcre""... $ac_c" 1>&6 -echo "configure:2225: checking for pcre_compile in -lpcre" >&5 +echo "configure:2283: checking for pcre_compile in -lpcre" >&5 ac_lib_var=`echo pcre'_'pcre_compile | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -2229,7 +2287,7 @@ ac_save_LIBS="$LIBS" LIBS="-lpcre $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2302: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else @@ -2269,12 +2327,12 @@ if test $have_regex = no; then if test $WANT_REGEX = auto -o $WANT_REGEX = regcmp; then echo $ac_n "checking for regcmp""... $ac_c" 1>&6 -echo "configure:2273: checking for regcmp" >&5 +echo "configure:2331: checking for regcmp" >&5 if eval "test \"`echo '$''{'ac_cv_func_regcmp'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2359: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_func_regcmp=yes" else @@ -2325,7 +2383,7 @@ if test $have_regex = no; then if test $WANT_REGEX = auto -o $WANT_REGEX = regcomp; then cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2395: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* echo "$ac_t""using V8 regcomp" 1>&6; cat >> confdefs.h <<\EOF #define HAVE_V8_REGCOMP 1 @@ -2362,12 +2420,12 @@ if test $have_regex = no; then if test $WANT_REGEX = auto -o $WANT_REGEX = re_comp; then echo "$ac_t""using re_comp" 1>&6; echo $ac_n "checking for re_comp""... $ac_c" 1>&6 -echo "configure:2366: checking for re_comp" >&5 +echo "configure:2424: checking for re_comp" >&5 if eval "test \"`echo '$''{'ac_cv_func_re_comp'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest; then +if { (eval echo configure:2452: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then rm -rf conftest* eval "ac_cv_func_re_comp=yes" else @@ -2421,6 +2479,56 @@ EOF fi +fi + +case $msb_enable in +yes) echo "$ac_t""enable the MSB of non ASCII characters" 1>&6 + cat >> confdefs.h <<\EOF +#define MSB_ENABLE 1 +EOF +;; +no) echo "$ac_t""disable the MSB of non ASCII characters" 1>&6 + cat >> confdefs.h <<\EOF +#define MSB_ENABLE 0 +EOF +;; +unknown) + if test $have_regex_cs = no; then + echo "$ac_t""decide to enable the MSB of non ASCII characters" 1>&6 + cat >> confdefs.h <<\EOF +#define MSB_ENABLE 1 +EOF + + else + echo "$ac_t""decide to disable the MSB of non ASCII characters" 1>&6 + cat >> confdefs.h <<\EOF +#define MSB_ENABLE 0 +EOF + + fi;; +esac + +case $jisx0213_enable in +unknown|yes) + echo "$ac_t""enable the JIS X 0213 mapping for SJIS and UJIS" 1>&6 + cat >> confdefs.h <<\EOF +#define SJIS0213 1 +EOF + + cat >> confdefs.h <<\EOF +#define UJIS0213 1 +EOF +;; +no) echo "$ac_t""disable the JIS X 0213 mapping for SJIS and UJIS" 1>&6 + cat >> confdefs.h <<\EOF +#define SJIS0213 0 +EOF + + cat >> confdefs.h <<\EOF +#define UJIS0213 0 +EOF +;; +esac # Check whether --with-editor or --without-editor was given. if test "${with_editor+set}" = set; then Index: less/decode.c diff -u less/decode.c:1.1.1.16 less/decode.c:1.22 --- less/decode.c:1.1.1.16 Sun Oct 29 02:19:10 2000 +++ less/decode.c Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -150,6 +156,10 @@ 'v',0, A_VISUAL, '!',0, A_SHELL, '+',0, A_FIRSTCMD, + +#if JAPANESE + '@',0, A_ROT_RCODESET, +#endif 'H',0, A_HELP, 'h',0, A_HELP, Index: less/defines.ds diff -u less/defines.ds:1.1.1.5 less/defines.ds:1.15 --- less/defines.ds:1.1.1.5 Sun Oct 29 02:19:18 2000 +++ less/defines.ds Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines reserved. + * + * You may distribute under the terms of the Less License. + */ /* DOS definition file for less. */ @@ -175,6 +181,46 @@ #define HAVE_DUP 1 /* + * COMPRESS is 1 if you want to read comressed file. + */ +#define COMPRESS 1 + +/* + * ISO is 1 if you want to read code which contain many character sets + * by using iso 2022 code extension techniques. + */ +#define ISO 1 + +/* + * JAPANESE is 1 if you want to read several KANJI code. + */ +#define JAPANESE 1 +#if JAPANESE && !ISO +# define ISO 1 +#endif + +/* + * DEFCHARSET is name of the default character set. + * This used as LESSCHARSET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSCHARSET environment variable + */ +#define DEFCHARSET "iso8" + +/* + * DEFPLANESET is name of the default plane set of iso 2022 extension. + * This used as LESSPLANESET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSPLANESET environment variable or any escape sequence for setting up. + */ +#define DEFPLANESET "ctext" + +/* + * DEFKEYCHARSET is name of the default character set for keyboard input. + */ +#define DEFKEYCHARSET "sjis-iso7" + +/* * Sizes of various buffers. */ #define CMDBUF_SIZE 512 /* Buffer for multichar commands */ @@ -200,13 +246,19 @@ /* * Regular expression library. + * Some are extended to detect character set. * Define exactly one of the following to be 1: + * HAVE_POSIX_REGCOMP_CS: extended POSIX regcomp() and regex.h + * HAVE_V8_REGCOMP_CS: extended Henry Spencer V8 regcomp() and regexp.h * HAVE_POSIX_REGCOMP: POSIX regcomp() and regex.h * HAVE_RE_COMP: BSD re_comp() * HAVE_REGCMP: System V regcmp() * HAVE_V8_REGCOMP: Henry Spencer V8 regcomp() and regexp.h - * NO_REGEX: pattern matching is supported, but without metacharacters. + * NO_REGEX: pattern matching with character set detection is supported, + * but without metacharacters. */ +/* #undef HAVE_POSIX_REGCOMP_CS */ +/* #undef HAVE_V8_REGCOMP_CS */ /* #undef HAVE_POSIX_REGCOMP */ /* #undef HAVE_RE_COMP */ /* #undef HAVE_REGCMP */ @@ -215,6 +267,27 @@ #define HAVE_POSIX_REGCOMP 1 #else #define NO_REGEX 1 +#endif + +/* + * Does regular expression library detect character set? + */ +#if NO_REGEX +#define CS_REGEX 1 +#else +#define CS_REGEX 0 +#endif + +/* + * Define MSB_ENABLE if you want to enable a MSB of non ASCII characters. + * It will be help if your library has not ability to detect a character set. + * If it has ability, this should be 0. If your system was not worked well, + * try to change following into 0 by hand. + */ +#if CS_REGEX +#define MSB_ENABLE 0 +#else +#define MSB_ENABLE 1 #endif /* Define HAVE_VOID if your compiler supports the "void" type. */ Index: less/defines.h.in diff -u less/defines.h.in:1.1.1.14 less/defines.h.in:1.33 --- less/defines.h.in:1.1.1.14 Sun Oct 29 02:19:15 2000 +++ less/defines.h.in Wed Nov 22 03:43:25 2000 @@ -159,6 +159,46 @@ #define HAVE_DUP 1 /* + * COMPRESS is 1 if you want to read comressed file. + */ +#define COMPRESS 1 + +/* + * ISO is 1 if you want to read code which contain many character sets + * by using iso 2022 code extension techniques. + */ +#define ISO 1 + +/* + * JAPANESE is 1 if you want to read several KANJI code. + */ +#define JAPANESE 1 +#if JAPANESE && !ISO +# define ISO 1 +#endif + +/* + * DEFCHARSET is name of the default character set. + * This used as LESSCHARSET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSCHARSET environment variable + */ +#define DEFCHARSET "iso8" + +/* + * DEFPLANESET is name of the default plane set of iso 2022 extension. + * This used as LESSPLANESET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSPLANESET environment variable or any escape sequence for setting up. + */ +#define DEFPLANESET "ctext" + +/* + * DEFKEYCHARSET is name of the default character set for keyboard input. + */ +#define DEFKEYCHARSET "japanese-iso7" + +/* * Sizes of various buffers. */ #define CMDBUF_SIZE 512 /* Buffer for multichar commands */ @@ -186,14 +226,20 @@ /* * Regular expression library. + * Some are extended to detect character set. * Define exactly one of the following to be 1: + * HAVE_POSIX_REGCOMP_CS: extended POSIX regcomp() and regex.h + * HAVE_V8_REGCOMP_CS: extended Henry Spencer V8 regcomp() and regexp.h * HAVE_POSIX_REGCOMP: POSIX regcomp() and regex.h * HAVE_PCRE: PCRE (Perl-compatible regular expression) library * HAVE_RE_COMP: BSD re_comp() * HAVE_REGCMP: System V regcmp() * HAVE_V8_REGCOMP: Henry Spencer V8 regcomp() and regexp.h - * NO_REGEX: pattern matching is supported, but without metacharacters. + * NO_REGEX: pattern matching with character set detection is supported, + * but without metacharacters. */ +#undef HAVE_POSIX_REGCOMP_CS +#undef HAVE_V8_REGCOMP_CS #undef HAVE_POSIX_REGCOMP #undef HAVE_PCRE #undef HAVE_RE_COMP @@ -201,6 +247,26 @@ #undef HAVE_V8_REGCOMP #undef NO_REGEX #undef HAVE_REGEXEC2 + +/* + * Does regular expression library detect character set? + */ +#undef CS_REGEX + +/* + * Define MSB_ENABLE if you want to enable a MSB of non ASCII characters. + * It will be help if your library has not ability to detect a character set. + * If it has ability, this should be 0. If your system was not worked well, + * try to change following into 0 by hand. + */ +#undef MSB_ENABLE + +/* + * Define SJIS0213 or UJIS0213 if you want to enable a JIS X 0213:2000 + * mapping of SJIS and UJIS encoding. + */ +#undef SJIS0213 +#undef UJIS0213 /* Define HAVE_VOID if your compiler supports the "void" type. */ #undef HAVE_VOID Index: less/defines.h.top diff -u less/defines.h.top:1.1.1.10 less/defines.h.top:1.27 --- less/defines.h.top:1.1.1.10 Sun Oct 29 02:03:37 2000 +++ less/defines.h.top Sun Oct 29 02:17:18 2000 @@ -158,6 +158,46 @@ #define HAVE_DUP 1 /* + * COMPRESS is 1 if you want to read comressed file. + */ +#define COMPRESS 1 + +/* + * ISO is 1 if you want to read code which contain many character sets + * by using iso 2022 code extension techniques. + */ +#define ISO 1 + +/* + * JAPANESE is 1 if you want to read several KANJI code. + */ +#define JAPANESE 1 +#if JAPANESE && !ISO +# define ISO 1 +#endif + +/* + * DEFCHARSET is name of the default character set. + * This used as LESSCHARSET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSCHARSET environment variable + */ +#define DEFCHARSET "iso8" + +/* + * DEFPLANESET is name of the default plane set of iso 2022 extension. + * This used as LESSPLANESET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSPLANESET environment variable or any escape sequence for setting up. + */ +#define DEFPLANESET "ctext" + +/* + * DEFKEYCHARSET is name of the default character set for keyboard input. + */ +#define DEFKEYCHARSET "japanese-iso7" + +/* * Sizes of various buffers. */ #define CMDBUF_SIZE 512 /* Buffer for multichar commands */ Index: less/defines.o2 diff -u less/defines.o2:1.1.1.6 less/defines.o2:1.16 --- less/defines.o2:1.1.1.6 Sun Oct 29 02:19:18 2000 +++ less/defines.o2 Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines reserved. + * + * You may distribute under the terms of the Less License. + */ /* OS/2 definition file for less. */ @@ -156,6 +162,46 @@ #define HAVE_DUP 1 /* + * COMPRESS is 1 if you want to read comressed file. + */ +#define COMPRESS 1 + +/* + * ISO is 1 if you want to read code which contain many character sets + * by using iso 2022 code extension techniques. + */ +#define ISO 1 + +/* + * JAPANESE is 1 if you want to read several KANJI code. + */ +#define JAPANESE 1 +#if JAPANESE && !ISO +# define ISO 1 +#endif + +/* + * DEFCHARSET is name of the default character set. + * This used as LESSCHARSET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSCHARSET environment variable + */ +#define DEFCHARSET "iso8" + +/* + * DEFPLANESET is name of the default plane set of iso 2022 extension. + * This used as LESSPLANESET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSPLANESET environment variable or any escape sequence for setting up. + */ +#define DEFPLANESET "ctext" + +/* + * DEFKEYCHARSET is name of the default character set for keyboard input. + */ +#define DEFKEYCHARSET "japanese-iso7" + +/* * Sizes of various buffers. */ #define CMDBUF_SIZE 512 /* Buffer for multichar commands */ @@ -179,19 +225,38 @@ /* * Regular expression library. + * Some are extended to detect character set. * Define exactly one of the following to be 1: + * HAVE_POSIX_REGCOMP_CS: extended POSIX regcomp() and regex.h + * HAVE_V8_REGCOMP_CS: extended Henry Spencer V8 regcomp() and regexp.h * HAVE_POSIX_REGCOMP: POSIX regcomp() and regex.h * HAVE_RE_COMP: BSD re_comp() * HAVE_REGCMP: System V regcmp() * HAVE_V8_REGCOMP: Henry Spencer V8 regcomp() and regexp.h - * NO_REGEX: pattern matching is supported, but without metacharacters. + * NO_REGEX: pattern matching with character set detection is supported, + * but without metacharacters. */ +/* #undef HAVE_POSIX_REGCOMP_CS */ +/* #undef HAVE_V8_REGCOMP_CS */ /* #undef HAVE_POSIX_REGCOMP */ /* #undef HAVE_RE_COMP */ /* #undef HAVE_REGCMP */ #define HAVE_V8_REGCOMP 1 /* #undef NO_REGEX */ #define HAVE_REGEXEC2 1 + +/* + * Does regular expression library detect character set? + */ +#define CS_REGEX 0 + +/* + * Define MSB_ENABLE if you want to enable a MSB of non ASCII characters. + * It will be help if your library has not ability to detect a character set. + * If it has ability, this should be 0. If your system was not worked well, + * try to change following into 0 by hand. + */ +#define MSB_ENABLE 1 /* Define HAVE_VOID if your compiler supports the "void" type. */ #define HAVE_VOID 1 Index: less/defines.o9 diff -u less/defines.o9:1.1.1.8 less/defines.o9:1.18 --- less/defines.o9:1.1.1.8 Sun Oct 29 02:19:18 2000 +++ less/defines.o9 Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines reserved. + * + * You may distribute under the terms of the Less License. + */ /* OS/9 definition file for less. */ @@ -169,6 +175,46 @@ #define HAVE_DUP 0 /* + * COMPRESS is 1 if you want to read comressed file. + */ +#define COMPRESS 1 + +/* + * ISO is 1 if you want to read code which contain many character sets + * by using iso 2022 code extension techniques. + */ +#define ISO 1 + +/* + * JAPANESE is 1 if you want to read several KANJI code. + */ +#define JAPANESE 1 +#if JAPANESE && !ISO +# define ISO 1 +#endif + +/* + * DEFCHARSET is name of the default character set. + * This used as LESSCHARSET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSCHARSET environment variable + */ +#define DEFCHARSET "iso8" + +/* + * DEFPLANESET is name of the default plane set of iso 2022 extension. + * This used as LESSPLANESET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSPLANESET environment variable or any escape sequence for setting up. + */ +#define DEFPLANESET "ctext" + +/* + * DEFKEYCHARSET is name of the default character set for keyboard input. + */ +#define DEFKEYCHARSET "japanese-iso7" + +/* * Sizes of various buffers. */ #define CMDBUF_SIZE 512 /* Buffer for multichar commands */ @@ -196,19 +242,38 @@ /* * Regular expression library. + * Some are extended to detect character set. * Define exactly one of the following to be 1: + * HAVE_POSIX_REGCOMP_CS: extended POSIX regcomp() and regex.h + * HAVE_V8_REGCOMP_CS: extended Henry Spencer V8 regcomp() and regexp.h * HAVE_POSIX_REGCOMP: POSIX regcomp() and regex.h * HAVE_RE_COMP: BSD re_comp() * HAVE_REGCMP: System V regcmp() * HAVE_V8_REGCOMP: Henry Spencer V8 regcomp() and regexp.h - * NO_REGEX: pattern matching is supported, but without metacharacters. + * NO_REGEX: pattern matching with character set detection is supported, + * but without metacharacters. */ +#define HAVE_POSIX_REGCOMP_CS 0 +#define HAVE_V8_REGCOMP_CS 0 #define HAVE_POSIX_REGCOMP 0 #define HAVE_RE_COMP 0 #define HAVE_REGCMP 0 #define HAVE_V8_REGCOMP 1 #define NO_REGEX 0 #define HAVE_REGEXEC2 1 + +/* + * Does regular expression library detect character set? + */ +#define CS_REGEX 0 + +/* + * Define MSB_ENABLE if you want to enable a MSB of non ASCII characters. + * It will be help if your library has not ability to detect a character set. + * If it has ability, this should be 0. If your system was not worked well, + * try to change following into 0 by hand. + */ +#define MSB_ENABLE 1 /* Define HAVE_VOID if your compiler supports the "void" type. */ #define HAVE_VOID 1 Index: less/defines.wn diff -u less/defines.wn:1.1.1.6 less/defines.wn:1.16 --- less/defines.wn:1.1.1.6 Sun Oct 29 02:19:18 2000 +++ less/defines.wn Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines reserved. + * + * You may distribute under the terms of the Less License. + */ /* Windows definition file for less. */ @@ -157,6 +163,46 @@ #define HAVE_DUP 1 /* + * COMPRESS is 1 if you want to read comressed file. + */ +#define COMPRESS 1 + +/* + * ISO is 1 if you want to read code which contain many character sets + * by using iso 2022 code extension techniques. + */ +#define ISO 1 + +/* + * JAPANESE is 1 if you want to read several KANJI code. + */ +#define JAPANESE 1 +#if JAPANESE && !ISO +# define ISO 1 +#endif + +/* + * DEFCHARSET is name of the default character set. + * This used as LESSCHARSET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSCHARSET environment variable + */ +#define DEFCHARSET "iso8" + +/* + * DEFPLANESET is name of the default plane set of iso 2022 extension. + * This used as LESSPLANESET envrionment variable if user did not define it. + * The value of this must equal one of the available value which can use as + * LESSPLANESET environment variable or any escape sequence for setting up. + */ +#define DEFPLANESET "ctext" + +/* + * DEFKEYCHARSET is name of the default character set for keyboard input. + */ +#define DEFKEYCHARSET "sjis-iso7" + +/* * Sizes of various buffers. */ #define CMDBUF_SIZE 512 /* Buffer for multichar commands */ @@ -180,19 +226,38 @@ /* * Regular expression library. + * Some are extended to detect character set. * Define exactly one of the following to be 1: + * HAVE_POSIX_REGCOMP_CS: extended POSIX regcomp() and regex.h + * HAVE_V8_REGCOMP_CS: extended Henry Spencer V8 regcomp() and regexp.h * HAVE_POSIX_REGCOMP: POSIX regcomp() and regex.h * HAVE_RE_COMP: BSD re_comp() * HAVE_REGCMP: System V regcmp() * HAVE_V8_REGCOMP: Henry Spencer V8 regcomp() and regexp.h - * NO_REGEX: pattern matching is supported, but without metacharacters. + * NO_REGEX: pattern matching with character set detection is supported, + * but without metacharacters. */ +/* #undef HAVE_POSIX_REGCOMP_CS */ +/* #undef HAVE_V8_REGCOMP_CS */ /* #undef HAVE_POSIX_REGCOMP */ /* #undef HAVE_RE_COMP */ /* #undef HAVE_REGCMP */ #define HAVE_V8_REGCOMP 1 /* #undef NO_REGEX */ #define HAVE_REGEXEC2 1 + +/* + * Does regular expression library detect character set? + */ +#define CS_REGEX 0 + +/* + * Define MSB_ENABLE if you want to enable a MSB of non ASCII characters. + * It will be help if your library has not ability to detect a character set. + * If it has ability, this should be 0. If your system was not worked well, + * try to change following into 0 by hand. + */ +#define MSB_ENABLE 1 /* Define HAVE_VOID if your compiler supports the "void" type. */ #define HAVE_VOID 1 Index: less/edit.c diff -u less/edit.c:1.1.1.15 less/edit.c:1.26 --- less/edit.c:1.1.1.15 Sun Oct 29 02:03:32 2000 +++ less/edit.c Sun Oct 29 02:17:18 2000 @@ -350,6 +350,9 @@ curr_altpipe = alt_pipe; set_open(curr_ifile); /* File has been opened */ get_pos(curr_ifile, &initial_scrpos); +#if ISO + init_multi(get_mulbuf(curr_ifile)); +#endif new_file = TRUE; ch_init(f, chflags); @@ -381,7 +384,7 @@ #if HILITE_SEARCH clr_hilite(); #endif - cmd_addhist(ml_examine, filename); + cmd_addhist(ml_examine, filename, NULL); if (no_display && errmsgs > 0) { /* Index: less/filename.c diff -u less/filename.c:1.1.1.14 less/filename.c:1.24 --- less/filename.c:1.1.1.14 Sun Oct 29 02:03:32 2000 +++ less/filename.c Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of compress file treating routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -758,7 +764,31 @@ return (gfilename); } +#if COMPRESS /* + * Check a name of input file and easy execution some uncompressing program. + */ + static char* +easy_lessopen_for_compressedfile(filename) + register char *filename; +{ + register int length = strlen(filename); + + if (strcmp(".Z", &filename[length - 2]) == 0 || + strcmp(".z", &filename[length - 2]) == 0) + return ("| zcat %s"); + else if (strcmp(".gz", &filename[length - 3]) == 0 || + strcmp(".GZ", &filename[length - 3]) == 0) + return ("| gzip -cd %s"); + else if (strcmp(".bz2", &filename[length - 4]) == 0 || + strcmp(".BZ2", &filename[length - 4]) == 0) + return ("| bzip2 -cd %s"); + else + return (NULL); +} +#endif + +/* * See if we should open a "replacement file" * instead of the file we're about to open. */ @@ -783,6 +813,9 @@ return (NULL); ch_ungetchar(-1); if ((lessopen = lgetenv("LESSOPEN")) == NULL) +#if COMPRESS + if ((lessopen = easy_lessopen_for_compressedfile(filename)) == NULL) +#endif return (NULL); if (strcmp(filename, "-") == 0) return (NULL); Index: less/funcs.h diff -u less/funcs.h:1.1.1.16 less/funcs.h:1.37 --- less/funcs.h:1.1.1.16 Sun Oct 29 02:19:14 2000 +++ less/funcs.h Sun Oct 29 02:34:41 2000 @@ -49,9 +49,13 @@ public void ch_close (); public int ch_getflags (); public void ch_dump (); + public CODESET left_codeset_of_charset (); + public CODESET right_codeset_of_charset (); + public void init_planeset (); public void init_charset (); public int binary_char (); public int control_char (); + public void change_control_char (); public char * prchar (); public void cmd_reset (); public void clear_cmd (); @@ -63,6 +67,7 @@ public int cmd_char (); public int cmd_int (); public char * get_cmdbuf (); + public CHARSET * get_cmdcs (); public int in_mca (); public void dispversion (); public int getcc (); @@ -124,6 +129,7 @@ public void store_pos (); public void get_pos (); public void set_open (); + public MULBUF * get_mulbuf (); public int opened (); public void hold_ifile (); public int held_ifile (); @@ -167,12 +173,14 @@ public void opt__O (); public void opt_l (); public void opt_k (); + public void opt_K (); public void opt_t (); public void opt__T (); public void opt_p (); public void opt__P (); public void opt_b (); public void opt_i (); + public void opt_Z (); public void opt__V (); public void opt_D (); public void opt_quote (); @@ -198,8 +206,13 @@ public int isatty (); public void put_line (); public void flush (); + public int putchr_raw (); + public void putstr_raw (); public int putchr (); public void putstr (); + public int putmchr (); + public int putmchrs (); + public void putmstr (); public void get_return (); public void error (); public void ierror (); Index: less/help.c diff -u less/help.c:1.1.1.11 less/help.c:1.14 --- less/help.c:1.1.1.11 Sun Oct 29 02:19:10 2000 +++ less/help.c Sun Oct 29 03:45:38 2000 @@ -85,6 +85,8 @@ ' ',' ',':','x',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','*',' ',' ','E','x','a','m','i','n','e',' ','t','h','e',' ','f','i','r','s','t',' ','(','o','r',' ','_','\b','N','-','t','h',')',' ','f','i','l','e',' ','f','r','o','m',' ','t','h','e',' ','c','o','m','m','a','n','d',' ','l','i','n','e','.','\n', ' ',' ',':','d',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','D','e','l','e','t','e',' ','t','h','e',' ','c','u','r','r','e','n','t',' ','f','i','l','e',' ','f','r','o','m',' ','t','h','e',' ','c','o','m','m','a','n','d',' ','l','i','n','e',' ','l','i','s','t','.','\n', ' ',' ','=',' ',' ','^','G',' ',' ',':','f',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','P','r','i','n','t',' ','c','u','r','r','e','n','t',' ','f','i','l','e',' ','n','a','m','e','.','\n', +' ',' ','@',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','R','o','t','a','t','e',' ','t','h','e',' ','c','o','d','e',' ','r','e','c','o','g','n','i','t','i','o','n',' ','m','e','t','h','o','d',' ','o','f',' ','t','h','e',' ','c','u','r','r','e','n','t',' ','f','i','l','e','.','\n', +' ',' ','V',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','P','r','i','n','t',' ','v','e','r','s','i','o','n',' ','n','u','m','b','e','r',' ','o','f',' ','"','l','e','s','s','"','.','\n', ' ','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','\n', '\n', ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','M','\b','M','I','\b','I','S','\b','S','C','\b','C','E','\b','E','L','\b','L','L','\b','L','A','\b','A','N','\b','N','E','\b','E','O','\b','O','U','\b','U','S','\b','S',' ','C','\b','C','O','\b','O','M','\b','M','M','\b','M','A','\b','A','N','\b','N','D','\b','D','S','\b','S','\n', @@ -144,6 +146,8 @@ ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','D','i','s','p','l','a','y',' ','a',' ','s','t','a','t','u','s',' ','c','o','l','u','m','n',' ','a','t',' ','l','e','f','t',' ','e','d','g','e',' ','o','f',' ','s','c','r','e','e','n','.','\n', ' ',' ','-','k',' ','[','_','\b','f','_','\b','i','_','\b','l','_','\b','e',']',' ',' ','.',' ',' ','-','-','l','e','s','s','k','e','y','-','f','i','l','e','=','[','_','\b','f','_','\b','i','_','\b','l','_','\b','e',']','\n', ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','U','s','e',' ','a',' ','l','e','s','s','k','e','y',' ','f','i','l','e','.','\n', +' ',' ','-','K',' ','[','_','\b','c','_','\b','h','_','\b','a','_','\b','r','_','\b','s','_','\b','e','_','\b','t',']',' ',' ','-','-','c','h','a','r','s','e','t','=','[','_','\b','c','_','\b','h','_','\b','a','_','\b','r','_','\b','s','_','\b','e','_','\b','t',']','\n', +' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','S','p','e','c','i','f','y',' ','c','h','a','r','a','c','t','e','r',' ','s','e','t','.','\n', ' ',' ','-','m',' ',' ','-','M',' ',' ','.','.','.','.',' ',' ','-','-','l','o','n','g','-','p','r','o','m','p','t',' ',' ','-','-','L','O','N','G','-','P','R','O','M','P','T','\n', ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','S','e','t',' ','p','r','o','m','p','t',' ','s','t','y','l','e','.','\n', ' ',' ','-','n',' ',' ','-','N',' ',' ','.','.','.','.',' ',' ','-','-','l','i','n','e','-','n','u','m','b','e','r','s',' ',' ','-','-','L','I','N','E','-','N','U','M','B','E','R','S','\n', @@ -176,6 +180,8 @@ ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','H','i','g','h','l','i','g','h','t',' ','f','i','r','s','t',' ','n','e','w',' ','l','i','n','e',' ','a','f','t','e','r',' ','f','o','r','w','a','r','d','-','s','c','r','e','e','n','.','\n', ' ',' ','-','W',' ',' ','.','.','.','.','.','.','.','.',' ',' ','-','-','H','I','L','I','T','E','-','U','N','R','E','A','D','\n', ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','H','i','g','h','l','i','g','h','t',' ','f','i','r','s','t',' ','n','e','w',' ','l','i','n','e',' ','a','f','t','e','r',' ','a','n','y',' ','f','o','r','w','a','r','d',' ','m','o','v','e','m','e','n','t','.','\n', +' ',' ','-',' ',' ',' ','.','.','.','.','.','.','.','.',' ',' ','-','-','m','a','r','k','-','w','r','o','n','g','-','c','h','a','r','\n', +'\t','\t',' ',' ','D','i','s','p','l','a','y',' ','m','a','r','k',' ','c','h','a','r','a','c','t','e','r',' ','i','n','s','t','e','a','d',' ','o','f',' ','w','r','o','n','g',' ','c','h','a','r','a','c','t','e','r','.','\n', ' ',' ','-','x',' ','[','_','\b','N',']',' ',' ','.','.','.','.',' ',' ','-','-','t','a','b','s','=','[','_','\b','N',']','\n', ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','S','e','t',' ','t','a','b',' ','s','t','o','p','s','.','\n', ' ',' ','-','X',' ',' ','.','.','.','.','.','.','.','.',' ',' ','-','-','n','o','-','i','n','i','t','\n', @@ -184,6 +190,8 @@ ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','F','o','r','w','a','r','d',' ','s','c','r','o','l','l',' ','l','i','m','i','t','.','\n', ' ',' ','-','z',' ','[','_','\b','N',']',' ',' ','.','.','.','.',' ',' ','-','-','w','i','n','d','o','w','=','[','_','\b','N',']','\n', ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','S','e','t',' ','s','i','z','e',' ','o','f',' ','w','i','n','d','o','w','.','\n', +' ',' ','-','Z',' ',' ','.','.','.','.','.','.','.','.',' ',' ','-','-','c','h','a','n','g','e','-','p','r','i','o','r','i','t','y','\n', +'\t','\t',' ',' ','G','i','v','e',' ','p','r','i','o','r','i','t','y',' ','t','o',' ','t','h','e',' ','S','J','I','S',' ','o','v','e','r',' ','t','h','e',' ','U','J','I','S',' ','i','f',' ','y','o','u',' ','u','s','e',' ','J','a','p','a','n','e','s','e','.','\n', ' ',' ','-','"',' ','[','_','\b','c','[','_','\b','c',']',']',' ',' ','.',' ',' ','-','-','q','u','o','t','e','s','=','[','_','\b','c','[','_','\b','c',']',']','\n', ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','S','e','t',' ','s','h','e','l','l',' ','q','u','o','t','e',' ','c','h','a','r','a','c','t','e','r','s','.','\n', ' ',' ','-','~',' ',' ','.','.','.','.','.','.','.','.',' ',' ','-','-','t','i','l','d','e','\n', Index: less/ifile.c diff -u less/ifile.c:1.1.1.12 less/ifile.c:1.21 --- less/ifile.c:1.1.1.12 Sun Oct 29 02:03:32 2000 +++ less/ifile.c Tue Oct 31 04:30:39 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -33,6 +39,9 @@ int h_hold; /* Hold count */ char h_opened; /* Has this ifile been opened? */ struct scrpos h_scrpos; /* Saved position within the file */ +#if ISO + MULBUF *h_mp; /* MULBUF for multi bytes character */ +#endif }; /* @@ -46,7 +55,11 @@ * Anchor for linked list. */ static struct ifile anchor = { &anchor, &anchor, NULL, NULL, 0, 0, '\0', - { NULL_POSITION, 0 } }; + { NULL_POSITION, 0 } +#if ISO + ,NULL +#endif + }; static int ifiles = 0; static void @@ -119,6 +132,10 @@ p->h_opened = 0; p->h_hold = 0; p->h_filestate = NULL; +#if ISO + p->h_mp = new_multi(); + init_priority(p->h_mp); +#endif link_ifile(p, prev); return (p); } @@ -144,6 +161,9 @@ p = int_ifile(h); unlink_ifile(p); free(p->h_filename); +#if ISO + free(p->h_mp); +#endif free(p); } @@ -277,6 +297,20 @@ struct scrpos *scrpos; { *scrpos = int_ifile(ifile)->h_scrpos; +} + +/* + * Get the MULBUF associated with a ifile. + */ + public MULBUF * +get_mulbuf(ifile) + IFILE ifile; +{ +#if ISO + if (ifile != NULL) + return (int_ifile(ifile)->h_mp); +#endif + return (NULL); } /* Index: less/input.c diff -u less/input.c:1.1.1.10 less/input.c:1.25 --- less/input.c:1.1.1.10 Sun Oct 29 02:19:10 2000 +++ less/input.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -33,6 +39,7 @@ extern int hilite_search; extern int size_linebuf; #endif +extern IFILE curr_ifile; /* * Get the next line. @@ -73,7 +80,7 @@ return (NULL_POSITION); } - prewind(); + prewind(curr_pos); plinenum(curr_pos); (void) ch_seek(curr_pos); @@ -117,20 +124,33 @@ do { c = ch_forw_get(); +#if ISO + /* + * Parse all character for finding + * escape sequences. + */ + parsing_multi(get_mulbuf(curr_ifile), + c); +#endif } while (c != '\n' && c != EOI); new_pos = ch_tell(); endline = TRUE; quit_if_one_screen = FALSE; } else { +#if ISO + new_pos = ch_tell() - + get_bufbytes(get_mulbuf(curr_ifile)); +#else new_pos = ch_tell() - 1; +#endif endline = FALSE; } break; } c = ch_forw_get(); } - pdone(endline); + pdone(endline, new_pos); if (squeeze && blankline) { @@ -265,7 +285,7 @@ endline = FALSE; loop: begin_new_pos = new_pos; - prewind(); + prewind(new_pos); plinenum(new_pos); (void) ch_seek(new_pos); @@ -295,15 +315,25 @@ endline = TRUE; quit_if_one_screen = FALSE; break; + } +#if ISO + for (c = get_bufbytes(get_mulbuf(curr_ifile)); + --c >= 0; ) + { + (void) ch_back_get(); + new_pos--; } - pdone(0); + pdone(0, new_pos); +#else + pdone(0, new_pos); (void) ch_back_get(); new_pos--; +#endif goto loop; } } while (new_pos < curr_pos); - pdone(endline); + pdone(endline, new_pos); return (begin_new_pos); } Index: less/less.h diff -u less/less.h:1.1.1.17 less/less.h:1.52 --- less/less.h:1.1.1.17 Sun Oct 29 02:19:15 2000 +++ less/less.h Mon Nov 20 02:05:56 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -149,7 +155,7 @@ /* * Flags for open() */ -#if MSDOS_COMPILER || OS2 +#if MSDOS_COMPILER || OS2 || __CYGWIN__ #define OPEN_READ (O_RDONLY|O_BINARY) #else #ifdef _OSK @@ -179,7 +185,7 @@ #if MSDOS_COMPILER==MSOFTC #define SET_BINARY(f) _setmode(f, _O_BINARY); #else -#if MSDOS_COMPILER +#if MSDOS_COMPILER || __CYGWIN__ #define SET_BINARY(f) setmode(f, O_BINARY) #else #define SET_BINARY(f) @@ -282,6 +288,12 @@ #define AT_BLINK (3) #define AT_INVIS (4) #define AT_STANDOUT (5) + +/* + * Defines for multi character set and code set. + */ +#include "multi.h" + #if IS_EBCDIC_HOST /* Index: less/less.hlp diff -u less/less.hlp:1.1.1.12 less/less.hlp:1.16 --- less/less.hlp:1.1.1.12 Sun Oct 29 02:19:18 2000 +++ less/less.hlp Sun Oct 29 03:45:41 2000 @@ -82,6 +82,8 @@ :x * Examine the first (or _N-th) file from the command line. :d Delete the current file from the command line list. = ^G :f Print current file name. + @ Rotate the code recognition method of the current file. + V Print version number of "less". --------------------------------------------------------------------------- MMIISSCCEELLLLAANNEEOOUUSS CCOOMMMMAANNDDSS @@ -141,6 +143,8 @@ Display a status column at left edge of screen. -k [_f_i_l_e] . --lesskey-file=[_f_i_l_e] Use a lesskey file. + -K [_c_h_a_r_s_e_t] --charset=[_c_h_a_r_s_e_t] + Specify character set. -m -M .... --long-prompt --LONG-PROMPT Set prompt style. -n -N .... --line-numbers --LINE-NUMBERS @@ -173,6 +177,8 @@ Highlight first new line after forward-screen. -W ........ --HILITE-UNREAD Highlight first new line after any forward movement. + - ........ --mark-wrong-char + Display mark character instead of wrong character. -x [_N] .... --tabs=[_N] Set tab stops. -X ........ --no-init @@ -181,6 +187,8 @@ Forward scroll limit. -z [_N] .... --window=[_N] Set size of window. + -Z ........ --change-priority + Give priority to the SJIS over the UJIS if you use Japanese. -" [_c[_c]] . --quotes=[_c[_c]] Set shell quote characters. -~ ........ --tilde Index: less/less.nro diff -u less/less.nro:1.1.1.17 less/less.nro:1.46 --- less/less.nro:1.1.1.17 Sun Oct 29 02:19:15 2000 +++ less/less.nro Tue Oct 31 04:30:40 2000 @@ -10,12 +10,14 @@ .br .B "less --version" .br -.B "less [-[+]aBcCdeEfgGiImMnNqQrsSuUVwX]" +.B "less [-[+]aBcCdeEfgGiImMnNqQrsSuUVWwXZ]" .br .B " [-b \fIbufs\fP] [-h \fIlines\fP] [-j \fIline\fP] [-k \fIkeyfile\fP]" .br -.B " [-{oO} \fIlogfile\fP] [-p \fIpattern\fP] [-P \fIprompt\fP] [-t \fItag\fP]" +.B " [-K \fIcharacter set\fP] [-{oO} \fIlogfile\fP]" .br +.B " [-p \fIpattern\fP] [-P \fIprompt\fP] [-t \fItag\fP]" +.br .B " [-T \fItagsfile\fP] [-x \fItab\fP] [-y \fIlines\fP] [-[z] \fIlines\fP]" .br .B " [+[+]\fIcmd\fP] [--] [\fIfilename\fP]..." @@ -294,6 +296,13 @@ If possible, it also prints the length of the file, the number of lines in the file and the percent of the file above the last displayed line. +.IP "@" +Rotate the code recognition method of the current file. +Enhanced +.I less +has 7 methods for recognition, default (which is chosen +by the JLESSCHARSET environment variable), japanese, ujis, sjis, +iso8, noconv and none. .IP \- Followed by one of the command line option letters (see OPTIONS below), this will change the setting of that option @@ -390,6 +399,8 @@ .I less is running, via the "\-" command. .PP +Options are also taken from the environment variable "LESS" and "JLESS". +.PP Most options may be given in one of two forms: either a dash followed by a single letter, or two dashes followed by a long option name. @@ -421,8 +432,8 @@ percent signs in the options string by double percent signs. .sp The environment variable is parsed before the command line, -so command line options override the LESS environment variable. -If an option appears in the LESS variable, it can be reset +so command line options override the LESS and JLESS environment variables. +If an option appears in the LESS and JLESS variables, it can be reset to its default value on the command line by beginning the command line option with "\-+". .sp @@ -567,6 +578,11 @@ it is also used as a .I lesskey file. +.IP -K\fIcharset\fP +Causes +.I less +to use this charset instead of a charset defined in the JLESSCHARSET or +LESSCHARSET environment variable. .IP "-m or --long-prompt" Causes .I less @@ -622,11 +638,11 @@ .IP "-P\fIprompt\fP or --prompt=\fIprompt\fP" Provides a way to tailor the three prompt styles to your own preference. -This option would normally be put in the LESS environment -variable, rather than being typed in with each +This option would normally be put in the LESS and JLESS environment +variables, rather than being typed in with each .I less command. -Such an option must either be the last option in the LESS variable, +Such an option must either be the last option in the LESS and JLESS variables, or be terminated by a dollar sign. -Ps followed by a string changes the default (short) prompt to that string. @@ -738,6 +754,9 @@ .IP "-W or --HILITE-UNREAD" Like -w, but temporarily highlights the first new line after any forward movement command larger than one line. +.IP -XXX +Causes mark characters to be used to represent wrong characters. +By default, such wrong characters displayed as binary. .IP "-x\fIn\fP or --tabs=\fIn\fP" Sets tab stops every \fIn\fP positions. The default for \fIn\fP is 8. @@ -767,6 +786,10 @@ For example, if the screen is 24 lines, \fI-z-4\fP sets the scrolling window to 20 lines. If the screen is resized to 40 lines, the scrolling window automatically changes to 36 lines. +.IP -Z +Causes to give priority to the SJIS over the UJIS if a "japanese" was selected +by the JLESSCHARSET environment variable. The default value is +to give priority to the UJIS over the SJIS. .IP -"\fIcc\fP\ or\ --quotes=\fIcc\fP Changes the filename quoting character. This may be necessary if you are trying to name a file @@ -1066,8 +1089,15 @@ .PP A "character set" is simply a description of which characters are to be considered normal, control, and binary. -The LESSCHARSET environment variable may be used to select a character set. -Possible values for LESSCHARSET are: +The JLESSCHARSET and LESSCHARSET environment variables may be used to select +a character set. There is no difference between them in program of +.I less. +But I suppose you should use the JLESSCHARSET environment variable +because not enhanced +.I less +will make errors if you use enhanced character set in +your LESSCHARSET environment variable. +Possible values for them are: .IP ascii BS, TAB, NL, CR, and formfeed are control characters, all chars with values between 32 and 126 are normal, @@ -1089,6 +1119,151 @@ .IP utf-8 Selects the UTF-8 encoding of the ISO 10646 character set. .PP +And possible values for only JLESSCHARSET are: +.IP iso7 +Multi character sets with the ISO 2022 code extension techniques +in 7 bits are assumed. +Characters with values between 128 and 255 are treated as binary. +The level of implementation of +.I Less +is level 3 of ISO 2022. +.IP iso8 +Multi character sets with the ISO 2022 code extension techniques +in 8 bits are assumed. +The level of implementation of +.I Less +is level 3 of ISO 2022. +.IP jis +Only Japanese character sets with the ISO 2022 code extension +techniques in 7 bits are assumed. +.IP ujis +If characters has values between 32 and 127, +the ASCII character set are assumed. +If characters has values between 162 and 254, +the JISX 0208 character set, +a right half of the JISX 0201 character set and +the JISX 0212 character set with the UJIS coding are assumed. +Otherwise, characters are treated as binary. +.IP euc +Same as ujis. +.IP sjis +If characters has values between 32 and 127, +the ASCII character set are assumed. +If characters has values between 128 and 252, +the JISX 0208 character set and +a right half of the JISX 0201 character set are assumed. +Otherwise, characters are treated as binary. +.IP japanese +All Japanese character sets, jis, ujis and sjis, are assumed. But +.I less +output only the jis. +.PP +Japanese has several code sets (not character sets). +Thus +.I less +must convert among them to display them correctly. +Possible values with this conversion for only JLESSCHARSET are: +.IP ujis-iso7 +The ujis and iso7 are assumend. But +.I less +output only the iso7. +.IP euc-iso7 +Same as ujis-iso7. +.IP sjis-iso7 +The sjis and iso7 are assumend. But +.I less +output only the iso7. +.IP ujis-jis +The ujis and jis are assumend. But +.I less +output only the jis. +.IP euc-jis +Same as ujis-jis. +.IP sjis-jis +The sjis and jis are assumend. But +.I less +output only the jis. +.IP jis-ujis +The jis and ujis are assumend. But +.I less +output only the ujis. +.IP jis-euc +Same as jis-ujis. +.IP jis-sjis +The jis and sjis are assumend. But +.I less +output only the sjis. +.IP japanese-iso7 +The japanese and iso7 are assumend. But +.I less +output only the iso7. +.IP japanese-jis +The japanese is assumend. But +.I less +output only the jis. +Same as japanese. +.IP japanese-ujis +The japanese is assumend. But +.I less +output only the ujis. +.IP japanese-euc +Same as japanese-ujis. +.IP japanese-sjis +The japanese is assumend. But +.I less +output only the sjis. +.IP ujis-sjis +The ujis is assumend. But +.I less +output only the sjis. +.IP euc-sjis +Same as ujis-sjis. +.IP sjis-ujis +The sjis is assumend. But +.I less +output only the ujis. +.IP sjis-euc +Same as sjis-ujis. +.PP +Other way to select a character set is to use the LANG environment variable. +If it start with "ja_JP" or "japan", +.I less +read all Japanese coded characters as some Japanese character set, +and a rest of the LANG environment variable specify output coding. +.PP +The ISO 2022 code extension techniques define +4 planes to display many character sets easy. +Default setting of planes is selected by +the JLESSPLANESET environment variable. +If the JLESSPLANESET vriable is equal to "japanese", "ujis" or "euc", +.I less +treat g1 plane as JISX 0208, g2 plane as JISX 0201 right half, g3 plane +as JISX 0212. If it is equal to "latin1", "latin2", "latin3", "latin4", +"greek", "alabic", "hebrew", "cyrillic" or "latin5", +.I less +treat g1 plane as one of ISO 8859. +Otherwise, +.I less +try to parse the JLESSPLANESET variable as +real escape sequences for setting up, and "\\e" in JLESSPLANESET +is treated as escape code when parsing. +.PP +.I Less +understand almost all escape sequence about character set +in the ISO 2022 code extension techniques. There are many escape sequences +to select the character set. On the one hand, +.I less +output only 6 escape sequences to select a character +set: '^[(', '^[-', '^[$(', '^[$-', '^N' and '^O'. +It means +.I less +is friendly to a terminal and a terminal emulator. +.PP +And there is special "character set" for keyboard inputs. +The JLESSKEYCHARSET environment variable is used +for such purpose. Possible values of it are equal +to the JLESSCHARSET environment variable. +.PP If the LESSCHARSET environment variable is not set, the default character set is latin1. However, if the string "UTF-8" is found in the LC_ALL, LC_CTYPE or LANG @@ -1216,6 +1391,8 @@ Usually used at the end of the string, but may appear anywhere. .IP "%x" Replaced by the name of the next input file in the list. +.IP "%K" +Replaced by the name of the last non ASCII character set or code set. .PP If any item is unknown (for example, the file size if input is a pipe), a question mark is printed instead. @@ -1395,6 +1572,8 @@ Options which are passed to .I less automatically. +.IP JLESS +same as the LESS environment variable. .IP LESSANSIENDCHARS Characters which are assumed to end an ANSI color escape sequence (default "m"). @@ -1402,8 +1581,14 @@ Format for displaying non-printable, non-control characters. .IP LESSCHARDEF Defines a character set. -.IP LESSCHARSET +.IP JLESSCHARSET Selects a predefined character set. +.IP LESSCHARSET +Selects a predefined character set if JLESSCHARSET is not defined. +.IP JLESSKEYCHARSET +Selects a predefined character set for keyboard inputs. +.IP JLESSPLANESET +Selects a predefined plane set of the ISO 2022 code extension techniques. .IP LESSCLOSE Command line to invoke the (optional) input-postprocessor. .IP LESSECHO @@ -1512,4 +1697,11 @@ Mark Nudelman .br Send bug reports or comments to the above address or to bug-less@gnu.org. + +.SH PATCH +Copyright (c) 1994-2000 Kazushi (Jam) Marukawa, Japanized routines only +.br +Comments about this part to: jam@pobox.com +.br +You may distribute under the terms of the Less License. Index: less/lesskey.c diff -u less/lesskey.c:1.1.1.17 less/lesskey.c:1.23 --- less/lesskey.c:1.1.1.17 Sun Oct 29 02:19:14 2000 +++ less/lesskey.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -146,6 +152,9 @@ "undo-hilite", A_UNDO_SEARCH, "version", A_VERSION, "visual", A_VISUAL, +#if JAPANESE + "rotate-right", A_ROT_RCODESET, +#endif NULL, 0 }; Index: less/lesskey.nro diff -u less/lesskey.nro:1.1.1.15 less/lesskey.nro:1.21 --- less/lesskey.nro:1.1.1.15 Sun Oct 29 02:19:16 2000 +++ less/lesskey.nro Tue Oct 31 04:30:40 2000 @@ -215,6 +215,7 @@ v visual ! shell + firstcmd + @ rotate-right H help h help V version @@ -377,3 +378,9 @@ .br Send bug reports or comments to the above address or to bug-less@gnu.org. +.SH PATCH +Copyright (c) 1994-2000 Kazushi (Jam) Marukawa, Japanized routines only +.br +Comments to: jam@pobox.com +.br +You may distribute under the terms of the Less License. Index: less/line.c diff -u less/line.c:1.1.1.13 less/line.c:1.85 --- less/line.c:1.1.1.13 Sun Oct 29 02:19:10 2000 +++ less/line.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -27,6 +33,8 @@ public int cshift; /* Current left-shift of output line buffer */ public int hshift; /* Desired left-shift of output line buffer */ +static CHARSET charset[LINEBUF_SIZE]; + /* Extension of linebuf to hold character set */ static char attr[LINEBUF_SIZE]; /* Extension of linebuf to hold attributes */ static int curr; /* Index into linebuf */ static int column; /* Printable length, accounting for @@ -36,9 +44,12 @@ static int lmargin; /* Left margin */ static char pendc; static POSITION pendpos; +static POSITION lastpos; static char *end_ansi_chars; +static int pwidth(); static int do_append(); +static int real_do_append(); extern int bs_mode; extern int tabstop; @@ -53,6 +64,8 @@ extern int bl_s_width, bl_e_width; extern int so_s_width, so_e_width; extern int sc_width, sc_height; +extern IFILE curr_ifile; + extern int utf_mode; extern POSITION start_attnpos; extern POSITION end_attnpos; @@ -72,13 +85,24 @@ * Rewind the line buffer. */ public void -prewind() +prewind(pos) + POSITION pos; { curr = 0; column = 0; overstrike = 0; is_null_line = 0; pendc = '\0'; +#if ISO + /* + * If an input string is not contigious string of the last string, + * clear all status in multi bytes character buffering routine. + */ + if (lastpos != pos || pos < 0) + init_multi(get_mulbuf(curr_ifile)); + else + clear_multi(get_mulbuf(curr_ifile)); +#endif lmargin = 0; if (status_col) lmargin += 1; @@ -131,8 +155,10 @@ { sprintf(&linebuf[curr], "%*d", LINENUM_WIDTH, lno); column += LINENUM_WIDTH; - for (i = 0; i < LINENUM_WIDTH; i++) - attr[curr++] = 0; + for (i = 0; i < LINENUM_WIDTH; i++) { + charset[curr] = ASCII; + attr[curr++] = AT_NORMAL; + } } /* * Append enough spaces to bring us to the lmargin. @@ -140,6 +166,7 @@ while (column < lmargin) { linebuf[curr] = ' '; + charset[curr] = ASCII; attr[curr++] = AT_NORMAL; column++; } @@ -177,13 +204,67 @@ int shift; { int i; - int real_shift; + int j; + int real_shift; /* exact columns to shift */ + int exact_length; /* exact bytes to shift */ + int padding; /* columns for padding */ if (shift > column - lmargin) shift = column - lmargin; if (shift > curr - lmargin) shift = curr - lmargin; +#if ISO + /* + * Calculate exact bytes to shift. + * + * We would like to shift linebuf, charset, and attr by "shift" + * characters. The problem is we don't know how many bytes we + * need to shift. So, calculate it first. + */ + padding = 0; + real_shift = 0; + exact_length = 0; + /* + * Skip rest of multi bytes character. + */ + for (j = lmargin; j < curr && pwidth(linebuf[j], charset[j], attr[j]) == 0; j++) + { + padding++; + exact_length++; + } + /* + * Calculate how many bytes we need to shift. + */ + for (; j < curr && real_shift < shift; j++) + { + real_shift += pwidth(linebuf[j], charset[j], attr[j]); + exact_length++; + } + /* + * Skip following rest bytes of a last multi bytes character. + */ + for (; j < curr && pwidth(linebuf[j], charset[j], attr[j]) == 0; j++) + { + exact_length++; + } + + /* + * Put characters. + */ + for (i = 0; i < padding; i++) + { + linebuf[lmargin + i] = ' '; + charset[lmargin + i] = ASCII; + attr[lmargin + i] = AT_NORMAL;; + } + for (i = 0; i < curr - exact_length; i++) + { + linebuf[lmargin + i + padding] = linebuf[lmargin + i + exact_length]; + charset[lmargin + i + padding] = charset[lmargin + i + exact_length]; + attr[lmargin + i + padding] = attr[lmargin + i + exact_length]; + } +#else if (!utf_mode) real_shift = shift; else @@ -197,6 +278,8 @@ linebuf[lmargin + i] = linebuf[lmargin + i + real_shift]; attr[lmargin + i] = attr[lmargin + i + real_shift]; } +#endif + column -= shift; curr -= real_shift; cshift += shift; @@ -245,8 +328,9 @@ * attribute sequence to be inserted, so this must be taken into account. */ static int -pwidth(c, a) +pwidth(c, cs, a) int c; + int cs; int a; { register int w; @@ -272,7 +356,11 @@ * Other characters take one space, * plus the width of any attribute enter/exit sequence. */ +#if ISO + w = mwidth(c, cs); +#else w = 1; +#endif if (curr > 0 && attr[curr-1] != a) w += attr_ewidth(attr[curr-1]); if (a && (curr == 0 || attr[curr-1] != a)) @@ -286,8 +374,15 @@ static void backc() { - curr--; - column -= pwidth(linebuf[curr], attr[curr]); + /* remove garbage in the buffer. */ + if (CSISREST(charset[curr])) + charset[curr] = 0; + /* delete the previous character. */ + do + { + curr--; + column -= pwidth(linebuf[curr], charset[curr], attr[curr]); + } while (curr > 0 && CSISREST(charset[curr])); } /* @@ -316,8 +411,9 @@ * Append a character and attribute to the line buffer. */ static int -storec(c, a, pos) +storec(c, cs, a, pos) int c; + int cs; int a; POSITION pos; { @@ -330,18 +426,38 @@ * Override the attribute passed in. */ a = AT_STANDOUT; +#if ISO + /* + * All data of multi bytes character has same position. + * However search mark data position and not character position. + * Thus we must check all data position of each character. + */ + else if (CSISREST(cs)) + { + register int i; + for (i = 1; CSISREST(charset[curr - i]); i++) + ; + if (is_hilited(pos - i, pos, 0)) + { + for (i = curr - 1; CSISREST(charset[i]); --i) + attr[i] = AT_STANDOUT; + attr[i] = AT_STANDOUT; + a = AT_STANDOUT; + } + } +#endif #endif if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq()) w = 0; else - w = pwidth(c, a); + w = pwidth(c, cs, a); if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width) /* * Won't fit on screen. */ return (1); - if (curr >= sizeof(linebuf)-2) + if (curr >= (int)sizeof(linebuf)-2) /* * Won't fit in line buffer. */ @@ -381,6 +497,7 @@ /* End of magic cookie handling. */ linebuf[curr] = c; + charset[curr] = cs; attr[curr] = a; column += w; return (0); @@ -438,30 +555,56 @@ int c; POSITION pos; { +#if ISO + char *cbuf; + CHARSET *csbuf; + int i, j; + + buffering_multi(get_mulbuf(curr_ifile), c, &cbuf, &csbuf, &i); + j = i; + while (--i >= 0) + { + if (real_do_append(*cbuf++, *csbuf++, pos)) + { + set_bufbytes(get_mulbuf(curr_ifile), + get_bufbytes(get_mulbuf(curr_ifile)) - + (j - (i + 1))); + return (1); + } + } + return (0); +#else + return (real_do_append(c, control_char(c) ? WRONGCS : ASCII, pos)); +#endif +} + + static int +real_do_append(c, cs, pos) + register int c; + register CHARSET cs; + POSITION pos; +{ register char *s; register int a; -#define STOREC(c,a) \ - if (storec((c),(a),pos)) return (1); else curr++ +#define STOREC(c,cs,a) \ + if (storec((c),(cs),(a),pos)) return (1); else curr++ - if (c == '\b') + if (CSISWRONG(cs) && c != '\b' && c != '\t') + goto do_control_char; + if (overstrike) { - switch (bs_mode) + /* + * Check about multi '\b' for multi bytes character. + */ + if (c == '\b') { - case BS_NORMAL: - STOREC(c, AT_NORMAL); - break; - case BS_CONTROL: - goto do_control_char; - case BS_SPECIAL: - if (curr == 0) - break; - backc(); - overstrike = 1; - break; + if (linebuf[curr] == '_' && CSISASCII(charset[curr])) + goto do_bs_char; /* do backc on underline */ + else + return (0); /* ignore it */ } - } else if (overstrike) - { + /* * Overstrike the character at the current position * in the line buffer. This will cause either @@ -470,17 +613,42 @@ * or just deletion of the character in the buffer. */ overstrike = 0; - if ((char)c == linebuf[curr]) - STOREC(linebuf[curr], AT_BOLD); - else if (c == '_') - STOREC(linebuf[curr], AT_UNDERLINE); - else if (linebuf[curr] == '_') - STOREC(c, AT_UNDERLINE); - else if (control_char(c)) + if (CSISWRONG(cs) && control_char(c)) goto do_control_char; + if ((char)c == linebuf[curr] && charset[curr] == cs) + STOREC(c, cs, AT_BOLD); + else if (c == '_' && CSISASCII(cs)) + { + STOREC(linebuf[curr], charset[curr], AT_UNDERLINE); +#if ISO + while (CSISREST(charset[curr])) + STOREC(linebuf[curr], charset[curr], + AT_UNDERLINE); +#endif + } else if (linebuf[curr] == '_' && CSISASCII(charset[curr])) + STOREC(c, cs, AT_UNDERLINE); else - STOREC(c, AT_NORMAL); - } else if (c == '\t') + STOREC(c, cs, AT_NORMAL); + } else if (c == '\b' && CSISWRONG(cs)) + { + do_bs_char: + switch (bs_mode) + { + case BS_NORMAL: + STOREC(c, cs, AT_NORMAL); + break; + case BS_CONTROL: + goto do_control_char; + case BS_SPECIAL: + if (curr == 0) + goto do_control_char; + if (CSISWRONG(charset[curr - 1])) + goto do_control_char; + backc(); + overstrike = 1; + break; + } + } else if (c == '\t' && CSISWRONG(cs)) { /* * Expand a tab into spaces. @@ -495,11 +663,11 @@ case BS_SPECIAL: do { - STOREC(' ', AT_NORMAL); + STOREC(' ', ASCII, AT_NORMAL); } while (((column + cshift - lmargin) % tabstop) != 0); break; } - } else if (control_char(c)) + } else if (CSISWRONG(cs) && control_char(c)) { do_control_char: if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && c == ESC)) @@ -507,13 +675,13 @@ /* * Output as a normal character. */ - STOREC(c, AT_NORMAL); + STOREC(c, cs, AT_NORMAL); } else { /* * Convert to printable representation. */ - s = prchar(c); + s = prchar(c, cs); a = binattr; /* @@ -525,11 +693,16 @@ return (1); for ( ; *s != 0; s++) - STOREC(*s, a); + STOREC(*s, WRONGCS, a); } +#if ISO + } else if (CSISREST(cs)) + { + STOREC(c, cs, attr[curr - 1]); +#endif } else { - STOREC(c, AT_NORMAL); + STOREC(c, cs, AT_NORMAL); } return (0); @@ -539,9 +712,17 @@ * Terminate the line in the line buffer. */ public void -pdone(endline) +pdone(endline, pos) int endline; + POSITION pos; { +#if ISO + /* + * Check and flush some buffering characters. + */ + (void) do_append(-1, pos); +#endif + if (pendc && (pendc != '\r' || !endline)) /* * If we had a pending character, put it in the buffer. @@ -563,11 +744,14 @@ if (column < sc_width || !auto_wrap || ignaw || ctldisp == OPT_ON) { linebuf[curr] = '\n'; + charset[curr] = ASCII; attr[curr] = AT_NORMAL; curr++; } linebuf[curr] = '\0'; + charset[curr] = ASCII; attr[curr] = AT_NORMAL; + lastpos = pos; /* * If we are done with this line, reset the current shift. */ @@ -581,8 +765,9 @@ * and the character attribute in *ap. */ public int -gline(i, ap) +gline(i, csp, ap) register int i; + register int *csp; register int *ap; { char *s; @@ -593,11 +778,13 @@ * If there is no current line, we pretend the line is * either "~" or "", depending on the "twiddle" flag. */ + *csp = ASCII; *ap = AT_BOLD; s = (twiddle) ? "~\n" : "\n"; return (s[i]); } + *csp = charset[i]; *ap = attr[i]; return (linebuf[i] & 0377); } Index: less/main.c diff -u less/main.c:1.1.1.16 less/main.c:1.25 --- less/main.c:1.1.1.16 Sun Oct 29 02:03:30 2000 +++ less/main.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1998-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -103,12 +109,18 @@ get_term(); init_cmds(); init_prompt(); + init_planeset(); init_charset(); init_line(); init_option(); s = lgetenv("LESS"); if (s != NULL) scan_option(save(s)); +#if ISO + s = lgetenv("JLESS"); + if (s != NULL) + scan_option(save(s)); +#endif #define isoptstring(s) (((s)[0] == '-' || (s)[0] == '+') && (s)[1] != '\0') while (argc > 0 && (isoptstring(*argv) || isoptpending())) Index: less/multi.c diff -u /dev/null less/multi.c:1.113 --- /dev/null Wed Dec 6 22:29:44 2000 +++ less/multi.c Tue Dec 5 07:27:45 2000 @@ -0,0 +1,2033 @@ +/* + * Copyright (c) 1994-2000 Kazushi (Jam) Marukawa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice in the documentation and/or other materials provided with + * the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * Routines to manipulate a buffer to hold string of multi bytes character. + * Detect a character set from input string and convert them to internal + * codes. And convert it to other codes to display them. + */ + +#include "defines.h" +#include "multi.h" + +#include +#include + +#if STDC_HEADERS +#include +#include +#endif + +/* TODO: remove caller control_char(), change_control_char() and ecalloc() */ +extern int control_char (); +extern void change_control_char (); +extern void* ecalloc (); + + +#if ISO + +static void rebuffering_multi(); + + +#if JAPANESE + +int markwrongchar = 1; + + +/* + * Kanji convetion + */ +#define ISJIS(c) (0x21 <= (c) && (c) <= 0x7e) +#define ISUJIS(c) (0xa1 <= (c) && (c) <= 0xfe) +#define ISUJISSS(c) ((c) == 0x8e || (c) == 0x8f) +#define ISUJISKANJI(c1,c2) (ISUJIS(c1) && ISUJIS(c2)) +#define ISUJISKANA(c1,c2) ((c1) == 0x8e && ISUJIS(c2)) +#define ISUJISKANA1(c) ((c) == 0x8e) +#define ISUJISKANJISUP(c1,c2,c3) ((c1) == 0x8f && ISUJIS(c2) && ISUJIS(c3)) +#define ISSJISKANJI(c1,c2) (((0x81 <= (c1) && (c1) <= 0x9f) || \ + (0xe0 <= (c1) && (c1) <= 0xfc)) && \ + (0x40 <= (c2) && (c2) <= 0xfc && (c2) != 0x7f)) +#define ISSJISKANA(c) (0xa1 <= (c) && (c) <= 0xdf) +#endif + + +/* + * Definitions for understanding the escape sequence. + * Following escape sequences which be understood by less: + * ESC 2/4 2/8,2/9,2/10,2/11,2/13,2/14,2/15 F + * ESC 2/4 4/0,4/1,4/2 + * ESC 2/6 F + * ESC 2/8,2/9,2/10,2/11,2/13,2/14,2/15 F + * ESC 2/12 F This is used in MULE. Less support this as input. + * 0/14,0/15 + * ESC 4/14,4/15,6/14,6/15,7/12,7/13,7/14 + * 8/14,8/15 + */ +enum escape_sequence { + NOESC, /* No */ ESC_, /* ^[ */ + ESC_2_4, /* ^[$ */ ESC_2_4_8, /* ^[$( */ + ESC_2_4_9, /* ^[$) */ ESC_2_4_10, /* ^[$* */ + ESC_2_4_11, /* ^[$+ */ ESC_2_4_13, /* ^[$- */ + ESC_2_4_14, /* ^[$. */ ESC_2_4_15, /* ^[$/ */ + ESC_2_6, /* ^[& */ ESC_2_8, /* ^[( */ + ESC_2_9, /* ^[) */ ESC_2_10, /* ^[* */ + ESC_2_11, /* ^[+ */ ESC_2_12, /* ^[, */ + ESC_2_13, /* ^[- */ ESC_2_14, /* ^[. */ + ESC_2_15 /* ^[/ */ +}; + + +static CODESET def_left = iso7; /* Default code set of left plane */ +static CODESET def_right = iso8; /* Default code set of right plane */ +static int def_g0 = ASCII; /* Default g0 plane status */ +static int def_g1 = WRONGCS; /* Default g1 plane status */ +static int def_g2 = WRONGCS; /* Default g2 plane status */ +static int def_g3 = WRONGCS; /* Default g3 plane status */ + +static CODESET output = iso8; /* Code set for output */ +#if JAPANESE +static CODESET def_priority = ujis; /* Which code was given priority. */ +#endif + +struct multibuf { + struct { + CODESET left; + CODESET right; + } io; + + CODESET orig_io_right; + int rotation_io_right; + + enum escape_sequence eseq; + /* + * Variables to control of escape sequences as output. + */ + int cs; /* Current character set */ + int g0; /* Current g0 plane status */ + int g1; /* Current g1 plane status */ + int g2; /* Current g2 plane status */ + int g3; /* Current g3 plane status */ + int *gl; /* Current gl plane status */ + int *gr; /* Current gr plane status */ + int *sg; /* Current status of single-shifted plane */ + int irr; /* Identify revised registration number */ +#if JAPANESE + CODESET priority; /* Which code was given priority. */ + int sequence_counter; /* Special counter for detect UJIS KANJI. */ +#endif + + int icharset; /* Last non ASCII character set of input */ + + /* + * Buffers to keep all bytes of a multi-bytes character until it is + * proved to be right sequence. + */ + /* + * First buffer. + * All input stream is buffered here until recognized. + */ + unsigned char multibuf[10]; /* Buffer for input */ + int multiindex; /* Index of multibuf */ + /* + * Second buffer. + * All recognized data is buffered with character set tag. + */ + unsigned char multiint[10]; /* Buffer for recognized data */ + CHARSET multics[10]; /* Buffer for character set marks */ + int intindex; /* Index of multiint */ + int bufbytes; /* The length of multi bytes character in */ + /* the original stream (before conversion), */ + /* and use this to adjust a current position */ + /* if the width of a character doesn't fit */ + /* to the screen width. */ +}; + + +static int code_length(cs) +int cs; +{ + if (CSISWRONG(cs)) + return (1); + switch (CS2TYPE(cs)) + { + case TYPE_94_CHARSET: + case TYPE_96_CHARSET: + return (1); + case TYPE_94N_CHARSET: + case TYPE_96N_CHARSET: + switch (CS2FT(cs) & 0x70) + { + case 0x30: /* for private use */ + return (2); + case 0x40: + case 0x50: + return (2); + case 0x60: + return (3); + case 0x70: + return (4); /* or more bytes */ + } + } + assert(0); + return (0); +} + +/* + * Check and normalize all codes which don't need conversion. + */ +static void internalize_noconv(mp) +MULBUF *mp; +{ + if (mp->multiindex != 1) + { + rebuffering_multi(mp); + } else + { + mp->multiint[mp->intindex] = mp->multibuf[0]; + mp->multics[mp->intindex] = ASCII; + mp->intindex += 1; + mp->multiindex = 0; + } +} + +/* + * Internalize and mark all codes as wrong since code is not valid. + */ +static void internalize_wrongcode(mp) +MULBUF *mp; +{ + register int i; + + assert(mp->intindex + mp->multiindex <= (int)sizeof(mp->multiint)); + for (i = 0; i < mp->multiindex; i++) + { + mp->multiint[mp->intindex + i] = mp->multibuf[i]; + mp->multics[mp->intindex + i] = WRONGCS; + } + mp->intindex += mp->multiindex; + mp->multiindex = 0; +} + +/* + * Put a wrongmark into internalize buffer. + */ +static void internalize_wrongmark(mp) +MULBUF *mp; +{ + mp->multiint[mp->intindex + 0] = '"'; + mp->multiint[mp->intindex + 1] = '.'; + mp->multics[mp->intindex + 0] = JISX0208KANJI; + mp->multics[mp->intindex + 1] = REST_MASK | + JISX0208KANJI; + mp->intindex += 2; + mp->multiindex = 0; +} + +/* + * Internalize and mark all codes as wrong since character is not valid. + * It is not the same as internalize_wrongcode because code is valid. + */ +static void internalize_wrongchar(mp) +MULBUF *mp; +{ + register int i; + + assert(mp->intindex + mp->multiindex <= (int)sizeof(mp->multiint)); + if (markwrongchar) + { + switch (mp->multics[mp->intindex]) { + case JISX0201KANA: + case JISX0201ROMAN: + case LATIN1: + case LATIN2: + case LATIN3: + case LATIN4: + case GREEK: + case ARABIC: + case HEBREW: + case CYRILLIC: + case LATIN5: + /* Should I use one byte character, like '?' or '_'? */ + internalize_wrongmark(mp); + return; + break; + case JISX0208_78KANJI: + case JISX0208KANJI: + case JISX0208_90KANJI: + case JISX0212KANJISUP: + case JISX0213KANJI1: + case JISX0213KANJI2: + case UJIS: + case SJIS: + internalize_wrongmark(mp); + return; + break; + case GB2312: + case KSC5601: + default: + break; + } + } + for (i = 0; i < mp->multiindex; i++) + { + mp->multiint[mp->intindex + i] = mp->multibuf[i]; + mp->multics[mp->intindex + i] = WRONGCS; + } + mp->intindex += mp->multiindex; + mp->multiindex = 0; +} + +/* + * Check and normalize all ISO codes + */ +static void internalize_iso(mp) +MULBUF *mp; +{ + register int i; + + /* + * Reject empty character set. + */ + if (CSISWRONG(mp->cs)) + { + rebuffering_multi(mp); + return; + } + + /* + * Reject DEL and SPACE codes if currently 94 character set is + * selected. + */ + if (CS2TYPE(mp->cs) == TYPE_94_CHARSET || + CS2TYPE(mp->cs) == TYPE_94N_CHARSET) + { + if ((mp->multibuf[mp->multiindex - 1] & 0x7f) == 0x7f) + { + rebuffering_multi(mp); + return; + } else if ((mp->multibuf[mp->multiindex - 1] & 0x7f) == 0x20) + { + /* + * A 0x20 (SPACE) code is wrong, but I treat it as + * a SPACE. Should I treat it wrong character? + */ + internalize_noconv(mp); + return; + } + } + + /* + * Buffering all ISO 2022 coding. If multi bytes code + * finished, flush them. + */ + if (mp->multiindex < code_length(mp->cs)) + return; + + /* + * Normalize all codes + */ + mp->multiint[mp->intindex] = mp->multibuf[0] & 0x7f; + mp->multics[mp->intindex] = mp->cs; + for (i = 1; i < mp->multiindex; i++) + { + mp->multiint[mp->intindex + i] = mp->multibuf[i] & 0x7f; + mp->multics[mp->intindex + i] = REST_MASK | mp->cs; + } + /* + * Eliminate some wrong codes + */ + if (chisvalid_cs(&mp->multiint[mp->intindex], + &mp->multics[mp->intindex])) { + mp->intindex += mp->multiindex; + mp->multiindex = 0; + } else + { + /* + * less ignore the undefined codes + */ + internalize_wrongchar(mp); + } +} + +#if JAPANESE +/* + * Check and normalize all UJIS codes + */ +static void internalize_ujis(mp) +MULBUF *mp; +{ + if (mp->multiindex == 1) { + /* do nothing */ + } else if (mp->multiindex == 2) { + if (ISUJISKANA(mp->multibuf[0], mp->multibuf[1])) { + mp->multiint[mp->intindex] = mp->multibuf[1] & 0x7f; + mp->multics[mp->intindex] = mp->cs; + mp->intindex += 1; + mp->multiindex = 0; + } else if (ISUJISKANJI(mp->multibuf[0], mp->multibuf[1])) { + mp->multiint[mp->intindex] = mp->multibuf[0]; + mp->multics[mp->intindex] = UJIS; + mp->multiint[mp->intindex + 1] = mp->multibuf[1]; + mp->multics[mp->intindex + 1] = REST_MASK | UJIS; + + /* + * Eliminate some wrong codes + */ + if (chisvalid_cs(&mp->multiint[mp->intindex], + &mp->multics[mp->intindex])) { + /* JIS X 0208:1997 */ + mp->multiint[mp->intindex] = mp->multiint[0] & 0x7f; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = mp->multiint[1] & 0x7f; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + mp->intindex += 2; + mp->multiindex = 0; + } else { + /* + * less ignore the undefined codes + */ + internalize_wrongchar(mp); + } + } + } else if (mp->multiindex == 3 && + ISUJISKANJISUP(mp->multibuf[0], mp->multibuf[1], + mp->multibuf[2])) { + mp->multiint[mp->intindex] = mp->multibuf[0]; + mp->multics[mp->intindex] = UJIS; + mp->multiint[mp->intindex + 1] = mp->multibuf[1]; + mp->multics[mp->intindex + 1] = REST_MASK | UJIS; + mp->multiint[mp->intindex + 2] = mp->multibuf[2]; + mp->multics[mp->intindex + 2] = REST_MASK | UJIS; + + /* + * Eliminate some wrong codes + */ + if (chisvalid_cs(&mp->multiint[mp->intindex], + &mp->multics[mp->intindex])) { + register int c1; + static unsigned char table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#if UJIS0213 + 0, 0x21, 0, 0x23, 0x24, 0x25, 0, 0, + 0x28, 0, 0, 0, 0x2C, 0x2D, 0x2E, 0x2F, +#else + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#endif + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#if UJIS0213 + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0 +#else + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#endif + }; + c1 = mp->multiint[1] & 0x7f; + if (table[c1] != 0) { + /* JIS X 0213:2000 plane 2 */ + if (output == jis) { + /* JIS cannot output JIS X 0213:2000 plane 2 */ + rebuffering_multi(mp); + } else { + mp->multiint[mp->intindex] = c1; + mp->multics[mp->intindex] = + JISX0213KANJI2; + mp->multiint[mp->intindex + 1] = + mp->multiint[2] & 0x7f; + mp->multics[mp->intindex + 1] = + REST_MASK | JISX0213KANJI2; + mp->intindex += 2; + mp->multiindex = 0; + } + } else { + /* JIS X 0212:1990 */ + if (output == sjis || output == jis) { + /* SJIS cannot output JIS X 0212:1990 */ + rebuffering_multi(mp); + } else { + mp->multiint[mp->intindex] = c1; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = + mp->multiint[2] & 0x7f; + mp->multics[mp->intindex + 1] = + REST_MASK | mp->cs; + mp->intindex += 2; + mp->multiindex = 0; + } + } + } else { + /* + * less ignore the undefined codes + */ + internalize_wrongchar(mp); + } + } else { + rebuffering_multi(mp); + } +} + +/* + * Check and normalize all SJIS codes + */ +static void internalize_sjis(mp) +MULBUF *mp; +{ + if (mp->multiindex == 1) { + if (!ISSJISKANA(mp->multibuf[0])) { + rebuffering_multi(mp); + } else { + mp->multiint[mp->intindex] = mp->multibuf[0] & 0x7f; + mp->multics[mp->intindex] = mp->cs; + mp->intindex += 1; + mp->multiindex = 0; + } + } else if (mp->multiindex == 2 && + ISSJISKANJI(mp->multibuf[0], mp->multibuf[1])) { + mp->multiint[mp->intindex] = mp->multibuf[0]; + mp->multics[mp->intindex] = SJIS; + mp->multiint[mp->intindex + 1] = mp->multibuf[1]; + mp->multics[mp->intindex + 1] = REST_MASK | SJIS; + + /* + * Check the correctness of SJIS encoded characters and + * convert them into internal representation. + */ + if (chisvalid_cs(&mp->multiint[mp->intindex], + &mp->multics[mp->intindex])) { + register int c1, c2, c3; + static unsigned char table[] = { + 0, 0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, + 0x2F, 0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, + 0x3F, 0x41, 0x43, 0x45, 0x47, 0x49, 0x4B, 0x4D, + 0x4F, 0x51, 0x53, 0x55, 0x57, 0x59, 0x5B, 0x5D, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x5F, 0x61, 0x63, 0x65, 0x67, 0x69, 0x6B, 0x6D, +#if SJIS0213 + 0x6F, 0x71, 0x73, 0x75, 0x77, 0x79, 0x7B, 0x7D, + 0x80, 0xA3, 0x81, 0xAD, 0x82, 0xEF, 0xF1, 0xF3, + 0xF5, 0xF7, 0xF9, 0xFB, 0xFD, 0, 0, 0 +#else + 0x6F, 0x71, 0x73, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +#endif + }; + + c1 = table[mp->multibuf[0] & 0x7f]; + c2 = mp->multibuf[1] - + ((unsigned char)mp->multibuf[1] >= 0x80 ? 1 : 0); + c3 = c2 >= 0x9e; + if (c1 < 0x80) { + /* JIS X 0213:2000 plane 1 or JIS X 0208:1997 */ + mp->multiint[mp->intindex] = + (c1 + (c3 ? 1 : 0)); + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = + (c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21)); + mp->multics[mp->intindex + 1] = + REST_MASK | mp->cs; + mp->intindex += 2; + mp->multiindex = 0; + } else { + /* JIS X 0213:2000 plane 2 */ + if (output == jis) { + /* JIS cannot output JIS X 0213:2000 plane 2 */ + rebuffering_multi(mp); + } else { + if (c1 > 0xA0) { + /* row 3-4, 13-14, and 79-94 */ + mp->multiint[mp->intindex] = + (c1 + (c3 ? 1 : 0)); + } else if (c1 == 0x80) { + /* row 1 or 8 */ + mp->multiint[mp->intindex] = + c3 ? 0x28 : 0x21; + } else if (c1 == 0x81) { + /* row 5 or 12 */ + mp->multiint[mp->intindex] = + c3 ? 0x2C : 0x25; + } else { + /* row 15 or 78 */ + mp->multiint[mp->intindex] = + c3 ? 0x6E : 0x2F; + } + mp->multics[mp->intindex] = JISX0213KANJI2; + mp->multiint[mp->intindex + 1] = + (c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21)); + mp->multics[mp->intindex + 1] = + REST_MASK | JISX0213KANJI2; + mp->intindex += 2; + mp->multiindex = 0; + } + } + } else { + /* + * Less ignores undefined characters after marking + * them as wrong characters. + */ + internalize_wrongchar(mp); + } + } else { + rebuffering_multi(mp); + } +} +#endif + +static void internalize(mp) +MULBUF *mp; +{ + int c = mp->multibuf[mp->multiindex - 1]; + + if (mp->multiindex == 1) + { + if ((c <= 0x7f && mp->io.left == noconv) || + (c >= 0x80 && mp->io.right == noconv)) + { +#if JAPANESE + mp->sequence_counter = 0; +#endif + if (control_char(c)) + internalize_wrongcode(mp); + else + internalize_noconv(mp); + return; + } else if (c >= 0x80 && mp->io.right == none) + { +#if JAPANESE + mp->sequence_counter = 0; +#endif + rebuffering_multi(mp); + return; + } + + mp->cs = ASCII; + if (c < 0x20) + { +#if JAPANESE + mp->sequence_counter = 0; +#endif + internalize_wrongcode(mp); + return; + } else if (c <= 0x7f || + (mp->io.right == iso8 && (0xa0 <= c && c <= 0xff))) + { +#if JAPANESE + mp->sequence_counter = 0; +#endif + /* + * Decide current character set. + */ + mp->cs = (mp->sg ? *mp->sg : + (c & 0x80) ? *mp->gr : *mp->gl); + /* + * Check cs that fit for output code set. + */ + /* JIS cannot output JISX0212, JISX0213_2, or ISO2022 */ + if (output == jis && mp->cs != ASCII && + mp->cs != JISX0201KANA && + mp->cs != JISX0201ROMAN && + mp->cs != JISX0208_78KANJI && + mp->cs != JISX0208KANJI && + mp->cs != JISX0208_90KANJI && + mp->cs != JISX0213KANJI1) + { + rebuffering_multi(mp); + return; + } + /* UJIS cannot output regular ISO2022 except JIS */ + if (output == ujis && mp->cs != ASCII && + mp->cs != JISX0201KANA && + mp->cs != JISX0201ROMAN && + mp->cs != JISX0208_78KANJI && + mp->cs != JISX0208KANJI && + mp->cs != JISX0208_90KANJI && + mp->cs != JISX0212KANJISUP && + mp->cs != JISX0213KANJI1 && + mp->cs != JISX0213KANJI2) + { + rebuffering_multi(mp); + return; + } + /* SJIS cannot output JISX0212 or ISO2022 */ + if (output == sjis && mp->cs != ASCII && + mp->cs != JISX0201KANA && + mp->cs != JISX0201ROMAN && + mp->cs != JISX0208_78KANJI && + mp->cs != JISX0208KANJI && + mp->cs != JISX0208_90KANJI && + mp->cs != JISX0213KANJI1 && + mp->cs != JISX0213KANJI2) + { + rebuffering_multi(mp); + return; + } + + if (mp->cs != ASCII) + mp->icharset = mp->cs; + internalize_iso(mp); + return; + } else if (control_char(c)) + { +#if JAPANESE + mp->sequence_counter = 0; +#endif + internalize_wrongcode(mp); + return; + } +#if JAPANESE + if (mp->priority == sjis && ISSJISKANA(c)) + { + if (mp->io.right == japanese) + { + mp->sequence_counter++; + if (mp->sequence_counter % 2 == 1 && + mp->multibuf[0] != 0xa4) + mp->sequence_counter = 0; + if (mp->sequence_counter >= 6) + /* + * It looks like a sequence of UJIS + * hiragana. Thus we give priority + * to not sjis. + */ + mp->priority = ujis; + } + mp->cs = JISX0201KANA; + mp->icharset = SJIS; + internalize_sjis(mp); + return; + } else if (mp->io.right == ujis || mp->io.right == sjis || + mp->io.right == japanese) + { + mp->sequence_counter = 0; + return; + } + mp->sequence_counter = 0; +#endif + internalize_wrongcode(mp); + return; + } + +#if JAPANESE + assert(mp->sequence_counter == 0); +#endif + if (c < 0x20) + { + rebuffering_multi(mp); + return; + } else if (mp->cs != ASCII && + (c <= 0x7f || + (mp->io.right == iso8 && 0xa0 <= c && c <= 0xff))) + { + if (mp->cs != (mp->sg ? *mp->sg : + (c & 0x80) ? *mp->gr : *mp->gl)) + rebuffering_multi(mp); + else + internalize_iso(mp); + return; + } else if (control_char(c)) + { + rebuffering_multi(mp); + return; + } +#if JAPANESE + if (mp->multiindex == 2) + { + if (mp->priority == sjis && ISSJISKANJI(mp->multibuf[0], c)) + { +#if UJIS0213 + mp->cs = JISX0213KANJI1; +#else + mp->cs = JISX0208KANJI; +#endif + mp->icharset = SJIS; + internalize_sjis(mp); + return; + } else if (mp->priority == ujis) + { + if (ISUJISKANA(mp->multibuf[0], c)) + { + mp->cs = JISX0201KANA; + mp->icharset = UJIS; + internalize_ujis(mp); + return; + } else if (ISUJISKANJI(mp->multibuf[0], c)) + { +#if UJIS0213 + mp->cs = JISX0213KANJI1; +#else + mp->cs = JISX0208KANJI; +#endif + mp->icharset = UJIS; + internalize_ujis(mp); + return; + } else if (ISUJISKANJISUP(mp->multibuf[0], c, 0xa1)) + { + return; + } + } + + if ((mp->io.right == sjis || mp->io.right == japanese) && + ISSJISKANJI(mp->multibuf[0], c)) + { +#if UJIS0213 + mp->cs = JISX0213KANJI1; +#else + mp->cs = JISX0208KANJI; +#endif + mp->priority = sjis; + mp->icharset = SJIS; + internalize_sjis(mp); + return; + } else if ((mp->io.right == ujis || mp->io.right == japanese)) + { + if (ISUJISKANA(mp->multibuf[0], c)) + { + mp->cs = JISX0201KANA; + mp->priority = ujis; + mp->icharset = UJIS; + internalize_ujis(mp); + return; + } else if (ISUJISKANJI(mp->multibuf[0], c)) + { +#if UJIS0213 + mp->cs = JISX0213KANJI1; +#else + mp->cs = JISX0208KANJI; +#endif + mp->priority = ujis; + mp->icharset = UJIS; + internalize_ujis(mp); + return; + } else if (ISUJISKANJISUP(mp->multibuf[0], c, 0xa1)) + { + return; + } + } + } else if (mp->multiindex == 3 && + (mp->priority == ujis || + mp->io.right == ujis || mp->io.right == japanese) && + ISUJISKANJISUP(mp->multibuf[0], mp->multibuf[1], c)) + { + mp->cs = JISX0212KANJISUP; + mp->priority = ujis; + mp->icharset = UJIS; + internalize_ujis(mp); + return; + } +#endif + rebuffering_multi(mp); +} + +/* + * Check routines + */ +static int check_ft(mp, c, type, plane) +MULBUF *mp; +register int c; +int type; +int *plane; +{ + if (mp->io.left == jis) + { + /* + * If the target code system is traditional jis, + * allow only JIS C6226-1978, JIS X0208-1983, JIS X0208-1990, + * JIS X0213-2000, JIS X0212-1990, ASCII, + * JIS X0201 right, and JIS X0201 left. + */ + if ((type == TYPE_94N_CHARSET && + (c == '@' || c == 'B' || c == 'D' || + c == 'O' || c == 'P')) || + (type == TYPE_94_CHARSET && + (c == 'B' || c == 'I' || c == 'J'))) + { + *plane = (mp->irr ? IRR2CS(mp->irr) : 0) | + TYPE2CS(type) | FT2CS(c); + mp->irr = 0; + mp->eseq = NOESC; + return (0); + } + } else if (0x30 <= c && c <= 0x7e) + { + /* + * Otherwise, accept all. + */ + *plane = (mp->irr ? IRR2CS(mp->irr) : 0) | + TYPE2CS(type) | FT2CS(c); + mp->irr = 0; + mp->eseq = NOESC; + return (0); + } + return (-1); +} + +static int check_irr(mp, c) +MULBUF *mp; +register int c; +{ + if (0x40 <= c && c <= 0x7e) + { + mp->irr = CODE2IRR(c); + mp->eseq = NOESC; + return (0); + } + return (-1); +} + +static void fix_status_for_escape_sequence(mp) +MULBUF *mp; +{ + if (mp->eseq == NOESC) + { + switch (CS2TYPE(mp->sg ? *mp->sg : *mp->gl)) + { + case TYPE_96_CHARSET: + case TYPE_96N_CHARSET: + change_control_char(0177, 0); + break; + case TYPE_94_CHARSET: + case TYPE_94N_CHARSET: + change_control_char(0177, 1); + break; + } + switch (CS2TYPE(mp->sg ? *mp->sg : *mp->gr)) + { + case TYPE_96_CHARSET: + case TYPE_96N_CHARSET: + change_control_char(0377, 0); + break; + case TYPE_94_CHARSET: + case TYPE_94N_CHARSET: + change_control_char(0377, 1); + break; + } + } +} + +static int check_escape_sequence(mp) +MULBUF *mp; +{ + int c = mp->multibuf[mp->multiindex - 1]; + + switch (mp->eseq) + { + case ESC_: + switch (c) + { + case '$': mp->eseq = ESC_2_4; break; + case '&': mp->eseq = ESC_2_6; break; + case '(': mp->eseq = ESC_2_8; break; + case ')': mp->eseq = ESC_2_9; break; + case '*': mp->eseq = ESC_2_10; break; + case '+': mp->eseq = ESC_2_11; break; + case ',': mp->eseq = ESC_2_12; break; + case '-': mp->eseq = ESC_2_13; break; + case '.': mp->eseq = ESC_2_14; break; + case '/': mp->eseq = ESC_2_15; break; + case 'N': mp->sg = &mp->g2; mp->eseq = NOESC; /*SS2*/break; + case 'O': mp->sg = &mp->g3; mp->eseq = NOESC; /*SS3*/break; + case 'n': mp->gl = &mp->g2; mp->eseq = NOESC; break; + case 'o': mp->gl = &mp->g3; mp->eseq = NOESC; break; + case '|': if (mp->io.right != iso8) goto wrong; + mp->gr = &mp->g3; mp->eseq = NOESC; break; + case '}': if (mp->io.right != iso8) goto wrong; + mp->gr = &mp->g2; mp->eseq = NOESC; break; + case '~': if (mp->io.right != iso8) goto wrong; + mp->gr = &mp->g1; mp->eseq = NOESC; break; + default: goto wrong; + } + break; + case ESC_2_4: + switch (c) + { + case '(': mp->eseq = ESC_2_4_8; break; + case ')': mp->eseq = ESC_2_4_9; break; + case '*': mp->eseq = ESC_2_4_10; break; + case '+': mp->eseq = ESC_2_4_11; break; + case '-': mp->eseq = ESC_2_4_13; break; + case '.': mp->eseq = ESC_2_4_14; break; + case '/': mp->eseq = ESC_2_4_15; break; + case '@': + case 'A': + case 'B': if (check_ft(mp, c, TYPE_94N_CHARSET, &mp->g0) == 0) + break; + default: goto wrong; + } + break; + case ESC_2_6: + if (check_irr(mp, c) == 0) + break; + goto wrong; + case ESC_2_8: + if (check_ft(mp, c, TYPE_94_CHARSET, &mp->g0) == 0) + break; + goto wrong; + case ESC_2_9: + if (check_ft(mp, c, TYPE_94_CHARSET, &mp->g1) == 0) + break; + goto wrong; + case ESC_2_10: + if (check_ft(mp, c, TYPE_94_CHARSET, &mp->g2) == 0) + break; + goto wrong; + case ESC_2_11: + if (check_ft(mp, c, TYPE_94_CHARSET, &mp->g3) == 0) + break; + goto wrong; + case ESC_2_12: + if (check_ft(mp, c, TYPE_96_CHARSET, &mp->g0) == 0) + break; + goto wrong; + case ESC_2_13: + if (check_ft(mp, c, TYPE_96_CHARSET, &mp->g1) == 0) + break; + goto wrong; + case ESC_2_14: + if (check_ft(mp, c, TYPE_96_CHARSET, &mp->g2) == 0) + break; + goto wrong; + case ESC_2_15: + if (check_ft(mp, c, TYPE_96_CHARSET, &mp->g3) == 0) + break; + goto wrong; + case ESC_2_4_8: + if (check_ft(mp, c, TYPE_94N_CHARSET, &mp->g0) == 0) + break; + goto wrong; + case ESC_2_4_9: + if (check_ft(mp, c, TYPE_94N_CHARSET, &mp->g1) == 0) + break; + goto wrong; + case ESC_2_4_10: + if (check_ft(mp, c, TYPE_94N_CHARSET, &mp->g2) == 0) + break; + goto wrong; + case ESC_2_4_11: + if (check_ft(mp, c, TYPE_94N_CHARSET, &mp->g3) == 0) + break; + goto wrong; + case ESC_2_4_13: + if (check_ft(mp, c, TYPE_96N_CHARSET, &mp->g1) == 0) + break; + goto wrong; + case ESC_2_4_14: + if (check_ft(mp, c, TYPE_96N_CHARSET, &mp->g2) == 0) + break; + goto wrong; + case ESC_2_4_15: + if (check_ft(mp, c, TYPE_96N_CHARSET, &mp->g3) == 0) + break; + goto wrong; + case NOESC: + /* + * If this sequences are wrong if currently does buffering. + */ + if (mp->multiindex != 1) + { + switch (c) + { + case 0033: + case 0016: + case 0017: + case 0031: goto wrong; + case 0216: + case 0217: if (mp->io.right == iso8) goto wrong; + default: goto wrongone; + } + } + switch (c) + { + case 0033: mp->eseq = ESC_; break; + case 0016: mp->gl = &mp->g1; mp->eseq = NOESC; break; + case 0017: mp->gl = &mp->g0; mp->eseq = NOESC; break; + case 0031: mp->sg = &mp->g2; mp->eseq = NOESC; /*SS2*/ break; + case 0216: if (mp->io.right != iso8) goto wrongone; + mp->sg = &mp->g2; mp->eseq = NOESC; /*SS2*/ break; + case 0217: if (mp->io.right != iso8) goto wrongone; + mp->sg = &mp->g3; mp->eseq = NOESC; /*SS3*/ break; + default: goto wrongone; + } + break; + default: + assert(0); + } + if (mp->eseq == NOESC) + { + fix_status_for_escape_sequence(mp); + mp->multiindex = 0; + return (0); + } + return (0); +wrong: + if (mp->eseq != NOESC) + { + mp->eseq = NOESC; + fix_status_for_escape_sequence(mp); + } + assert(mp->multiindex != 1); + rebuffering_multi(mp); + return (0); +wrongone: + assert(mp->eseq == NOESC); + return (-1); +} + +struct planeset { + char *name; + char *planeset; +} planesets[] = { + { "ascii", "" }, + { "ctext", "\\e-A" }, + { "latin1", "\\e-A" }, + { "latin2", "\\e-B" }, + { "latin3", "\\e-C" }, + { "latin4", "\\e-D" }, + { "greek", "\\e-F" }, + { "alabic", "\\e-G" }, + { "hebrew", "\\e-H" }, + { "cyrillic", "\\e-L" }, + { "latin5", "\\e-M" }, + { "japanese", "\\e$)B\\e*I\\e$+D" }, + { "ujis", "\\e$)B\\e*I\\e$+D" }, + { "euc", "\\e$)B\\e*I\\e$+D" }, + { NULL, "" } +}; + +int set_planeset(name) +register char *name; +{ + register struct planeset *p; + MULBUF *mp; + int ret; + + if (name == NULL) + return -1; + for (p = planesets; p->name != NULL; p++) + if (strcmp(name, p->name) == 0) + { + name = p->planeset; + break; + } + mp = new_multi(); + init_priority(mp); + while (*name) + { + if (*name == '\\' && + (*(name + 1) == 'e' || *(name + 1) == 'E')) + { + mp->multibuf[mp->multiindex++] = '\033'; + ret = check_escape_sequence(mp); + name += 2; + } else + { + mp->multibuf[mp->multiindex++] = *name++; + ret = check_escape_sequence(mp); + } + if (ret < 0 || mp->intindex > 0) + { + free(mp); + return -1; + } + } + def_g0 = mp->g0; + def_g1 = mp->g1; + def_g2 = mp->g2; + def_g3 = mp->g3; + free(mp); + return 0; +} + +void init_def_codesets(left, right, out) +CODESET left; +CODESET right; +CODESET out; +{ + def_left = left; + def_right = right; + output = out; +} + +void init_def_priority(pri) +CODESET pri; +{ +#if JAPANESE + assert(pri == sjis || pri == ujis); + def_priority = pri; +#endif +} + +void init_priority(mp) +MULBUF *mp; +{ +#if JAPANESE + if (mp->io.right == sjis) + mp->priority = sjis; + else if (mp->io.right == ujis) + mp->priority = ujis; + else if (mp->io.right == japanese) + mp->priority = def_priority; + else + mp->priority = noconv; + mp->sequence_counter = 0; +#endif +} + +CODESET get_priority(mp) +MULBUF *mp; +{ +#if JAPANESE + return (mp->priority); +#else + return (noconv); +#endif +} + +void set_priority(mp, pri) +MULBUF *mp; +CODESET pri; +{ +#if JAPANESE + assert(pri == sjis || pri == ujis || pri == noconv); + mp->priority = pri; +#endif +} + +MULBUF *new_multi() +{ + MULBUF *mp = (MULBUF*) ecalloc(1, sizeof(MULBUF)); + mp->io.left = def_left; + mp->io.right = def_right; + mp->orig_io_right = def_right; + mp->rotation_io_right = 0; + mp->eseq = NOESC; + init_multi(mp); + return (mp); +} + +void clear_multi(mp) +MULBUF *mp; +{ + mp->multiindex = 0; + mp->intindex = 0; + mp->bufbytes = 0; +} + +void init_multi(mp) +MULBUF *mp; +{ + if (mp->eseq != NOESC) + { + mp->eseq = NOESC; + fix_status_for_escape_sequence(mp); + } + mp->cs = ASCII; + mp->g0 = def_g0; + mp->g1 = def_g1; + mp->g2 = def_g2; + mp->g3 = def_g3; + mp->gl = &mp->g0; + mp->gr = &mp->g1; + mp->sg = NULL; + mp->irr = 0; +#if JAPANESE + mp->sequence_counter = 0; +#endif + mp->icharset = ASCII; + clear_multi(mp); +} + +/* + * Buffering characters untile get a guarantee that it is right sequence. + */ +static void buffering_multi_internal(mp, c) +MULBUF *mp; +int c; +{ + mp->multibuf[mp->multiindex] = c; + mp->multiindex++; + mp->bufbytes++; + + if (mp->io.left == jis || mp->io.left == iso7 || + mp->io.right == iso8) + if (check_escape_sequence(mp) == 0) + return; /* going process well */ + + /* it is not a escape sequence, try to use it as character */ + internalize(mp); + + /* + * If a character was detected in internalize(), + * clean sg since single shift affect only one character. + */ + if (mp->multiindex == 0 && mp->sg) + { + mp->sg = NULL; + fix_status_for_escape_sequence(mp); + } +} + +static void rebuffering_multi(mp) +MULBUF *mp; +{ + int i; + int oldindex; + + /* + * Buffering character has some problem, so I clear single-shifted + * character set because it effect only one character which + * is detected wrong. + */ + if (mp->sg) + { + mp->sg = NULL; + fix_status_for_escape_sequence(mp); + } + + /* + * Convert first buffered data as wrong multi bytes data. + */ + assert(mp->intindex + 1 <= (int)sizeof(mp->multiint)); + mp->multiint[mp->intindex] = mp->multibuf[0]; + mp->multics[mp->intindex] = WRONGCS; + mp->intindex++; + +#if JAPANESE + /* + * Quick japanese code hack. + * Check whether character is SJIS KANA. If so, less has just + * detected the prediction is failed. Try recognize it well. + */ + if ((mp->priority == sjis || + mp->io.right == sjis || mp->io.right == japanese) && + ISSJISKANA(mp->multiint[mp->intindex - 1])) + { + mp->cs = JISX0201KANA; + mp->priority = sjis; + mp->icharset = SJIS; + mp->multiint[mp->intindex - 1] &= 0x7f; + mp->multics[mp->intindex - 1] = mp->cs; + } +#endif + + /* + * Retry to parse rest of buffered data. + */ + oldindex = mp->multiindex; + mp->bufbytes -= mp->multiindex - 1; + mp->multiindex = 0; + for (i = 1; i < oldindex; i++) + buffering_multi_internal(mp, mp->multibuf[i]); +} + +/* + * Buffering characters untile get a guarantee that it is right sequence. + */ +void buffering_multi(mp, c, strbuf, csbuf, length) +MULBUF *mp; +int c; +unsigned char **strbuf; +CHARSET **csbuf; +unsigned int *length; +{ + assert(mp->intindex == 0); + mp->bufbytes = mp->multiindex; + + if (c < 0) + { + /* + * Force to flush out buffered characters. + */ + if (mp->eseq != NOESC) + { + mp->eseq = NOESC; + fix_status_for_escape_sequence(mp); + } + while (mp->multiindex) + { + rebuffering_multi(mp); + } + } else + { + /* + * Put it into buffer and parse it. + */ + buffering_multi_internal(mp, c); + } + + *strbuf = mp->multiint; + *csbuf = mp->multics; + *length = mp->intindex; + mp->intindex = 0; +} + +/* + * Parse and discard characters. This routine is used for chopping line. + */ +void parsing_multi(mp, c) +MULBUF *mp; +int c; +{ + unsigned char *strbuf; + CHARSET *csbuf; + unsigned int length; + + buffering_multi(mp, c, &strbuf, &csbuf, &length); +} + +void set_codesets(mp, left, right) +MULBUF *mp; +CODESET left; +CODESET right; +{ + mp->io.left = left; + mp->io.right = right; +} + +/* + * Return the number of buffering characters. + */ +int get_bufbytes(mp) +MULBUF *mp; +{ + return (mp->bufbytes); +} + +/* + * Adjust the number of buffering characters. + */ +void set_bufbytes(mp, bufbytes) +MULBUF *mp; +int bufbytes; +{ + mp->bufbytes = bufbytes; +} + +/* + * Return string representation about multi bytes character + * which was buffered. + */ +char *get_icharset_string(mp) +MULBUF *mp; +{ + static char buf[10]; + + switch (mp->icharset) + { +#if JAPANESE + /* + * Code set + */ + case SJIS: return ("SJIS"); + case UJIS: return ("UJIS"); +#endif + /* + * Character set + */ + case ASCII: return ("ASCII"); + case JISX0201KANA: return ("JIS-KANA"); + case JISX0201ROMAN: return ("JIS-ROMAN"); + case LATIN1: return ("LATIN1"); + case LATIN2: return ("LATIN2"); + case LATIN3: return ("LATIN3"); + case LATIN4: return ("LATIN4"); + case GREEK: return ("GREEK"); + case ARABIC: return ("ARABIC"); + case HEBREW: return ("HEBREW"); + case CYRILLIC: return ("CYRILLIC"); + case LATIN5: return ("LATIN5"); + case JISX0208_78KANJI: return ("JIS-78KANJI"); + case GB2312: return ("GB2312"); + case JISX0208KANJI: return ("JIS-83KANJI"); + case JISX0208_90KANJI: return ("JIS-90KANJI"); + case KSC5601: return ("KSC5601"); + case JISX0212KANJISUP: return ("JIS-KANJISUP"); + case JISX0213KANJI1: return ("JISX0213KANJI1"); + case JISX0213KANJI2: return ("JISX0213KANJI2"); + } + switch (CS2TYPE(mp->icharset)) + { + case TYPE_94_CHARSET: + strcpy(buf, "94( )"); + buf[3] = CS2FT(mp->icharset); + break; + case TYPE_96_CHARSET: + strcpy(buf, "96( )"); + buf[3] = CS2FT(mp->icharset); + break; + case TYPE_94N_CHARSET: + strcpy(buf, "94N( )"); + buf[4] = CS2FT(mp->icharset); + break; + case TYPE_96N_CHARSET: + strcpy(buf, "96N( )"); + buf[4] = CS2FT(mp->icharset); + break; + default: + assert(0); + } + if (CS2IRR(mp->icharset) > 0) + { + char num[3]; + sprintf(num, "%d", CS2IRR(mp->icharset)); + strcat(buf, num); + } + return (buf); +} + +static int old_output_charset = ASCII; /* Last displayed character set */ + +static unsigned char *make_escape_sequence(charset) +int charset; +{ + static unsigned char p[9]; + int len; + + if (CSISWRONG(charset)) + { + charset = ASCII; + } + + p[0] = '\033'; + len = 1; + if ((output == iso7 || output == iso8) && CS2IRR(charset) > 0) + { + p[len] = '&'; + p[len + 1] = IRR2CODE(CS2IRR(charset)); + p[len + 2] = '\033'; + len += 3; + } + switch (CS2TYPE(charset)) + { + case TYPE_94_CHARSET: + p[len] = '('; + p[len + 1] = CS2FT(charset); + len += 2; + break; + case TYPE_94N_CHARSET: + switch (CS2FT(charset)) + { + case '@': + case 'A': + case 'B': + p[len] = '$'; + p[len + 1] = CS2FT(charset); + len += 2; + break; + default: + p[len] = '$'; + p[len + 1] = '('; + p[len + 2] = CS2FT(charset); + len += 3; + break; + } + break; + case TYPE_96_CHARSET: + p[len] = '-'; + p[len + 1] = CS2FT(charset); + len += 2; + break; + case TYPE_96N_CHARSET: + p[len] = '$'; + p[len + 1] = '-'; + p[len + 2] = CS2FT(charset); + len += 3; + break; + } + if (output != iso8) + { + switch (CS2TYPE(charset)) + { + case TYPE_94_CHARSET: + case TYPE_94N_CHARSET: + switch (CS2TYPE(old_output_charset)) + { + case TYPE_96_CHARSET: + case TYPE_96N_CHARSET: + p[len] = '\017'; + len++; + } + break; + case TYPE_96_CHARSET: + case TYPE_96N_CHARSET: + switch (CS2TYPE(old_output_charset)) + { + case TYPE_94_CHARSET: + case TYPE_94N_CHARSET: + p[len] = '\016'; + len++; + } + break; + } + } + p[len] = '\0'; + return (p); +} + +static char cvbuffer[32]; +static int cvindex = 0; +static char *nullcvbuffer = ""; + + +static char *convert_to_iso(c, cs) +int c; +int cs; +{ + register unsigned char *p; + static char buffer2[2]; + + if (output == iso8 && c != 0 && + (CS2TYPE(cs) == TYPE_96_CHARSET || + CS2TYPE(cs) == TYPE_96N_CHARSET)) + c |= 0x80; + + buffer2[0] = c; + buffer2[1] = '\0'; + + if (CSISREST(cs)) + { + return (buffer2); + } + if (CSISWRONG(cs)) + { + cs = ASCII; + } + + cs = CS2CHARSET(cs); + + if (cs == old_output_charset) + { + return (buffer2); + } + else + { + p = make_escape_sequence(cs); + old_output_charset = cs; + strcpy(cvbuffer, p); + strcat(cvbuffer, buffer2); + return (cvbuffer); + } +} + +static char *convert_to_jis(c, cs) +int c; +int cs; +{ + register unsigned char *p; + static char buffer2[3]; + + if (c == 0) + { + cvindex = 0; + return (nullcvbuffer); + } + + buffer2[cvindex++] = c; + buffer2[cvindex] = '\0'; + + if (CSISWRONG(cs)) + { + cs = ASCII; + } + + cs = CS2CHARSET(cs); + + if (cs == ASCII || cs == JISX0201ROMAN) + { + assert(cvindex == 1); + cvindex = 0; + } else if (cs == JISX0201KANA) + { + assert(cvindex == 1); + cvindex = 0; + } else if (cs == JISX0208_78KANJI) + { + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + jis78to90(buffer2); + cs = JISX0208_90KANJI; + cvindex = 0; + } else if (cs == JISX0208KANJI || cs == JISX0208_90KANJI) + { + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + cvindex = 0; + } else if (cs == JISX0213KANJI1) + { + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + cvindex = 0; + cs = JISX0208KANJI; + } else + { + assert(0); + cvindex = 0; + } + + if (cs == old_output_charset) + { + return (buffer2); + } + else + { + p = make_escape_sequence(cs); + old_output_charset = cs; + strcpy(cvbuffer, p); + strcat(cvbuffer, buffer2); + return (cvbuffer); + } +} + +#if JAPANESE +static char *convert_to_ujis(c, cs) +int c; +int cs; +{ + if (c == 0) + { + cvindex = 0; + return (nullcvbuffer); + } + + cvbuffer[cvindex++] = c; + cvbuffer[cvindex] = '\0'; + + if (CSISWRONG(cs)) + { + cs = ASCII; + } + + cs = CS2CHARSET(cs); + + if (cs == ASCII || cs == JISX0201ROMAN) + { + assert(cvindex == 1); + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0201KANA) + { + assert(cvindex == 1); + cvbuffer[2] = '\0'; + cvbuffer[1] = cvbuffer[0] | 0x80; + cvbuffer[0] = 0x8e; + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || + cs == JISX0208_90KANJI || cs == JISX0213KANJI1) + { + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + if (cs == JISX0208_78KANJI) + jis78to90(cvbuffer); + cvbuffer[0] |= 0x80; + cvbuffer[1] |= 0x80; + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0212KANJISUP) + { + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + cvbuffer[2] = cvbuffer[1] | 0x80; + cvbuffer[1] = cvbuffer[0] | 0x80; + cvbuffer[0] = 0x8f; + cvbuffer[3] = '\0'; + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0213KANJI2) + { + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + cvbuffer[2] = cvbuffer[1] | 0x80; + cvbuffer[1] = cvbuffer[0] | 0x80; + cvbuffer[0] = 0x8f; + cvbuffer[3] = '\0'; + cvindex = 0; + return (cvbuffer); + } + assert(0); + cvindex = 0; + return (cvbuffer); +} + +static char *convert_to_sjis(c, cs) +int c; +int cs; +{ + if (c == 0) + { + cvindex = 0; + return (nullcvbuffer); + } + + cvbuffer[cvindex++] = c; + cvbuffer[cvindex] = '\0'; + + if (CSISWRONG(cs)) + { + cs = ASCII; + } + + cs = CS2CHARSET(cs); + + if (cs == ASCII || cs == JISX0201ROMAN) + { + assert(cvindex == 1); + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0201KANA) + { + assert(cvindex == 1); + cvbuffer[0] |= 0x80; + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || + cs == JISX0208_90KANJI || cs == JISX0213KANJI1) + { + register int c1, c2, c3; + static unsigned char table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, +#if SJIS0213 + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, +#else + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0, +#endif + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, +#if SJIS0213 + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, +#else + 0xE8, 0xE9, 0xEA, 0, 0, 0, 0, 0, +#endif + }; + + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + if (cs == JISX0208_78KANJI) + jis78to90(cvbuffer); + c3 = cvbuffer[0] & 0x7f; + c1 = c3 & 1; + c2 = (cvbuffer[1] & 0x7f) + (c1 ? 0x40 - 0x21 : 0x9e - 0x21); + c1 = table[c3 / 2 + c1]; + cvbuffer[0] = c1; + cvbuffer[1] = c2 + (c2 >= 0x7f ? 1 : 0); + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0213KANJI2) + { + register int c1, c2, c3; + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + c3 = cvbuffer[0] & 0x7f; + c1 = c3 & 1; + c2 = (cvbuffer[1] & 0x7f) + + (c1 ? 0x40 - 0x21 : 0x9e - 0x21); + if (c3 <= 0x25) { + /* Map 1, 3, 4, and 5-KU */ + /* Note: 2-KU is rejected already. */ + c1 = (c3 - 0x21) / 2 + 0xf0; + } else if (c3 == 0x28) { + /* Map 8-KU */ + c1 = 0xf0; + } else if (c3 <= 0x2f) { + /* Map 12, 13, 14, and 15-KU */ + c1 = (c3 - 0x2b) / 2 + 0xf2; + } else { + /* Map 78-94 KU. */ + /* Note: 16-77 KU is rejected already. */ + c1 = (c3 - 0x6d) / 2 + 0xf4; + } + cvbuffer[0] = c1; + cvbuffer[1] = c2 + (c2 >= 0x7f ? 1 : 0); + cvindex = 0; + return (cvbuffer); + } + assert(0); + cvindex = 0; + return (cvbuffer); +} +#endif + +char *outchar(c, cs) +int c; +CHARSET cs; +{ + if (c < 0) + { + c = 0; + cs = ASCII; + } + + if (output == iso7 || output == iso8) + return (convert_to_iso(c, cs)); + if (output == jis) + return (convert_to_jis(c, cs)); +#if JAPANESE + if (output == ujis) + return (convert_to_ujis(c, cs)); + if (output == sjis) + return (convert_to_sjis(c, cs)); +#endif + cvbuffer[0] = c; + cvbuffer[1] = '\0'; + return (cvbuffer); +} + +char *outbuf(p, cs) +unsigned char *p; +CHARSET cs; +{ + static char buffer[1024]; + char *s; + int i = 0; + + while (*p != '\0') + { + s = outchar(*p++, cs); + while (*s != '\0') + buffer[i++] = *s++; + assert(i < (int)sizeof(buffer)); + } + buffer[i] = '\0'; + return (buffer); +} + +int mwidth(c, cs) +int c; +CHARSET cs; +{ + if (CSISREST(cs)) + return (0); + switch (CS2TYPE(cs)) + { + case TYPE_94_CHARSET: + case TYPE_96_CHARSET: + return (1); + case TYPE_94N_CHARSET: + case TYPE_96N_CHARSET: + return (2); + default: + assert(0); + return (0); + } +} + +char *rotate_right_codeset(mp) +MULBUF *mp; +{ + char *p = NULL; + + mp->rotation_io_right++; + mp->rotation_io_right %= 7; + switch (mp->rotation_io_right) { + case 0: p = "original"; mp->io.right = mp->orig_io_right; break; + case 1: p = "japanese"; mp->io.right = japanese; break; + case 2: p = "ujis"; mp->io.right = ujis; break; + case 3: p = "sjis"; mp->io.right = sjis; break; + case 4: p = "iso8"; mp->io.right = iso8; break; + case 5: p = "noconv"; mp->io.right = noconv; break; + case 6: p = "none"; mp->io.right = none; break; + default: assert(0); break; + } + init_priority(mp); + return (p); +} + +#endif + +int strlen_cs(str, cs) +char* str; +CHARSET* cs; +{ + int i = 0; + if (cs == NULL) + return strlen(str); + while (*str != NULCH || !CSISNULLCS(*cs)) { + str++; + cs++; + i++; + } + return i; +} + +int chlen_cs(chstr, cs) +char* chstr; +CHARSET* cs; +{ + int i; + if (cs == NULL) + { + if (chstr == NULL || *chstr == NULCH) + return 0; + else + return 1; + } + if (*chstr == NULCH && CSISNULLCS(*cs)) + return 0; + i = 0; + do { + i++; + cs++; + } while (CSISREST(*cs)); + return i; +} + +char* strdup_cs(str, cs, csout) +char* str; +CHARSET* cs; +CHARSET** csout; +{ + int len = strlen_cs(str, cs); + char* save_str = (char *)ecalloc(len + 1, 1); + CHARSET* save_cs = (CHARSET *)ecalloc(len + 1, sizeof(CHARSET)); + memcpy(save_str, str, sizeof(char) * (len + 1)); + if (cs) + memcpy(save_cs, cs, sizeof(CHARSET) * (len + 1)); + else { + cs = save_cs; + while (--len >= 0) + *cs++ = ASCII; + *cs = NULLCS; + } + *csout = save_cs; + return save_str; +} Index: less/multi.h diff -u /dev/null less/multi.h:1.16 --- /dev/null Wed Dec 6 22:29:44 2000 +++ less/multi.h Tue Nov 21 09:01:27 2000 @@ -0,0 +1,235 @@ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice in the documentation and/or other materials provided with + * the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * Definition of values to specify the character set. + * And definitions some well known character sets and a types of set. + */ +typedef unsigned short CHARSET; + +/* + * The structure of CHARSET: + * + * 151413121110 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |r| IRR |m|n| F | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * r: true if it is not first byte of multi bytes character. + * IRR: represented identification of revisions of registered character + * sets (IRR) ranged from 00/01 to 03/15. The 00/00 means no IRR. + * The real IRR is ranged from 00/01 to 03/15, and coded from 04/00 + * to 07/14. + * m: true if it is one byte in bytes of multi bytes character. + * n: true if it is one of 96 or 96x96 graphic sets, otherwise it is one + * of 94 or 94x94 graphic sets. + * F: represented final byte which choose one of graphi sets ranged from + * 00/00 to 04/14. The real final byte is coded from 03/00 to 07/14. + */ + +#define REST_MASK 0x8000 /* r */ +#define CSISHEAD(cs) (!((cs) & REST_MASK)) +#define CSISREST(cs) ((cs) & REST_MASK) + +#define IRR_MASK 0x7e00 /* IRR */ +#define IRR_SHIFT 9 +#define CS2IRR(cs) (((cs) & IRR_MASK) >> IRR_SHIFT) +#define IRR2CS(irr) (((irr) << IRR_SHIFT) & IRR_MASK) + +#define CODE_MASK 0x003f /* coded IRR in ISO 2022 */ +#define CODE_DIFF 0x0040 +#define IRR2CODE(irr) ((((irr) - 1) & CODE_MASK) + CODE_DIFF) +#define CODE2IRR(code) ((((code) - CODE_DIFF) & CODE_MASK) + 1) + +#define TYPE_94_CHARSET 0x0000 /* m & n */ +#define TYPE_96_CHARSET 0x0080 +#define TYPE_94N_CHARSET 0x0100 +#define TYPE_96N_CHARSET 0x0180 +#define TYPE_MASK 0x0180 +#define CS2TYPE(cs) ((cs) & TYPE_MASK) +#define TYPE2CS(type) ((type) & TYPE_MASK) + +#define FT_MASK 0x007f /* F */ +#define FT_DIFF 0x0030 +#define CS2FT(cs) (((cs) & FT_MASK) + FT_DIFF) +#define FT2CS(ft) (((ft) - FT_DIFF) & FT_MASK) + +/* + * Each character sets is represented by IRR, TYPE and FT. + */ +#define CHARSET_MASK (IRR_MASK | TYPE_MASK | FT_MASK) +#define CS2CHARSET(cs) ((cs) & CHARSET_MASK) + +/* + * There is a reserved empty set in every type of charset. 07/14. + * So we cannot use (CS2CHARSET(cs) == WRONGCS) to check it. + */ +#define CSISWRONG(cs) (CS2FT(cs) == '~') + +/* + * List of representative character sets. + */ +#define ASCII (TYPE_94_CHARSET | FT2CS('B')) +#define WRONGCS (TYPE_94_CHARSET | FT2CS('~')) +#if ISO +#define JISX0201KANA (TYPE_94_CHARSET | FT2CS('I')) +#define JISX0201ROMAN (TYPE_94_CHARSET | FT2CS('J')) +#define LATIN1 (TYPE_96_CHARSET | FT2CS('A')) +#define LATIN2 (TYPE_96_CHARSET | FT2CS('B')) +#define LATIN3 (TYPE_96_CHARSET | FT2CS('C')) +#define LATIN4 (TYPE_96_CHARSET | FT2CS('D')) +#define GREEK (TYPE_96_CHARSET | FT2CS('F')) +#define ARABIC (TYPE_96_CHARSET | FT2CS('G')) +#define HEBREW (TYPE_96_CHARSET | FT2CS('H')) +#define CYRILLIC (TYPE_96_CHARSET | FT2CS('L')) +#define LATIN5 (TYPE_96_CHARSET | FT2CS('M')) +#define JISX0208_78KANJI (TYPE_94N_CHARSET | FT2CS('@')) +#define GB2312 (TYPE_94N_CHARSET | FT2CS('A')) +#define JISX0208KANJI (TYPE_94N_CHARSET | FT2CS('B')) +#define JISX0208_90KANJI (IRR2CS(1) | TYPE_94N_CHARSET | FT2CS('B')) +#define KSC5601 (TYPE_94N_CHARSET | FT2CS('C')) +#define JISX0212KANJISUP (TYPE_94N_CHARSET | FT2CS('D')) +#define JISX0213KANJI1 (TYPE_94N_CHARSET | FT2CS('O')) +#define JISX0213KANJI2 (TYPE_94N_CHARSET | FT2CS('P')) +#if JAPANESE +/* + * Special number for Japanese code set. Only input_set use following with + * above definitions. The 07/15 is not valid for F. Thus I use it to + * indicate the special character sets. + */ +#define SJIS (IRR2CS(1) | TYPE_94N_CHARSET | FT_MASK) +#define UJIS (IRR2CS(2) | TYPE_94N_CHARSET | FT_MASK) +#endif +#endif + +/* + * List of special characters and character set for it. + * + * A terminator of string with character set is represented by + * both a NULCH and a NULLCS. A padding character in string with + * character set is represented by both a PADCH and a NULLCS. A + * binary data '\0' and '\1' are represented by both '\0' and a + * WRONGCS, and both '\1' and a WRONGCS respectively. + */ +#define NULCH ('\0') +#define PADCH ('\1') +#define NULLCS (ASCII) + +/* + * Macros for easy checking. + */ +#define CSISASCII(cs) (CS2CHARSET(cs) == ASCII) +#define CSISNULLCS(cs) (CS2CHARSET(cs) == NULLCS) + + +/* + * Definition of values to specify the character set and character. + */ +typedef int CHARVAL; + +#define MAKECV(ch, cs) (((cs) << 8 * sizeof(char)) | ch) +#define CV2CH(cv) ((cv) & ((1 << 8 * sizeof(char)) - 1)) +#define CV2CS(cv) ((cv) >> 8 * sizeof(char)) + + +/* + * Definition of code sets. The code set is not character set. + * It is only means of code, and we use these value when we + * decide what input data are. + */ +typedef enum { + /* code sets for left, right and output plane */ + noconv, /* A code set which doesn't need converting */ + /* code sets for left and output plane */ + jis, /* A subset of ISO 2022 */ + /* + * It may contain JIS C 6226-1978, JIS X 0208-1983, + * JIS X 0208:1990/1997, JIS X 0212:1990, JIS X 0213:2000, + * JIS X 0201:1976/1997 left/right planes, and ASCII as input. + * + * In the case of output, this means all JIS C 6226-1978, + * JIS X 0208-1983, JIS X 0208:1990/1997, and JIS X 0213:2000 + * are converted into JIS X 0208-1983 encode with an assumption + * that character set of JIS X 0208-1983 encode is + * JIS X 0213:2000. And JIS X 0212:1990 and 2nd plane of + * JIS X 0213:2000 are rejected when output. + * + * If you need the same code as the output, please use iso7 + * or iso8. + */ + iso7, /* A code set which is extented by iso2022 */ + /* code sets for only right plane */ + none, /* No code set */ + japanese, /* Both of UJIS and SJIS */ + /* code sets for right and output plane */ + ujis, /* Japanese code set named UJIS */ + sjis, /* Japanese code set named SJIS */ + iso8 /* A code set which is extented by iso2022 */ +} CODESET; + + +/* + * struct multibuf is internal data structure for multi.c. + * Defines it name only. + */ +typedef struct multibuf MULBUF; + + +/* + * in multi.c + */ +extern int set_planeset (); +extern void init_def_codesets (); +extern void init_def_priority (); +extern void init_priority (); +extern CODESET get_priority (); +extern void set_priority (); +extern MULBUF * new_multi (); +extern void clear_multi (); +extern void init_multi (); +extern void buffering_multi (); +extern void parsing_multi (); +extern void set_codesets (); +extern int get_bufbytes (); +extern void set_bufbytes (); +extern char * get_icharset_string (); +extern char * outchar(); +extern char * outbuf(); +extern int mwidth(); +extern char * rotate_right_codeset (); +extern int strlen_cs(); +extern int chlen_cs(); +extern char* strdup_cs(); + +/* + * in unify.c + */ +extern void jis78to90(); +extern void chconvert_cs(); +extern void chunify_cs(); +extern int chcmp_cs(); +extern int chisvalid_cs(); Index: less/optfunc.c diff -u less/optfunc.c:1.1.1.15 less/optfunc.c:1.25 --- less/optfunc.c:1.1.1.15 Sun Oct 29 02:03:33 2000 +++ less/optfunc.c Fri Nov 24 14:43:38 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -61,6 +67,10 @@ extern int so_fg_color, so_bg_color; extern int bl_fg_color, bl_bg_color; #endif +#if JAPANESE +extern char *opt_charset; +extern int opt_Z_var; +#endif #if LOGFILE @@ -346,6 +356,66 @@ } } +#if JAPANESE +/* + * Handlers for -K option. + */ + public void +opt_K(type, s) + int type; + char *s; +{ + switch (type) + { + case INIT: + opt_charset = s; + init_charset(); + break; + } +} + +/* + * Handler for the -Z option. + */ + /*ARGSUSED*/ + public void +opt_Z(type, s) + int type; + char *s; +{ + switch (type) + { + case INIT: + if (opt_Z_var == OPT_ON) + init_def_priority(sjis); + else if (opt_Z_var == OPT_OFF) + init_def_priority(ujis); + break; + case QUERY: + break; + case TOGGLE: + switch (get_priority(get_mulbuf(curr_ifile))) { + case sjis: + opt_Z_var = OPT_OFF; + break; + case ujis: + opt_Z_var = OPT_ON; + break; + case noconv: + default: + opt_Z_var = OPT_ONPLUS; /* we use this to mean error */ + return; + } + if (opt_Z_var == OPT_ON) + init_def_priority(sjis); + else if (opt_Z_var == OPT_OFF) + init_def_priority(ujis); + init_priority(get_mulbuf(curr_ifile)); + break; + } +} +#endif + /* * Handler for the -V option. */ @@ -372,6 +442,8 @@ putstr("less comes with NO WARRANTY, to the extent permitted by law.\n"); putstr("For information about the terms of redistribution,\n"); putstr("see the file named README in the less distribution.\n"); + putstr("\nJapanized part of Less\n"); + putstr("Copyright (c) 1994-2000 Kazushi (Jam) Marukawa\n"); quit(QUIT_OK); break; } Index: less/option.c diff -u less/option.c:1.1.1.10 less/option.c:1.18 --- less/option.c:1.1.1.10 Sun Oct 29 02:03:33 2000 +++ less/option.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -467,7 +473,7 @@ { static char buf[8]; - sprintf(buf, "-%s", prchar(c)); + sprintf(buf, "-%s", prchar(c, ASCII)); return (buf); } Index: less/opttbl.c diff -u less/opttbl.c:1.1.1.15 less/opttbl.c:1.27 --- less/opttbl.c:1.1.1.15 Sun Oct 29 02:19:11 2000 +++ less/opttbl.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -49,6 +55,10 @@ #if HILITE_SEARCH public int hilite_search; /* Highlight matched search patterns? */ #endif +#if JAPANESE +public int opt_Z_var; /* Initial variable for opt_Z */ +extern int markwrongchar; /* Display marker instead of wrong character */ +#endif /* * Long option names. @@ -98,6 +108,11 @@ static struct optname X__optname = { "no-init", NULL }; static struct optname y_optname = { "max-forw-scroll", NULL }; static struct optname z_optname = { "window", NULL }; +#if JAPANESE +static struct optname K_optname = { "charset", NULL }; +static struct optname W_optname = { "mark-wrong-char", NULL }; +static struct optname Z_optname = { "change-priority", NULL }; +#endif static struct optname quote_optname = { "quotes", NULL }; static struct optname tilde_optname = { "tilde", NULL }; static struct optname query_optname = { "help", NULL }; @@ -202,6 +217,12 @@ NULL, NULL, NULL }, #endif +#if JAPANESE + { 'K', &K_optname, + STRING|NO_TOGGLE|NO_QUERY, 0, NULL, opt_K, + NULL, NULL, NULL + }, +#endif { 'l', NULL, STRING|NO_TOGGLE|NO_QUERY, 0, NULL, opt_l, NULL, NULL, NULL @@ -286,6 +307,14 @@ "Highlight first unread line after forward-screen", "Highlight first unread line after any forward movement", }, +#if JAPANESE + { 'W', &W_optname, + BOOL|REPAINT, OPT_OFF, &markwrongchar, NULL, + "Display marker instead of wrong character", + "Display wrong character", + NULL + }, +#endif { 'x', &x_optname, NUMBER|REPAINT, 8, &tabstop, NULL, "Tab stops: ", @@ -310,6 +339,19 @@ "Scroll window size is %d lines", NULL }, +#if JAPANESE +#ifdef SJIS_PRE +#define OPT_Z OPT_ON +#else +#define OPT_Z OPT_OFF +#endif + { 'Z', &Z_optname, + BOOL|REPAINT, OPT_Z, &opt_Z_var, opt_Z, + "Give priority to the UJIS over the SJIS", + "Give priority to the SJIS over the UJIS", + "Cannot give priority since Japanese is not treated now", + }, +#endif { '"', "e_optname, STRING, 0, NULL, opt_quote, "quotes: ", NULL, NULL @@ -350,6 +392,12 @@ if (o->ovar != NULL) *(o->ovar) = o->odefault; } +#if JAPANESE + if (opt_Z_var == OPT_ON) + init_def_priority(sjis); + else if (opt_Z_var == OPT_OFF) + init_def_priority(ujis); +#endif } /* Index: less/output.c diff -u less/output.c:1.1.1.13 less/output.c:1.32 --- less/output.c:1.1.1.13 Sun Oct 29 02:03:33 2000 +++ less/output.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -38,6 +44,7 @@ register int c; register int i; int a; + int cs; int curr_attr; if (ABORT_SIGS()) @@ -51,7 +58,7 @@ curr_attr = AT_NORMAL; - for (i = 0; (c = gline(i, &a)) != '\0'; i++) + for (i = 0; (c = gline(i, &cs, &a)) != '\0'; i++) { if (a != curr_attr) { @@ -81,7 +88,11 @@ if (c == '\b') putbs(); else +#if ISO + putmchr(c, cs); +#else putchr(c); +#endif } switch (curr_attr) @@ -221,7 +232,7 @@ * Output a character. */ public int -putchr(c) +putchr_raw(c) int c; { if (need_clr) @@ -233,12 +244,12 @@ if (c == '\n' && is_tty) { /* remove_top(1); */ - putchr('\r'); + putchr_raw('\r'); } #else #ifdef _OSK if (c == '\n' && is_tty) /* In OS-9, '\n' == 0x0D */ - putchr(0x0A); + putchr_raw(0x0A); #endif #endif /* @@ -255,11 +266,85 @@ * Output a string. */ public void -putstr(s) +putstr_raw(s) register char *s; { + while (*s != '\0') + putchr_raw(*s++); +} + +/* + * Output a character as ASCII. + */ + public int +putchr(c) + int c; +{ +#if ISO + char *p = outchar(c & 0377, ASCII); + putstr_raw(p); + return (c); +#else + return (putchr_raw(c)); +#endif +} + +/* + * Output a string as ASCII. + */ + public void +putstr(s) + char *s; +{ +#if ISO + char *p = outbuf(s, ASCII); + putstr_raw(p); +#else + putstr_raw(s); +#endif +} + +/* + * Output a character which is a part of multi-bytes character. + */ + public int +putmchr(c, cs) + int c; + CHARSET cs; +{ +#if ISO + char *p = outchar(c & 0377, cs); + putstr_raw(p); +#else + putchr_raw(c); +#endif + return (c); +} + +/* + * Output a part of multi-bytes character. + */ + public int +putmchrs(s, cs) + char *s; + CHARSET cs; +{ + int c = *s; + while (*s != '\0') + putmchr(*s++, cs); + return (c); +} + +/* + * Output a string of multi-bytes character. + */ + public void +putmstr(s, cs) + char *s; + CHARSET *cs; +{ while (*s != '\0') - putchr(*s++); + putmchr(*s++, *cs++); } Index: less/prompt.c diff -u less/prompt.c:1.1.1.11 less/prompt.c:1.20 --- less/prompt.c:1.1.1.11 Sun Oct 29 02:03:33 2000 +++ less/prompt.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -47,7 +53,7 @@ static constant char M_proto[] = "?f%f .?n?m(file %i of %m) ..?ltlines %lt-%lb?L/%L. :byte %bB?s/%s. .?e(END) ?x- Next\\: %x.:?pB%pB\\%..%t"; static constant char e_proto[] = - "?f%f .?m(file %i of %m) .?ltlines %lt-%lb?L/%L. .byte %bB?s/%s. ?e(END) :?pB%pB\\%..%t"; + "?f%f .?m(file %i of %m) .?ltlines %lt-%lb?L/%L. .byte %bB?s/%s. ?e(END) :?pB%pB\\%.. code %K%t"; static constant char h_proto[] = "HELP -- ?eEND -- Press g to see it again:Press RETURN for more., or q when done"; @@ -330,6 +336,13 @@ free(s); } else ap_quest(); + break; + case 'K': /* Character set or code set of last non ASCII char. */ +#if ISO + ap_str(get_icharset_string(get_mulbuf(curr_ifile))); +#else + ap_str(""); +#endif break; } } Index: less/search.c diff -u less/search.c:1.1.1.17 less/search.c:1.55 --- less/search.c:1.1.1.17 Sun Oct 29 02:19:11 2000 +++ less/search.c Tue Oct 31 04:30:40 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -16,9 +22,19 @@ #include "less.h" #include "position.h" +#include + #define MINPOS(a,b) (((a) < (b)) ? (a) : (b)) #define MAXPOS(a,b) (((a) > (b)) ? (a) : (b)) +#if HAVE_POSIX_REGCOMP_CS +#include +#ifdef REG_EXTENDED +#define REGCOMP_FLAG REG_EXTENDED +#else +#define REGCOMP_FLAG 0 +#endif +#endif #if HAVE_POSIX_REGCOMP #include #ifdef REG_EXTENDED @@ -39,6 +55,9 @@ char *regex(); extern char *__loc1; #endif +#if HAVE_V8_REGCOMP_CS +#include "regexp_cs.h" +#endif #if HAVE_V8_REGCOMP #include "regexp.h" #endif @@ -52,6 +71,7 @@ extern int sc_height; extern int jump_sline; extern int bs_mode; +extern IFILE curr_ifile; extern int status_col; extern POSITION start_attnpos; extern POSITION end_attnpos; @@ -79,7 +99,7 @@ * These are the static variables that represent the "remembered" * search pattern. */ -#if HAVE_POSIX_REGCOMP +#if HAVE_POSIX_REGCOMP_CS || HAVE_POSIX_REGCOMP static regex_t *regpattern = NULL; #endif #if HAVE_PCRE @@ -91,7 +111,7 @@ #if HAVE_REGCMP static char *cpattern = NULL; #endif -#if HAVE_V8_REGCOMP +#if HAVE_V8_REGCOMP_CS || HAVE_V8_REGCOMP static struct regexp *regpattern = NULL; #endif @@ -99,16 +119,108 @@ static int is_ucase_pattern; static int last_search_type; static char *last_pattern = NULL; +static CHARSET *last_charset = NULL; +#if ISO && !NO_REGEX && (!CS_REGEX || MSB_ENABLE) /* + * Normalize text. Add quote for some non ASCII characters or enable MSB + * of them since some regular expression library parse them as ASCII. + */ + static char * +normalize_text(src, cs, search_type) + char *src; + CHARSET *cs; + int search_type; +{ + static char *buf = NULL; + static int size = 0; + int len = strlen_cs(src, cs) * 2; + char *dst; + + if (len + 1 > size) + { + size = (len + 1 + 255) / 256 * 256; + if (buf) + free(buf); + buf = (char *) ecalloc(size, sizeof(char)); + } + dst = buf; + while (*src != '\0') + { +#if MSB_ENABLE + if (CSISASCII(*cs) || CSISWRONG(*cs)) + *dst++ = *src++; + else + *dst++ = *src++ | 0x80; + cs++; +#else + if (!CSISASCII(*cs++) && !(search_type & SRCH_NO_REGEX)) + { + switch (*src) { + /* Basic Regular Expressions */ + case '[': + case ']': + case '.': + case '*': + case '\\': + case '^': + case '$': +#if (HAVE_POSIX_REGCOMP_CS || HAVE_POSIX_REGCOMP) && defined(REG_EXTENDED) + /* Extended Regular Expressions */ + case '+': + case '?': + case '|': + case '(': + case ')': + case '{': + case '}': +#endif +#if HAVE_RE_COMP + /* No Extended Regular Expressions */ +#endif +#if HAVE_REGCMP + /* Extended Regular Expressions */ + case '+': + case '(': + case ')': + case '{': + case '}': +#endif +#if HAVE_V8_REGCOMP_CS || HAVE_V8_REGCOMP + /* Extended Regular Expressions */ + case '+': + case '?': + case '|': + case '(': + case ')': +#endif + *dst++ = '\\'; + /* fall through */ + default: + *dst++ = *src++; + break; + } + } else + *dst++ = *src++; +#endif + } + *dst = '\0'; + return (buf); +} +#endif + +/* * Convert text. Perform one or more of these transformations: */ #define CVT_TO_LC 01 /* Convert upper-case to lower-case */ #define CVT_BS 02 /* Do backspace processing */ #define CVT_CRLF 04 /* Remove CR after LF */ +#define CVT_TO_INT 010 /* Convert all multi bytes characters into */ + /* internal form */ +#define CVT_PAD 020 /* Remove padding character */ static void -cvt_text(odst, osrc, ops) +cvt_text_ascii(odst, osrc, ops) char *odst; char *osrc; int ops; @@ -127,23 +239,217 @@ else /* Just copy. */ *dst = *src; - } + } if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r') dst--; *dst = '\0'; } + static void +cvt_text(odst, odstcs, osrc, osrccs, ops) + char *odst; + CHARSET *odstcs; + char *osrc; + CHARSET *osrccs; + int ops; +{ + char *src = osrc; + CHARSET *srccs = osrccs; + char *dst = odst; + CHARSET *dstcs = odstcs; + int bufcount; + char *cbuf; + CHARSET *csbuf; + char cbuffer[10]; + CHARSET csbuffer[10]; + int donef = 0; + +#if ISO + if (!(ops & CVT_TO_INT) && srccs == NULL) + { + cvt_text_ascii(dst, src, ops); + if (dstcs) + while (*dst++ != '\0') + *dstcs++ = ASCII; + return; + } + + while ((srccs != NULL && (*src != NULCH || !CSISNULLCS(*srccs))) || + (srccs == NULL && !donef)) + { + if (ops & CVT_TO_INT) + { + if (srccs == NULL) + { + int i, j; + if (*src == '\0') + { + /* flush buffer */ + buffering_multi(get_mulbuf(curr_ifile), + -1, &cbuf, &csbuf, + &bufcount); + donef = 1; + } else + { + /* make charset */ + buffering_multi(get_mulbuf(curr_ifile), + (unsigned char) *src, + &cbuf, &csbuf, + &bufcount); + } + if (bufcount == 0) + { + if (donef) + { + /* adjust the address */ + dst--; + if (dstcs) dstcs--; + } else + { + /* fill pad */ + *dst = PADCH; + if (dstcs) + *dstcs = ASCII; + } + } + cbuf[bufcount] = NULCH; + csbuf[bufcount] = NULLCS; + + /* unify character(s) in buffer */ + i = 0; + j = 0; + while (i < bufcount) { + chunify_cs(&cbuf[i], &csbuf[i], + &cbuffer[j], &csbuffer[j]); + i += chlen_cs(&cbuf[i], &csbuf[i]); + j += chlen_cs(&cbuffer[j], + &csbuffer[j]); + } + bufcount = j; + cbuf = cbuffer; + csbuf = csbuffer; + } else + { + int i; + cbuf = cbuffer; + csbuf = csbuffer; + chunify_cs(src, srccs, cbuf, csbuf); + bufcount = chlen_cs(src, srccs); + src += bufcount - 1; + if (srccs) srccs += bufcount - 1; + for (i = 0; i < bufcount - 1; i++) + { + *dst++ = PADCH; + if (dstcs) + *dstcs++ = ASCII; + } + bufcount = chlen_cs(cbuf, csbuf); + } + } else + { + static CHARSET dummy_cs[] = { ASCII, ASCII }; + bufcount = 1; + cbuf = src; + csbuf = srccs ? srccs : dummy_cs; + } + + assert(dst - odst >= bufcount - 1); + + while (--bufcount >= 0) + { +#if MSB_ENABLE + if (!CSISASCII(*csbuf) && !CSISWRONG(*csbuf)) + *cbuf |= 0x80; +#endif + if ((ops & CVT_TO_LC) && CSISASCII(*csbuf) && + isupper((unsigned char) *cbuf)) + { + /* Convert uppercase to lowercase. */ + dst[-bufcount] = tolower((unsigned char) *cbuf); + if (dstcs) + dstcs[-bufcount] = *csbuf; + } else if ((ops & CVT_BS) && CSISWRONG(*csbuf) && + *cbuf == '\b' && dst > odst) + { + /* Delete BS and preceding char. */ + if (bufcount == 0) + { + dst -= 2; + if (dstcs) + dstcs -= 2; + } else + { + dst -= 1; + if (dstcs) + dstcs -= 1; + } + } else + { + /* Just copy. */ + dst[-bufcount] = *cbuf; + if (dstcs) + dstcs[-bufcount] = *csbuf; + } + cbuf++; + csbuf++; + } + src++; + dst++; + if (srccs) srccs++; + if (dstcs) dstcs++; + } + *dst = NULCH; + if (dstcs) *dstcs = NULLCS; + + if (odstcs && (ops & CVT_PAD)) + { + src = odst; + srccs = odstcs; + dst = odst; + dstcs = odstcs; + while (*src != NULCH || !CSISNULLCS(*srccs)) + { + if (*src != PADCH || !CSISNULLCS(*srccs)) + { + *dst++ = *src; + *dstcs++ = *srccs; + } + src++; + srccs++; + } + *dst = NULCH; + *dstcs = NULLCS; + } +#else + cvt_text_ascii(dst, src, ops); + if (dstcs) + { + while (*dst++ != '\0') + *dstcs++ = ASCII; + *dstcs = NULLCS; + } +#endif + if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r') { + *--dst = NULCH; + if (dstcs) + { + *--dstcs = NULLCS; + } + } +} + /* * Are there any uppercase letters in this string? */ static int -is_ucase(s) +is_ucase(s, cs) char *s; + CHARSET *cs; { register char *p; - for (p = s; *p != '\0'; p++) - if (isupper((unsigned char) *p)) + for (p = s; *p != '\0'; p++, cs++) + if (CSISASCII(*cs) && isupper((unsigned char) *p)) return (1); return (0); } @@ -156,7 +462,7 @@ { if (last_search_type & SRCH_NO_REGEX) return (last_pattern != NULL); -#if HAVE_POSIX_REGCOMP +#if HAVE_POSIX_REGCOMP_CS || HAVE_POSIX_REGCOMP return (regpattern != NULL); #endif #if HAVE_PCRE @@ -168,7 +474,7 @@ #if HAVE_REGCMP return (cpattern != NULL); #endif -#if HAVE_V8_REGCOMP +#if HAVE_V8_REGCOMP_CS || HAVE_V8_REGCOMP return (regpattern != NULL); #endif #if NO_REGEX @@ -293,12 +599,27 @@ * Compile a search pattern, for future use by match_pattern. */ static int -compile_pattern(pattern, search_type) +compile_pattern(pattern, charset, search_type) char *pattern; + CHARSET *charset; int search_type; { + int len = strlen_cs(pattern, charset); + if ((search_type & SRCH_NO_REGEX) == 0) { +#if HAVE_POSIX_REGCOMP_CS + regex_t *s = (regex_t *) ecalloc(1, sizeof(regex_t)); + if (regcomp_cs(s, pattern, charset, REGCOMP_FLAG)) + { + free(s); + error("Invalid pattern", NULL_PARG); + return (-1); + } + if (regpattern != NULL) + regfree_cs(regpattern); + regpattern = s; +#endif #if HAVE_POSIX_REGCOMP regex_t *s = (regex_t *) ecalloc(1, sizeof(regex_t)); if (regcomp(s, pattern, REGCOMP_FLAG)) @@ -346,6 +667,20 @@ free(cpattern); cpattern = s; #endif +#if HAVE_V8_REGCOMP_CS + struct regexp *s; + if ((s = regcomp_cs(pattern, charset)) == NULL) + { + /* + * regcomp has already printed an error message + * via regerror(). + */ + return (-1); + } + if (regpattern != NULL) + free(regpattern); + regpattern = s; +#endif #if HAVE_V8_REGCOMP struct regexp *s; if ((s = regcomp(pattern)) == NULL) @@ -364,9 +699,10 @@ if (last_pattern != NULL) free(last_pattern); - last_pattern = (char *) calloc(1, strlen(pattern)+1); - if (last_pattern != NULL) - strcpy(last_pattern, pattern); + if (last_charset != NULL) + free(last_charset); + + last_pattern = strdup_cs(pattern, charset, &last_charset); last_search_type = search_type; return (0); @@ -378,6 +714,11 @@ static void uncompile_pattern() { +#if HAVE_POSIX_REGCOMP_CS + if (regpattern != NULL) + regfree_cs(regpattern); + regpattern = NULL; +#endif #if HAVE_POSIX_REGCOMP if (regpattern != NULL) regfree(regpattern); @@ -396,12 +737,13 @@ free(cpattern); cpattern = NULL; #endif -#if HAVE_V8_REGCOMP +#if HAVE_V8_REGCOMP_CS || HAVE_V8_REGCOMP if (regpattern != NULL) free(regpattern); regpattern = NULL; #endif last_pattern = NULL; + last_charset = NULL; } /* @@ -409,8 +751,9 @@ * Set sp and ep to the start and end of the matched string. */ static int -match_pattern(line, sp, ep, notbol) +match_pattern(line, charset, sp, ep, notbol) char *line; + CHARSET *charset; char **sp; char **ep; int notbol; @@ -418,8 +761,19 @@ int matched; if (last_search_type & SRCH_NO_REGEX) - return (match(last_pattern, line, sp, ep)); + return (match(last_pattern, last_charset, line, charset, + sp, ep)); +#if HAVE_POSIX_REGCOMP_CS + { + regmatch_t rm; + matched = !regexec_cs(regpattern, line, charset, 1, &rm, 0); + if (!matched) + return (0); + *sp = line + rm.rm_so; + *ep = line + rm.rm_eo; + } +#endif #if HAVE_POSIX_REGCOMP { regmatch_t rm; @@ -462,6 +816,13 @@ return (0); *sp = __loc1; #endif +#if HAVE_V8_REGCOMP_CS + matched = regexec_cs(regpattern, line, charset); + if (!matched) + return (0); + *sp = regpattern->startp[0]; + *ep = regpattern->endp[0]; +#endif #if HAVE_V8_REGCOMP #if HAVE_REGEXEC2 matched = regexec2(regpattern, line, notbol); @@ -474,7 +835,7 @@ *ep = regpattern->endp[0]; #endif #if NO_REGEX - matched = match(last_pattern, line, sp, ep); + matched = match(last_pattern, last_charset, line, charset, sp, ep); #endif return (matched); } @@ -641,8 +1002,9 @@ * forward by 2 relative to the processed line * which was searched in hilite_line. */ - npos += 2; - line += 2; + opos--; + npos++; + line++; } } } @@ -653,9 +1015,10 @@ * sp,ep delimit the first match already found. */ static void -hilite_line(linepos, line, sp, ep) +hilite_line(linepos, line, charset, sp, ep) POSITION linepos; char *line; + CHARSET *charset; char *sp; char *ep; { @@ -698,13 +1061,29 @@ * move to the first char after the string we matched. * If we matched zero, just move to the next char. */ +#if ISO if (ep > searchp) + { + charset += ep - searchp; searchp = ep; + } else if (*searchp != '\0') + { + do + { + searchp++; + charset++; + } while (CSISREST(*charset)); + } else /* end of line */ + break; +#else + if (ep > searchp) + searchp = ep; else if (*searchp != '\0') searchp++; else /* end of line */ break; - } while (match_pattern(searchp, &sp, &ep, 1)); +#endif + } while (match_pattern(searchp, charset, &sp, &ep, 1)); if (bs_mode == BS_SPECIAL) { @@ -876,6 +1255,12 @@ int line_match; POSITION linepos, oldpos; + static CHARSET *charset = NULL; +#if ISO + static int charset_len = 0; + init_multi(get_mulbuf(curr_ifile)); +#endif + linenum = find_linenum(pos); oldpos = pos; for (;;) @@ -953,19 +1338,49 @@ * If it's a caseless search, convert the line to lowercase. * If we're doing backspace processing, delete backspaces. */ +#if ISO + if (1) +#else if (is_caseless || bs_mode == BS_SPECIAL) +#endif { int ops = 0; +#if ISO + int len; +#endif + if (is_caseless) ops |= CVT_TO_LC; if (bs_mode == BS_SPECIAL) ops |= CVT_BS; if (bs_mode != BS_CONTROL) ops |= CVT_CRLF; - cvt_text(line, line, ops); +#if ISO + ops |= CVT_TO_INT; +#endif + +#if ISO + /* + * Make charset buffer and convert input lines + * into internal codes and its charsets. + */ + len = (strlen(line) + 1 + 1023) / 1024 * 1024; + if (len > charset_len) + { + charset_len = len; + if (charset) + free(charset); + charset = (CHARSET *) + ecalloc(len, sizeof(CHARSET)); + } + + cvt_text(line, charset, line, NULL, ops); +#else + cvt_text(line, NULL, line, NULL, ops); +#endif } else if (bs_mode != BS_CONTROL) { - cvt_text(line, line, CVT_CRLF); + cvt_text(line, NULL, line, NULL, CVT_CRLF); } /* @@ -973,7 +1388,7 @@ * We are successful if we either want a match and got one, * or if we want a non-match and got one. */ - line_match = match_pattern(line, &sp, &ep, 0); + line_match = match_pattern(line, charset, &sp, &ep, 0); line_match = (!(search_type & SRCH_NO_MATCH) && line_match) || ((search_type & SRCH_NO_MATCH) && !line_match); if (!line_match) @@ -990,7 +1405,7 @@ * hilite list and keep searching. */ if (line_match) - hilite_line(linepos, line, sp, ep); + hilite_line(linepos, line, charset, sp, ep); #endif } else if (--matches <= 0) { @@ -1007,7 +1422,8 @@ */ clr_hilite(); if (line_match) - hilite_line(linepos, line, sp, ep); + hilite_line(linepos, line, charset, + sp, ep); } #endif if (plinepos != NULL) @@ -1027,9 +1443,10 @@ * if less than n matches are found in this file. */ public int -search(search_type, pattern, n) +search(search_type, pattern, charset, n) int search_type; char *pattern; + CHARSET *charset; int n; { POSITION pos; @@ -1074,14 +1491,41 @@ } else { /* + * Save the pattern. + */ + char* save_pattern; + CHARSET* save_charset; + save_pattern = strdup_cs(pattern, charset, &save_charset); + pattern = save_pattern; + charset = save_charset; + /* * Compile the pattern. */ - ucase = is_ucase(pattern); + ucase = is_ucase(pattern, charset); if (caseless == OPT_ONPLUS) - cvt_text(pattern, pattern, CVT_TO_LC); - if (compile_pattern(pattern, search_type) < 0) + cvt_text(pattern, charset, pattern, charset, + CVT_TO_LC | CVT_TO_INT | CVT_PAD); + else + cvt_text(pattern, charset, pattern, charset, + CVT_TO_INT | CVT_PAD); +#if ISO && !NO_REGEX && (!CS_REGEX || MSB_ENABLE) + /* + * The normalize_text must not change charset if it is + * used in regex. Otherwise charset will be dicared + * in regex, so there are no problem. + */ + pattern = normalize_text(pattern, charset, search_type); +#endif + if (compile_pattern(pattern, charset, search_type) < 0) return (-1); /* + * Free the saved pattern. + */ + if (save_pattern != NULL) + free(save_pattern); + if (save_charset != NULL) + free(save_charset); + /* * Ignore case if -I is set OR * -i is set AND the pattern is all lowercase. */ @@ -1302,14 +1746,76 @@ * It supports no metacharacters like *, etc. */ static int -match(pattern, buf, pfound, pend) - char *pattern, *buf; +match(pattern, charset, buf, bufcharset, pfound, pend) + char *pattern; + CHARSET *charset; + char *buf; + CHARSET *bufcharset; char **pfound, **pend; { register char *pp, *lp; +#if ISO + register CHARSET *pc, *lc; +#endif - for ( ; *buf != '\0'; buf++) - { +#if 0 +write(2, "pa1: ", 5); +write(2, pattern, strlen(pattern)); +write(2, "\r\n", 2); +write(2, "cs1: ", 5); +write(2, charset, strlen(pattern)*2); +write(2, "\r\n", 2); +write(2, "pa2: ", 5); +write(2, buf, strlen(buf)); +write(2, "\r\n", 2); +write(2, "cs2: ", 5); +write(2, bufcharset, strlen(buf)*2); +write(2, "\r\n", 2); +#endif + while (*buf != '\0') + { +#if ISO + pp = pattern; + pc = charset; + lp = buf; + lc = bufcharset; + while (1) + { + if ((*pp == NULCH && *pc == NULLCS) || + (*lp == NULCH && *lc == NULLCS)) + break; + + while (*pp == PADCH && CSISASCII(*pc)) + { + pp++; + pc++; + } + while (*lp == PADCH && CSISASCII(*lc)) + { + lp++; + lc++; + } + if (*pp != *lp || *pc != *lc) + break; + pp++; + pc++; + lp++; + lc++; + } + if (*pp == NULCH && *pc == NULLCS) + { + if (pfound != NULL) + *pfound = buf; + if (pend != NULL) + *pend = lp; + return (1); + } + do + { + buf++; + bufcharset++; + } while (CSISREST(*bufcharset)); +#else for (pp = pattern, lp = buf; *pp == *lp; pp++, lp++) if (*pp == '\0' || *lp == '\0') break; @@ -1321,11 +1827,13 @@ *pend = lp; return (1); } + buf++; +#endif } return (0); } -#if HAVE_V8_REGCOMP +#if HAVE_V8_REGCOMP_CS || HAVE_V8_REGCOMP /* * This function is called by the V8 regcomp to report * errors in regular expressions. Index: less/unify.c diff -u /dev/null less/unify.c:1.28 --- /dev/null Wed Dec 6 22:29:44 2000 +++ less/unify.c Sat Dec 2 11:51:00 2000 @@ -0,0 +1,1653 @@ +/* + * Copyright (c) 1998-2000 Kazushi (Jam) Marukawa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice in the documentation and/or other materials provided with + * the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +/* + * Routines to unify a multi bytes character. + */ + +#include "defines.h" +#include "multi.h" + + +#if ISO + +#include +#include +#include + +typedef struct { + char* input1; /* if input2 is null, convert input1 to output */ + char* input2; /* if input2 is here, convert input1-input2 to output */ + char* output; + CHARSET charset; +} convtab; + +typedef struct { + int num; + convtab tab[1]; +} sortedconvtab; + +typedef struct { + convtab* ctab; + sortedconvtab* sctab; +} convtable; + +static int comp_convtab(p, q) +void* p; +void* q; +{ + return strcmp(((convtab*)p)->input1, ((convtab*)q)->input1); +} + +static sortedconvtab* make_sortedconvtab(tab) +convtab tab[]; +{ + int i; + sortedconvtab* sctab; + + for (i = 0; tab[i].input1 != NULL; i++) + ; + sctab = (sortedconvtab*)malloc(sizeof(sortedconvtab) + + sizeof(convtab) * i - 1); + if (sctab == NULL) + return NULL; + sctab->num = i; + for (i = 0; i < sctab->num; i++) + sctab->tab[i] = tab[i]; + qsort(sctab->tab, sctab->num, sizeof(convtab), comp_convtab); + return sctab; +} + +static convtab* find_convtab_from_sctab(sctab, input) +sortedconvtab* sctab; +char* input; +{ + int from = 0; + int to = sctab->num; + int cur; + int cmp; + + if (to == 0) + return NULL; + while (1) { + cur = (from + to) / 2; + cmp = strcmp(input, sctab->tab[cur].input1); + if (cmp == 0) + return &sctab->tab[cur]; + if (sctab->tab[cur].input2 && + cmp > 0 && + strcmp(input, sctab->tab[cur].input2) <= 0) + return &sctab->tab[cur]; + if (to - from == 1) + return NULL; + if (cmp < 0) + to = cur; + if (0 < cmp) + from = cur; + } +} + +static void init_convtable(ctable) +convtable* ctable; +{ + if (ctable->sctab == NULL) + ctable->sctab = make_sortedconvtab(ctable->ctab); +} + +static convtab* find_convtab(ctable, input) +convtable* ctable; +char* input; +{ + convtab* ptab; + int cmp; + + if (ctable->sctab == NULL) + init_convtable(ctable); + + if (ctable->sctab != NULL) + return find_convtab_from_sctab(ctable->sctab, input); + + for (ptab = ctable->ctab; ptab->input1; ptab++) + if ((cmp = strcmp(input, ptab->input1)) == 0) + return ptab; + else if (ptab->input2 && + cmp > 0 && + strcmp(input, ptab->input2) <= 0) + return ptab; + return NULL; +} + +static convtab conv_jisx0208_78_90[] = { + /* 0x3646($@6F(B) -> 0x7421(&@$Bt!(B) */ + { "6F", NULL, "t!", JISX0208_90KANJI }, + /* 0x4B6A($@Kj(B) -> 0x7422(&@$Bt"(B) */ + { "Kj", NULL, "t\"", JISX0208_90KANJI }, + /* 0x4D5A($@MZ(B) -> 0x7423(&@$Bt#(B) */ + { "MZ", NULL, "t#", JISX0208_90KANJI }, + /* 0x6076($@`v(B) -> 0x7424(&@$Bt$(B) */ + { "`v", NULL, "t$", JISX0208_90KANJI }, + /* 0x3033($@03(B) -> 0x724D(&@$BrM(B) */ + { "03", NULL, "rM", JISX0208_90KANJI }, + /* 0x724D($@rM(B) -> 0x3033(&@$B03(B) */ + { "rM", NULL, "03", JISX0208_90KANJI }, + /* 0x3229($@2)(B) -> 0x7274(&@$Brt(B) */ + { "2)", NULL, "rt", JISX0208_90KANJI }, + /* 0x7274($@rt(B) -> 0x3229(&@$B2)(B) */ + { "rt", NULL, "2)", JISX0208_90KANJI }, + /* 0x3342($@3B(B) -> 0x695A(&@$BiZ(B) */ + { "3B", NULL, "iZ", JISX0208_90KANJI }, + /* 0x695A($@iZ(B) -> 0x3342(&@$B3B(B) */ + { "iZ", NULL, "3B", JISX0208_90KANJI }, + /* 0x3349($@3I(B) -> 0x5978(&@$BYx(B) */ + { "3I", NULL, "Yx", JISX0208_90KANJI }, + /* 0x5978($@Yx(B) -> 0x3349(&@$B3I(B) */ + { "Yx", NULL, "3I", JISX0208_90KANJI }, + /* 0x3376($@3v(B) -> 0x635E(&@$Bc^(B) */ + { "3v", NULL, "c^", JISX0208_90KANJI }, + /* 0x635E($@c^(B) -> 0x3376(&@$B3v(B) */ + { "c^", NULL, "3v", JISX0208_90KANJI }, + /* 0x3443($@4C(B) -> 0x5E75(&@$B^u(B) */ + { "4C", NULL, "^u", JISX0208_90KANJI }, + /* 0x5E75($@^u(B) -> 0x3443(&@$B4C(B) */ + { "^u", NULL, "4C", JISX0208_90KANJI }, + /* 0x3452($@4R(B) -> 0x6B5D(&@$Bk](B) */ + { "4R", NULL, "k]", JISX0208_90KANJI }, + /* 0x6B5D($@k](B) -> 0x3452(&@$B4R(B) */ + { "k]", NULL, "4R", JISX0208_90KANJI }, + /* 0x375B($@7[(B) -> 0x7074(&@$Bpt(B) */ + { "7[", NULL, "pt", JISX0208_90KANJI }, + /* 0x7074($@pt(B) -> 0x375B(&@$B7[(B) */ + { "pt", NULL, "7[", JISX0208_90KANJI }, + /* 0x395C($@9\(B) -> 0x6268(&@$Bbh(B) */ + { "9\\", NULL, "bh", JISX0208_90KANJI }, + /* 0x6268($@bh(B) -> 0x395C(&@$B9\(B) */ + { "bh", NULL, "9\\", JISX0208_90KANJI }, + /* 0x3C49($@ 0x6922(&@$Bi"(B) */ + { " 0x3C49(&@$B 0x7057(&@$BpW(B) */ + { "?Y", NULL, "pW", JISX0208_90KANJI }, + /* 0x7057($@pW(B) -> 0x3F59(&@$B?Y(B) */ + { "pW", NULL, "?Y", JISX0208_90KANJI }, + /* 0x4128($@A((B) -> 0x6C4D(&@$BlM(B) */ + { "A(", NULL, "lM", JISX0208_90KANJI }, + /* 0x6C4D($@lM(B) -> 0x4128(&@$BA((B) */ + { "lM", NULL, "A(", JISX0208_90KANJI }, + /* 0x445B($@D[(B) -> 0x5464(&@$BTd(B) */ + { "D[", NULL, "Td", JISX0208_90KANJI }, + /* 0x5464($@Td(B) -> 0x445B(&@$BD[(B) */ + { "Td", NULL, "D[", JISX0208_90KANJI }, + /* 0x4557($@EW(B) -> 0x626A(&@$Bbj(B) */ + { "EW", NULL, "bj", JISX0208_90KANJI }, + /* 0x626A($@bj(B) -> 0x4557(&@$BEW(B) */ + { "bj", NULL, "EW", JISX0208_90KANJI }, + /* 0x456E($@En(B) -> 0x5B6D(&@$B[m(B) */ + { "En", NULL, "[m", JISX0208_90KANJI }, + /* 0x5B6D($@[m(B) -> 0x456E(&@$BEn(B) */ + { "[m", NULL, "En", JISX0208_90KANJI }, + /* 0x4573($@Es(B) -> 0x5E39(&@$B^9(B) */ + { "Es", NULL, "^9", JISX0208_90KANJI }, + /* 0x5E39($@^9(B) -> 0x4573(&@$BEs(B) */ + { "^9", NULL, "Es", JISX0208_90KANJI }, + /* 0x4676($@Fv(B) -> 0x6D6E(&@$Bmn(B) */ + { "Fv", NULL, "mn", JISX0208_90KANJI }, + /* 0x6D6E($@mn(B) -> 0x4676(&@$BFv(B) */ + { "mn", NULL, "Fv", JISX0208_90KANJI }, + /* 0x4768($@Gh(B) -> 0x6A24(&@$Bj$(B) */ + { "Gh", NULL, "j$", JISX0208_90KANJI }, + /* 0x6A24($@j$(B) -> 0x4768(&@$BGh(B) */ + { "j$", NULL, "Gh", JISX0208_90KANJI }, + /* 0x4930($@I0(B) -> 0x5B58(&@$B[X(B) */ + { "I0", NULL, "[X", JISX0208_90KANJI }, + /* 0x5B58($@[X(B) -> 0x4930(&@$BI0(B) */ + { "[X", NULL, "I0", JISX0208_90KANJI }, + /* 0x4B79($@Ky(B) -> 0x5056(&@$BPV(B) */ + { "Ky", NULL, "PV", JISX0208_90KANJI }, + /* 0x5056($@PV(B) -> 0x4B79(&@$BKy(B) */ + { "PV", NULL, "Ky", JISX0208_90KANJI }, + /* 0x4C79($@Ly(B) -> 0x692E(&@$Bi.(B) */ + { "Ly", NULL, "i.", JISX0208_90KANJI }, + /* 0x692E($@i.(B) -> 0x4C79(&@$BLy(B) */ + { "i.", NULL, "Ly", JISX0208_90KANJI }, + /* 0x4F36($@O6(B) -> 0x6446(&@$BdF(B) */ + { "O6", NULL, "dF", JISX0208_90KANJI }, + /* 0x6446($@dF(B) -> 0x4F36(&@$BO6(B) */ + { "dF", NULL, "O6", JISX0208_90KANJI }, + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable ctable_jisx0208_78_90 = { conv_jisx0208_78_90, NULL }; + +static convtab unify_jisx0208[] = { + /* 0x2121(&@$B!!(B) -> 0x20( ) */ + { "!!", NULL, " ", ASCII }, + /* 0x2122(&@$B!"(B) -> 0x2C(,) */ + { "!\"", NULL, ",", ASCII }, + /* 0x2123(&@$B!#(B) -> 0x2E(.) */ + { "!#", NULL, ".", ASCII }, + /* 0x2124(&@$B!$(B) -> 0x2C(,) */ + { "!$", NULL, ",", ASCII }, + /* 0x2125(&@$B!%(B) -> 0x2E(.) */ + { "!%", NULL, ".", ASCII }, + /* 0x2127(&@$B!'(B) -> 0x3A(:) */ + { "!'", NULL, ":", ASCII }, + /* 0x2128(&@$B!((B) -> 0x3B(;) */ + { "!(", NULL, ";", ASCII }, + /* 0x2129(&@$B!)(B) -> 0x3F(?) */ + { "!)", NULL, "?", ASCII }, + /* 0x212A(&@$B!*(B) -> 0x21(!) */ + { "!*", NULL, "!", ASCII }, + /* 0x2130(&@$B!0(B) -> 0x5E(^) */ + { "!0", NULL, "^", ASCII }, + /* 0x2132(&@$B!2(B) -> 0x5F(_) */ + { "!2", NULL, "_", ASCII }, + /* 0x213D(&@$B!=(B) -> 0x2D(-) */ + { "!=", NULL, "-", ASCII }, + /* 0x213E(&@$B!>(B) -> 0x2D(-) */ + { "!>", NULL, "-", ASCII }, + /* 0x213F(&@$B!?(B) -> 0x2F(/) */ + { "!?", NULL, "/", ASCII }, + /* 0x2140(&@$B!@(B) -> 0x5C(\) */ + { "!@", NULL, "\\", ASCII }, + /* 0x2141(&@$B!A(B) -> 0x2D(-) */ + { "!A", NULL, "-", ASCII }, + /* 0x2143(&@$B!C(B) -> 0x7C(|) */ + { "!C", NULL, "|", ASCII }, + /* 0x2146(&@$B!F(B) -> 0x27(') */ + { "!F", NULL, "'", ASCII }, + /* 0x2147(&@$B!G(B) -> 0x27(') */ + { "!G", NULL, "'", ASCII }, + /* 0x2148(&@$B!H(B) -> 0x22(") */ + { "!H", NULL, "\"", ASCII }, + /* 0x2149(&@$B!I(B) -> 0x22(") */ + { "!I", NULL, "\"", ASCII }, + /* 0x214A(&@$B!J(B) -> 0x28(() */ + { "!J", NULL, "(", ASCII }, + /* 0x214B(&@$B!K(B) -> 0x29()) */ + { "!K", NULL, ")", ASCII }, + /* 0x214C(&@$B!L(B) -> 0x5B([) */ + { "!L", NULL, "[", ASCII }, + /* 0x214D(&@$B!M(B) -> 0x5D(]) */ + { "!M", NULL, "]", ASCII }, + /* 0x214E(&@$B!N(B) -> 0x5B([) */ + { "!N", NULL, "[", ASCII }, + /* 0x214F(&@$B!O(B) -> 0x5D(]) */ + { "!O", NULL, "]", ASCII }, + /* 0x2150(&@$B!P(B) -> 0x7B({) */ + { "!P", NULL, "{", ASCII }, + /* 0x2151(&@$B!Q(B) -> 0x7D(}) */ + { "!Q", NULL, "}", ASCII }, + /* 0x2152(&@$B!R(B) -> 0x5B([) */ + { "!R", NULL, "[", ASCII }, + /* 0x2153(&@$B!S(B) -> 0x5D(]) */ + { "!S", NULL, "]", ASCII }, + /* 0x2154(&@$B!T(B) -> 0x5B([) */ + { "!T", NULL, "[", ASCII }, + /* 0x2155(&@$B!U(B) -> 0x5D(]) */ + { "!U", NULL, "]", ASCII }, + /* 0x2156(&@$B!V(B) -> 0x5B([) */ + { "!V", NULL, "[", ASCII }, + /* 0x2157(&@$B!W(B) -> 0x5D(]) */ + { "!W", NULL, "]", ASCII }, + /* 0x2158(&@$B!X(B) -> 0x5B([) */ + { "!X", NULL, "[", ASCII }, + /* 0x2159(&@$B!Y(B) -> 0x5D(]) */ + { "!Y", NULL, "]", ASCII }, + /* 0x215A(&@$B!Z(B) -> 0x5B([) */ + { "!Z", NULL, "[", ASCII }, + /* 0x215B(&@$B![(B) -> 0x5D(]) */ + { "![", NULL, "]", ASCII }, + /* 0x215C(&@$B!\(B) -> 0x2B(+) */ + { "!\\", NULL, "+", ASCII }, + /* 0x215D(&@$B!](B) -> 0x2D(-) */ + { "!]", NULL, "-", ASCII }, + /* 0x215F(&@$B!_(B) -> 0x2A(*) */ + { "!_", NULL, "*", ASCII }, + /* 0x2160(&@$B!`(B) -> 0x2F(/) */ + { "!`", NULL, "/", ASCII }, + /* 0x2161(&@$B!a(B) -> 0x3D(=) */ + { "!a", NULL, "=", ASCII }, + /* 0x2163(&@$B!c(B) -> 0x3C(<) */ + { "!c", NULL, "<", ASCII }, + /* 0x2164(&@$B!d(B) -> 0x3E(>) */ + { "!d", NULL, ">", ASCII }, + /* 0x216C(&@$B!l(B) -> 0x27(') */ + { "!l", NULL, "'", ASCII }, + /* 0x216D(&@$B!m(B) -> 0x22(") */ + { "!m", NULL, "\"", ASCII }, + /* 0x2170(&@$B!p(B) -> 0x24($) */ + { "!p", NULL, "$", ASCII }, + /* 0x2173(&@$B!s(B) -> 0x25(%) */ + { "!s", NULL, "%", ASCII }, + /* 0x2174(&@$B!t(B) -> 0x23(#) */ + { "!t", NULL, "#", ASCII }, + /* 0x2175(&@$B!u(B) -> 0x26(&) */ + { "!u", NULL, "&", ASCII }, + /* 0x2176(&@$B!v(B) -> 0x2A(*) */ + { "!v", NULL, "*", ASCII }, + /* 0x2177(&@$B!w(B) -> 0x40(@) */ + { "!w", NULL, "@", ASCII }, + /* 0x2330(&@$B#0(B) -> 0x30(0) */ + { "#0", NULL, "0", ASCII }, + /* 0x2331(&@$B#1(B) -> 0x31(1) */ + { "#1", NULL, "1", ASCII }, + /* 0x2332(&@$B#2(B) -> 0x32(2) */ + { "#2", NULL, "2", ASCII }, + /* 0x2333(&@$B#3(B) -> 0x33(3) */ + { "#3", NULL, "3", ASCII }, + /* 0x2334(&@$B#4(B) -> 0x34(4) */ + { "#4", NULL, "4", ASCII }, + /* 0x2335(&@$B#5(B) -> 0x35(5) */ + { "#5", NULL, "5", ASCII }, + /* 0x2336(&@$B#6(B) -> 0x36(6) */ + { "#6", NULL, "6", ASCII }, + /* 0x2337(&@$B#7(B) -> 0x37(7) */ + { "#7", NULL, "7", ASCII }, + /* 0x2338(&@$B#8(B) -> 0x38(8) */ + { "#8", NULL, "8", ASCII }, + /* 0x2339(&@$B#9(B) -> 0x39(9) */ + { "#9", NULL, "9", ASCII }, + /* 0x2341(&@$B#A(B) -> 0x41(A) */ + { "#A", NULL, "A", ASCII }, + /* 0x2342(&@$B#B(B) -> 0x42(B) */ + { "#B", NULL, "B", ASCII }, + /* 0x2343(&@$B#C(B) -> 0x43(C) */ + { "#C", NULL, "C", ASCII }, + /* 0x2344(&@$B#D(B) -> 0x44(D) */ + { "#D", NULL, "D", ASCII }, + /* 0x2345(&@$B#E(B) -> 0x45(E) */ + { "#E", NULL, "E", ASCII }, + /* 0x2346(&@$B#F(B) -> 0x46(F) */ + { "#F", NULL, "F", ASCII }, + /* 0x2347(&@$B#G(B) -> 0x47(G) */ + { "#G", NULL, "G", ASCII }, + /* 0x2348(&@$B#H(B) -> 0x48(H) */ + { "#H", NULL, "H", ASCII }, + /* 0x2349(&@$B#I(B) -> 0x49(I) */ + { "#I", NULL, "I", ASCII }, + /* 0x234A(&@$B#J(B) -> 0x4A(J) */ + { "#J", NULL, "J", ASCII }, + /* 0x234B(&@$B#K(B) -> 0x4B(K) */ + { "#K", NULL, "K", ASCII }, + /* 0x234C(&@$B#L(B) -> 0x4C(L) */ + { "#L", NULL, "L", ASCII }, + /* 0x234D(&@$B#M(B) -> 0x4D(M) */ + { "#M", NULL, "M", ASCII }, + /* 0x234E(&@$B#N(B) -> 0x4E(N) */ + { "#N", NULL, "N", ASCII }, + /* 0x234F(&@$B#O(B) -> 0x4F(O) */ + { "#O", NULL, "O", ASCII }, + /* 0x2350(&@$B#P(B) -> 0x50(P) */ + { "#P", NULL, "P", ASCII }, + /* 0x2351(&@$B#Q(B) -> 0x51(Q) */ + { "#Q", NULL, "Q", ASCII }, + /* 0x2352(&@$B#R(B) -> 0x52(R) */ + { "#R", NULL, "R", ASCII }, + /* 0x2353(&@$B#S(B) -> 0x53(S) */ + { "#S", NULL, "S", ASCII }, + /* 0x2354(&@$B#T(B) -> 0x54(T) */ + { "#T", NULL, "T", ASCII }, + /* 0x2355(&@$B#U(B) -> 0x55(U) */ + { "#U", NULL, "U", ASCII }, + /* 0x2356(&@$B#V(B) -> 0x56(V) */ + { "#V", NULL, "V", ASCII }, + /* 0x2357(&@$B#W(B) -> 0x57(W) */ + { "#W", NULL, "W", ASCII }, + /* 0x2358(&@$B#X(B) -> 0x58(X) */ + { "#X", NULL, "X", ASCII }, + /* 0x2359(&@$B#Y(B) -> 0x59(Y) */ + { "#Y", NULL, "Y", ASCII }, + /* 0x235A(&@$B#Z(B) -> 0x5A(Z) */ + { "#Z", NULL, "Z", ASCII }, + /* 0x2361(&@$B#a(B) -> 0x61(a) */ + { "#a", NULL, "a", ASCII }, + /* 0x2362(&@$B#b(B) -> 0x62(b) */ + { "#b", NULL, "b", ASCII }, + /* 0x2363(&@$B#c(B) -> 0x63(c) */ + { "#c", NULL, "c", ASCII }, + /* 0x2364(&@$B#d(B) -> 0x64(d) */ + { "#d", NULL, "d", ASCII }, + /* 0x2365(&@$B#e(B) -> 0x65(e) */ + { "#e", NULL, "e", ASCII }, + /* 0x2366(&@$B#f(B) -> 0x66(f) */ + { "#f", NULL, "f", ASCII }, + /* 0x2367(&@$B#g(B) -> 0x67(g) */ + { "#g", NULL, "g", ASCII }, + /* 0x2368(&@$B#h(B) -> 0x68(h) */ + { "#h", NULL, "h", ASCII }, + /* 0x2369(&@$B#i(B) -> 0x69(i) */ + { "#i", NULL, "i", ASCII }, + /* 0x236A(&@$B#j(B) -> 0x6A(j) */ + { "#j", NULL, "j", ASCII }, + /* 0x236B(&@$B#k(B) -> 0x6B(k) */ + { "#k", NULL, "k", ASCII }, + /* 0x236C(&@$B#l(B) -> 0x6C(l) */ + { "#l", NULL, "l", ASCII }, + /* 0x236D(&@$B#m(B) -> 0x6D(m) */ + { "#m", NULL, "m", ASCII }, + /* 0x236E(&@$B#n(B) -> 0x6E(n) */ + { "#n", NULL, "n", ASCII }, + /* 0x236F(&@$B#o(B) -> 0x6F(o) */ + { "#o", NULL, "o", ASCII }, + /* 0x2370(&@$B#p(B) -> 0x70(p) */ + { "#p", NULL, "p", ASCII }, + /* 0x2371(&@$B#q(B) -> 0x71(q) */ + { "#q", NULL, "q", ASCII }, + /* 0x2372(&@$B#r(B) -> 0x72(r) */ + { "#r", NULL, "r", ASCII }, + /* 0x2373(&@$B#s(B) -> 0x73(s) */ + { "#s", NULL, "s", ASCII }, + /* 0x2374(&@$B#t(B) -> 0x74(t) */ + { "#t", NULL, "t", ASCII }, + /* 0x2375(&@$B#u(B) -> 0x75(u) */ + { "#u", NULL, "u", ASCII }, + /* 0x2376(&@$B#v(B) -> 0x76(v) */ + { "#v", NULL, "v", ASCII }, + /* 0x2377(&@$B#w(B) -> 0x77(w) */ + { "#w", NULL, "w", ASCII }, + /* 0x2378(&@$B#x(B) -> 0x78(x) */ + { "#x", NULL, "x", ASCII }, + /* 0x2379(&@$B#y(B) -> 0x79(y) */ + { "#y", NULL, "y", ASCII }, + /* 0x237a(&@$B#z(B) -> 0x7A(z) */ + { "#z", NULL, "z", ASCII }, + /* 0x2621(&@$B&!(B) -> 0x41(-FA) */ + { "&!", NULL, "A", GREEK }, + /* 0x2622(&@$B&"(B) -> 0x42(-FB) */ + { "&\"", NULL, "B", GREEK }, + /* 0x2623(&@$B&#(B) -> 0x43(-FC) */ + { "&#", NULL, "C", GREEK }, + /* 0x2624(&@$B&$(B) -> 0x44(-FD) */ + { "&$", NULL, "D", GREEK }, + /* 0x2625(&@$B&%(B) -> 0x45(-FE) */ + { "&%", NULL, "E", GREEK }, + /* 0x2626(&@$B&&(B) -> 0x46(-FF) */ + { "&&", NULL, "F", GREEK }, + /* 0x2627(&@$B&'(B) -> 0x47(-FG) */ + { "&'", NULL, "G", GREEK }, + /* 0x2628(&@$B&((B) -> 0x48(-FH) */ + { "&(", NULL, "H", GREEK }, + /* 0x2629(&@$B&)(B) -> 0x49(-FI) */ + { "&)", NULL, "I", GREEK }, + /* 0x262A(&@$B&*(B) -> 0x4A(-FJ) */ + { "&*", NULL, "J", GREEK }, + /* 0x262B(&@$B&+(B) -> 0x4B(-FK) */ + { "&+", NULL, "K", GREEK }, + /* 0x262C(&@$B&,(B) -> 0x4C(-FL) */ + { "&,", NULL, "L", GREEK }, + /* 0x262D(&@$B&-(B) -> 0x4D(-FM) */ + { "&-", NULL, "M", GREEK }, + /* 0x262E(&@$B&.(B) -> 0x4E(-FN) */ + { "&.", NULL, "N", GREEK }, + /* 0x262F(&@$B&/(B) -> 0x4F(-FO) */ + { "&/", NULL, "O", GREEK }, + /* 0x2630(&@$B&0(B) -> 0x50(-FP) */ + { "&0", NULL, "P", GREEK }, + /* 0x2631(&@$B&1(B) -> 0x51(-FQ) */ + { "&1", NULL, "Q", GREEK }, + /* 0x2632(&@$B&2(B) -> 0x53(-FS) */ + { "&2", NULL, "S", GREEK }, + /* 0x2633(&@$B&3(B) -> 0x54(-FT) */ + { "&3", NULL, "T", GREEK }, + /* 0x2634(&@$B&4(B) -> 0x55(-FU) */ + { "&4", NULL, "U", GREEK }, + /* 0x2635(&@$B&5(B) -> 0x56(-FV) */ + { "&5", NULL, "V", GREEK }, + /* 0x2636(&@$B&6(B) -> 0x57(-FW) */ + { "&6", NULL, "W", GREEK }, + /* 0x2637(&@$B&7(B) -> 0x58(-FX) */ + { "&7", NULL, "X", GREEK }, + /* 0x2638(&@$B&8(B) -> 0x59(-FY) */ + { "&8", NULL, "Y", GREEK }, + /* 0x2641(&@$B&A(B) -> 0x61(-Fa) */ + { "&A", NULL, "a", GREEK }, + /* 0x2642(&@$B&B(B) -> 0x62(-Fb) */ + { "&B", NULL, "b", GREEK }, + /* 0x2643(&@$B&C(B) -> 0x63(-Fc) */ + { "&C", NULL, "c", GREEK }, + /* 0x2644(&@$B&D(B) -> 0x64(-Fd) */ + { "&D", NULL, "d", GREEK }, + /* 0x2645(&@$B&E(B) -> 0x65(-Fe) */ + { "&E", NULL, "e", GREEK }, + /* 0x2646(&@$B&F(B) -> 0x66(-Ff) */ + { "&F", NULL, "f", GREEK }, + /* 0x2647(&@$B&G(B) -> 0x67(-Fg) */ + { "&G", NULL, "g", GREEK }, + /* 0x2648(&@$B&H(B) -> 0x68(-Fh) */ + { "&H", NULL, "h", GREEK }, + /* 0x2649(&@$B&I(B) -> 0x69(-Fi) */ + { "&I", NULL, "i", GREEK }, + /* 0x264A(&@$B&J(B) -> 0x6A(-Fj) */ + { "&J", NULL, "j", GREEK }, + /* 0x264B(&@$B&K(B) -> 0x6B(-Fk) */ + { "&K", NULL, "k", GREEK }, + /* 0x264C(&@$B&L(B) -> 0x6C(-Fl) */ + { "&L", NULL, "l", GREEK }, + /* 0x264D(&@$B&M(B) -> 0x6D(-Fm) */ + { "&M", NULL, "m", GREEK }, + /* 0x264E(&@$B&N(B) -> 0x6E(-Fn) */ + { "&N", NULL, "n", GREEK }, + /* 0x264F(&@$B&O(B) -> 0x6F(-Fo) */ + { "&O", NULL, "o", GREEK }, + /* 0x2650(&@$B&P(B) -> 0x70(-Fp) */ + { "&P", NULL, "p", GREEK }, + /* 0x2651(&@$B&Q(B) -> 0x71(-Fq) */ + { "&Q", NULL, "q", GREEK }, + /* 0x2652(&@$B&R(B) -> 0x73(-Fs) */ + { "&R", NULL, "s", GREEK }, + /* 0x2653(&@$B&S(B) -> 0x74(-Ft) */ + { "&S", NULL, "t", GREEK }, + /* 0x2654(&@$B&T(B) -> 0x75(-Fu) */ + { "&T", NULL, "u", GREEK }, + /* 0x2655(&@$B&U(B) -> 0x76(-Fv) */ + { "&U", NULL, "v", GREEK }, + /* 0x2656(&@$B&V(B) -> 0x77(-Fw) */ + { "&V", NULL, "w", GREEK }, + /* 0x2657(&@$B&W(B) -> 0x78(-Fx) */ + { "&W", NULL, "x", GREEK }, + /* 0x2658(&@$B&X(B) -> 0x79(-Fy) */ + { "&X", NULL, "y", GREEK }, + /* 0x2721(&@$B'!(B) -> 0x30(-L0) */ + { "'!", NULL, "0", CYRILLIC }, + /* 0x2722(&@$B'"(B) -> 0x31(-L1) */ + { "'\"", NULL, "1", CYRILLIC }, + /* 0x2723(&@$B'#(B) -> 0x32(-L2) */ + { "'#", NULL, "2", CYRILLIC }, + /* 0x2724(&@$B'$(B) -> 0x33(-L3) */ + { "'$", NULL, "3", CYRILLIC }, + /* 0x2725(&@$B'%(B) -> 0x34(-L4) */ + { "'%", NULL, "4", CYRILLIC }, + /* 0x2726(&@$B'&(B) -> 0x35(-L5) */ + { "'&", NULL, "5", CYRILLIC }, + /* 0x2727(&@$B''(B) -> 0x21(-L!) */ + { "''", NULL, "!", CYRILLIC }, + /* 0x2728(&@$B'((B) -> 0x36(-L6) */ + { "'(", NULL, "6", CYRILLIC }, + /* 0x2729(&@$B')(B) -> 0x37(-L7) */ + { "')", NULL, "7", CYRILLIC }, + /* 0x272A(&@$B'*(B) -> 0x38(-L8) */ + { "'*", NULL, "8", CYRILLIC }, + /* 0x272B(&@$B'+(B) -> 0x39(-L9) */ + { "'+", NULL, "9", CYRILLIC }, + /* 0x272C(&@$B',(B) -> 0x3A(-L:) */ + { "',", NULL, ":", CYRILLIC }, + /* 0x272D(&@$B'-(B) -> 0x3B(-L;) */ + { "'-", NULL, ";", CYRILLIC }, + /* 0x272E(&@$B'.(B) -> 0x3C(-L<) */ + { "'.", NULL, "<", CYRILLIC }, + /* 0x272F(&@$B'/(B) -> 0x3D(-L=) */ + { "'/", NULL, "=", CYRILLIC }, + /* 0x2730(&@$B'0(B) -> 0x3E(-L>) */ + { "'0", NULL, ">", CYRILLIC }, + /* 0x2731(&@$B'1(B) -> 0x3F(-L?) */ + { "'1", NULL, "?", CYRILLIC }, + /* 0x2732(&@$B'2(B) -> 0x40(-L@) */ + { "'2", NULL, "@", CYRILLIC }, + /* 0x2733(&@$B'3(B) -> 0x41(-LA) */ + { "'3", NULL, "A", CYRILLIC }, + /* 0x2734(&@$B'4(B) -> 0x42(-LB) */ + { "'4", NULL, "B", CYRILLIC }, + /* 0x2735(&@$B'5(B) -> 0x43(-LC) */ + { "'5", NULL, "C", CYRILLIC }, + /* 0x2736(&@$B'6(B) -> 0x44(-LD) */ + { "'6", NULL, "D", CYRILLIC }, + /* 0x2737(&@$B'7(B) -> 0x45(-LE) */ + { "'7", NULL, "E", CYRILLIC }, + /* 0x2738(&@$B'8(B) -> 0x46(-LF) */ + { "'8", NULL, "F", CYRILLIC }, + /* 0x2739(&@$B'9(B) -> 0x47(-LG) */ + { "'9", NULL, "G", CYRILLIC }, + /* 0x273A(&@$B':(B) -> 0x48(-LH) */ + { "':", NULL, "H", CYRILLIC }, + /* 0x273B(&@$B';(B) -> 0x49(-LI) */ + { "';", NULL, "I", CYRILLIC }, + /* 0x273C(&@$B'<(B) -> 0x4A(-LJ) */ + { "'<", NULL, "J", CYRILLIC }, + /* 0x273D(&@$B'=(B) -> 0x4B(-LK) */ + { "'=", NULL, "K", CYRILLIC }, + /* 0x273E(&@$B'>(B) -> 0x4C(-LL) */ + { "'>", NULL, "L", CYRILLIC }, + /* 0x273F(&@$B'?(B) -> 0x4D(-LM) */ + { "'?", NULL, "M", CYRILLIC }, + /* 0x2740(&@$B'@(B) -> 0x4E(-LN) */ + { "'@", NULL, "N", CYRILLIC }, + /* 0x2741(&@$B'A(B) -> 0x4F(-LO) */ + { "'A", NULL, "O", CYRILLIC }, + /* 0x2751(&@$B'Q(B) -> 0x50(-LP) */ + { "'Q", NULL, "P", CYRILLIC }, + /* 0x2752(&@$B'R(B) -> 0x51(-LQ) */ + { "'R", NULL, "Q", CYRILLIC }, + /* 0x2753(&@$B'S(B) -> 0x52(-LR) */ + { "'S", NULL, "R", CYRILLIC }, + /* 0x2754(&@$B'T(B) -> 0x53(-LS) */ + { "'T", NULL, "S", CYRILLIC }, + /* 0x2755(&@$B'U(B) -> 0x54(-LT) */ + { "'U", NULL, "T", CYRILLIC }, + /* 0x2756(&@$B'V(B) -> 0x55(-LU) */ + { "'V", NULL, "U", CYRILLIC }, + /* 0x2757(&@$B'W(B) -> 0x71(-Lq) */ + { "'W", NULL, "q", CYRILLIC }, + /* 0x2758(&@$B'X(B) -> 0x56(-LV) */ + { "'X", NULL, "V", CYRILLIC }, + /* 0x2759(&@$B'Y(B) -> 0x57(-LW) */ + { "'Y", NULL, "W", CYRILLIC }, + /* 0x275A(&@$B'Z(B) -> 0x58(-LX) */ + { "'Z", NULL, "X", CYRILLIC }, + /* 0x275B(&@$B'[(B) -> 0x59(-LY) */ + { "'[", NULL, "Y", CYRILLIC }, + /* 0x275C(&@$B'\(B) -> 0x5A(-LZ) */ + { "'\\", NULL, "Z", CYRILLIC }, + /* 0x275D(&@$B'](B) -> 0x5B(-L[) */ + { "']", NULL, "[", CYRILLIC }, + /* 0x275E(&@$B'^(B) -> 0x5C(-L\) */ + { "'^", NULL, "\\", CYRILLIC }, + /* 0x275F(&@$B'_(B) -> 0x5D(-L]) */ + { "'_", NULL, "]", CYRILLIC }, + /* 0x2760(&@$B'`(B) -> 0x5E(-L^) */ + { "'`", NULL, "^", CYRILLIC }, + /* 0x2761(&@$B'a(B) -> 0x5F(-L_) */ + { "'a", NULL, "_", CYRILLIC }, + /* 0x2762(&@$B'b(B) -> 0x60(-L`) */ + { "'b", NULL, "`", CYRILLIC }, + /* 0x2763(&@$B'c(B) -> 0x61(-La) */ + { "'c", NULL, "a", CYRILLIC }, + /* 0x2764(&@$B'd(B) -> 0x62(-Lb) */ + { "'d", NULL, "b", CYRILLIC }, + /* 0x2765(&@$B'e(B) -> 0x63(-Lc) */ + { "'e", NULL, "c", CYRILLIC }, + /* 0x2766(&@$B'f(B) -> 0x64(-Ld) */ + { "'f", NULL, "d", CYRILLIC }, + /* 0x2767(&@$B'g(B) -> 0x65(-Le) */ + { "'g", NULL, "e", CYRILLIC }, + /* 0x2768(&@$B'h(B) -> 0x66(-Lf) */ + { "'h", NULL, "f", CYRILLIC }, + /* 0x2769(&@$B'i(B) -> 0x67(-Lg) */ + { "'i", NULL, "g", CYRILLIC }, + /* 0x276A(&@$B'j(B) -> 0x68(-Lh) */ + { "'j", NULL, "h", CYRILLIC }, + /* 0x276B(&@$B'k(B) -> 0x69(-Li) */ + { "'k", NULL, "i", CYRILLIC }, + /* 0x276C(&@$B'l(B) -> 0x6A(-Lj) */ + { "'l", NULL, "j", CYRILLIC }, + /* 0x276D(&@$B'm(B) -> 0x6B(-Lk) */ + { "'m", NULL, "k", CYRILLIC }, + /* 0x276E(&@$B'n(B) -> 0x6C(-Ll) */ + { "'n", NULL, "l", CYRILLIC }, + /* 0x276F(&@$B'o(B) -> 0x6D(-Lm) */ + { "'o", NULL, "m", CYRILLIC }, + /* 0x2770(&@$B'p(B) -> 0x6E(-Ln) */ + { "'p", NULL, "n", CYRILLIC }, + /* 0x2771(&@$B'q(B) -> 0x6F(-Lo) */ + { "'q", NULL, "o", CYRILLIC }, + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable utable_jisx0208 = { unify_jisx0208, NULL }; + +static convtab unify_n_jisx0201roman[] = { + /* 0x5C((J\(B) -X 0x5C(\) */ + { "\\", NULL, "\\", ASCII }, + /* 0x7E((J~(B) -X 0x7E(~) */ + { "~", NULL, "~", ASCII }, + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable utable_n_jisx0201roman = { unify_n_jisx0201roman, NULL }; + +static convtab unify_n_iso646[] = { + /* 0x23((@#(B) -X 0x23(#) */ + { "#", NULL, "#", ASCII }, + /* 0x24((@$(B) -X 0x24($) */ + { "$", NULL, "$", ASCII }, + /* 0x40((@@(B) -X 0x40(@) */ + { "@", NULL, "@", ASCII }, + /* 0x5B((@[(B) -X 0x5B([) */ + { "[", NULL, "[", ASCII }, + /* 0x5C((@\(B) -X 0x5C(\) */ + { "\\", NULL, "\\", ASCII }, + /* 0x5D((@](B) -X 0x5D(]) */ + { "]", NULL, "]", ASCII }, + /* 0x5E((@^(B) -X 0x5E(^) */ + { "^", NULL, "^", ASCII }, + /* 0x60((@`(B) -X 0x60(`) */ + { "`", NULL, "`", ASCII }, + /* 0x7B((@{(B) -X 0x7B({) */ + { "{", NULL, "{", ASCII }, + /* 0x7C((@|(B) -X 0x7C(|) */ + { "|", NULL, "|", ASCII }, + /* 0x7D((@}(B) -X 0x7D(}) */ + { "}", NULL, "}", ASCII }, + /* 0x7E((@~(B) -X 0x7E(~) */ + { "~", NULL, "~", ASCII }, + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable utable_n_iso646 = { unify_n_iso646, NULL }; + +static convtab eliminate_wrong_jisx0208_78[] = { + /* empty rows */ + /* 8-15 KU 0x2821($@(!(B)-0x2F7E($@/~(B) -> 0x222E($B".(B) */ + { "(!", "/~", "\".", JISX0208KANJI }, + /* 84-94 KU 0x7421($@t!(B)-0x7E7E($@~~(B) -> 0x222E($B".(B) */ + { "t!", "~~", "\".", JISX0208KANJI }, + + /* sequences of empty columns */ + /* 2 KU 0x222F($@"/(B)-0x227E($@"~(B) -> 0x222E($B".(B) */ + { "\"/", "\"~", "\".", JISX0208KANJI }, + /* 3 KU 0x2321($@#!(B)-0x232F($@#/(B) -> 0x222E($B".(B) */ + { "#!", "#/", "\".", JISX0208KANJI }, + /* 3 KU 0x233A($@#:(B)-0x2340($@#@(B) -> 0x222E($B".(B) */ + { "#:", "#@", "\".", JISX0208KANJI }, + /* 3 KU 0x235B($@#[(B)-0x2360($@#`(B) -> 0x222E($B".(B) */ + { "#[", "#`", "\".", JISX0208KANJI }, + /* 3 KU 0x237B($@#{(B)-0x237E($@#~(B) -> 0x222E($B".(B) */ + { "#{", "#~", "\".", JISX0208KANJI }, + /* 4 KU 0x2474($@$t(B)-0x247E($@$~(B) -> 0x222E($B".(B) */ + { "$t", "$~", "\".", JISX0208KANJI }, + /* 5 KU 0x2577($@%w(B)-0x257E($@%~(B) -> 0x222E($B".(B) */ + { "%w", "%~", "\".", JISX0208KANJI }, + /* 6 KU 0x2639($@&9(B)-0x2640($@&@(B) -> 0x222E($B".(B) */ + { "&9", "&@", "\".", JISX0208KANJI }, + /* 6 KU 0x2659($@&Y(B)-0x267E($@&~(B) -> 0x222E($B".(B) */ + { "&Y", "&~", "\".", JISX0208KANJI }, + /* 7 KU 0x2742($@'B(B)-0x2750($@'P(B) -> 0x222E($B".(B) */ + { "'B", "'P", "\".", JISX0208KANJI }, + /* 7 KU 0x2772($@'r(B)-0x277E($@'~(B) -> 0x222E($B".(B) */ + { "'r", "'~", "\".", JISX0208KANJI }, + /* 47 KU 0x4F54($@OT(B)-0x4F7E($@O~(B) -> 0x222E($B".(B) */ + { "OT", "O~", "\".", JISX0208KANJI }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_jisx0208_78 = { eliminate_wrong_jisx0208_78, NULL }; + +static convtab eliminate_wrong_jisx0208_83[] = { + /* empty rows */ + /* 9-15 KU 0x2921($B)!(B)-0x2F7E($B/~(B) -> 0x222E($B".(B) */ + { ")!", "/~", "\".", JISX0208KANJI }, + /* 85-94 KU 0x7521($Bu!(B)-0x7E7E($B~~(B) -> 0x222E($B".(B) */ + { "u!", "~~", "\".", JISX0208KANJI }, + + /* sequences of empty columns */ + /* 2 KU 0x222F($B"/(B)-0x2239($B"9(B) -> 0x222E($B".(B) */ + { "\"/", "\"9", "\".", JISX0208KANJI }, + /* 2 KU 0x2242($B"B(B)-0x2249($B"I(B) -> 0x222E($B".(B) */ + { "\"B", "\"I", "\".", JISX0208KANJI }, + /* 2 KU 0x2251($B"Q(B)-0x225B($B"[(B) -> 0x222E($B".(B) */ + { "\"Q", "\"[", "\".", JISX0208KANJI }, + /* 2 KU 0x226B($B"k(B)-0x2271($B"q(B) -> 0x222E($B".(B) */ + { "\"k", "\"q", "\".", JISX0208KANJI }, + /* 2 KU 0x227A($B"z(B)-0x227D($B"}(B) -> 0x222E($B".(B) */ + { "\"z", "\"}", "\".", JISX0208KANJI }, + /* 3 KU 0x2321($B#!(B)-0x232F($B#/(B) -> 0x222E($B".(B) */ + { "#!", "#/", "\".", JISX0208KANJI }, + /* 3 KU 0x233A($B#:(B)-0x2340($B#@(B) -> 0x222E($B".(B) */ + { "#:", "#@", "\".", JISX0208KANJI }, + /* 3 KU 0x235B($B#[(B)-0x2360($B#`(B) -> 0x222E($B".(B) */ + { "#[", "#`", "\".", JISX0208KANJI }, + /* 3 KU 0x237B($B#{(B)-0x237E($B#~(B) -> 0x222E($B".(B) */ + { "#{", "#~", "\".", JISX0208KANJI }, + /* 4 KU 0x2474($B$t(B)-0x247E($B$~(B) -> 0x222E($B".(B) */ + { "$t", "$~", "\".", JISX0208KANJI }, + /* 5 KU 0x2577($B%w(B)-0x257E($B%~(B) -> 0x222E($B".(B) */ + { "%w", "%~", "\".", JISX0208KANJI }, + /* 6 KU 0x2639($B&9(B)-0x2640($B&@(B) -> 0x222E($B".(B) */ + { "&9", "&@", "\".", JISX0208KANJI }, + /* 6 KU 0x2659($B&Y(B)-0x267E($B&~(B) -> 0x222E($B".(B) */ + { "&Y", "&~", "\".", JISX0208KANJI }, + /* 7 KU 0x2742($B'B(B)-0x2750($B'P(B) -> 0x222E($B".(B) */ + { "'B", "'P", "\".", JISX0208KANJI }, + /* 7 KU 0x2772($B'r(B)-0x277E($B'~(B) -> 0x222E($B".(B) */ + { "'r", "'~", "\".", JISX0208KANJI }, + /* 8 KU 0x2841($B(A(B)-0x287E($B(~(B) -> 0x222E($B".(B) */ + { "(A", "(~", "\".", JISX0208KANJI }, + /* 47 KU 0x4F54($BOT(B)-0x4F7E($BO~(B) -> 0x222E($B".(B) */ + { "OT", "O~", "\".", JISX0208KANJI }, + /* 84 KU 0x7425($Bt%(B)-0x747E($Bt~(B) -> 0x222E($B".(B) */ + { "t%", "t~", "\".", JISX0208KANJI }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_jisx0208_83 = { eliminate_wrong_jisx0208_83, NULL }; + +static convtab eliminate_wrong_jisx0208_90[] = { + /* empty rows */ + /* 9-15 KU 0x2921(&@$B)!(B)-0x2F7E(&@$B/~(B) -> 0x222E($B".(B) */ + { ")!", "/~", "\".", JISX0208KANJI }, + /* 85-94 KU 0x7521(&@$Bu!(B)-0x7E7E(&@$B~~(B) -> 0x222E($B".(B) */ + { "u!", "~~", "\".", JISX0208KANJI }, + + /* sequences of empty columns */ + /* 2 KU 0x222F(&@$B"/(B)-0x2239(&@$B"9(B) -> 0x222E($B".(B) */ + { "\"/", "\"9", "\".", JISX0208KANJI }, + /* 2 KU 0x2242(&@$B"B(B)-0x2249(&@$B"I(B) -> 0x222E($B".(B) */ + { "\"B", "\"I", "\".", JISX0208KANJI }, + /* 2 KU 0x2251(&@$B"Q(B)-0x225B(&@$B"[(B) -> 0x222E($B".(B) */ + { "\"Q", "\"[", "\".", JISX0208KANJI }, + /* 2 KU 0x226B(&@$B"k(B)-0x2271(&@$B"q(B) -> 0x222E($B".(B) */ + { "\"k", "\"q", "\".", JISX0208KANJI }, + /* 2 KU 0x227A(&@$B"z(B)-0x227D(&@$B"}(B) -> 0x222E($B".(B) */ + { "\"z", "\"}", "\".", JISX0208KANJI }, + /* 3 KU 0x2321(&@$B#!(B)-0x232F(&@$B#/(B) -> 0x222E($B".(B) */ + { "#!", "#/", "\".", JISX0208KANJI }, + /* 3 KU 0x233A(&@$B#:(B)-0x2340(&@$B#@(B) -> 0x222E($B".(B) */ + { "#:", "#@", "\".", JISX0208KANJI }, + /* 3 KU 0x235B(&@$B#[(B)-0x2360(&@$B#`(B) -> 0x222E($B".(B) */ + { "#[", "#`", "\".", JISX0208KANJI }, + /* 3 KU 0x237B(&@$B#{(B)-0x237E(&@$B#~(B) -> 0x222E($B".(B) */ + { "#{", "#~", "\".", JISX0208KANJI }, + /* 4 KU 0x2474(&@$B$t(B)-0x247E(&@$B$~(B) -> 0x222E($B".(B) */ + { "$t", "$~", "\".", JISX0208KANJI }, + /* 5 KU 0x2577(&@$B%w(B)-0x257E(&@$B%~(B) -> 0x222E($B".(B) */ + { "%w", "%~", "\".", JISX0208KANJI }, + /* 6 KU 0x2639(&@$B&9(B)-0x2640(&@$B&@(B) -> 0x222E($B".(B) */ + { "&9", "&@", "\".", JISX0208KANJI }, + /* 6 KU 0x2659(&@$B&Y(B)-0x267E(&@$B&~(B) -> 0x222E($B".(B) */ + { "&Y", "&~", "\".", JISX0208KANJI }, + /* 7 KU 0x2742(&@$B'B(B)-0x2750(&@$B'P(B) -> 0x222E($B".(B) */ + { "'B", "'P", "\".", JISX0208KANJI }, + /* 7 KU 0x2772(&@$B'r(B)-0x277E(&@$B'~(B) -> 0x222E($B".(B) */ + { "'r", "'~", "\".", JISX0208KANJI }, + /* 8 KU 0x2841(&@$B(A(B)-0x287E(&@$B(~(B) -> 0x222E($B".(B) */ + { "(A", "(~", "\".", JISX0208KANJI }, + /* 47 KU 0x4F54(&@$BOT(B)-0x4F7E(&@$BO~(B) -> 0x222E($B".(B) */ + { "OT", "O~", "\".", JISX0208KANJI }, + /* 84 KU 0x7427(&@$Bt'(B)-0x747E(&@$Bt~(B) -> 0x222E($B".(B) */ + { "t'", "t~", "\".", JISX0208KANJI }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_jisx0208_90 = { eliminate_wrong_jisx0208_90, NULL }; + +static convtab eliminate_wrong_jisx0212[] = { + /* empty rows */ + /* 1 KU 0x2121($(D!!(B)-0x217E($(D!~(B) -> 0x222E($B".(B) */ + { "!!", "!~", "\".", JISX0208KANJI }, + /* 3-5 KU 0x2321($(D#!(B)-0x257E($(D%~(B) -> 0x222E($B".(B) */ + { "#!", "%~", "\".", JISX0208KANJI }, + /* 8 KU 0x2821($(D(!(B)-0x287E($(D(~(B) -> 0x222E($B".(B) */ + { "(!", "(~", "\".", JISX0208KANJI }, + /* 12-15 KU 0x2C21($(D,!(B)-0x2F7E($(D/~(B) -> 0x222E($B".(B) */ + { ",!", "/~", "\".", JISX0208KANJI }, + /* 78-94 KU 0x6E21($(Dn!(B)-0x7E7E($(D~~(B) -> 0x222E($B".(B) */ + { "n!", "~~", "\".", JISX0208KANJI }, + + /* sequences of empty columns */ + /* 2 KU 0x2221($(D"!(B)-0x222E($(D".(B) -> 0x222E($B".(B) */ + { "\"!", "\".", "\".", JISX0208KANJI }, + /* 2 KU 0x223A($(D":(B)-0x2241($(D"A(B) -> 0x222E($B".(B) */ + { "\":", "\"A", "\".", JISX0208KANJI }, + /* 2 KU 0x2245($(D"E(B)-0x226A($(D"j(B) -> 0x222E($B".(B) */ + { "\"E", "\"j", "\".", JISX0208KANJI }, + /* 2 KU 0x2272($(D"r(B)-0x227E($(D"~(B) -> 0x222E($B".(B) */ + { "\"r", "\"~", "\".", JISX0208KANJI }, + /* 6 KU 0x2621($(D&!(B)-0x2660($(D&`(B) -> 0x222E($B".(B) */ + { "&!", "&`", "\".", JISX0208KANJI }, + /* 6 KU 0x2666($(D&f(B) -> 0x222E($B".(B) */ + { "&f", NULL, "\".", JISX0208KANJI }, + /* 6 KU 0x2668($(D&h(B) -> 0x222E($B".(B) */ + { "&h", NULL, "\".", JISX0208KANJI }, + /* 6 KU 0x266B($(D&k(B) -> 0x222E($B".(B) */ + { "&k", NULL, "\".", JISX0208KANJI }, + /* 6 KU 0x266D($(D&m(B)-0x2670($(D&p(B) -> 0x222E($B".(B) */ + { "&m", "&p", "\".", JISX0208KANJI }, + /* 6 KU 0x267D($(D&}(B)-0x267E($(D&~(B) -> 0x222E($B".(B) */ + { "&}", "&~", "\".", JISX0208KANJI }, + /* 7 KU 0x2721($(D'!(B)-0x2741($(D'A(B) -> 0x222E($B".(B) */ + { "'!", "'A", "\".", JISX0208KANJI }, + /* 7 KU 0x274F($(D'O(B)-0x2771($(D'q(B) -> 0x222E($B".(B) */ + { "'O", "'q", "\".", JISX0208KANJI }, + /* 9 KU 0x2923($(D)#(B) -> 0x222E($B".(B) */ + { ")#", NULL, "\".", JISX0208KANJI }, + /* 9 KU 0x2925($(D)%(B) -> 0x222E($B".(B) */ + { ")%", NULL, "\".", JISX0208KANJI }, + /* 9 KU 0x2927($(D)'(B) -> 0x222E($B".(B) */ + { ")'", NULL, "\".", JISX0208KANJI }, + /* 9 KU 0x292A($(D)*(B) -> 0x222E($B".(B) */ + { ")*", NULL, "\".", JISX0208KANJI }, + /* 9 KU 0x292E($(D).(B) -> 0x222E($B".(B) */ + { ").", NULL, "\".", JISX0208KANJI }, + /* 9 KU 0x2931($(D)1(B)-0x2940($(D)@(B) -> 0x222E($B".(B) */ + { ")1", ")@", "\".", JISX0208KANJI }, + /* 9 KU 0x2951($(D)Q(B)-0x297E($(D)~(B) -> 0x222E($B".(B) */ + { ")Q", ")~", "\".", JISX0208KANJI }, + /* 10 KU 0x2A39($(D*9(B) -> 0x222E($B".(B) */ + { "*9", NULL, "\".", JISX0208KANJI }, + /* 10 KU 0x2A78($(D*x(B)-0x2A7E($(D*~(B) -> 0x222E($B".(B) */ + { "*x", "*~", "\".", JISX0208KANJI }, + /* 11 KU 0x2B3C($(D+<(B) -> 0x222E($B".(B) */ + { "+<", NULL, "\".", JISX0208KANJI }, + /* 11 KU 0x2B44($(D+D(B) -> 0x222E($B".(B) */ + { "+D", NULL, "\".", JISX0208KANJI }, + /* 11 KU 0x2B78($(D+x(B)-0x2B7E($(D+~(B) -> 0x222E($B".(B) */ + { "+x", "+~", "\".", JISX0208KANJI }, + /* 77 KU 0x6D64($(Dmd(B)-0x6D7E($(Dm~(B) -> 0x222E($B".(B) */ + { "md", "m~", "\".", JISX0208KANJI }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_jisx0212 = { eliminate_wrong_jisx0212, NULL }; + +static convtab eliminate_wrong_jisx0213_1[] = { + /* no empty row */ + + /* sequences of empty columns */ + /* 4 KU 0x247C($(O$|(B)-0x247E($(O$~(B) -> 0x222E($B".(B) */ + { "$|", "$~", "\".", JISX0208KANJI }, + /* 8 KU 0x285F($(O(_(B)-0x2866($(O(f(B) -> 0x222E($B".(B) */ + { "(_", "(f", "\".", JISX0208KANJI }, + /* 12 KU 0x2C74($(O,t(B)-0x2C7C($(O,|(B) -> 0x222E($B".(B) */ + { ",t", ",|", "\".", JISX0208KANJI }, + /* 13 KU 0x2D58($(O-X(B)-0x2D5E($(O-^(B) -> 0x222E($B".(B) */ + { "-X", "-^", "\".", JISX0208KANJI }, + /* 13 KU 0x2D70($(O-p(B)-0x2D72($(O-r(B) -> 0x222E($B".(B) */ + { "-p", "-r", "\".", JISX0208KANJI }, + /* 13 KU 0x2D74($(O-t(B)-0x2D77($(O-w(B) -> 0x222E($B".(B) */ + { "-t", "-w", "\".", JISX0208KANJI }, + /* 13 KU 0x2D7A($(O-z(B)-0x2D7C($(O-|(B) -> 0x222E($B".(B) */ + { "-z", "-|", "\".", JISX0208KANJI }, + /* 14 KU 0x2E21($(O.!(B) -> 0x222E($B".(B) */ + { ".!", NULL, "\".", JISX0208KANJI }, + /* 15 KU 0x2F7E($(O/~(B) -> 0x222E($B".(B) */ + { "/~", NULL, "\".", JISX0208KANJI }, + /* 47 KU 0x4F54($(OOT(B) -> 0x222E($B".(B) */ + { "OT", NULL, "\".", JISX0208KANJI }, + /* 47 KU 0x4F7E($(OO~(B) -> 0x222E($B".(B) */ + { "O~", NULL, "\".", JISX0208KANJI }, + /* 84 KU 0x7427($(Ot'(B) -> 0x222E($B".(B) */ + { "t'", NULL, "\".", JISX0208KANJI }, + /* 94 KU 0x7E7A($(O~z(B)-0x7E7E($(O~~(B) -> 0x222E($B".(B) */ + { "~z", "~~", "\".", JISX0208KANJI }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_jisx0213_1 = { eliminate_wrong_jisx0213_1, NULL }; + +static convtab eliminate_wrong_jisx0213_2[] = { + /* empty rows */ + /* 2 KU 0x2221($(P"!(B)-0x227E($(P"~(B) -> 0x222E($B".(B) */ + { "\"!", "\"~", "\".", JISX0208KANJI }, + /* 6-7 KU 0x2621($(P&!(B)-0x277E($(P'~(B) -> 0x222E($B".(B) */ + { "&!", "'~", "\".", JISX0208KANJI }, + /* 9-11 KU 0x2921($(P)!(B)-0x2B7E($(P+~(B) -> 0x222E($B".(B) */ + { ")!", "+~", "\".", JISX0208KANJI }, + /* 16-77 KU 0x3021($(P0!(B)-0x6D7E($(Pm~(B) -> 0x222E($B".(B) */ + { "0!", "m~", "\".", JISX0208KANJI }, + + /* sequences of empty columns */ + /* 94 KU 0x7E77($(P~w(B)-0x7E7E($(P~~(B) -> 0x222E($B".(B) */ + { "~w", "~~", "\".", JISX0208KANJI }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_jisx0213_2 = { eliminate_wrong_jisx0213_2, NULL }; + +static convtab eliminate_wrong_sjis[] = { +#if SJIS0213 + /* JIS X 0213:2000 plane 1 for SJIS0213 */ + + /* no empty row */ + + /* sequences of empty columns */ + /* 4 KU 0x82FA($(O$|(B)-0x82FC($(O$~(B) -> 0x222E($B".(B) */ + { "\202\372", "\202\374", "\201\254", SJIS }, + /* 8 KU 0x84DD($(O(_(B)-0x84E4($(O(f(B) -> 0x222E($B".(B) */ + { "\204\335", "\204\344", "\201\254", SJIS }, + /* 12 KU 0x86F2($(O,t(B)-0x86FA($(O,|(B) -> 0x222E($B".(B) */ + { "\206\362", "\206\372", "\201\254", SJIS }, + /* 13 KU 0x8777($(O-X(B)-0x877D($(O-^(B) -> 0x222E($B".(B) */ + { "\207\167", "\207\175", "\201\254", SJIS }, + /* 13 KU 0x8790($(O-p(B)-0x8792($(O-r(B) -> 0x222E($B".(B) */ + { "\207\220", "\207\222", "\201\254", SJIS }, + /* 13 KU 0x8794($(O-t(B)-0x8797($(O-w(B) -> 0x222E($B".(B) */ + { "\207\224", "\207\227", "\201\254", SJIS }, + /* 13 KU 0x879A($(O-z(B)-0x879C($(O-|(B) -> 0x222E($B".(B) */ + { "\207\232", "\207\234", "\201\254", SJIS }, + /* 14 KU 0x879F($(O.!(B) -> 0x222E($B".(B) */ + { "\207\237", NULL, "\201\254", SJIS }, + /* 15 KU 0x889E($(O/~(B) -> 0x222E($B".(B) */ + { "\210\236", NULL, "\201\254", SJIS }, + /* 47 KU 0x9873($(OOT(B) -> 0x222E($B".(B) */ + { "\230\163", NULL, "\201\254", SJIS }, + /* 47 KU 0x989E($(OO~(B) -> 0x222E($B".(B) */ + { "\230\236", NULL, "\201\254", SJIS }, + /* 84 KU 0xEAA5($(Ot'(B) -> 0x222E($B".(B) */ + { "\\352\245", NULL, "\201\254", SJIS }, + /* 94 KU 0xEFF8($(O~z(B)-0xEFFC($(O~~(B) -> 0x222E($B".(B) */ + { "\357\370", "\357\374", "\201\254", SJIS }, + + /* JIS X 0213:2000 plane 2 for SJIS0213 */ + /* In SJIS0213, JIS X 0213:2000 occupies from 96 to 120 KU */ + + /* no empty row */ + + /* sequences of empty columns */ + /* 94 KU 0xFCF5($(P~w(B)-0xFCFC($(P~~(B) -> 0x222E($B".(B) */ + { "\374\365", "\374\374", "\201\254", SJIS }, +#else /* SJIS0213 */ + /* JIS X 0208:1990 for SJIS */ + /* 2 KU 0x81AD(&@$B"/(B)-0x81B7(&@$B"9(B) -> 0x81AC($B".(B) */ + { "\201\255", "\201\267", "\201\254", SJIS }, + /* 2 KU 0x81C0(&@$B"B(B)-0x81C7(&@$B"I(B) -> 0x81AC($B".(B) */ + { "\201\300", "\201\307", "\201\254", SJIS }, + /* 2 KU 0x81CF(&@$B"Q(B)-0x81D9(&@$B"[(B) -> 0x81AC($B".(B) */ + { "\201\317", "\201\331", "\201\254", SJIS }, + /* 2 KU 0x81E9(&@$B"k(B)-0x81EF(&@$B"q(B) -> 0x81AC($B".(B) */ + { "\201\351", "\201\357", "\201\254", SJIS }, + /* 2 KU 0x81F8(&@$B"z(B)-0x81FB(&@$B"}(B) -> 0x81AC($B".(B) */ + { "\201\370", "\201\373", "\201\254", SJIS }, + /* 3 KU 0x8240(&@$B#!(B)-0x824E(&@$B#/(B) -> 0x81AC($B".(B) */ + { "\202\100", "\202\116", "\201\254", SJIS }, + /* 3 KU 0x8259(&@$B#:(B)-0x825F(&@$B#@(B) -> 0x81AC($B".(B) */ + { "\202\131", "\202\137", "\201\254", SJIS }, + /* 3 KU 0x827A(&@$B#[(B)-0x8280(&@$B#`(B) -> 0x81AC($B".(B) */ + { "\202\172", "\202\200", "\201\254", SJIS }, + /* 3 KU 0x829B(&@$B#{(B)-0x829E(&@$B#~(B) -> 0x81AC($B".(B) */ + { "\202\233", "\202\236", "\201\254", SJIS }, + /* 4 KU 0x82F2(&@$B$t(B)-0x82FC(&@$B$~(B) -> 0x81AC($B".(B) */ + { "\202\362", "\202\374", "\201\254", SJIS }, + /* 5 KU 0x8397(&@$B%w(B)-0x839E(&@$B%~(B) -> 0x81AC($B".(B) */ + { "\203\227", "\203\236", "\201\254", SJIS }, + /* 6 KU 0x83B7(&@$B&9(B)-0x83BE(&@$B&@(B) -> 0x81AC($B".(B) */ + { "\203\267", "\203\276", "\201\254", SJIS }, + /* 6 KU 0x83D7(&@$B&Y(B)-0x83FC(&@$B&~(B) -> 0x81AC($B".(B) */ + { "\203\327", "\203\374", "\201\254", SJIS }, + /* 7 KU 0x8461(&@$B'B(B)-0x846F(&@$B'P(B) -> 0x81AC($B".(B) */ + { "\204\141", "\204\157", "\201\254", SJIS }, + /* 7 KU 0x8492(&@$B'r(B)-0x849E(&@$B'~(B) -> 0x81AC($B".(B) */ + { "\204\222", "\204\236", "\201\254", SJIS }, + /* 8 KU 0x84BF(&@$B(A(B)-0x84FC(&@$B(~(B) -> 0x81AC($B".(B) */ + { "\204\277", "\204\374", "\201\254", SJIS }, + /* 9-14 KU 0x8540(&@$B)!(B)-0x87FC(&@$B.~(B) -> 0x81AC($B".(B) */ + { "\205\100", "\207\374", "\201\254", SJIS }, + /* 15 KU 0x8840(&@$B/!(B)-0x889E(&@$B/~(B) -> 0x81AC($B".(B) */ + { "\210\100", "\210\236", "\201\254", SJIS }, + /* 47 KU 0x9873(&@$BOT(B)-0x989E(&@$BO~(B) -> 0x81AC($B".(B) */ + { "\230\163", "\230\236", "\201\254", SJIS }, + /* 84 KU 0xEAA5(&@$Bt'(B)-0xEAFC(&@$Bt~(B) -> 0x81AC($B".(B) */ + { "\352\245", "\352\374", "\201\254", SJIS }, + + /* + * SJIS uses area from 85 KU to 120 KU for GAIJI, but current less + * doesn't allow GAIJI. + */ + /* 85-94 KU 0xEB40(&@$Bu!(B)-0xEFFC(&@$B~~(B) -> 0x81AC($B".(B) */ + { "\353\100", "\357\374", "\201\254", SJIS }, + /* 95-120 KU 0xF040(none)-0xFC9E(none) -> 0x81AC($B".(B) */ + { "\360\100", "\374\374", "\201\254", SJIS }, +#endif /* SJIS0213 */ + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_sjis = { eliminate_wrong_sjis, NULL }; + +static convtab eliminate_wrong_ujis[] = { +#if UJIS0213 + /* JIS X 0213:2000 plane 1 for UJIS0213 */ + + /* no empty row */ + + /* sequences of empty columns */ + /* 4 KU 0xA4FC($(O$|(B)-0xA4FE($(O$~(B) -> 0xA2AE($B".(B) */ + { "\244\374", "\244\376", "\242\256", UJIS }, + /* 8 KU 0xA8DF($(O(_(B)-0xA8E6($(O(f(B) -> 0xA2AE($B".(B) */ + { "\250\337", "\250\346", "\242\256", UJIS }, + /* 12 KU 0xACF4($(O,t(B)-0xACFC($(O,|(B) -> 0xA2AE($B".(B) */ + { "\254\364", "\254\374", "\242\256", UJIS }, + /* 13 KU 0xADD8($(O-X(B)-0xADDE($(O-^(B) -> 0xA2AE($B".(B) */ + { "\255\330", "\255\336", "\242\256", UJIS }, + /* 13 KU 0xADF0($(O-p(B)-0xADF2($(O-r(B) -> 0xA2AE($B".(B) */ + { "\255\360", "\255\362", "\242\256", UJIS }, + /* 13 KU 0xADF4($(O-t(B)-0xADF7($(O-w(B) -> 0xA2AE($B".(B) */ + { "\255\364", "\255\367", "\242\256", UJIS }, + /* 13 KU 0xADFA($(O-z(B)-0xADFC($(O-|(B) -> 0xA2AE($B".(B) */ + { "\255\372", "\255\374", "\242\256", UJIS }, + /* 14 KU 0xAEA1($(O.!(B) -> 0xA2AE($B".(B) */ + { "\256\241", NULL, "\242\256", UJIS }, + /* 15 KU 0xAFFE($(O/~(B) -> 0xA2AE($B".(B) */ + { "\257\376", NULL, "\242\256", UJIS }, + /* 47 KU 0xCFD4($(OOT(B) -> 0xA2AE($B".(B) */ + { "\317\324", NULL, "\242\256", UJIS }, + /* 47 KU 0xCFFE($(OO~(B) -> 0xA2AE($B".(B) */ + { "\317\376", NULL, "\242\256", UJIS }, + /* 84 KU 0xF4A7($(Ot'(B) -> 0xA2AE($B".(B) */ + { "\364\247", NULL, "\242\256", UJIS }, + /* 94 KU 0xFEFA($(O~z(B)-0xFEFE($(O~~(B) -> 0xA2AE($B".(B) */ + { "\376\372", "\376\376", "\242\256", UJIS }, + + /* + * UJIS0213 shares G2 space by JIS X 0213:2000 plane 2 and + * JIS X 0212:1990. later has some empty rows and some empty + * columns in particular rows. JIS X 0213:2000 plane 2 shares + * those empty rows. So, totally UJIS0213 has no empty row. + */ + + /* JIS X 0212:1990 for UJIS0213 */ + /* Empty columns in particular rows are defined after below #endif */ + + /* JIS X 0213:2000 plane 2 for UJIS0213 */ + /* sequences of empty columns */ + /* 94 KU 0xFEF7($(P~w(B)-0xFEFE($(P~~(B) -> 0xA2AE($B".(B) */ + { "\217\376\367", "\217\376\376", "\242\256", UJIS }, +#else /* UJIS0213 */ + /* UJIS uses JIS X 0208 1983 */ + + /* empty rows */ + /* 9-15 KU 0xA9A1($B)!(B)-0xAFFE($B/~(B) -> 0xA2AE($B".(B) */ + { "\251\241", "\257\376", "\242\256", UJIS }, + /* + * UJIS uses area from 85 KU to 94 KU for GAIJI, but current less + * doesn't allow GAIJI. + */ + /* 85-94 KU 0xF5A1($Bu!(B)-0xFEFE($B~~(B) -> 0xA2AE($B".(B) */ + { "\365\241", "\376\376", "\242\256", UJIS }, + + /* sequences of empty columns */ + /* 2 KU 0xA2AF($B"/(B)-0xA2B9($B"9(B) -> 0xA2AE($B".(B) */ + { "\242\257", "\242\271", "\242\256", UJIS }, + /* 2 KU 0xA2C2($B"B(B)-0xA2C9($B"I(B) -> 0xA2AE($B".(B) */ + { "\242\302", "\242\311", "\242\256", UJIS }, + /* 2 KU 0xA2D1($B"Q(B)-0xA2DB($B"[(B) -> 0xA2AE($B".(B) */ + { "\242\321", "\242\333", "\242\256", UJIS }, + /* 2 KU 0xA2EB($B"k(B)-0xA2F1($B"q(B) -> 0xA2AE($B".(B) */ + { "\242\353", "\242\361", "\242\256", UJIS }, + /* 2 KU 0xA2FA($B"z(B)-0xA2FD($B"}(B) -> 0xA2AE($B".(B) */ + { "\242\372", "\242\375", "\242\256", UJIS }, + /* 3 KU 0xA3A1($B#!(B)-0xA3AF($B#/(B) -> 0xA2AE($B".(B) */ + { "\243\241", "\243\257", "\242\256", UJIS }, + /* 3 KU 0xA3BA($B#:(B)-0xA3C0($B#@(B) -> 0xA2AE($B".(B) */ + { "\243\272", "\243\300", "\242\256", UJIS }, + /* 3 KU 0xA3DB($B#[(B)-0xA3E0($B#`(B) -> 0xA2AE($B".(B) */ + { "\243\333", "\243\340", "\242\256", UJIS }, + /* 3 KU 0xA3FB($B#{(B)-0xA3FE($B#~(B) -> 0xA2AE($B".(B) */ + { "\243\373", "\243\376", "\242\256", UJIS }, + /* 4 KU 0xA4F4($B$t(B)-0xA4FE($B$~(B) -> 0xA2AE($B".(B) */ + { "\244\364", "\244\376", "\242\256", UJIS }, + /* 5 KU 0xA5F7($B%w(B)-0xA5FE($B%~(B) -> 0xA2AE($B".(B) */ + { "\245\367", "\245\376", "\242\256", UJIS }, + /* 6 KU 0xA6B9($B&9(B)-0xA6C0($B&@(B) -> 0xA2AE($B".(B) */ + { "\246\271", "\246\300", "\242\256", UJIS }, + /* 6 KU 0xA6D9($B&Y(B)-0xA6FE($B&~(B) -> 0xA2AE($B".(B) */ + { "\246\331", "\246\376", "\242\256", UJIS }, + /* 7 KU 0xA7C2($B'B(B)-0xA7D0($B'P(B) -> 0xA2AE($B".(B) */ + { "\247\302", "\247\320", "\242\256", UJIS }, + /* 7 KU 0xA7F2($B'r(B)-0xA7FE($B'~(B) -> 0xA2AE($B".(B) */ + { "\247\362", "\247\376", "\242\256", UJIS }, + /* 8 KU 0xA8C1($B(A(B)-0xA8FE($B(~(B) -> 0xA2AE($B".(B) */ + { "\250\301", "\250\376", "\242\256", UJIS }, + /* 47 KU 0xCFD4($BOT(B)-0xCFFE($BO~(B) -> 0xA2AE($B".(B) */ + { "\317\324", "\317\376", "\242\256", UJIS }, + /* 84 KU 0xF4A5($Bt%(B)-0xF4FE($Bt~(B) -> 0xA2AE($B".(B) */ + { "\364\245", "\364\376", "\242\256", UJIS }, + + /* JIS X 0212:1990 for UJIS */ + /* + * Here, we defines only empty rows. Empty columns in + * particular rows are defined after below #endif + */ + /* empty rows */ + /* 1 KU 0xA1A1($(D!!(B)-0xA1FE($(D!~(B) -> 0xA2AE($B".(B) */ + { "\217\241\241", "\217\241\376", "\242\256", UJIS }, + /* 3-5 KU 0xA3A1($(D#!(B)-0xA5FE($(D%~(B) -> 0xA2AE($B".(B) */ + { "\217\243\241", "\217\245\376", "\242\256", UJIS }, + /* 8 KU 0xA8A1($(D(!(B)-0xA8FE($(D(~(B) -> 0xA2AE($B".(B) */ + { "\217\250\241", "\217\250\376", "\242\256", UJIS }, + /* 12-15 KU 0xACA1($(D,!(B)-0xACFE($(D/~(B) -> 0xA2AE($B".(B) */ + { "\217\254\241", "\217\257\376", "\242\256", UJIS }, + /* 78-94 KU 0xEEA1($(Dn!(B)-0xFEFE($(D~~(B) -> 0xA2AE($B".(B) */ + { "\217\356\241", "\217\376\376", "\242\256", UJIS }, +#endif /* UJIS0213 */ + /* JIS X 0212:1990 */ + /* + * Here, we defines only empty columns in particular rows + * Empty rows are defined before above #endif + */ + /* sequences of empty columns */ + /* 2 KU 0xA2A1($(D"!(B)-0xA2AE($(D".(B) -> 0xA2AE($B".(B) */ + { "\217\242\241", "\217\242\256", "\242\256", UJIS }, + /* 2 KU 0xA2BA($(D":(B)-0xA2C1($(D"A(B) -> 0xA2AE($B".(B) */ + { "\217\242\272", "\217\242\301", "\242\256", UJIS }, + /* 2 KU 0xA2C5($(D"E(B)-0xA2EA($(D"j(B) -> 0xA2AE($B".(B) */ + { "\217\242\305", "\217\242\352", "\242\256", UJIS }, + /* 2 KU 0xA2F2($(D"r(B)-0xA2FE($(D"~(B) -> 0xA2AE($B".(B) */ + { "\217\242\362", "\217\242\376", "\242\256", UJIS }, + /* 6 KU 0xA6A1($(D&!(B)-0xA6E0($(D&`(B) -> 0xA2AE($B".(B) */ + { "\217\246\241", "\217\246\340", "\242\256", UJIS }, + /* 6 KU 0xA6E6($(D&f(B) -> 0xA2AE($B".(B) */ + { "\217\246\346", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6E8($(D&h(B) -> 0xA2AE($B".(B) */ + { "\217\246\350", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6EB($(D&k(B) -> 0xA2AE($B".(B) */ + { "\217\246\353", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6ED($(D&m(B)-0xA6F0($(D&p(B) -> 0xA2AE($B".(B) */ + { "\217\246\355", "\217\246\360", "\242\256", UJIS }, + /* 6 KU 0xA6FD($(D&}(B)-0xA6FE($(D&~(B) -> 0xA2AE($B".(B) */ + { "\217\246\375", "\217\246\376", "\242\256", UJIS }, + /* 7 KU 0xA7A1($(D'!(B)-0xA7C1($(D'A(B) -> 0xA2AE($B".(B) */ + { "\217\247\241", "\217\247\301", "\242\256", UJIS }, + /* 7 KU 0xA7CF($(D'O(B)-0xA7F1($(D'q(B) -> 0xA2AE($B".(B) */ + { "\217\247\317", "\217\247\361", "\242\256", UJIS }, + /* 9 KU 0xA9A3($(D)#(B) -> 0xA2AE($B".(B) */ + { "\217\251\243", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9A5($(D)%(B) -> 0xA2AE($B".(B) */ + { "\217\251\245", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9A7($(D)'(B) -> 0xA2AE($B".(B) */ + { "\217\251\247", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9AA($(D)*(B) -> 0xA2AE($B".(B) */ + { "\217\251\252", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9AE($(D).(B) -> 0xA2AE($B".(B) */ + { "\217\251\256", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9B1($(D)1(B)-0xA9C0($(D)@(B) -> 0xA2AE($B".(B) */ + { "\217\251\261", "\217\251\300", "\242\256", UJIS }, + /* 9 KU 0xA9D1($(D)Q(B)-0xA9FE($(D)~(B) -> 0xA2AE($B".(B) */ + { "\217\251\321", "\217\251\376", "\242\256", UJIS }, + /* 10 KU 0xAAB9($(D*9(B) -> 0xA2AE($B".(B) */ + { "\217\252\271", NULL, "\242\256", UJIS }, + /* 10 KU 0xAAF8($(D*x(B)-0xAAFE($(D*~(B) -> 0xA2AE($B".(B) */ + { "\217\252\370", "\217\252\376", "\242\256", UJIS }, + /* 11 KU 0xABBC($(D+<(B) -> 0xA2AE($B".(B) */ + { "\217\253\274", NULL, "\242\256", UJIS }, + /* 11 KU 0xABC4($(D+D(B) -> 0xA2AE($B".(B) */ + { "\217\253\304", NULL, "\242\256", UJIS }, + /* 11 KU 0xABF8($(D+x(B)-0xABFE($(D+~(B) -> 0xA2AE($B".(B) */ + { "\217\253\370", "\217\253\376", "\242\256", UJIS }, + /* 77 KU 0xEDE4($(Dmd(B)-0xEDFE($(Dm~(B) -> 0xA2AE($B".(B) */ + { "\217\355\344", "\217\355\376", "\242\256", UJIS }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_ujis = { eliminate_wrong_ujis, NULL }; + + +static int iso646p(cs) +CHARSET cs; +{ + if (CS2TYPE(cs) != TYPE_94_CHARSET) + return 0; + switch (CS2CHARSET(cs)) { + case TYPE_94_CHARSET | FT2CS('@'): /* ISO 646 IRV 1983 */ + case TYPE_94_CHARSET | FT2CS('A'): /* BSI 4730 United Kingdom */ + case TYPE_94_CHARSET | FT2CS('C'): /* NATS Standard Swedish/Finish */ + case TYPE_94_CHARSET | FT2CS('G'): /* ISO 646 Swedish */ + /* (SEN 850200 Ann. B) */ + case TYPE_94_CHARSET | FT2CS('H'): /* ISO 646 Swedish Name */ + /* (SEN 850200 Ann. C) */ + case JISX0201ROMAN: /* JIS X 0201-1976 Roman */ + case TYPE_94_CHARSET | FT2CS('K'): /* ISO 646 German (DIN 66083) */ + case TYPE_94_CHARSET | FT2CS('L'): /* ISO 646 Portuguese (ECMA) */ + case TYPE_94_CHARSET | FT2CS('R'): /* French */ + case TYPE_94_CHARSET | FT2CS('T'): /* China */ + case TYPE_94_CHARSET | FT2CS('Y'): /* Italian */ + case TYPE_94_CHARSET | FT2CS('Z'): /* Spanish */ + case TYPE_94_CHARSET | FT2CS('`'): /* NS 4551 Version 1 */ + case TYPE_94_CHARSET | FT2CS('a'): /* NS 4551 Version 2 */ + case TYPE_94_CHARSET | FT2CS('f'): /* NF Z 62-010-1982 */ + case TYPE_94_CHARSET | FT2CS('g'): /* IBM Portuguese */ + case TYPE_94_CHARSET | FT2CS('h'): /* IBM Spanish */ + case TYPE_94_CHARSET | FT2CS('i'): /* MS Z 7795/3 [Hungary] */ + case TYPE_94_CHARSET | FT2CS('n'): /* JIS C 6229-1984 OCR-B [Japan] */ + case TYPE_94_CHARSET | FT2CS('u'): /* CCITT Recommendation T.61 */ + /* Teletex Primary Set */ + case TYPE_94_CHARSET | FT2CS('w'): /* CSA Z 243.4-1985 Alternate */ + /* Primary Set No.1 [Canada] */ + case TYPE_94_CHARSET | FT2CS('x'): /* CSA Z 243.4-1985 Alternate */ + /* Primary Set No.2 [Canada] */ + case TYPE_94_CHARSET | FT2CS('z'): /* JUS I.B1.002 [Yugoslavia] */ + return 1; + default: + return 0; + } +} + +#if 0 +static char * +quote_it(src, cs, search_type) + char *src; + CHARSET *cs; + int search_type; +{ + static char *buf = NULL; + static int size = 0; + int len = strlen_cs(src, cs) * 2; + char *dst; + + if (len + 1 > size) + { + size = (len + 1 + 255) / 256 * 256; + if (buf) + free(buf); + buf = (char *) ecalloc(size, sizeof(char)); + } + dst = buf; + while (*src != '\0') + { +#if MSB_ENABLE + if (CSISASCII(*cs) || CSISWRONG(*cs)) + *dst++ = *src++; + else + *dst++ = *src++ | 0x80; + cs++; +#else + if (!CSISASCII(*cs++) && !(search_type & SRCH_NO_REGEX)) + { + switch (*src) { + /* Basic Regular Expressions */ + case '[': + case ']': + case '.': + case '*': + case '\\': + case '^': + case '$': +#if (HAVE_POSIX_REGCOMP_CS || HAVE_POSIX_REGCOMP) && defined(REG_EXTENDED) + /* Extended Regular Expressions */ + case '+': + case '?': + case '|': + case '(': + case ')': + case '{': + case '}': +#endif +#if HAVE_RE_COMP + /* No Extended Regular Expressions */ +#endif +#if HAVE_REGCMP + /* Extended Regular Expressions */ + case '+': + case '(': + case ')': + case '{': + case '}': +#endif +#if HAVE_V8_REGCOMP_CS || HAVE_V8_REGCOMP + /* Extended Regular Expressions */ + case '+': + case '?': + case '|': + case '(': + case ')': +#endif + *dst++ = '\\'; + /* fall through */ + default: + *dst++ = *src++; + break; + } + } else + *dst++ = *src++; +#endif + } + *dst = '\0'; + return (buf); +} +#endif + +/* + * convert JIS C 6226-1978 into JIS X 0208:1990 + */ +void jis78to90(str) +char* str; +{ + convtab* ptab; + + /* convert JIS C 6226-1978 into JIS X 0208:1990 */ + ptab = find_convtab(&ctable_jisx0208_78_90, str); + if (ptab) { + str[0] = ptab->output[0]; + str[1] = ptab->output[1]; + } +} + +void chconvert_cs(istr, ics, ostr, ocs, flag) +char* istr; +CHARSET* ics; +char* ostr; +CHARSET* ocs; +int flag; /* quote regexp pattern */ +{ + int i; + convtab* ptab; + + if (istr[0] == NULCH && CSISNULLCS(ics[0])) { + ostr[0] = NULCH; + ocs[0] = NULLCS; + return; + } + /* convert codes into some traditional character sets */ + if (CS2CHARSET(*ics) == JISX0208_78KANJI) { + /* convert JIS C 6226-1978 into JIS X 0208:1990 */ + ptab = find_convtab(&ctable_jisx0208_78_90, istr); + if (ptab) { + ostr[0] = ptab->output[0]; + ostr[1] = ptab->output[1]; + ocs[0] = ptab->charset; + ocs[1] = ptab->charset | REST_MASK; + } else { + ostr[0] = istr[0]; + ostr[1] = istr[1]; + ocs[0] = JISX0208_90KANJI; + ocs[1] = JISX0208_90KANJI | REST_MASK; + } + ostr[2] = NULCH; + ocs[2] = NULLCS; + } else if (CS2CHARSET(*ics) == JISX0208KANJI) { + /* convert JIS X 0208-1983 into JIS X 0208:1990 */ + ostr[0] = istr[0]; + ostr[1] = istr[1]; + ocs[0] = JISX0208_90KANJI; + ocs[1] = JISX0208_90KANJI | REST_MASK; + + /* + * Difference betwen 1983 and 1990 are two added characters, + * 0x7425 and 0x7426. So, here is nothing to do. + */ + ostr[2] = NULCH; + ocs[2] = NULLCS; + } else if (CS2CHARSET(*ics) == JISX0201ROMAN) { + /* convert JIS X 0201:1976 into ASCII */ + ptab = find_convtab(&utable_n_jisx0201roman, istr); + ostr[0] = istr[0]; + if (!ptab) { + ocs[0] = ASCII; + } + ostr[1] = NULCH; + ocs[1] = NULLCS; + } else if (iso646p(*ics)) { + /* convert domestic ISO 646 into ASCII */ + ptab = find_convtab(&utable_n_iso646, istr); + ostr[0] = istr[0]; + if (!ptab) { + ocs[0] = ASCII; + } + ostr[1] = NULCH; + ocs[1] = NULLCS; + } else { + /* copy input to output */ + i = 0; + do { + ostr[i] = istr[i]; + ocs[i] = ics[i]; + i++; + } while (CSISREST(ics[i])); + ostr[i] = NULCH; + ocs[i] = NULLCS; + } +} + +void chunify_cs(istr, ics, ostr, ocs) +char* istr; +CHARSET* ics; +char* ostr; +CHARSET* ocs; +{ + int i; + convtab* ptab; + + chconvert_cs(istr, ics, ostr, ocs); + /* unify codes */ + if (CS2CHARSET(*ocs) == JISX0208_90KANJI) { + /* + * convert ASCII, GREEK and CYRILLIC character in + * JIS X 0208-1990 into ASCII, ISO 8859-7 and ISO 8859-5 + * respectively. + */ + ptab = find_convtab(&utable_jisx0208, ostr); + if (ptab) { + int len = strlen(ptab->output); + assert(len <= (int)strlen(ostr)); + ostr[0] = ptab->output[0]; + ocs[0] = ptab->charset; + for (i = 1; i < len; i++) { + ostr[i] = ptab->output[i]; + ocs[i] = ptab->charset | REST_MASK; + } + ostr[i] = NULCH; + ocs[i] = NULLCS; + } + } +} + +int chcmp_cs(str1, cs1, str2, cs2) +char* str1; +CHARSET* cs1; +char* str2; +CHARSET* cs2; +{ + char buf1[32]; + CHARSET bcs1[32]; + char buf2[32]; + CHARSET bcs2[32]; + + /* if there is no character set, compare them as ASCII */ + if (cs1 == NULL && cs2 == NULL) + return *str1 - *str2; + if (cs1 == NULL) + return chcmp_cs(str2, cs2, str1, cs1); + if (cs2 == NULL) + return MAKECV(*str1, *cs1) - MAKECV(*str2, ASCII); + + /* unify both of inputs */ + chunify_cs(str1, cs1, buf1, bcs1); + str1 = buf1; + cs1 = bcs1; + chunify_cs(str2, cs2, buf2, bcs2); + str2 = buf2; + cs2 = bcs2; + /* compare them */ + if ((*str1 == NULCH && CSISNULLCS(*cs1)) || + (*str2 == NULCH && CSISNULLCS(*cs2))) + return MAKECV(*str1, *cs1) - MAKECV(*str2, *cs2); + do { + if (*str1 != *str2 || *cs1 != *cs2) { + return MAKECV(*str1, *cs1) - MAKECV(*str2, *cs2); + } + str1++; + cs1++; + str2++; + cs2++; + } while (CSISREST(*cs1)); + return 0; +} + +int chisvalid_cs(istr, ics) +char* istr; +CHARSET* ics; +{ + int i; + convtab* ptab; + + if (istr[0] == NULCH && CSISNULLCS(ics[0])) + return 0; + + /* check wrong codes if it is some traditional character set */ + if (CS2CHARSET(*ics) == JISX0208_78KANJI) { + ptab = find_convtab(&etable_jisx0208_78, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == JISX0208KANJI) { + ptab = find_convtab(&etable_jisx0208_83, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == JISX0208_90KANJI) { + /* eliminate wrong codes */ + ptab = find_convtab(&etable_jisx0208_90, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == JISX0212KANJISUP) { + ptab = find_convtab(&etable_jisx0212, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == JISX0213KANJI1) { + ptab = find_convtab(&etable_jisx0213_1, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == JISX0213KANJI2) { + ptab = find_convtab(&etable_jisx0213_2, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == SJIS) { + /* eliminate wrong codes */ + ptab = find_convtab(&etable_sjis, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == UJIS) { + /* eliminate wrong codes */ + ptab = find_convtab(&etable_ujis, istr); + if (ptab) + return 0; + else + return 1; + /* TODO: G2 */ + } + return 1; +} + +#endif Index: less/version.c diff -u less/version.c:1.1.1.19 less/version.c:1.99 --- less/version.c:1.1.1.19 Sun Oct 29 02:19:12 2000 +++ less/version.c Tue Dec 5 07:27:45 2000 @@ -7,6 +7,12 @@ * For more information about less, or for information on how to * contact the author, see the README file. */ +/* + * Copyright (c) 1997-2000 Kazushi (Jam) Marukawa + * All rights of japanized routines are reserved. + * + * You may distribute under the terms of the Less License. + */ /* @@ -600,6 +606,87 @@ v357 7/6/00 Support sigprocmask. ----------------------------------------------------------------- v358 7/8/00 Fix problems with #stop in lesskey file. + +----------------------------------------------------------------- + Other patches by jam@pobox.com +kanji 7/18/91 Support Kanji as one of the charset. +zcat 4/21/94 Add easy uncompressing feature by invoking zcat. +iso 8/11/94 Support ISO2022 encoding and special Japanese KANJI encodings + as one of the charset. +iso2 9/21/94 Fix a variety of bugs of iso patch. +iso2p1 10/22/94 Fix a variety of bugs. +iso202 12/1/96 Chang to fit less version 330 and fixed some bugs. +iso203 12/5/96 Fix line termination bug. +iso204 12/15/96 Change all internal data structure. Add CHARSET and + "CHARSET charset[]" in line.c. +iso205 12/16/96 Add CODESET and structure of CODESET in multi.c and + made buffering_multi() recursively. +iso206 12/21/96 Support multi bytes characters in command buffer. + And add rotate-right command for rotating the code set in the + right plane. +iso207 12/22/96 Support search of multi bytes characters. +iso208 12/23/96 Encapsulate multi bytes buffering data. And change + the structure of CHARSET. +iso209 1/5/97 Fix bugs of KANJI search and sophisticate SJIS routines. +iso210 1/12/97 Fix BS handling and add priority argument of KANJI code. +iso211 1/19/97 Add and change comments. +iso212 1/21/97 Fix a bug about UJIS_REGEXP search. +iso213 1/22/97 Fix a bug about 8-bit error message. +iso214 1/24/97 Fix a bug about handling of BS character. +iso215 1/25/97 Change to invert -Z option if SJIS_PRE is defined +iso216 3/9/97 Fix a bug about buffering of KANJI supplement. +iso217 4/13/97 Fix a bug about initialization of priority table. +iso218 4/13/97 Change search routines to check the CHARSET at any time. +iso219 5/5/97 Fix a typo and some codes which are warned. +iso220 5/5/97 Changed the name of UJIS_REGEXP macro to EIGHTBIT_REGEXP. +iso221 5/6/97 Changed to use USE_CHARSET macro well. +iso222 12/2/97 Support private character sets and empty character set. +iso223 12/8/97 Fix some typos and warnings. +iso224 12/10/97 Fix a converting bug about WRONGCS. +iso225 2/8/98 Use appropriate matching when looking for a charset. +iso226 2/9/98 Add -K option to choose CHARSET easy. +iso227 2/15/98 Fix some bugs in code conversion routines. +iso228 2/16/98 Change to ignore all user defined characters in SJIS and UJIS. +iso229 3/5/98 Display blank characters instead of delete them when a part of + a multi column character needed to display. +iso230 3/6/98 Support regular expression library with character set ability. +iso231 3/7/98 Fixed typo. +iso232 3/8/98 Change a configure script. +iso233 3/10/98 Fixed typo and made multi.h. +iso234 3/12/98 Removed prewind_multi and pdone_multi because it depend on + less. Add init_multi and clear_multi instead of them. +iso235 3/13/98 Add unify.c for chcmp_cs function. +iso236 3/14/98 Fixed MSB_ENABLE bugs. +iso237 3/16/98 Add unification among JIS X 0208, ASCII, Cyrillic and Greek. +iso238 3/17/98 Add NULLCS to represent a terminator. + Changed a character set for control characters to WRONGCS. + Add chunify_cs and chconvert_cs as external function. +iso239 3/20/98 Fixed a bug in match() and add assertion in chunify_cs(). +iso240 3/25/98 Corrected all cmdbuf and cmdcs buffers' handling. + Fixed a control character handling bug. + Changed to remove padded codes from search pattern. +iso241 4/2/98 Fixed small bugs in search.c. +iso242 5/18/98 Fixed a buffering problem of search. +iso243 7/1/98 Add elimination of wrong characters for JIS C 6226-1978, + JIS X 0208-1983 and JIS X 0208:1990/1997. +iso244 7/2/98 Add elimination of wrong characters for SJIS and UJIS. +iso245 7/2/98 Fix a bug about elimination for SJIS. +iso246 8/8/98 Add one locale for Win32, eliminate all MSB_ENABLE stuff + from unify.c, and fix eliminating table for JIS C 6226-1978. +iso247 8/8/98 Add -W option. And change the point of putting a mark. Now + multi.c call checking function, then mark wrong characters. +iso248 8/12/98 Fix a problem of outputting WRONGCS. Add checking table + for JIS X 0212:1990. +iso249 10/29/00 Joined with less-358. Fixed some bugs caused by join. +iso250 11/21/00 Support JIS X 0213:2000. Added support of cygwin. + Thanks to nayuta-san. +iso251 11/22/00 Support SJIS and UJIS using JIS X 0213:2000. +iso252 11/24/00 Fixed a problem to output JIS X 0212:1990 using jis style. +iso253 12/2/00 Fixed a problem to output SJIS. Thanks to nayuta-san. + Fixed assertion problem in search.c. Thanks to SAKAKI + Kiyotake, Tanaka Akira, and Yuichi SATO. +iso254 12/5/00 Fixed a problem to output JIS X 0213:2000 plane 2 into SJIS. + Thanks to Shinya Hanataka. */ -char version[] = "358"; +char version[] = "358+iso254";