Index: less/README.iso diff -u less/README.iso:1.20 less/README.iso:1.21 --- less/README.iso:1.20 Sun Sep 4 19:35:32 2005 +++ less/README.iso Sat Feb 18 09:57:24 2006 @@ -12,39 +12,18 @@ - Understand all following codes. - ISO 2022 level 3. - a special sequence, ESC 2/4[$] 2/12[,] FE, which is used in MULE. - - LESS-JIS, UJIS and SJIS. - - LESS-JIS may contain JIS C 6226-1978, JIS X 0208-1983, - JIS X 0208:1990/1997, JIS X 0212:1990, JIS X 0213:2000, - JIS X 0201:1976/1997 left/right planes, and ASCII. - - UJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, - JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of - JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 - right plane, and both JIS X 0212:1990 and JIS X 0213:2000 - plane 2. - - SJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, - JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of - JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 - right plane, and JIS X 0213:2000 plane 2. + - JIS C 6226-1978, JIS X 0208-1983, JIS X 0208:1990/1997, JIS X 0212:1990, + JIS X 0213:2000/2004, JIS X 0201:1976/1997 left/right planes, and ASCII. + - UJIS + - SJIS - Output following codes. - iso8 (CTEXT, which is defined in X11) - iso7 with only GZDM4, GZD4, G1DM4, G1DM6, G1D4, G1D6, SI and SO escape sequences. - - LESS-JIS, UJIS and SJIS. - - LESS-JIS may contain JIS X 0208-1983 with an assumption - that its face is the same as JIS X 0213:2000 plane 1, - JIS X 0201:1976/1997 left/right planes and ASCII. - It does not contain JIS X 0212:1990 or JIS X 0213:2000 plane 2, - so those are marked as wrong character on the fly. - If you need more restricted JIS sequence, please use iso7 or iso8. - - UJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, - JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of - JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 - right plane, and both JIS X 0212:1990 and JIS X 0213:2000 - plane 2. - - SJIS may contain one of JIS C 6226-1978, JIS X 0208-1983, - JIS X 0208:1990/1997, or JIS X 0213:2000 plane 1, one of - JIS X 0201:1976/1997 left plane or ASCII, JIS X 0201:1976/1997 - right plane, and JIS X 0213:2000 plane 2. + - JIS C 6226-1978, JIS X 0208-1983, JIS X 0208:1990/1997, JIS X 0212:1990, + JIS X 0213:2000/2004, JIS X 0201:1976/1997 left/right planes, and ASCII. + - UJIS + - SJIS - Automatic decompression of compressed files. **Problems** Index: less/README.iso.jp diff -u less/README.iso.jp:1.17 less/README.iso.jp:1.18 --- less/README.iso.jp:1.17 Wed Nov 22 00:01:27 2000 +++ less/README.iso.jp Sat Feb 18 09:57:24 2006 @@ -16,38 +16,19 @@ - ISO 2022$B$NJ}$,MxMQ$G$-$^$9!#(B - - SJIS$B$O!"(BJIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(B - JIS X 0213:2000$B$N(B1$BLL$N$&$A$N$I$l$+!"(BJIS X 0201:1976/1997$B:8LL(B - $B$+(BASCII$B$N$I$A$i$+!"(BJIS X 0201:1976/1997$B1&LL!"$=$7$F(B - JIS X 0213:2000$B$N(B2$BLL$,MxMQ$G$-$^$9!#(B + - JIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(BJIS X 0212:1990$B!"(B + JIS X 0213:2000/2004$B!"(BJIS X 0201:1976/1997$B:8(B/$B1&LL!"$=$7$F(BASCII$B$,MxMQ(B + $B$G$-$^$9!#(B + - UJIS + - SJIS - $B=PNO$H$7$FMxMQ$G$-$kJ8;z%3!<%I$NDj$7$?>e$G$N(BJIS X 0208-1983$B$H!"(B - JIS X 0201:1976/1997$B:8(B/$B1&LL!"(BASCII$B$rMxMQ$7$F=PNO$7$^(B - $B$9!#(BJIS X 0212:1990$B$H(BJIS X 0213:2000$B$N(B2$BLL$O=PNO$G$-$J(B - $B$$$?$a!"=PNO;~$K@5$7$/$J$$J8;z$H$7$F%^!<%/$7$FI=<($5(B - $B$l$^$9!#$b$7!"F~NODL$j$N=PNO$,M_$7$$>l9g$O!"(Biso7$B$d(B - iso8$B$rMxMQ$7$F=PNO$7$F2<$5$$!#(B - - UJIS$B$O!"(BJIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(B - JIS X 0213:2000$B$N(B1$BLL$N$&$A$N$I$l$+!"(BJIS X 0201:1976/1997$B:8LL(B - $B$+(BASCII$B$N$I$A$i$+!"(BJIS X 0201:1976/1997$B1&LL!"$=$7$F(B - JIS X 0212:1990$B$H(BJIS X 0213:2000$B$N(B2$BLL$NN>J}$,MxMQ$7$F(B - $B=PNO$7$^$9!#(B - - SJIS$B$O!"(BJIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(B - JIS X 0213:2000$B$N(B1$BLL$N$&$A$N$I$l$+!"(BJIS X 0201:1976/1997$B:8LL(B - $B$+(BASCII$B$N$I$A$i$+!"(BJIS X 0201:1976/1997$B1&LL!"$=$7$F(B - JIS X 0213:2000$B$N(B2$BLL$,MxMQ$7$F=PNO$7$^$9!#(B + - JIS C 6226-1978$B!"(BJIS X 0208-1983$B!"(BJIS X 0208:1990/1997$B!"(BJIS X 0212:1990$B!"(B + JIS X 0213:2000/2004$B!"(BJIS X 0201:1976/1997$B:8(B/$B1&LL!"$=$7$F(BASCII$B$,MxMQ(B + $B$G$-$^$9!#(B + - UJIS + - SJIS - compress$B$d(Bgzip$B$5$l$?%U%!%$%k$r4JC1$KD/$a$k$3$H$,$G$-$k!#(B **$BLdBjE@(B** Index: less/charset.c diff -u less/charset.c:1.67 less/charset.c:1.70 --- less/charset.c:1.67 Wed Aug 31 12:22:41 2005 +++ less/charset.c Sat Sep 17 16:50:39 2005 @@ -36,86 +36,170 @@ char *name; int *p_flag; char *desc; - CODESET left; - CODESET right; - CODESET output; + SETCHARSET scs; + ENCSET input; + ENCSET inputr; + ENCSET output; } charsets[] = { { "ascii", NULL, "8bcccbcc18b95.b", - noconv, none, noconv }, + SCSASCII, ESNOCONV, ESNONE, ESNOCONV }, { "dos", NULL, "8bcccbcc12bc5b223.b", - noconv, noconv, noconv }, + SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV }, { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b.", - noconv, noconv, noconv }, + SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV }, { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b", - noconv, noconv, noconv }, + SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV }, { "iso8859", NULL, "8bcccbcc18b95.33b.", - noconv, noconv, noconv }, + SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV }, { "koi8-r", NULL, "8bcccbcc18b95.b128.", - noconv, noconv, noconv }, + SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV }, { "next", NULL, "8bcccbcc18b95.bb125.bb", - noconv, noconv, noconv }, - { "utf-8", &utf_mode, "8bcccbcc18b.", - noconv, noconv, noconv }, + SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV }, #if ISO { "iso7", NULL, "8bcccb4c11bc4b96.b", - iso7, noconv, iso7 }, + SCSASCII | SCSALLJIS | SCSOTHERISO, + ESISO7, ESNONE, ESISO7 }, { "iso8", NULL, "8bcccb4c11bc4b95.15b2.16b.", - iso7, iso8, iso8 }, + SCSASCII | SCSALLJIS | SCSOTHERISO, + ESISO7, ESISO8, ESISO8 }, # if JAPANESE - /* read all KANJI code sets */ - { "japanese", NULL, "8bcccb4c11bc4b95.b127.b", - jis, japanese, jis }, + /* read JIS - recoginize all JIS */ + { "jis-iso7", NULL, "8bcccb4c11bc4b95.b", + SCSASCII | SCSALLJIS | SCSOTHERISO, + ESISO7, ESNONE, ESISO7 }, + { "jis-jis83", NULL, "8bcccb4c11bc4b95.b", + /* except plane 2 and supplement */ + SCSASCII | SCSALLJISTRAD, + ESISO7, ESNONE, ESJIS83 }, + { "jis-ujis", NULL, "8bcccb4c11bc4b95.b", + SCSASCII | SCSALLJIS, + ESISO7, ESNONE, ESUJIS }, + { "jis-sjis", NULL, "8bcccb4c11bc4b95.b", + /* recoginize all JIS except supplement */ + SCSASCII | SCSALLSJIS, + ESISO7, ESNONE, ESSJIS }, + + /* read UJIS - recoginize all JIS */ + { "ujis-ujis", NULL, "8bcccbcc18b95.15b2.17b94.b", + SCSASCII | SCSALLJIS, + ESNOCONV, ESUJIS, ESUJIS }, + { "ujis-iso7", NULL, "8bcccbcc18b95.15b2.17b94.b", + SCSASCII | SCSALLJIS, + ESNOCONV, ESUJIS, ESISO7 }, + { "ujis-jis83", NULL, "8bcccbcc18b95.15b2.17b94.b", + /* except plane 2 and supplement */ + SCSASCII | SCSALLJISTRAD, + ESNOCONV, ESUJIS, ESJIS83 }, + { "ujis-sjis", NULL, "8bcccbcc18b95.15b2.17b94.b", + /* recoginize all JIS except supplement */ + SCSASCII | SCSALLSJIS, + ESNOCONV, ESUJIS, ESSJIS }, + + /* read SJIS - recoginize all JIS except supplement */ + { "sjis-sjis", NULL, "8bcccbcc18b95.b125.3b", + SCSASCII | SCSALLSJIS, + ESNOCONV, ESSJIS, ESSJIS }, + { "sjis-iso7", NULL, "8bcccbcc18b95.b125.3b", + SCSASCII | SCSALLSJIS, + ESNOCONV, ESSJIS, ESISO7 }, + { "sjis-jis83", NULL, "8bcccbcc18b95.b125.3b", + /* except plane 2 and supplement */ + SCSASCII | SCSALLJISTRAD, + ESNOCONV, ESSJIS, ESJIS83 }, + { "sjis-ujis", NULL, "8bcccbcc18b95.b125.3b", + SCSASCII | SCSALLSJIS, + ESNOCONV, ESSJIS, ESUJIS }, + + /* read all - recognize all JIS and ISO */ + { "japaneseiso7-iso7", NULL, "8bcccb4c11bc4b223.b", + SCSASCII | SCSALLJIS | SCSOTHERISO, + ESISO7, ESALLJA, ESISO7 }, + + /* read all KANJI code sets - recognize all JIS */ { "japanese-iso7", NULL, "8bcccb4c11bc4b95.b127.b", - iso7, japanese, iso7 }, - { "japanese-jis", NULL, "8bcccb4c11bc4b95.b127.b", - jis, japanese, jis }, + SCSASCII | SCSALLJIS, + ESISO7, ESALLJA, ESISO7 }, + { "japanese-jis83", NULL, "8bcccb4c11bc4b95.b127.b", + /* except plane 2 and supplement */ + SCSASCII | SCSALLJISTRAD, + ESISO7, ESALLJA, ESJIS83 }, { "japanese-ujis", NULL, "8bcccb4c11bc4b95.b127.b", - jis, japanese, ujis }, - { "japanese-euc", NULL, "8bcccb4c11bc4b95.b127.b", - jis, japanese, ujis }, + SCSASCII | SCSALLJIS, + ESISO7, ESALLJA, ESUJIS }, { "japanese-sjis", NULL, "8bcccb4c11bc4b95.b127.b", - jis, japanese, sjis }, - /* read JIS */ - { "jis", NULL, "8bcccb4c11bc4b95.b", - jis, none, jis }, - { "jis-ujis", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", - jis, ujis, ujis }, - { "jis-euc", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", - jis, ujis, ujis }, - { "jis-sjis", NULL, "8bcccb4c11bc4b95.b125.3b", - jis, sjis, sjis }, - /* read UJIS */ - { "ujis", NULL, "8bcccbcc18b95.15b2.17b94.b", - noconv, ujis, ujis }, - { "euc", NULL, "8bcccbcc18b95.15b2.17b94.b", - noconv, ujis, ujis }, - { "ujis-iso7", NULL, "8bcccb4c11bc4b96.14b2.17b94.b", - iso7, ujis, iso7 }, - { "euc-iso7", NULL, "8bcccb4c11bc4b96.14b2.17b94.b", - iso7, ujis, iso7 }, - { "ujis-jis", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", - jis, ujis, jis }, - { "euc-jis", NULL, "8bcccb4c11bc4b95.15b2.17b94.b", - jis, ujis, jis }, - /* disallow UJIS's katakana to improve the encoding detection */ - { "ujis-sjis", NULL, "8bcccbcc18b95.15b.18b94.b", - noconv, ujis, sjis }, - { "euc-sjis", NULL, "8bcccbcc18b95.15b.18b94.b", - noconv, ujis, sjis }, - /* read SJIS */ - { "sjis", NULL, "8bcccbcc18b95.b125.3b", - noconv, sjis, sjis }, - { "sjis-iso7", NULL, "8bcccb4c11bc4b221.b", - iso7, sjis, iso7 }, - { "sjis-jis", NULL, "8bcccb4c11bc4b95.b125.3b", - jis, sjis, jis }, - { "sjis-ujis", NULL, "8bcccbcc18b95.b125.3b", - noconv, sjis, ujis }, - { "sjis-euc", NULL, "8bcccbcc18b95.b125.3b", - noconv, sjis, ujis }, + /* recoginize all JIS except supplement */ + SCSASCII | SCSALLSJIS, + ESISO7, ESALLJA, ESSJIS }, + + /* read all KANJI before 1983 */ + { "japanese83-iso7", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983, + ESISO7, ESALLJA, ESISO7 }, + { "japanese83-jis83", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983, + ESISO7, ESALLJA, ESJIS83 }, + { "japanese83-ujis", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983, + ESISO7, ESALLJA, ESUJIS }, + { "japanese83-sjis", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983, + ESISO7, ESALLJA, ESSJIS }, + + /* read all KANJI before 1990 */ + { "japanese90-iso7", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990 | SCSJISX0212_1990, + ESISO7, ESALLJA, ESISO7 }, + { "japanese90-jis83", NULL, "8bcccb4c11bc4b95.b127.b", + /* except supplement */ + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990, + ESISO7, ESALLJA, ESJIS83 }, + { "japanese90-ujis", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990 | SCSJISX0212_1990, + ESISO7, ESALLJA, ESUJIS }, + { "japanese90-sjis", NULL, "8bcccb4c11bc4b95.b127.b", + /* except supplement */ + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990, + ESISO7, ESALLJA, ESSJIS }, + + /* read all KANJI before 2000 */ + { "japanese00-iso7", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990 | SCSJISX0213_2000 | + SCSJISX0213_2ND | SCSJISX0212_1990, + ESISO7, ESALLJA, ESISO7 }, + { "japanese00-jis83", NULL, "8bcccb4c11bc4b95.b127.b", + /* except plane 2 and supplement */ + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990 | SCSJISX0213_2000, + ESISO7, ESALLJA, ESJIS83 }, + { "japanese00-ujis", NULL, "8bcccb4c11bc4b95.b127.b", + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990 | SCSJISX0213_2000 | + SCSJISX0213_2ND | SCSJISX0212_1990, + ESISO7, ESALLJA, ESUJIS }, + { "japanese00-sjis", NULL, "8bcccb4c11bc4b95.b127.b", + /* except supplement */ + SCSASCII | SCSJISX0201_1976 | SCSJISC6226_1978 | + SCSJISX0208_1983 | SCSJISX0208_1990 | SCSJISX0213_2000 | + SCSJISX0213_2ND, + ESISO7, ESALLJA, ESSJIS }, # endif + { "utf-8", NULL, "8bcccbcc18b.", + SCSUTF8, ESUTF8, ESUTF8, ESUTF8 }, +#else + { "utf-8", &utf_mode, "8bcccbcc18b.", + SCSUTF8, ESNOCONV, ESNOCONV, ESNOCONV }, #endif - { NULL, NULL, NULL, noconv, noconv, noconv } + { NULL, NULL, NULL, + SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV }, }; #if HAVE_LOCALE && ISO @@ -174,6 +258,22 @@ } cs_aliases[] = { { "latin1", "iso8859" }, { "latin9", "iso8859" }, +#if JAPANESE + { "japaneseiso7", "japaneseiso7-iso7" }, + { "japanese", "japanese-iso7" }, + { "japanese-euc", "japanese-ujis" }, + { "jis", "jis-iso7" }, + { "jis-euc", "jis-ujis" }, + { "ujis", "ujis-ujis" }, + { "euc", "ujis-ujis" }, + { "euc-iso7", "ujis-iso7" }, + { "ujis-jis", "ujis-iso7" }, + { "euc-jis", "ujis-iso7" }, + { "euc-sjis", "ujis-sjis" }, + { "sjis-euc", "sjis-ujis" }, + { "sjis", "sjis-sjis" }, + { "sjis-jis", "sjis-iso7" }, +#endif { NULL, NULL } }; @@ -238,31 +338,31 @@ } /* - * Return the CODESET of left plane of named charset. + * Return the ENCSET of left plane of named charset. */ - public CODESET + public ENCSET left_codeset_of_charset(name) register char *name; { struct charset *p = search_charset(name); if (p) - return (p->left); - return (noconv); + return (p->input); + return (ESNOCONV); } /* - * Return the CODESET of right plane of named charset. + * Return the ENCSET of right plane of named charset. */ - public CODESET + public ENCSET right_codeset_of_charset(name) register char *name; { struct charset *p = search_charset(name); if (p) - return (p->right); - return (none); + return (p->inputr); + return (ESNOCONV); } /* @@ -363,7 +463,7 @@ if (p->p_flag != NULL) *(p->p_flag) = 1; #if ISO - init_def_codesets(p->left, p->right, p->output); + init_def_scs_es(p->scs, p->input, p->inputr, p->output); #endif return (1); } @@ -429,7 +529,7 @@ chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; } #if ISO - init_def_codesets(noconv, noconv, noconv); + init_def_scs_es(SCSASCII, ESNOCONV, ESNOCONV, ESNOCONV); #endif } #endif Index: less/configure diff -u less/configure:1.11 less/configure:1.12 --- less/configure:1.11 Wed Aug 31 00:09:14 2005 +++ less/configure Mon Sep 19 10:03:07 2005 @@ -843,7 +843,6 @@ --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --disable-msb Disable the MSB of non ASCII characters - --disable-jisx0213 Disable the JIS X 0213 mapping for SJIS and UJIS --disable-largefile omit support for large files Optional Packages: @@ -1329,12 +1328,6 @@ enableval="$enable_msb" msb_enable=$enableval fi; -jisx0213_enable=unknown -# Check whether --enable-jisx0213 or --disable-jisx0213 was given. -if test "${enable_jisx0213+set}" = set; then - enableval="$enable_jisx0213" - jisx0213_enable=$enableval -fi; # Checks for programs. ac_ext=c @@ -6620,30 +6613,6 @@ fi;; esac -case $jisx0213_enable in -unknown|yes) - echo "$as_me:$LINENO: result: enable the JIS X 0213 mapping for SJIS and UJIS" >&5 -echo "${ECHO_T}enable the JIS X 0213 mapping for SJIS and UJIS" >&6 - cat >>confdefs.h <<\_ACEOF -#define SJIS0213 1 -_ACEOF - - cat >>confdefs.h <<\_ACEOF -#define UJIS0213 1 -_ACEOF -;; -no) echo "$as_me:$LINENO: result: disable the JIS X 0213 mapping for SJIS and UJIS" >&5 -echo "${ECHO_T}disable the JIS X 0213 mapping for SJIS and UJIS" >&6 - cat >>confdefs.h <<\_ACEOF -#define SJIS0213 0 -_ACEOF - - cat >>confdefs.h <<\_ACEOF -#define UJIS0213 0 -_ACEOF -;; -esac - # Check whether --with-editor or --without-editor was given. if test "${with_editor+set}" = set; then Index: less/configure.ac diff -u less/configure.ac:1.2 less/configure.ac:1.3 --- less/configure.ac:1.2 Wed Aug 31 00:09:14 2005 +++ less/configure.ac Mon Sep 19 10:03:07 2005 @@ -17,8 +17,6 @@ AC_ARG_WITH(cs-regex, [ --with-cs-regex Regular expression library with character set detection], have_regex_cs=$withval, have_regex_cs=no) msb_enable=unknown AC_ARG_ENABLE(msb, [ --disable-msb Disable the MSB of non ASCII characters], msb_enable=$enableval) -jisx0213_enable=unknown -AC_ARG_ENABLE(jisx0213, [ --disable-jisx0213 Disable the JIS X 0213 mapping for SJIS and UJIS], jisx0213_enable=$enableval) # Checks for programs. AC_PROG_CC @@ -420,16 +418,6 @@ fi;; esac -case $jisx0213_enable in -unknown|yes) - AC_MSG_RESULT(enable the JIS X 0213 mapping for SJIS and UJIS) - AC_DEFINE(SJIS0213, 1) - AC_DEFINE(UJIS0213, 1);; -no) AC_MSG_RESULT(disable the JIS X 0213 mapping for SJIS and UJIS) - AC_DEFINE(SJIS0213, 0) - AC_DEFINE(UJIS0213, 0);; -esac - AC_ARG_WITH(editor, [ --with-editor=PROGRAM use PROGRAM as the default editor [vi]], AC_DEFINE_UNQUOTED(EDIT_PGM, "$withval"), AC_DEFINE(EDIT_PGM, "vi")) Index: less/defines.h.in diff -u less/defines.h.in:1.35 less/defines.h.in:1.36 --- less/defines.h.in:1.35 Wed Aug 31 00:09:14 2005 +++ less/defines.h.in Mon Sep 19 10:03:07 2005 @@ -428,24 +428,12 @@ /* Define as the return type of signal handlers (`int' or `void'). */ #undef RETSIGTYPE -/* - * Define SJIS0213 if you want to enable a JIS X 0213:2000 mapping of - * SJIS encoding. - */ -#undef SJIS0213 - /* Define to 1 if the `S_IS*' macros in do not work properly. */ #undef STAT_MACROS_BROKEN /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS -/* - * Define UJIS0213 if you want to enable a JIS X 0213:2000 mapping of - * UJIS encoding. - */ -#undef UJIS0213 - /* Define to 1 if you can safely include both and . */ #undef TIME_WITH_SYS_TIME Index: less/funcs.h diff -u less/funcs.h:1.40 less/funcs.h:1.41 --- less/funcs.h:1.40 Sun Sep 4 17:41:18 2005 +++ less/funcs.h Wed Sep 14 06:43:54 2005 @@ -50,8 +50,8 @@ public void ch_close (); public int ch_getflags (); public void ch_dump (); - public CODESET left_codeset_of_charset (); - public CODESET right_codeset_of_charset (); + public ENCSET left_codeset_of_charset (); + public ENCSET right_codeset_of_charset (); public void init_planeset (); public void init_charset (); public int binary_char (); Index: less/multi.c diff -u less/multi.c:1.135 less/multi.c:1.142 --- less/multi.c:1.135 Mon Sep 5 17:50:06 2005 +++ less/multi.c Sat Feb 18 09:38:02 2006 @@ -61,7 +61,7 @@ /* - * Kanji convetion + * Macro for character detection */ #define ISJIS(c) (0x21 <= (c) && (c) <= 0x7e) #define ISUJIS(c) (0xa1 <= (c) && (c) <= 0xfe) @@ -78,6 +78,20 @@ #define ISSJISKANJI1(c) ((0x81 <= (c) && (c) <= 0x9f) || \ (0xe0 <= (c) && (c) <= 0xfc)) #define ISSJISKANA(c) (0xa1 <= (c) && (c) <= 0xdf) +#define ISUTF8_HEAD(c) (0xc0 <= (c) && (c) < 0xfe) +#define ISUTF8_REST(c) (((c) & 0xc0) == 0x80) +#define ISUTF8_1(c) ((c) <= 0x7f) +#define ISUTF8_2(c1,c2) (((c1) & 0xe0) == 0xc0 && ISUTF8_REST(c2)) +#define ISUTF8_3(c1,c2,c3) (((c1) & 0xf0) == 0xe0 && ISUTF8_REST(c2) && \ + ISUTF8_REST(c3)) +#define ISUTF8_4(c1,c2,c3,c4) (((c1) & 0xf8) == 0xf0 && ISUTF8_REST(c2) && \ + ISUTF8_REST(c3) && ISUTF8_REST(c4)) +#define ISUTF8_5(c1,c2,c3,c4,c5) \ + (((c1) & 0xfc) == 0xf8 && ISUTF8_REST(c2) && ISUTF8_REST(c3) && \ + ISUTF8_REST(c4) && ISUTF8_REST(c5)) +#define ISUTF8_6(c1,c2,c3,c4,c5,c6) \ + (((c1) & 0xfe) == 0xfc && ISUTF8_REST(c2) && ISUTF8_REST(c3) && \ + ISUTF8_REST(c4) && ISUTF8_REST(c5) && ISUTF8_REST(c6)) #endif @@ -107,8 +121,9 @@ }; -static CODESET def_left = iso7; /* Default code set of left plane */ -static CODESET def_right = iso8; /* Default code set of right plane */ +static SETCHARSET def_scs = SCSASCII | SCSOTHERISO; +static ENCSET def_input = ESISO7; /* Default character set of left plane */ +static ENCSET def_inputr = ESISO8; /* Default character set of right plane */ static int def_gs[4] = { ASCII, /* Default g0 plane status */ WRONGCS, /* Default g1 plane status */ @@ -116,9 +131,9 @@ WRONGCS /* Default g3 plane status */ }; -static CODESET output = iso8; /* Code set for output */ +static ENCSET output = ESISO8; /* Character set for output */ #if JAPANESE -static CODESET def_priority = ujis; /* Which code was given priority. */ +static J_PRIORITY def_priority = PUJIS; /* Which code was given priority. */ #endif typedef POSITION m_position; @@ -148,11 +163,12 @@ struct multibuf { struct { - CODESET left; - CODESET right; + SETCHARSET scs; + ENCSET input; + ENCSET inputr; } io; - CODESET orig_io_right; + ENCSET orig_io_right; int rotation_io_right; enum escape_sequence eseq; @@ -162,11 +178,11 @@ int cs; /* Current character set */ struct m_status* ms; #if JAPANESE - CODESET priority; /* Which code was given priority. */ + J_PRIORITY priority; /* Which code was given priority. */ int sequence_counter; /* Special counter for detect UJIS KANJI. */ #endif - int icharset; /* Last non ASCII character set of input */ + CHARSET icharset; /* Last non ASCII character set of input */ /* * Small buffers to hold all parsing bytes of multi-byte characters. @@ -211,6 +227,9 @@ #define INBUF0(mp) ((mp)->inbuf[(mp)->startpos%sizeof((mp)->inbuf)]) #define INBUF1(mp) ((mp)->inbuf[((mp)->startpos+1)%sizeof((mp)->inbuf)]) #define INBUF2(mp) ((mp)->inbuf[((mp)->startpos+2)%sizeof((mp)->inbuf)]) +#define INBUF3(mp) ((mp)->inbuf[((mp)->startpos+3)%sizeof((mp)->inbuf)]) +#define INBUF4(mp) ((mp)->inbuf[((mp)->startpos+4)%sizeof((mp)->inbuf)]) +#define INBUF5(mp) ((mp)->inbuf[((mp)->startpos+5)%sizeof((mp)->inbuf)]) #define INBUFI(mp,i) ((mp)->inbuf[(i)%sizeof((mp)->inbuf)]) static int code_length(mp, cs) @@ -227,12 +246,16 @@ #if JAPANESE switch (CS2CHARSET(cs)) { case UJIS: + case UJIS2000: + case UJIS2004: c = INBUF0(mp); if (ISUJISKANJI1(c)) return 2; if (ISUJISKANA1(c)) return 2; if (ISUJISKANJISUP1(c)) return 3; return 1; case SJIS: + case SJIS2000: + case SJIS2004: c = INBUF0(mp); if (ISSJISKANJI1(c)) return 2; if (ISSJISKANA(c)) return 1; @@ -332,7 +355,11 @@ case JISX0213KANJI1: case JISX0213KANJI2: case UJIS: + case UJIS2000: + case UJIS2004: case SJIS: + case SJIS2000: + case SJIS2004: put_wrongmark(mp); break; case GB2312: @@ -341,10 +368,7 @@ break; } } else { - int i; - - i = code_length(mp, mp->multics[mp->intindex]); - while (--i >= 0) { + while (mp->startpos <= mp->lastpos) { wrongcs1(mp); } } @@ -434,247 +458,439 @@ * less ignore the undefined codes */ wrongchar(mp); - mp->startpos = pos; - multi_reparse(mp); } } #if JAPANESE /* - * Internalize input stream. - * We recognized input data as using UJIS coding set. + * Internalize input stream encoded by UJIS encoding scheme. + * + * Return 1 if input is recognized well. + * Return 0 if input is rejected. */ -static void internalize_ujis(mp) +static int internalize_ujis(mp) MULBUF *mp; { if (mp->lastpos - mp->startpos + 1 == 1) { - /* do nothing */ + /* do nothing. return 1 to get next byte */ + return 1; } else if (mp->lastpos - mp->startpos + 1 == 2) { - if (ISUJISKANA(INBUF0(mp), INBUF1(mp))) { - mp->multiint[mp->intindex] = INBUF1(mp) & 0x7f; + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + if (ISUJISKANA(c0, c1)) { + mp->cs = JISX0201KANA; + mp->icharset = UJIS; + mp->multiint[mp->intindex] = c1 & 0x7f; mp->multics[mp->intindex] = mp->cs; mp->intindex += 1; mp->startpos = mp->lastpos + 1; - } else if (ISUJISKANJI(INBUF0(mp), INBUF1(mp))) { - mp->multiint[mp->intindex] = INBUF0(mp); - mp->multics[mp->intindex] = UJIS; - mp->multiint[mp->intindex + 1] = INBUF1(mp); - mp->multics[mp->intindex + 1] = REST_MASK | UJIS; + return 1; + } else if (ISUJISKANJI(c0, c1)) { + if (mp->io.scs & SCSJISX0213_2004) { + mp->icharset = UJIS2004; + mp->cs = JISX02132004KANJI1; + } else if (mp->io.scs & SCSJISX0213_2000) { + mp->icharset = UJIS2000; + mp->cs = JISX0213KANJI1; + } else { + mp->icharset = UJIS; + mp->cs = JISX0208KANJI; + } + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = mp->icharset; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset; - /* - * Eliminate some wrong codes - */ + /* Check character whether it has defined glyph or not */ if (chisvalid_cs(&mp->multiint[mp->intindex], &mp->multics[mp->intindex])) { - /* JIS X 0208:1997 */ - mp->multiint[mp->intindex] &= 0x7f; + /* defined */ + mp->multiint[mp->intindex] = c0 & 0x7f; mp->multics[mp->intindex] = mp->cs; - mp->multiint[mp->intindex + 1] &= 0x7f; + mp->multiint[mp->intindex + 1] = c1 & 0x7f; mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; mp->intindex += 2; mp->startpos = mp->lastpos + 1; } else { - /* - * less ignore the undefined codes - */ + /* undefined. less ignore them */ wrongchar(mp); - mp->startpos = mp->lastpos + 1; - multi_reparse(mp); } + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; + } else if (ISUJISKANJISUP(c0, c1, 0xa1)) { + /* do nothing. return 1 to get next byte */ + return 1; } - } else if (mp->lastpos - mp->startpos + 1 == 3 && - ISUJISKANJISUP(INBUF0(mp), INBUF1(mp), INBUF2(mp))) { - mp->multiint[mp->intindex] = INBUF0(mp); - mp->multics[mp->intindex] = UJIS; - mp->multiint[mp->intindex + 1] = INBUF1(mp); - mp->multics[mp->intindex + 1] = REST_MASK | UJIS; - mp->multiint[mp->intindex + 2] = INBUF2(mp); - mp->multics[mp->intindex + 2] = REST_MASK | UJIS; + } else if (mp->lastpos - mp->startpos + 1 == 3) { + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + int c2 = INBUF2(mp); + if (ISUJISKANJISUP(c0, c1, c2)) { + mp->cs = JISX0212KANJISUP; + mp->icharset = UJIS; + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = UJIS; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | UJIS; + mp->multiint[mp->intindex + 2] = c2; + mp->multics[mp->intindex + 2] = REST_MASK | UJIS; - /* - * Eliminate some wrong codes - */ - if (chisvalid_cs(&mp->multiint[mp->intindex], - &mp->multics[mp->intindex])) { - register int c1; - static unsigned char table[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -#if UJIS0213 - 0, 0x21, 0, 0x23, 0x24, 0x25, 0, 0, - 0x28, 0, 0, 0, 0x2C, 0x2D, 0x2E, 0x2F, -#else - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -#endif - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -#if UJIS0213 - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0 -#else - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -#endif - }; - c1 = mp->multiint[mp->intindex + 1] & 0x7f; - if (table[c1] != 0) { - /* JIS X 0213:2000 plane 2 */ - if (output == jis) { - /* JIS cannot output JIS X 0213:2000 plane 2 */ - wrongcs1(mp); - multi_reparse(mp); + /* Check character whether it has defined glyph or not */ + if (chisvalid_cs(&mp->multiint[mp->intindex], + &mp->multics[mp->intindex])) { + /* defined */ + static unsigned char table_ujis[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x21, 0, 0x23, 0x24, 0x25, 0, 0, + 0x28, 0, 0, 0, 0x2C, 0x2D, 0x2E, 0x2F, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0 + }; + c1 &= 0x7f; + if (table_ujis[c1] != 0) { + /* JIS X 0213:2000 plane 2 */ + if (output & ESJIS83) { + /* JIS cannot output JIS X 0213:2000 plane 2 */ + wrongchar(mp); + } else { + mp->cs = JISX0213KANJI2; + mp->multiint[mp->intindex] = c1; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = c2 & 0x7f; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + mp->intindex += 2; + mp->startpos = mp->lastpos + 1; + } } else { - mp->multiint[mp->intindex] = c1; - mp->multics[mp->intindex] = - JISX0213KANJI2; - mp->multiint[mp->intindex + 1] = - mp->multiint[mp->intindex + 2] & 0x7f; - mp->multics[mp->intindex + 1] = - REST_MASK | JISX0213KANJI2; - mp->intindex += 2; - mp->startpos = mp->lastpos + 1; + /* JIS X 0212:1990 */ + if (output & (ESSJIS | ESJIS83)) { + /* SJIS cannot output JIS X 0212:1990 */ + wrongchar(mp); + } else { + mp->multiint[mp->intindex] = c1; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = c2 & 0x7f; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + mp->intindex += 2; + mp->startpos = mp->lastpos + 1; + } } } else { - /* JIS X 0212:1990 */ - if (output == sjis || output == jis) { - /* SJIS cannot output JIS X 0212:1990 */ - wrongcs1(mp); - multi_reparse(mp); - } else { - mp->multiint[mp->intindex] = c1; + /* undefined. less ignore them */ + wrongchar(mp); + } + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; + } + } + /* return 0 because this data sequence is not matched to UJIS */ + return 0; +} + +/* + * Internalize input stream encoded by SJIS encoding scheme. + * + * Return 1 if input is recognized well. + * Return 0 if input is rejected. + */ +static int internalize_sjis(mp) +MULBUF *mp; +{ + if (mp->lastpos - mp->startpos + 1 == 1) { + int c0 = INBUF(mp); + if (ISSJISKANA(c0)) { + mp->cs = JISX0201KANA; + mp->icharset = SJIS; + mp->multiint[mp->intindex] = c0 & 0x7f; + mp->multics[mp->intindex] = mp->cs; + mp->intindex += 1; + mp->startpos = mp->lastpos + 1; + return 1; + } else { + /* do nothing. return 1 to get next byte */ + return 1; + } + } else if (mp->lastpos - mp->startpos + 1 == 2) { + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + if (ISSJISKANJI(c0, c1)) { + if (mp->io.scs & SCSJISX0213_2004) { + mp->icharset = SJIS2004; + mp->cs = JISX02132004KANJI1; + } else if (mp->io.scs & SCSJISX0213_2000) { + mp->icharset = SJIS2000; + mp->cs = JISX0213KANJI1; + } else { + mp->icharset = SJIS; + mp->cs = JISX0208KANJI; + } + + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = mp->icharset; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset; + + /* + * Check the correctness of SJIS encoded characters and + * convert them into internal representation. + */ + if (chisvalid_cs(&mp->multiint[mp->intindex], + &mp->multics[mp->intindex])) { + int c2, c3; + static unsigned char table_sjis[] = { + 0, 0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, + 0x2F, 0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, + 0x3F, 0x41, 0x43, 0x45, 0x47, 0x49, 0x4B, 0x4D, + 0x4F, 0x51, 0x53, 0x55, 0x57, 0x59, 0x5B, 0x5D, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x5F, 0x61, 0x63, 0x65, 0x67, 0x69, 0x6B, 0x6D, + 0x6F, 0x71, 0x73, 0x75, 0x77, 0x79, 0x7B, 0x7D, + 0x80, 0xA3, 0x81, 0xAD, 0x82, 0xEF, 0xF1, 0xF3, + 0xF5, 0xF7, 0xF9, 0xFB, 0xFD, 0, 0, 0 + }; + + c0 = table_sjis[c0 & 0x7f]; + c2 = c1 - ((unsigned char)c1 >= 0x80 ? 1 : 0); + c1 = c0; + c3 = c2 >= 0x9e; + if (c1 < 0x80) { + /* JIS X 0213:2000 plane 1 or JIS X 0208:1997 */ + mp->multiint[mp->intindex] = + (c1 + (c3 ? 1 : 0)); mp->multics[mp->intindex] = mp->cs; mp->multiint[mp->intindex + 1] = - mp->multiint[mp->intindex + 2] & 0x7f; + (c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21)); mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; mp->intindex += 2; mp->startpos = mp->lastpos + 1; + } else { + /* JIS X 0213:2000 plane 2 */ + if (output & ESJIS83) { + /* JIS cannot output JIS X 0213:2000 plane 2 */ + wrongchar(mp); + } else { + mp->cs = JISX0213KANJI2; + if (c1 > 0xA0) { + /* row 3-4, 13-14, and 79-94 */ + mp->multiint[mp->intindex] = + ((c1 & 0x7f) + (c3 ? 1 : 0)); + } else if (c1 == 0x80) { + /* row 1 or 8 */ + mp->multiint[mp->intindex] = + c3 ? 0x28 : 0x21; + } else if (c1 == 0x81) { + /* row 5 or 12 */ + mp->multiint[mp->intindex] = + c3 ? 0x2C : 0x25; + } else { + /* row 15 or 78 */ + mp->multiint[mp->intindex] = + c3 ? 0x6E : 0x2F; + } + mp->multics[mp->intindex] = JISX0213KANJI2; + mp->multiint[mp->intindex + 1] = + (c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21)); + mp->multics[mp->intindex + 1] = + REST_MASK | JISX0213KANJI2; + mp->intindex += 2; + mp->startpos = mp->lastpos + 1; + } } + } else { + /* undefined. less ignore them */ + wrongchar(mp); } - } else { - wrongchar(mp); - mp->startpos = mp->lastpos + 1; - multi_reparse(mp); + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; } - } else { - wrongcs1(mp); - multi_reparse(mp); } + /* return 0 because this data sequence is not matched to UJIS */ + return 0; } /* - * Check and normalize all SJIS codes + * Internalize input stream encoded by UTF8 encoding scheme. + * + * Return 1 if input is recognized well. + * Return 0 if input is rejected. */ -static void internalize_sjis(mp) +static int internalize_utf8(mp) MULBUF *mp; { if (mp->lastpos - mp->startpos + 1 == 1) { - if (!ISSJISKANA(INBUF(mp))) { - wrongcs1(mp); - } else { - mp->multiint[mp->intindex] = INBUF(mp) & 0x7f; + /* do nothing. return 1 to get next byte */ + return 1; + } else if (mp->lastpos - mp->startpos + 1 == 2) { + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + if (ISUTF8_2(c0, c1)) { + mp->cs = UTF8; + mp->icharset = UTF8; + mp->multiint[mp->intindex] = c0; mp->multics[mp->intindex] = mp->cs; - mp->intindex += 1; - mp->startpos = mp->lastpos + 1; - } - } else if (mp->lastpos - mp->startpos + 1 == 2 && - ISSJISKANJI(INBUF0(mp), INBUF1(mp))) { - mp->multiint[mp->intindex] = INBUF0(mp); - mp->multics[mp->intindex] = SJIS; - mp->multiint[mp->intindex + 1] = INBUF1(mp); - mp->multics[mp->intindex + 1] = REST_MASK | SJIS; - - /* - * Check the correctness of SJIS encoded characters and - * convert them into internal representation. - */ - if (chisvalid_cs(&mp->multiint[mp->intindex], - &mp->multics[mp->intindex])) { - register int c1, c2, c3; - static unsigned char table[] = { - 0, 0x21, 0x23, 0x25, 0x27, 0x29, 0x2B, 0x2D, - 0x2F, 0x31, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, - 0x3F, 0x41, 0x43, 0x45, 0x47, 0x49, 0x4B, 0x4D, - 0x4F, 0x51, 0x53, 0x55, 0x57, 0x59, 0x5B, 0x5D, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0x5F, 0x61, 0x63, 0x65, 0x67, 0x69, 0x6B, 0x6D, -#if SJIS0213 - 0x6F, 0x71, 0x73, 0x75, 0x77, 0x79, 0x7B, 0x7D, - 0x80, 0xA3, 0x81, 0xAD, 0x82, 0xEF, 0xF1, 0xF3, - 0xF5, 0xF7, 0xF9, 0xFB, 0xFD, 0, 0, 0 -#else - 0x6F, 0x71, 0x73, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -#endif - }; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + if (output & ESUTF8) { + mp->intindex += 2; + mp->startpos = mp->lastpos + 1; + return 1; + } else { + mp->intindex += 2; + mp->startpos = mp->lastpos + 1; + return 1; + } + } else if (ISUJISKANJI(c0, c1)) { + if (mp->io.scs & SCSJISX0213_2004) { + mp->icharset = UJIS2004; + mp->cs = JISX02132004KANJI1; + } else if (mp->io.scs & SCSJISX0213_2000) { + mp->icharset = UJIS2000; + mp->cs = JISX0213KANJI1; + } else { + mp->icharset = UJIS; + mp->cs = JISX0208KANJI; + } + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = mp->icharset; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->icharset; - c1 = table[INBUF0(mp) & 0x7f]; - c2 = INBUF(mp) - ((unsigned char)INBUF(mp) >= 0x80 ? 1 : 0); - c3 = c2 >= 0x9e; - if (c1 < 0x80) { - /* JIS X 0213:2000 plane 1 or JIS X 0208:1997 */ - mp->multiint[mp->intindex] = - (c1 + (c3 ? 1 : 0)); + /* Check character whether it has defined glyph or not */ + if (chisvalid_cs(&mp->multiint[mp->intindex], + &mp->multics[mp->intindex])) { + /* defined */ + mp->multiint[mp->intindex] = c0 & 0x7f; mp->multics[mp->intindex] = mp->cs; - mp->multiint[mp->intindex + 1] = - (c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21)); - mp->multics[mp->intindex + 1] = - REST_MASK | mp->cs; + mp->multiint[mp->intindex + 1] = c1 & 0x7f; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; mp->intindex += 2; mp->startpos = mp->lastpos + 1; } else { - /* JIS X 0213:2000 plane 2 */ - if (output == jis) { - /* JIS cannot output JIS X 0213:2000 plane 2 */ - wrongcs1(mp); - multi_reparse(mp); - } else { - if (c1 > 0xA0) { - /* row 3-4, 13-14, and 79-94 */ - mp->multiint[mp->intindex] = - ((c1 & 0x7f) + (c3 ? 1 : 0)); - } else if (c1 == 0x80) { - /* row 1 or 8 */ - mp->multiint[mp->intindex] = - c3 ? 0x28 : 0x21; - } else if (c1 == 0x81) { - /* row 5 or 12 */ - mp->multiint[mp->intindex] = - c3 ? 0x2C : 0x25; - } else { - /* row 15 or 78 */ - mp->multiint[mp->intindex] = - c3 ? 0x6E : 0x2F; - } - mp->multics[mp->intindex] = JISX0213KANJI2; - mp->multiint[mp->intindex + 1] = - (c2 - (c3 ? 0x9e - 0x21 : 0x40 - 0x21)); - mp->multics[mp->intindex + 1] = - REST_MASK | JISX0213KANJI2; - mp->intindex += 2; - mp->startpos = mp->lastpos + 1; - } + /* undefined. less ignore them */ + wrongchar(mp); } - } else { - /* - * Less ignores undefined characters after marking - * them as wrong characters. - */ - wrongchar(mp); + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; + } else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1)) { + /* do nothing. return 1 to get next byte */ + return 1; + } + } else if (mp->lastpos - mp->startpos + 1 == 3) { + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + int c2 = INBUF2(mp); + if (ISUTF8_3(c0, c1, c2)) { + mp->cs = UTF8; + mp->icharset = UTF8; + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 2] = c2; + mp->multics[mp->intindex + 2] = REST_MASK | mp->cs; + mp->intindex += 3; mp->startpos = mp->lastpos + 1; - multi_reparse(mp); + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; + } else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2)) { + /* do nothing. return 1 to get next byte */ + return 1; + } + } else if (mp->lastpos - mp->startpos + 1 == 4) { + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + int c2 = INBUF2(mp); + int c3 = INBUF3(mp); + if (ISUTF8_4(c0, c1, c2, c3)) { + mp->cs = UTF8; + mp->icharset = UTF8; + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 2] = c2; + mp->multics[mp->intindex + 2] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 3] = c3; + mp->multics[mp->intindex + 3] = REST_MASK | mp->cs; + mp->intindex += 4; + mp->startpos = mp->lastpos + 1; + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; + } else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2) && + ISUTF8_REST(c3)) { + /* do nothing. return 1 to get next byte */ + return 1; + } + } else if (mp->lastpos - mp->startpos + 1 == 5) { + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + int c2 = INBUF2(mp); + int c3 = INBUF3(mp); + int c4 = INBUF4(mp); + if (ISUTF8_5(c0, c1, c2, c3, c4)) { + mp->cs = UTF8; + mp->icharset = UTF8; + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 2] = c2; + mp->multics[mp->intindex + 2] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 3] = c3; + mp->multics[mp->intindex + 3] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 4] = c4; + mp->multics[mp->intindex + 4] = REST_MASK | mp->cs; + mp->intindex += 5; + mp->startpos = mp->lastpos + 1; + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; + } else if (ISUTF8_HEAD(c0) && ISUTF8_REST(c1) && ISUTF8_REST(c2) && + ISUTF8_REST(c3) && ISUTF8_REST(c4)) { + /* do nothing. return 1 to get next byte */ + return 1; + } + } else if (mp->lastpos - mp->startpos + 1 == 6) { + int c0 = INBUF0(mp); + int c1 = INBUF1(mp); + int c2 = INBUF2(mp); + int c3 = INBUF3(mp); + int c4 = INBUF4(mp); + int c5 = INBUF5(mp); + if (ISUTF8_6(c0, c1, c2, c3, c4, c5)) { + mp->cs = UTF8; + mp->icharset = UTF8; + mp->multiint[mp->intindex] = c0; + mp->multics[mp->intindex] = mp->cs; + mp->multiint[mp->intindex + 1] = c1; + mp->multics[mp->intindex + 1] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 2] = c2; + mp->multics[mp->intindex + 2] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 3] = c3; + mp->multics[mp->intindex + 3] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 4] = c4; + mp->multics[mp->intindex + 4] = REST_MASK | mp->cs; + mp->multiint[mp->intindex + 5] = c5; + mp->multics[mp->intindex + 5] = REST_MASK | mp->cs; + mp->intindex += 6; + mp->startpos = mp->lastpos + 1; + /* data are recognized as kanji or wrong data, so return 1 */ + return 1; } - } else { - wrongcs1(mp); - multi_reparse(mp); } + /* return 0 because this data sequence is not matched to UTF8 */ + return 0; } + #endif static void internalize(mp) @@ -683,8 +899,8 @@ int c = INBUF(mp); if (mp->lastpos - mp->startpos + 1 == 1) { - if ((c <= 0x7f && mp->io.left == noconv) || - (c >= 0x80 && mp->io.right == noconv)) { + if ((c <= 0x7f && mp->io.input == ESNOCONV) || + (c >= 0x80 && mp->io.inputr == ESNOCONV)) { #if JAPANESE mp->sequence_counter = 0; #endif @@ -694,7 +910,7 @@ noconv1(mp); } return; - } else if (c >= 0x80 && mp->io.right == none) { + } else if (c >= 0x80 && mp->io.inputr == ESNONE) { #if JAPANESE mp->sequence_counter = 0; #endif @@ -710,7 +926,7 @@ wrongcs1(mp); return; } else if (c <= 0x7f || - (mp->io.right == iso8 && (0xa0 <= c && c <= 0xff))) { + ((mp->io.inputr & ESISO8) && (0xa0 <= c && c <= 0xff))) { #if JAPANESE mp->sequence_counter = 0; #endif @@ -722,19 +938,20 @@ * Check cs that fit for output code set. */ /* JIS cannot output JISX0212, JISX0213_2, or ISO2022 */ - if (output == jis && mp->cs != ASCII && + if ((output & ESJIS83) && mp->cs != ASCII && mp->cs != JISX0201KANA && mp->cs != JISX0201ROMAN && mp->cs != JISX0208_78KANJI && mp->cs != JISX0208KANJI && mp->cs != JISX0208_90KANJI && - mp->cs != JISX0213KANJI1) { + mp->cs != JISX0213KANJI1 && + mp->cs != JISX02132004KANJI1) { wrongcs1(mp); multi_reparse(mp); return; } /* UJIS cannot output regular ISO2022 except JIS */ - if (output == ujis && mp->cs != ASCII && + if ((output & ESUJIS) && mp->cs != ASCII && mp->cs != JISX0201KANA && mp->cs != JISX0201ROMAN && mp->cs != JISX0208_78KANJI && @@ -742,20 +959,22 @@ mp->cs != JISX0208_90KANJI && mp->cs != JISX0212KANJISUP && mp->cs != JISX0213KANJI1 && - mp->cs != JISX0213KANJI2) { + mp->cs != JISX0213KANJI2 && + mp->cs != JISX02132004KANJI1) { wrongcs1(mp); multi_reparse(mp); return; } /* SJIS cannot output JISX0212 or ISO2022 */ - if (output == sjis && mp->cs != ASCII && + if ((output & ESSJIS) && mp->cs != ASCII && mp->cs != JISX0201KANA && mp->cs != JISX0201ROMAN && mp->cs != JISX0208_78KANJI && mp->cs != JISX0208KANJI && mp->cs != JISX0208_90KANJI && mp->cs != JISX0213KANJI1 && - mp->cs != JISX0213KANJI2) { + mp->cs != JISX0213KANJI2 && + mp->cs != JISX02132004KANJI1) { wrongcs1(mp); multi_reparse(mp); return; @@ -773,8 +992,8 @@ return; } #if JAPANESE - if (mp->priority == sjis && ISSJISKANA(c)) { - if (mp->io.right == japanese) { + if (mp->priority == PSJIS && ISSJISKANA(c)) { + if (mp->io.inputr & ESUJIS) { mp->sequence_counter++; if (mp->sequence_counter % 2 == 1 && INBUF0(mp) != 0xa4) /* ???? */ @@ -785,16 +1004,13 @@ /* * It looks like a sequence of UJIS * hiragana. Thus we give priority - * to not sjis. + * to not PSJIS. */ - mp->priority = ujis; + mp->priority = PUJIS; } - mp->cs = JISX0201KANA; - mp->icharset = SJIS; internalize_sjis(mp); return; - } else if (mp->io.right == ujis || mp->io.right == sjis || - mp->io.right == japanese) { + } else if (mp->io.inputr & (ESUJIS | ESSJIS)) { mp->sequence_counter = 0; return; } @@ -813,7 +1029,7 @@ return; } else if (mp->cs != ASCII && (c <= 0x7f || - (mp->io.right == iso8 && 0xa0 <= c && c <= 0xff))) { + ((mp->io.inputr & ESISO8) && 0xa0 <= c && c <= 0xff))) { if (mp->cs != FINDCS(mp, c)) { wrongcs1(mp); multi_reparse(mp); @@ -828,78 +1044,72 @@ } #if JAPANESE if (mp->lastpos - mp->startpos + 1 == 2) { - int c0 = INBUF0(mp); - if (mp->priority == sjis && ISSJISKANJI(c0, c)) { -#if UJIS0213 - mp->cs = JISX0213KANJI1; -#else - mp->cs = JISX0208KANJI; -#endif - mp->icharset = SJIS; - internalize_sjis(mp); - return; - } else if (mp->priority == ujis) { - if (ISUJISKANA(c0, c)) { - mp->cs = JISX0201KANA; - mp->icharset = UJIS; - internalize_ujis(mp); + if (mp->priority == PSJIS) { + if (internalize_sjis(mp)) { return; - } else if (ISUJISKANJI(c0, c)) { -#if UJIS0213 - mp->cs = JISX0213KANJI1; -#else - mp->cs = JISX0208KANJI; -#endif - mp->icharset = UJIS; - internalize_ujis(mp); + } + } else if (mp->priority == PUJIS) { + if (internalize_ujis(mp)) { return; - } else if (ISUJISKANJISUP(c0, c, 0xa1)) { + } + } else if (mp->priority == PUTF8) { + if (internalize_utf8(mp)) { return; } } - if ((mp->io.right == sjis || mp->io.right == japanese) && - ISSJISKANJI(c0, c)) { -#if UJIS0213 - mp->cs = JISX0213KANJI1; -#else - mp->cs = JISX0208KANJI; -#endif - mp->priority = sjis; - mp->icharset = SJIS; - internalize_sjis(mp); - return; - } else if ((mp->io.right == ujis || mp->io.right == japanese)) { - if (ISUJISKANA(c0, c)) { - mp->cs = JISX0201KANA; - mp->priority = ujis; - mp->icharset = UJIS; - internalize_ujis(mp); + if (mp->io.inputr & ESUJIS) { + if (internalize_ujis(mp)) { + mp->priority = PUJIS; + return; + } + } + if (mp->io.inputr & ESUTF8) { + if (internalize_utf8(mp)) { + mp->priority = PUTF8; + return; + } + } + if (mp->io.inputr & ESSJIS) { + if (internalize_sjis(mp)) { + mp->priority = PSJIS; + return; + } + } + } else if (mp->lastpos - mp->startpos + 1 == 3) { + if (mp->io.inputr & ESUJIS) { + if (internalize_ujis(mp)) { + mp->priority = PUJIS; + return; + } + } + if (mp->io.inputr & ESUJIS) { + if (internalize_utf8(mp)) { + mp->priority = PUTF8; + return; + } + } + } else if (mp->lastpos - mp->startpos + 1 == 4) { + if (mp->io.inputr & ESUJIS) { + if (internalize_utf8(mp)) { + mp->priority = PUTF8; + return; + } + } + } else if (mp->lastpos - mp->startpos + 1 == 5) { + if (mp->io.inputr & ESUJIS) { + if (internalize_utf8(mp)) { + mp->priority = PUTF8; + return; + } + } + } else if (mp->lastpos - mp->startpos + 1 == 6) { + if (mp->io.inputr & ESUJIS) { + if (internalize_utf8(mp)) { + mp->priority = PUTF8; return; - } else if (ISUJISKANJI(c0, c)) { -#if UJIS0213 - mp->cs = JISX0213KANJI1; -#else - mp->cs = JISX0208KANJI; -#endif - mp->priority = ujis; - mp->icharset = UJIS; - internalize_ujis(mp); - return; - } else if (ISUJISKANJISUP(c0, c, 0xa1)) - { - return; } } - } else if (mp->lastpos - mp->startpos + 1 == 3 && - (mp->priority == ujis || - mp->io.right == ujis || mp->io.right == japanese) && - ISUJISKANJISUP(INBUF0(mp), INBUF1(mp), c)) { - mp->cs = JISX0212KANJISUP; - mp->priority = ujis; - mp->icharset = UJIS; - internalize_ujis(mp); - return; } #endif wrongcs1(mp); @@ -915,33 +1125,46 @@ int type; int *plane; { - if (mp->io.left == jis) { - /* - * If the target code system is traditional jis, - * allow only JIS C6226-1978, JIS X0208-1983, JIS X0208-1990, - * JIS X0213-2000, JIS X0212-1990, ASCII, - * JIS X0201 right, and JIS X0201 left. - */ - if ((type == TYPE_94N_CHARSET && - (c == '@' || c == 'B' || c == 'D' || - c == 'O' || c == 'P')) || - (type == TYPE_94_CHARSET && - (c == 'B' || c == 'I' || c == 'J'))) { - *plane = (mp->ms->irr ? IRR2CS(mp->ms->irr) : 0) | TYPE2CS(type) | FT2CS(c); - mp->ms->irr = 0; - mp->eseq = NOESC; - return (0); + if (type == TYPE_94_CHARSET) { + switch (c) { + case 'B': /* ASCII */ + goto ok; + case 'I': /* JIS X 0201 right half (Katakana) */ + case 'J': /* JIS X 0201 left half (Roman) */ + if (mp->io.scs & SCSJISX0201_1976) goto ok; + } + } else if (type == TYPE_94N_CHARSET) { + switch (c) { + case '@': /* JIS C 6226-1978 */ + if (mp->io.scs & SCSJISC6226_1978) goto ok; + break; + case 'B': /* JIS X 0208-1983, JIS X 0208:1990, or JIS X 0208:1997 */ + if (mp->io.scs & (SCSJISX0208_1983 | SCSJISX0208_1990)) goto ok; + break; + case 'D': /* JIS X 0212:1990 */ + if (mp->io.scs & SCSJISX0212_1990) goto ok; + break; + case 'O': /* JIS X 0213:2000 plane 1 */ + if (mp->io.scs & SCSJISX0213_2000) goto ok; + break; + case 'P': /* JIS X 0213:2000 plane 2 or JIS X 0213:2004 plane 2 */ + if (mp->io.scs & (SCSJISX0213_2000 | SCSJISX0213_2004)) goto ok; + break; + case 'Q': /* JIS X 0213:2004 plane 1 */ + if (mp->io.scs & SCSJISX0213_2004) goto ok; + break; } - } else if (0x30 <= c && c <= 0x7e) { - /* - * Otherwise, accept all. - */ - *plane = (mp->ms->irr ? IRR2CS(mp->ms->irr) : 0) | TYPE2CS(type) | FT2CS(c); - mp->ms->irr = 0; - mp->eseq = NOESC; - return (0); + } + if ((mp->io.scs & SCSOTHERISO) && 0x30 <= c && c <= 0x7e) { + /* accepting all other ISO, so OK */ + goto ok; } return (-1); +ok: + *plane = (mp->ms->irr ? IRR2CS(mp->ms->irr) : 0) | TYPE2CS(type) | FT2CS(c); + mp->ms->irr = 0; + mp->eseq = NOESC; + return (0); } static int check_irr(mp, c) @@ -1007,11 +1230,11 @@ case 'O': mp->ms->sg = 3; mp->eseq = NOESC; /*SS3*/break; case 'n': mp->ms->gl = 2; mp->eseq = NOESC; break; case 'o': mp->ms->gl = 3; mp->eseq = NOESC; break; - case '|': if (mp->io.right != iso8) goto wrong; + case '|': if (!(mp->io.inputr & ESISO8)) goto wrong; mp->ms->gr = 3; mp->eseq = NOESC; break; - case '}': if (mp->io.right != iso8) goto wrong; + case '}': if (!(mp->io.inputr & ESISO8)) goto wrong; mp->ms->gr = 2; mp->eseq = NOESC; break; - case '~': if (mp->io.right != iso8) goto wrong; + case '~': if (!(mp->io.inputr & ESISO8)) goto wrong; mp->ms->gr = 1; mp->eseq = NOESC; break; default: goto wrong; } @@ -1084,14 +1307,14 @@ /* * This sequence is wrong if we buffered some data. */ - if (mp->lastpos != mp->startpos) { + if (mp->lastpos > mp->startpos) { switch (c) { case 0033: case 0016: case 0017: case 0031: goto wrong; case 0216: - case 0217: if (mp->io.right == iso8) goto wrong; + case 0217: if (mp->io.inputr & ESISO8) goto wrong; default: goto wrongone; } } @@ -1103,9 +1326,9 @@ case 0016: mp->ms->gl = 1; mp->eseq = NOESC; break; case 0017: mp->ms->gl = 0; mp->eseq = NOESC; break; case 0031: mp->ms->sg = 2; mp->eseq = NOESC; /*SS2*/ break; - case 0216: if (mp->io.right != iso8) goto wrongone; + case 0216: if (!(mp->io.inputr & ESISO8)) goto wrongone; mp->ms->sg = 2; mp->eseq = NOESC; /*SS2*/ break; - case 0217: if (mp->io.right != iso8) goto wrongone; + case 0217: if (!(mp->io.inputr & ESISO8)) goto wrongone; mp->ms->sg = 3; mp->eseq = NOESC; /*SS3*/ break; default: goto wrongone; } @@ -1197,21 +1420,23 @@ return 0; } -void init_def_codesets(left, right, out) -CODESET left; -CODESET right; -CODESET out; -{ - def_left = left; - def_right = right; +void init_def_scs_es(scs, input, inputr, out) +SETCHARSET scs; +ENCSET input; +ENCSET inputr; +ENCSET out; +{ + def_scs = scs; + def_input = input; + def_inputr = inputr; output = out; } void init_def_priority(pri) -CODESET pri; +J_PRIORITY pri; { #if JAPANESE - assert(pri == sjis || pri == ujis); + assert(pri == PUJIS || pri == PSJIS || pri == PUTF8); def_priority = pri; #endif } @@ -1220,34 +1445,36 @@ MULBUF *mp; { #if JAPANESE - if (mp->io.right == sjis) - mp->priority = sjis; - else if (mp->io.right == ujis) - mp->priority = ujis; - else if (mp->io.right == japanese) + if ((mp->io.inputr & ESSJIS) && (mp->io.inputr & ESUJIS)) mp->priority = def_priority; + else if (mp->io.inputr & ESUJIS) + mp->priority = PUJIS; + else if (mp->io.inputr & ESUTF8) + mp->priority = PUTF8; + else if (mp->io.inputr & ESSJIS) + mp->priority = PSJIS; else - mp->priority = noconv; + mp->priority = PNONE; mp->sequence_counter = 0; #endif } -CODESET get_priority(mp) +J_PRIORITY get_priority(mp) MULBUF *mp; { #if JAPANESE return (mp->priority); #else - return (noconv); + return (PNONE); #endif } void set_priority(mp, pri) MULBUF *mp; -CODESET pri; +J_PRIORITY pri; { #if JAPANESE - assert(pri == sjis || pri == ujis || pri == noconv); + assert(pri == PSJIS || pri == PUJIS || pri == PUTF8 || pri == PNONE); mp->priority = pri; #endif } @@ -1255,9 +1482,10 @@ MULBUF *new_multibuf() { MULBUF *mp = (MULBUF*) ecalloc(1, sizeof(MULBUF)); - mp->io.left = def_left; - mp->io.right = def_right; - mp->orig_io_right = def_right; + mp->io.scs = def_scs; + mp->io.input = def_input; + mp->io.inputr = def_inputr; + mp->orig_io_right = def_inputr; mp->rotation_io_right = 0; mp->eseq = NOESC; mp->ms = (struct m_status*) ecalloc(1, sizeof(struct m_status)); @@ -1312,7 +1540,7 @@ { m_position last_startpos = mp->startpos; - if (mp->io.left == jis || mp->io.left == iso7 || mp->io.right == iso8) { + if (mp->io.input & (ESJIS83 | ESISO7 | ESISO8)) { if (check_escape_sequence(mp) == 0) { return; /* going process well */ } @@ -1359,15 +1587,15 @@ #if JAPANESE /* * Quick japanese code hack. - * Check whether character is SJIS KANA or no. + * Check whether character is SJIS KANA or not. * If it is SJIS KANA, it means our prediction was failed. * Now going to fall back to SJIS KANA mode. */ - if ((mp->priority == sjis || - mp->io.right == sjis || mp->io.right == japanese) && + if ((mp->priority == PSJIS || (mp->io.inputr & ESSJIS)) && + CSISWRONG(mp->multics[mp->intindex - 1]) && ISSJISKANA(mp->multiint[mp->intindex - 1])) { mp->cs = JISX0201KANA; - mp->priority = sjis; + mp->priority = PSJIS; mp->icharset = SJIS; mp->multiint[mp->intindex - 1] &= 0x7f; mp->multics[mp->intindex - 1] = mp->cs; @@ -1549,13 +1777,13 @@ multi_parse(mp, -1, NULL_POSITION, NULL); } -void set_codesets(mp, left, right) +void set_codesets(mp, input, inputr) MULBUF *mp; -CODESET left; -CODESET right; +ENCSET input; +ENCSET inputr; { - mp->io.left = left; - mp->io.right = right; + mp->io.input = input; + mp->io.inputr = inputr; } /* @@ -1574,7 +1802,11 @@ * Code set */ case SJIS: return ("SJIS"); + case SJIS2000: return ("SJIS2000"); + case SJIS2004: return ("SJIS2004"); case UJIS: return ("UJIS"); + case UJIS2000: return ("UJIS2000"); + case UJIS2004: return ("UJIS2004"); #endif /* * Character set @@ -1599,6 +1831,7 @@ case JISX0212KANJISUP: return ("JIS-KANJISUP"); case JISX0213KANJI1: return ("JISX0213KANJI1"); case JISX0213KANJI2: return ("JISX0213KANJI2"); + case JISX02132004KANJI1:return ("JISX0213:2004KANJI1"); } switch (CS2TYPE(mp->icharset)) { @@ -1645,13 +1878,17 @@ p[0] = '\033'; len = 1; - if ((output == iso7 || output == iso8) && CS2IRR(charset) > 0) + if ((output & (ESISO7 | ESISO8)) && CS2IRR(charset) > 0) { p[len] = '&'; p[len + 1] = IRR2CODE(CS2IRR(charset)); p[len + 2] = '\033'; len += 3; } + /* + * Call 94 or 94N character set to G0 plane. + * Call 96 or 96N character set to G1 plane. + */ switch (CS2TYPE(charset)) { case TYPE_94_CHARSET: @@ -1689,7 +1926,12 @@ len += 3; break; } - if (output != iso8) + /* + * If output is not ESISO8, use SO and SI to call G1 to GL. + * Otherwise, we use GR directly, so no need to call G1 + * since G1 is called GR already. + */ + if (!(output & ESISO8)) { switch (CS2TYPE(charset)) { @@ -1731,7 +1973,7 @@ register unsigned char *p; static char buffer2[2]; - if (output == iso8 && c != 0 && + if ((output & ESISO8) && c != 0 && (CS2TYPE(cs) == TYPE_96_CHARSET || CS2TYPE(cs) == TYPE_96N_CHARSET)) c |= 0x80; @@ -1816,6 +2058,13 @@ assert(cvindex == 2); cvindex = 0; cs = JISX0208KANJI; + } else if (cs == JISX02132004KANJI1) + { + if (cvindex == 1) + return (nullcvbuffer); + assert(cvindex == 2); + cvindex = 0; + cs = JISX0208KANJI; } else { assert(0); @@ -1945,22 +2194,14 @@ cs == JISX0208_90KANJI || cs == JISX0213KANJI1) { register int c1, c2, c3; - static unsigned char table[] = { + static unsigned char table_sjis[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -#if SJIS0213 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, -#else - 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0, -#endif 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, -#if SJIS0213 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, -#else - 0xE8, 0xE9, 0xEA, 0, 0, 0, 0, 0, -#endif }; if (cvindex == 1) @@ -1971,7 +2212,7 @@ c3 = cvbuffer[0] & 0x7f; c1 = c3 & 1; c2 = (cvbuffer[1] & 0x7f) + (c1 ? 0x40 - 0x21 : 0x9e - 0x21); - c1 = table[c3 / 2 + c1]; + c1 = table_sjis[c3 / 2 + c1]; cvbuffer[0] = c1; cvbuffer[1] = c2 + (c2 >= 0x7f ? 1 : 0); cvindex = 0; @@ -2012,6 +2253,53 @@ } #endif +static char *convert_to_utf8(c, cs) +int c; +int cs; +{ + if (c == 0) + { + cvindex = 0; + return (nullcvbuffer); + } + + cvbuffer[cvindex++] = c; + cvbuffer[cvindex] = '\0'; + + if (CSISWRONG(cs)) + { + cs = ASCII; + } + + cs = CS2CHARSET(cs); + + assert(0); + if (cs == ASCII || cs == JISX0201ROMAN) + { + assert(cvindex == 1); + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0201KANA) + { + assert(cvindex == 1); + cvbuffer[0] |= 0x80; + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0208_78KANJI || cs == JISX0208KANJI || + cs == JISX0208_90KANJI || cs == JISX0213KANJI1) + { + cvindex = 0; + return (cvbuffer); + } else if (cs == JISX0213KANJI2) + { + cvindex = 0; + return (cvbuffer); + } + assert(0); + cvindex = 0; + return (cvbuffer); +} + char *outchar(c, cs) int c; CHARSET cs; @@ -2022,16 +2310,18 @@ cs = ASCII; } - if (output == iso7 || output == iso8) + if (output & (ESISO7 | ESISO8)) return (convert_to_iso(c, cs)); - if (output == jis) + if (output & ESJIS83) return (convert_to_jis(c, cs)); #if JAPANESE - if (output == ujis) + if (output & ESUJIS) return (convert_to_ujis(c, cs)); - if (output == sjis) + if (output & ESSJIS) return (convert_to_sjis(c, cs)); #endif + if (output & ESUTF8) + return (convert_to_utf8(c, cs)); cvbuffer[0] = c; cvbuffer[1] = '\0'; return (cvbuffer); @@ -2084,13 +2374,13 @@ mp->rotation_io_right++; mp->rotation_io_right %= 7; switch (mp->rotation_io_right) { - case 0: p = "original"; mp->io.right = mp->orig_io_right; break; - case 1: p = "japanese"; mp->io.right = japanese; break; - case 2: p = "ujis"; mp->io.right = ujis; break; - case 3: p = "sjis"; mp->io.right = sjis; break; - case 4: p = "iso8"; mp->io.right = iso8; break; - case 5: p = "noconv"; mp->io.right = noconv; break; - case 6: p = "none"; mp->io.right = none; break; + case 0: p = "original"; mp->io.inputr = mp->orig_io_right; break; + case 1: p = "japanese"; mp->io.inputr = ESUJIS | ESSJIS; break; + case 2: p = "ujis"; mp->io.inputr = ESUJIS; break; + case 3: p = "sjis"; mp->io.inputr = ESSJIS; break; + case 4: p = "iso8"; mp->io.inputr = ESISO8; break; + case 5: p = "noconv"; mp->io.inputr = ESNOCONV; break; + case 6: p = "none"; mp->io.inputr = ESNONE; break; default: assert(0); break; } init_priority(mp); Index: less/multi.h diff -u less/multi.h:1.25 less/multi.h:1.29 --- less/multi.h:1.25 Tue Sep 6 07:17:23 2005 +++ less/multi.h Sat Feb 18 09:57:24 2006 @@ -134,7 +134,7 @@ #define CYRILLIC (TYPE_96_CHARSET | FT2CS('L')) #define LATIN5 (TYPE_96_CHARSET | FT2CS('M')) /* - * JISX0208_78KANJI means JIS C 6226-1978 (called JIS X 0208-1978) + * JISX0208_78KANJI means JIS C 6226-1978 * JISX0208KANJI means JIS X 0208-1983 (same as JIS C 6226-1983) * This is similar to JIS C 6226-1978. Several characters are moved * or exchanged in code space. Conversion table is available in unify.c. @@ -188,12 +188,30 @@ * UJIS2004 contains ASCII, JIS X 0201:1976, JIS X 0213:2004, * and JIS X 0212:1990 */ -#define SJIS (IRR2CS(1) | TYPE_94N_CHARSET | FT_MASK) -#define SJIS2000 (IRR2CS(2) | TYPE_94N_CHARSET | FT_MASK) -#define SJIS2004 (IRR2CS(3) | TYPE_94N_CHARSET | FT_MASK) -#define UJIS (IRR2CS(1) | TYPE_94N_CHARSET | (FT_MASK-1)) -#define UJIS2000 (IRR2CS(2) | TYPE_94N_CHARSET | (FT_MASK-1)) -#define UJIS2004 (IRR2CS(3) | TYPE_94N_CHARSET | (FT_MASK-1)) +#define SJIS (IRR2CS(0) | TYPE_94N_CHARSET | FT_MASK) +#define SJIS2000 (IRR2CS(1) | TYPE_94N_CHARSET | FT_MASK) +#define SJIS2004 (IRR2CS(2) | TYPE_94N_CHARSET | FT_MASK) +#define UJIS (IRR2CS(0) | TYPE_94N_CHARSET | (FT_MASK-1)) +#define UJIS2000 (IRR2CS(1) | TYPE_94N_CHARSET | (FT_MASK-1)) +#define UJIS2004 (IRR2CS(2) | TYPE_94N_CHARSET | (FT_MASK-1)) + +#define UTF8 (IRR2CS(0) | TYPE_94N_CHARSET | (FT_MASK-2)) + +/* + * Make SJIS/UJIS character set from mp. + * + * SJIS and UJIS are using only fixed number of plane sets. Therefore, + * it is impossible to use JIS X 0208:1990 and JIS X 0213:2004 at the + * same time. SJIS use only one of them. And, it is declared by + * MULBUF->io.right. This function constructs appropriate SJIS + * character set number from it. + * + * Usage: sjiscs = MAKESUJISCS(mp, SJIS); + * ujiscs = MAKESUJISCS(mp, UJIS); + */ +#define MAKESUJISCS(mp,su) \ + ((su)| (((mp)->io.right&CJISX0213_2004)?IRR2CS(2):\ + (((mp)->io.right&CJISX0213_2000)?IRR2CS(1):0))) #endif #endif @@ -228,53 +246,101 @@ /* - * Definition of code sets. The code set is not character set. - * It is only means of code, and we use these value when we - * decide what input data are. + * Definition of SETCHARSET. + * + * SETCHARSET represents a set of character sets. This is used to + * specify character sets less accepts. + * + * Although, ISO 2022 can accept any character sets, the output device + * cannot represents all. Therefore, we add less ability to specify + * character sets that a user want to use. + * + * SCSASCII is a value to specify ASCII character set. + * SCSJISX0201_1976..SCSJISX0213_2004 specify Japanese character sets. + * All of these are character sets are defined in Japan. However, + * Japanese terminal devices can display only few of them. So, we + * decide to give users the ability to specify character sets that + * their terminal device can display. + * SCSOTHERISO is used to allow all other ISO 2022 character sets. + * There are too many character sets in the world. And the number + * of them is increasing. Therefore, we also decide to give users + * the ability to try all of them. ;-) + */ +typedef int SETCHARSET; +#define SCSASCII 0x0000 +#define SCSJISX0201_1976 0x0001 +#define SCSJISC6226_1978 0x0002 +#define SCSJISX0208_1983 0x0004 +#define SCSJISX0208_1990 0x0008 +#define SCSJISX0212_1990 0x0010 +#define SCSJISX0213_2000 0x0020 +#define SCSJISX0213_2004 0x0040 +#define SCSJISX0213_2ND 0x0080 /* 2nd plane of JIS X 0213:2000 and */ + /* JIS X 0213:2004 */ +#define SCSOTHERISO 0x0100 +#define SCSUTF8 0x0200 +/* + * SCSALLJIS - everything + * SCSALLJISTRAD - everything except JIS X 0213 plane 2 and JIS X 0212. + * SCSALLSJIS - everything except JIS X 0212 + */ +#define SCSALLJIS (SCSJISX0201_1976|SCSJISC6226_1978|SCSJISX0208_1983|\ + SCSJISX0208_1990|SCSJISX0213_2000|SCSJISX0213_2004|\ + SCSJISX0213_2ND|SCSJISX0212_1990) +#define SCSALLJISTRAD (SCSJISX0201_1976|SCSJISC6226_1978|SCSJISX0208_1983|\ + SCSJISX0208_1990|SCSJISX0213_2000|SCSJISX0213_2004) +#define SCSALLSJIS (SCSJISX0201_1976|SCSJISC6226_1978|SCSJISX0208_1983|\ + SCSJISX0208_1990|SCSJISX0213_2000|SCSJISX0213_2004|\ + SCSJISX0213_2ND) + +/* + * Definition of ENCSET. + * + * ENCSET represents a set of encoding schemes less accepts. ENCSET is + * used as a triplet like { input, inputr, output }. "input" represents + * a set of encoding schemes for input stream left plane (0x00..0x7f). + * "inputr" represents a set of encoding schemes for input stream right + * plane (0x80..0xff). "output" represents an encoding scheme for output + * stream. + * + * ESNONE has to be used exclusively to specify no-data. This is used + * as only "inputr" to specify no right plane (0x80..0xff) data. + * ESNOCONV has to be used exclusively to specify no-conversion. + * ESISO7 and ESISO8 specify ISO style encoding techniques. ESISO7 can + * be used as "input" or "output". ESISO8 can be used as "inputr" or + * "output". + * ESJIS83, ESSJIS, and ESUJIS specify Japanese encoding techniques. + * Note: As input, users can use any combination of these values. + * However, as output, users need to use only one of them. + * Note: If ESJIS83 is used as "output", less output all KANJI + * character set using only JIS X 0208-1983 character set (ESC$B) with + * a hope that user's terminal device is using glyph of JIS X 0213:2004 + * plane 1 character set as its default glyph. It is hard to update + * terminal device to understand JIS X 0213:2004 completely, but it is + * easy to change the glyph. + * ESUTF8 specifies encoding technique and character set. This have to + * be used exclusively as output. + */ +typedef int ENCSET; +#define ESNONE 0x0000 +#define ESNOCONV 0x0001 +#define ESISO7 0x0002 +#define ESISO8 0x0004 +#define ESJIS83 0x0008 +#define ESSJIS 0x0010 +#define ESUJIS 0x0020 +#define ESUTF8 0x0040 +#define ESALLJA (ESSJIS|ESUJIS|ESUTF8) + +/* + * J_PRIORITY: priority to select either UJIS or SJIS as encoding scheme. */ typedef enum { - /* code sets for left, right and output plane */ - noconv, /* A code set which doesn't need converting */ - /* code sets for left and output plane */ - jis, /* A subset of ISO 2022 */ - /* - * It may contain JIS C 6226-1978, JIS X 0208-1983, - * JIS X 0208:1990/1997, JIS X 0212:1990, - * JIS X 0213:2000/2004, JIS X 0201:1976/1997 left/right - * planes, and ASCII. - * - * If less is specified to use "jis" as its encoding scheme - * for input stream, less accepts all above character sets. - * e.g. jis-ujis or jis-sjis in JLESSCHARSET. - * - * If less is specified to use "jis" as its encoding scheme - * for output stream, less outputs all characters in - * JIS C 6226-1978 as JIS X 0208-1983 with conversion - * and all other characters in JIS X 0208:1990/1997, - * and JIS X 0213:2000/2004 plane 1 using JIS X 0208-1983 - * (ESC$B) encoding scheme without any conversion. - * Less doesn't convert here with a hope that an output - * device may use JIS X 0213:2004 plane 1 character set - * as its glyph. - * e.g. iso7-jis or ujis-jis in JLESSCHARSET. - * - * In addition, less rejects JIS X 0212:1990 and JIS X - * 0213:2000 plane 2 if "jis" is specified as its encoding - * scheme for output stream. - * e.g. jis or ujis-jis in JLESSCHARSET. - * - * If you need to use JIS X 0213:2004 or any other character - * sets as the output, please use iso7 or iso8. - */ - iso7, /* A code set which is extented by iso2022 */ - /* code sets for only right plane */ - none, /* No code set */ - japanese, /* Both of UJIS and SJIS */ - /* code sets for right and output plane */ - ujis, /* Japanese code set named UJIS */ - sjis, /* Japanese code set named SJIS */ - iso8 /* A code set which is extented by iso2022 */ -} CODESET; + PUJIS, + PSJIS, + PUTF8, + PNONE +} J_PRIORITY; /* * A structure used as a return value in multi_parse(). @@ -297,10 +363,10 @@ * in multi.c */ extern int set_planeset (); -extern void init_def_codesets (); +extern void init_def_scs_es (); extern void init_def_priority (); extern void init_priority (); -extern CODESET get_priority (); +extern J_PRIORITY get_priority (); extern void set_priority (); extern MULBUF * new_multibuf (); extern void clear_multibuf (); Index: less/optfunc.c diff -u less/optfunc.c:1.26 less/optfunc.c:1.28 --- less/optfunc.c:1.26 Tue Aug 30 22:45:49 2005 +++ less/optfunc.c Wed Sep 14 06:43:54 2005 @@ -386,29 +386,29 @@ { case INIT: if (opt_Z_var == OPT_ON) - init_def_priority(sjis); + init_def_priority(PSJIS); else if (opt_Z_var == OPT_OFF) - init_def_priority(ujis); + init_def_priority(PUJIS); break; case QUERY: break; case TOGGLE: switch (get_priority(get_mulbuf(curr_ifile))) { - case sjis: + case PSJIS: opt_Z_var = OPT_OFF; break; - case ujis: + case PUJIS: opt_Z_var = OPT_ON; break; - case noconv: + case PNONE: default: opt_Z_var = OPT_ONPLUS; /* we use this to mean error */ return; } if (opt_Z_var == OPT_ON) - init_def_priority(sjis); + init_def_priority(PSJIS); else if (opt_Z_var == OPT_OFF) - init_def_priority(ujis); + init_def_priority(PUJIS); init_priority(get_mulbuf(curr_ifile)); break; } Index: less/opttbl.c diff -u less/opttbl.c:1.29 less/opttbl.c:1.31 --- less/opttbl.c:1.29 Wed Aug 31 00:09:15 2005 +++ less/opttbl.c Wed Sep 14 06:43:54 2005 @@ -483,9 +483,9 @@ } #if JAPANESE if (opt_Z_var == OPT_ON) - init_def_priority(sjis); + init_def_priority(PSJIS); else if (opt_Z_var == OPT_OFF) - init_def_priority(ujis); + init_def_priority(PUJIS); #endif } Index: less/unify.c diff -u less/unify.c:1.31 less/unify.c:1.32 --- less/unify.c:1.31 Sun Sep 4 23:36:22 2005 +++ less/unify.c Fri Sep 9 17:12:30 2005 @@ -1015,51 +1015,34 @@ }; static convtable etable_jisx0213_2 = { eliminate_wrong_jisx0213_2, NULL }; -static convtab eliminate_wrong_sjis[] = { -#if SJIS0213 - /* JIS X 0213:2000 plane 1 for SJIS0213 */ - +static convtab eliminate_wrong_jisx02132004_1[] = { /* no empty row */ /* sequences of empty columns */ - /* 4 KU 0x82FA($(O$|(B)-0x82FC($(O$~(B) -> 0x222E($B".(B) */ - { "\202\372", "\202\374", "\201\254", SJIS }, - /* 8 KU 0x84DD($(O(_(B)-0x84E4($(O(f(B) -> 0x222E($B".(B) */ - { "\204\335", "\204\344", "\201\254", SJIS }, - /* 8 KU 0x84FB($(O(}(B)-0x84FC($(O(~(B) -> 0x222E($B".(B) */ - { "\204\373", "\204\374", "\201\254", SJIS }, - /* 12 KU 0x86F2($(O,t(B)-0x86FA($(O,|(B) -> 0x222E($B".(B) */ - { "\206\362", "\206\372", "\201\254", SJIS }, - /* 13 KU 0x8777($(O-X(B)-0x877D($(O-^(B) -> 0x222E($B".(B) */ - { "\207\167", "\207\175", "\201\254", SJIS }, - /* 13 KU 0x8790($(O-p(B)-0x8792($(O-r(B) -> 0x222E($B".(B) */ - { "\207\220", "\207\222", "\201\254", SJIS }, - /* 13 KU 0x8794($(O-t(B)-0x8797($(O-w(B) -> 0x222E($B".(B) */ - { "\207\224", "\207\227", "\201\254", SJIS }, - /* 13 KU 0x879A($(O-z(B)-0x879C($(O-|(B) -> 0x222E($B".(B) */ - { "\207\232", "\207\234", "\201\254", SJIS }, - /* 14 KU 0x879F($(O.!(B) -> 0x222E($B".(B) */ - { "\207\237", NULL, "\201\254", SJIS }, - /* 15 KU 0x889E($(O/~(B) -> 0x222E($B".(B) */ - { "\210\236", NULL, "\201\254", SJIS }, - /* 47 KU 0x9873($(OOT(B) -> 0x222E($B".(B) */ - { "\230\163", NULL, "\201\254", SJIS }, - /* 47 KU 0x989E($(OO~(B) -> 0x222E($B".(B) */ - { "\230\236", NULL, "\201\254", SJIS }, - /* 84 KU 0xEAA5($(Ot'(B) -> 0x222E($B".(B) */ - { "\352\245", NULL, "\201\254", SJIS }, - /* 94 KU 0xEFF8($(O~z(B)-0xEFFC($(O~~(B) -> 0x222E($B".(B) */ - { "\357\370", "\357\374", "\201\254", SJIS }, - - /* JIS X 0213:2000 plane 2 for SJIS0213 */ - /* In SJIS0213, JIS X 0213:2000 occupies from 96 to 120 KU */ + /* 4 KU 0x247C($(Q$|(B)-0x247E($(Q$~(B) -> 0x222E($B".(B) */ + { "$|", "$~", "\".", JISX0208KANJI }, + /* 8 KU 0x285F($(Q(_(B)-0x2866($(Q(f(B) -> 0x222E($B".(B) */ + { "(_", "(f", "\".", JISX0208KANJI }, + /* 8 KU 0x287D($(Q(}(B)-0x287E($(Q(~(B) -> 0x222E($B".(B) */ + { "(}", "(~", "\".", JISX0208KANJI }, + /* 12 KU 0x2C74($(Q,t(B)-0x2C7C($(Q,|(B) -> 0x222E($B".(B) */ + { ",t", ",|", "\".", JISX0208KANJI }, + /* 13 KU 0x2D58($(Q-X(B)-0x2D5E($(Q-^(B) -> 0x222E($B".(B) */ + { "-X", "-^", "\".", JISX0208KANJI }, + /* 13 KU 0x2D70($(Q-p(B)-0x2D72($(Q-r(B) -> 0x222E($B".(B) */ + { "-p", "-r", "\".", JISX0208KANJI }, + /* 13 KU 0x2D74($(Q-t(B)-0x2D77($(Q-w(B) -> 0x222E($B".(B) */ + { "-t", "-w", "\".", JISX0208KANJI }, + /* 13 KU 0x2D7A($(Q-z(B)-0x2D7C($(Q-|(B) -> 0x222E($B".(B) */ + { "-z", "-|", "\".", JISX0208KANJI }, - /* no empty row */ + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_jisx02132004_1 = { eliminate_wrong_jisx02132004_1, NULL }; +static convtable etable_jisx02132004_2 = { eliminate_wrong_jisx0213_2, NULL }; - /* sequences of empty columns */ - /* 94 KU 0xFCF5($(P~w(B)-0xFCFC($(P~~(B) -> 0x222E($B".(B) */ - { "\374\365", "\374\374", "\201\254", SJIS }, -#else /* SJIS0213 */ +static convtab eliminate_wrong_sjis[] = { /* JIS X 0208:1990 for SJIS */ /* 2 KU 0x81AD(&@$B"/(B)-0x81B7(&@$B"9(B) -> 0x81AC($B".(B) */ { "\201\255", "\201\267", "\201\254", SJIS }, @@ -1110,64 +1093,99 @@ { "\353\100", "\357\374", "\201\254", SJIS }, /* 95-120 KU 0xF040(none)-0xFC9E(none) -> 0x81AC($B".(B) */ { "\360\100", "\374\374", "\201\254", SJIS }, -#endif /* SJIS0213 */ /* NULL */ { 0, 0, 0, 0 } }; static convtable etable_sjis = { eliminate_wrong_sjis, NULL }; -static convtab eliminate_wrong_ujis[] = { -#if UJIS0213 - /* JIS X 0213:2000 plane 1 for UJIS0213 */ +static convtab eliminate_wrong_sjis2000[] = { + /* JIS X 0213:2000 plane 1 for SJIS2000 */ /* no empty row */ /* sequences of empty columns */ - /* 4 KU 0xA4FC($(O$|(B)-0xA4FE($(O$~(B) -> 0xA2AE($B".(B) */ - { "\244\374", "\244\376", "\242\256", UJIS }, - /* 8 KU 0xA8DF($(O(_(B)-0xA8E6($(O(f(B) -> 0xA2AE($B".(B) */ - { "\250\337", "\250\346", "\242\256", UJIS }, - /* 8 KU 0xA8FD($(O(}(B)-0xA8FE($(O(~(B) -> 0xA2AE($B".(B) */ - { "\250\375", "\250\376", "\242\256", UJIS }, - /* 12 KU 0xACF4($(O,t(B)-0xACFC($(O,|(B) -> 0xA2AE($B".(B) */ - { "\254\364", "\254\374", "\242\256", UJIS }, - /* 13 KU 0xADD8($(O-X(B)-0xADDE($(O-^(B) -> 0xA2AE($B".(B) */ - { "\255\330", "\255\336", "\242\256", UJIS }, - /* 13 KU 0xADF0($(O-p(B)-0xADF2($(O-r(B) -> 0xA2AE($B".(B) */ - { "\255\360", "\255\362", "\242\256", UJIS }, - /* 13 KU 0xADF4($(O-t(B)-0xADF7($(O-w(B) -> 0xA2AE($B".(B) */ - { "\255\364", "\255\367", "\242\256", UJIS }, - /* 13 KU 0xADFA($(O-z(B)-0xADFC($(O-|(B) -> 0xA2AE($B".(B) */ - { "\255\372", "\255\374", "\242\256", UJIS }, - /* 14 KU 0xAEA1($(O.!(B) -> 0xA2AE($B".(B) */ - { "\256\241", NULL, "\242\256", UJIS }, - /* 15 KU 0xAFFE($(O/~(B) -> 0xA2AE($B".(B) */ - { "\257\376", NULL, "\242\256", UJIS }, - /* 47 KU 0xCFD4($(OOT(B) -> 0xA2AE($B".(B) */ - { "\317\324", NULL, "\242\256", UJIS }, - /* 47 KU 0xCFFE($(OO~(B) -> 0xA2AE($B".(B) */ - { "\317\376", NULL, "\242\256", UJIS }, - /* 84 KU 0xF4A7($(Ot'(B) -> 0xA2AE($B".(B) */ - { "\364\247", NULL, "\242\256", UJIS }, - /* 94 KU 0xFEFA($(O~z(B)-0xFEFE($(O~~(B) -> 0xA2AE($B".(B) */ - { "\376\372", "\376\376", "\242\256", UJIS }, + /* 4 KU 0x82FA($(O$|(B)-0x82FC($(O$~(B) -> 0x222E($B".(B) */ + { "\202\372", "\202\374", "\201\254", SJIS }, + /* 8 KU 0x84DD($(O(_(B)-0x84E4($(O(f(B) -> 0x222E($B".(B) */ + { "\204\335", "\204\344", "\201\254", SJIS }, + /* 8 KU 0x84FB($(O(}(B)-0x84FC($(O(~(B) -> 0x222E($B".(B) */ + { "\204\373", "\204\374", "\201\254", SJIS }, + /* 12 KU 0x86F2($(O,t(B)-0x86FA($(O,|(B) -> 0x222E($B".(B) */ + { "\206\362", "\206\372", "\201\254", SJIS }, + /* 13 KU 0x8777($(O-X(B)-0x877D($(O-^(B) -> 0x222E($B".(B) */ + { "\207\167", "\207\175", "\201\254", SJIS }, + /* 13 KU 0x8790($(O-p(B)-0x8792($(O-r(B) -> 0x222E($B".(B) */ + { "\207\220", "\207\222", "\201\254", SJIS }, + /* 13 KU 0x8794($(O-t(B)-0x8797($(O-w(B) -> 0x222E($B".(B) */ + { "\207\224", "\207\227", "\201\254", SJIS }, + /* 13 KU 0x879A($(O-z(B)-0x879C($(O-|(B) -> 0x222E($B".(B) */ + { "\207\232", "\207\234", "\201\254", SJIS }, + /* 14 KU 0x879F($(O.!(B) -> 0x222E($B".(B) */ + { "\207\237", NULL, "\201\254", SJIS }, + /* 15 KU 0x889E($(O/~(B) -> 0x222E($B".(B) */ + { "\210\236", NULL, "\201\254", SJIS }, + /* 47 KU 0x9873($(OOT(B) -> 0x222E($B".(B) */ + { "\230\163", NULL, "\201\254", SJIS }, + /* 47 KU 0x989E($(OO~(B) -> 0x222E($B".(B) */ + { "\230\236", NULL, "\201\254", SJIS }, + /* 84 KU 0xEAA5($(Ot'(B) -> 0x222E($B".(B) */ + { "\352\245", NULL, "\201\254", SJIS }, + /* 94 KU 0xEFF8($(O~z(B)-0xEFFC($(O~~(B) -> 0x222E($B".(B) */ + { "\357\370", "\357\374", "\201\254", SJIS }, - /* - * UJIS0213 shares G2 space by JIS X 0213:2000 plane 2 and - * JIS X 0212:1990. later has some empty rows and some empty - * columns in particular rows. JIS X 0213:2000 plane 2 shares - * those empty rows. So, totally UJIS0213 has no empty row. - */ + /* JIS X 0213:2000 plane 2 for SJIS2000 */ + /* In SJIS2000, JIS X 0213:2000 occupies from 96 to 120 KU */ - /* JIS X 0212:1990 for UJIS0213 */ - /* Empty columns in particular rows are defined after below #endif */ + /* no empty row */ - /* JIS X 0213:2000 plane 2 for UJIS0213 */ /* sequences of empty columns */ - /* 94 KU 0xFEF7($(P~w(B)-0xFEFE($(P~~(B) -> 0xA2AE($B".(B) */ - { "\217\376\367", "\217\376\376", "\242\256", UJIS }, -#else /* UJIS0213 */ + /* 94 KU 0xFCF5($(P~w(B)-0xFCFC($(P~~(B) -> 0x222E($B".(B) */ + { "\374\365", "\374\374", "\201\254", SJIS }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_sjis2000 = { eliminate_wrong_sjis2000, NULL }; + +static convtab eliminate_wrong_sjis2004[] = { + /* JIS X 0213:2004 plane 1 for SJIS2004 */ + + /* no empty row */ + + /* sequences of empty columns */ + /* 4 KU 0x82FA($(Q$|(B)-0x82FC($(Q$~(B) -> 0x222E($B".(B) */ + { "\202\372", "\202\374", "\201\254", SJIS }, + /* 8 KU 0x84DD($(Q(_(B)-0x84E4($(Q(f(B) -> 0x222E($B".(B) */ + { "\204\335", "\204\344", "\201\254", SJIS }, + /* 8 KU 0x84FB($(Q(}(B)-0x84FC($(Q(~(B) -> 0x222E($B".(B) */ + { "\204\373", "\204\374", "\201\254", SJIS }, + /* 12 KU 0x86F2($(Q,t(B)-0x86FA($(Q,|(B) -> 0x222E($B".(B) */ + { "\206\362", "\206\372", "\201\254", SJIS }, + /* 13 KU 0x8777($(Q-X(B)-0x877D($(Q-^(B) -> 0x222E($B".(B) */ + { "\207\167", "\207\175", "\201\254", SJIS }, + /* 13 KU 0x8790($(Q-p(B)-0x8792($(Q-r(B) -> 0x222E($B".(B) */ + { "\207\220", "\207\222", "\201\254", SJIS }, + /* 13 KU 0x8794($(Q-t(B)-0x8797($(Q-w(B) -> 0x222E($B".(B) */ + { "\207\224", "\207\227", "\201\254", SJIS }, + /* 13 KU 0x879A($(Q-z(B)-0x879C($(Q-|(B) -> 0x222E($B".(B) */ + { "\207\232", "\207\234", "\201\254", SJIS }, + + /* JIS X 0213:2004 plane 2 for SJIS2004 */ + /* In SJIS2004, JIS X 0213:2004 occupies from 96 to 120 KU */ + + /* no empty row */ + + /* sequences of empty columns */ + /* 94 KU 0xFCF5($(P~w(B)-0xFCFC($(P~~(B) -> 0x222E($B".(B) */ + { "\374\365", "\374\374", "\201\254", SJIS }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_sjis2004 = { eliminate_wrong_sjis2004, NULL }; + +static convtab eliminate_wrong_ujis[] = { /* UJIS uses JIS X 0208 1983 */ /* empty rows */ @@ -1234,7 +1252,7 @@ { "\217\254\241", "\217\257\376", "\242\256", UJIS }, /* 78-94 KU 0xEEA1($(Dn!(B)-0xFEFE($(D~~(B) -> 0xA2AE($B".(B) */ { "\217\356\241", "\217\376\376", "\242\256", UJIS }, -#endif /* UJIS0213 */ + /* JIS X 0212:1990 */ /* * Here, we defines only empty columns in particular rows @@ -1297,6 +1315,204 @@ }; static convtable etable_ujis = { eliminate_wrong_ujis, NULL }; +static convtab eliminate_wrong_ujis2000[] = { + /* JIS X 0213:2000 plane 1 for UJIS2000 */ + + /* no empty row */ + + /* sequences of empty columns */ + /* 4 KU 0xA4FC($(O$|(B)-0xA4FE($(O$~(B) -> 0xA2AE($B".(B) */ + { "\244\374", "\244\376", "\242\256", UJIS }, + /* 8 KU 0xA8DF($(O(_(B)-0xA8E6($(O(f(B) -> 0xA2AE($B".(B) */ + { "\250\337", "\250\346", "\242\256", UJIS }, + /* 8 KU 0xA8FD($(O(}(B)-0xA8FE($(O(~(B) -> 0xA2AE($B".(B) */ + { "\250\375", "\250\376", "\242\256", UJIS }, + /* 12 KU 0xACF4($(O,t(B)-0xACFC($(O,|(B) -> 0xA2AE($B".(B) */ + { "\254\364", "\254\374", "\242\256", UJIS }, + /* 13 KU 0xADD8($(O-X(B)-0xADDE($(O-^(B) -> 0xA2AE($B".(B) */ + { "\255\330", "\255\336", "\242\256", UJIS }, + /* 13 KU 0xADF0($(O-p(B)-0xADF2($(O-r(B) -> 0xA2AE($B".(B) */ + { "\255\360", "\255\362", "\242\256", UJIS }, + /* 13 KU 0xADF4($(O-t(B)-0xADF7($(O-w(B) -> 0xA2AE($B".(B) */ + { "\255\364", "\255\367", "\242\256", UJIS }, + /* 13 KU 0xADFA($(O-z(B)-0xADFC($(O-|(B) -> 0xA2AE($B".(B) */ + { "\255\372", "\255\374", "\242\256", UJIS }, + /* 14 KU 0xAEA1($(O.!(B) -> 0xA2AE($B".(B) */ + { "\256\241", NULL, "\242\256", UJIS }, + /* 15 KU 0xAFFE($(O/~(B) -> 0xA2AE($B".(B) */ + { "\257\376", NULL, "\242\256", UJIS }, + /* 47 KU 0xCFD4($(OOT(B) -> 0xA2AE($B".(B) */ + { "\317\324", NULL, "\242\256", UJIS }, + /* 47 KU 0xCFFE($(OO~(B) -> 0xA2AE($B".(B) */ + { "\317\376", NULL, "\242\256", UJIS }, + /* 84 KU 0xF4A7($(Ot'(B) -> 0xA2AE($B".(B) */ + { "\364\247", NULL, "\242\256", UJIS }, + /* 94 KU 0xFEFA($(O~z(B)-0xFEFE($(O~~(B) -> 0xA2AE($B".(B) */ + { "\376\372", "\376\376", "\242\256", UJIS }, + + /* + * UJIS2000 shares G2 space by JIS X 0213:2000 plane 2 and + * JIS X 0212:1990. later has some empty rows and some empty + * columns in particular rows. JIS X 0213:2000 plane 2 shares + * those empty rows. So, totally UJIS2000 has no empty row. + */ + + /* JIS X 0213:2000 plane 2 for UJIS2000 */ + /* sequences of empty columns */ + /* 94 KU 0xFEF7($(P~w(B)-0xFEFE($(P~~(B) -> 0xA2AE($B".(B) */ + { "\217\376\367", "\217\376\376", "\242\256", UJIS }, + + /* JIS X 0212:1990 for UJIS2000 */ + /* sequences of empty columns */ + /* 2 KU 0xA2A1($(D"!(B)-0xA2AE($(D".(B) -> 0xA2AE($B".(B) */ + { "\217\242\241", "\217\242\256", "\242\256", UJIS }, + /* 2 KU 0xA2BA($(D":(B)-0xA2C1($(D"A(B) -> 0xA2AE($B".(B) */ + { "\217\242\272", "\217\242\301", "\242\256", UJIS }, + /* 2 KU 0xA2C5($(D"E(B)-0xA2EA($(D"j(B) -> 0xA2AE($B".(B) */ + { "\217\242\305", "\217\242\352", "\242\256", UJIS }, + /* 2 KU 0xA2F2($(D"r(B)-0xA2FE($(D"~(B) -> 0xA2AE($B".(B) */ + { "\217\242\362", "\217\242\376", "\242\256", UJIS }, + /* 6 KU 0xA6A1($(D&!(B)-0xA6E0($(D&`(B) -> 0xA2AE($B".(B) */ + { "\217\246\241", "\217\246\340", "\242\256", UJIS }, + /* 6 KU 0xA6E6($(D&f(B) -> 0xA2AE($B".(B) */ + { "\217\246\346", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6E8($(D&h(B) -> 0xA2AE($B".(B) */ + { "\217\246\350", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6EB($(D&k(B) -> 0xA2AE($B".(B) */ + { "\217\246\353", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6ED($(D&m(B)-0xA6F0($(D&p(B) -> 0xA2AE($B".(B) */ + { "\217\246\355", "\217\246\360", "\242\256", UJIS }, + /* 6 KU 0xA6FD($(D&}(B)-0xA6FE($(D&~(B) -> 0xA2AE($B".(B) */ + { "\217\246\375", "\217\246\376", "\242\256", UJIS }, + /* 7 KU 0xA7A1($(D'!(B)-0xA7C1($(D'A(B) -> 0xA2AE($B".(B) */ + { "\217\247\241", "\217\247\301", "\242\256", UJIS }, + /* 7 KU 0xA7CF($(D'O(B)-0xA7F1($(D'q(B) -> 0xA2AE($B".(B) */ + { "\217\247\317", "\217\247\361", "\242\256", UJIS }, + /* 9 KU 0xA9A3($(D)#(B) -> 0xA2AE($B".(B) */ + { "\217\251\243", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9A5($(D)%(B) -> 0xA2AE($B".(B) */ + { "\217\251\245", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9A7($(D)'(B) -> 0xA2AE($B".(B) */ + { "\217\251\247", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9AA($(D)*(B) -> 0xA2AE($B".(B) */ + { "\217\251\252", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9AE($(D).(B) -> 0xA2AE($B".(B) */ + { "\217\251\256", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9B1($(D)1(B)-0xA9C0($(D)@(B) -> 0xA2AE($B".(B) */ + { "\217\251\261", "\217\251\300", "\242\256", UJIS }, + /* 9 KU 0xA9D1($(D)Q(B)-0xA9FE($(D)~(B) -> 0xA2AE($B".(B) */ + { "\217\251\321", "\217\251\376", "\242\256", UJIS }, + /* 10 KU 0xAAB9($(D*9(B) -> 0xA2AE($B".(B) */ + { "\217\252\271", NULL, "\242\256", UJIS }, + /* 10 KU 0xAAF8($(D*x(B)-0xAAFE($(D*~(B) -> 0xA2AE($B".(B) */ + { "\217\252\370", "\217\252\376", "\242\256", UJIS }, + /* 11 KU 0xABBC($(D+<(B) -> 0xA2AE($B".(B) */ + { "\217\253\274", NULL, "\242\256", UJIS }, + /* 11 KU 0xABC4($(D+D(B) -> 0xA2AE($B".(B) */ + { "\217\253\304", NULL, "\242\256", UJIS }, + /* 11 KU 0xABF8($(D+x(B)-0xABFE($(D+~(B) -> 0xA2AE($B".(B) */ + { "\217\253\370", "\217\253\376", "\242\256", UJIS }, + /* 77 KU 0xEDE4($(Dmd(B)-0xEDFE($(Dm~(B) -> 0xA2AE($B".(B) */ + { "\217\355\344", "\217\355\376", "\242\256", UJIS }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_ujis2000 = { eliminate_wrong_ujis2000, NULL }; + +static convtab eliminate_wrong_ujis2004[] = { + /* JIS X 0213:2004 plane 1 for UJIS2004 */ + + /* no empty row */ + + /* sequences of empty columns */ + /* 4 KU 0xA4FC($(Q$|(B)-0xA4FE($(Q$~(B) -> 0xA2AE($B".(B) */ + { "\244\374", "\244\376", "\242\256", UJIS }, + /* 8 KU 0xA8DF($(Q(_(B)-0xA8E6($(Q(f(B) -> 0xA2AE($B".(B) */ + { "\250\337", "\250\346", "\242\256", UJIS }, + /* 8 KU 0xA8FD($(Q(}(B)-0xA8FE($(Q(~(B) -> 0xA2AE($B".(B) */ + { "\250\375", "\250\376", "\242\256", UJIS }, + /* 12 KU 0xACF4($(Q,t(B)-0xACFC($(Q,|(B) -> 0xA2AE($B".(B) */ + { "\254\364", "\254\374", "\242\256", UJIS }, + /* 13 KU 0xADD8($(Q-X(B)-0xADDE($(Q-^(B) -> 0xA2AE($B".(B) */ + { "\255\330", "\255\336", "\242\256", UJIS }, + /* 13 KU 0xADF0($(Q-p(B)-0xADF2($(Q-r(B) -> 0xA2AE($B".(B) */ + { "\255\360", "\255\362", "\242\256", UJIS }, + /* 13 KU 0xADF4($(Q-t(B)-0xADF7($(Q-w(B) -> 0xA2AE($B".(B) */ + { "\255\364", "\255\367", "\242\256", UJIS }, + /* 13 KU 0xADFA($(Q-z(B)-0xADFC($(Q-|(B) -> 0xA2AE($B".(B) */ + { "\255\372", "\255\374", "\242\256", UJIS }, + + /* + * UJIS2004 shares G2 space by JIS X 0213:2004 plane 2 and + * JIS X 0212:1990. later has some empty rows and some empty + * columns in particular rows. JIS X 0213:2004 plane 2 shares + * those empty rows. So, totally UJIS2004 has no empty row. + */ + + /* JIS X 0213:2004 plane 2 for UJIS2004 */ + /* sequences of empty columns */ + /* 94 KU 0xFEF7($(P~w(B)-0xFEFE($(P~~(B) -> 0xA2AE($B".(B) */ + { "\217\376\367", "\217\376\376", "\242\256", UJIS }, + + /* JIS X 0212:1990 for UJIS2004 */ + /* sequences of empty columns */ + /* 2 KU 0xA2A1($(D"!(B)-0xA2AE($(D".(B) -> 0xA2AE($B".(B) */ + { "\217\242\241", "\217\242\256", "\242\256", UJIS }, + /* 2 KU 0xA2BA($(D":(B)-0xA2C1($(D"A(B) -> 0xA2AE($B".(B) */ + { "\217\242\272", "\217\242\301", "\242\256", UJIS }, + /* 2 KU 0xA2C5($(D"E(B)-0xA2EA($(D"j(B) -> 0xA2AE($B".(B) */ + { "\217\242\305", "\217\242\352", "\242\256", UJIS }, + /* 2 KU 0xA2F2($(D"r(B)-0xA2FE($(D"~(B) -> 0xA2AE($B".(B) */ + { "\217\242\362", "\217\242\376", "\242\256", UJIS }, + /* 6 KU 0xA6A1($(D&!(B)-0xA6E0($(D&`(B) -> 0xA2AE($B".(B) */ + { "\217\246\241", "\217\246\340", "\242\256", UJIS }, + /* 6 KU 0xA6E6($(D&f(B) -> 0xA2AE($B".(B) */ + { "\217\246\346", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6E8($(D&h(B) -> 0xA2AE($B".(B) */ + { "\217\246\350", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6EB($(D&k(B) -> 0xA2AE($B".(B) */ + { "\217\246\353", NULL, "\242\256", UJIS }, + /* 6 KU 0xA6ED($(D&m(B)-0xA6F0($(D&p(B) -> 0xA2AE($B".(B) */ + { "\217\246\355", "\217\246\360", "\242\256", UJIS }, + /* 6 KU 0xA6FD($(D&}(B)-0xA6FE($(D&~(B) -> 0xA2AE($B".(B) */ + { "\217\246\375", "\217\246\376", "\242\256", UJIS }, + /* 7 KU 0xA7A1($(D'!(B)-0xA7C1($(D'A(B) -> 0xA2AE($B".(B) */ + { "\217\247\241", "\217\247\301", "\242\256", UJIS }, + /* 7 KU 0xA7CF($(D'O(B)-0xA7F1($(D'q(B) -> 0xA2AE($B".(B) */ + { "\217\247\317", "\217\247\361", "\242\256", UJIS }, + /* 9 KU 0xA9A3($(D)#(B) -> 0xA2AE($B".(B) */ + { "\217\251\243", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9A5($(D)%(B) -> 0xA2AE($B".(B) */ + { "\217\251\245", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9A7($(D)'(B) -> 0xA2AE($B".(B) */ + { "\217\251\247", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9AA($(D)*(B) -> 0xA2AE($B".(B) */ + { "\217\251\252", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9AE($(D).(B) -> 0xA2AE($B".(B) */ + { "\217\251\256", NULL, "\242\256", UJIS }, + /* 9 KU 0xA9B1($(D)1(B)-0xA9C0($(D)@(B) -> 0xA2AE($B".(B) */ + { "\217\251\261", "\217\251\300", "\242\256", UJIS }, + /* 9 KU 0xA9D1($(D)Q(B)-0xA9FE($(D)~(B) -> 0xA2AE($B".(B) */ + { "\217\251\321", "\217\251\376", "\242\256", UJIS }, + /* 10 KU 0xAAB9($(D*9(B) -> 0xA2AE($B".(B) */ + { "\217\252\271", NULL, "\242\256", UJIS }, + /* 10 KU 0xAAF8($(D*x(B)-0xAAFE($(D*~(B) -> 0xA2AE($B".(B) */ + { "\217\252\370", "\217\252\376", "\242\256", UJIS }, + /* 11 KU 0xABBC($(D+<(B) -> 0xA2AE($B".(B) */ + { "\217\253\274", NULL, "\242\256", UJIS }, + /* 11 KU 0xABC4($(D+D(B) -> 0xA2AE($B".(B) */ + { "\217\253\304", NULL, "\242\256", UJIS }, + /* 11 KU 0xABF8($(D+x(B)-0xABFE($(D+~(B) -> 0xA2AE($B".(B) */ + { "\217\253\370", "\217\253\376", "\242\256", UJIS }, + /* 77 KU 0xEDE4($(Dmd(B)-0xEDFE($(Dm~(B) -> 0xA2AE($B".(B) */ + { "\217\355\344", "\217\355\376", "\242\256", UJIS }, + + /* NULL */ + { 0, 0, 0, 0 } +}; +static convtable etable_ujis2004 = { eliminate_wrong_ujis2004, NULL }; + static int iso646p(cs) CHARSET cs; @@ -1470,18 +1686,27 @@ } ostr[2] = NULCH; ocs[2] = NULLCS; - } else if (CS2CHARSET(*ics) == JISX0208KANJI) { - /* convert JIS X 0208-1983 into JIS X 0208:1990 */ + } else if (CS2CHARSET(*ics) == JISX0208KANJI || + CS2CHARSET(*ics) == JISX0213KANJI1 || + CS2CHARSET(*ics) == JISX02132004KANJI1) { + /* + * JIS X 0208:1990 has two additional characters from JIS X + * 0208-1983. It's addition. So, no need to change code. + * We simply treat JIS X 0208-1983 as JIS X 0208:1990 + * + * JIS X 0213:2000 has several additional characters from JIS + * X 0208:1990. Again, it's just addition. We ignore them + * and treat JIS X 0213:2000 as JIS X 0208:1990 + * + * JIS X 0213:2004 has 10 additional characters from JIS X + * 0213:2000. Again, it's just addition. We ignore them + * and treat JIS X 0213:2004 as JIS X 0208:1990 + */ ostr[0] = istr[0]; ostr[1] = istr[1]; + ostr[2] = NULCH; ocs[0] = JISX0208_90KANJI; ocs[1] = JISX0208_90KANJI | REST_MASK; - - /* - * Difference betwen 1983 and 1990 are two added characters, - * 0x7425 and 0x7426. So, here is nothing to do. - */ - ostr[2] = NULCH; ocs[2] = NULLCS; } else if (CS2CHARSET(*ics) == JISX0201ROMAN) { /* convert JIS X 0201:1976 into ASCII */ @@ -1637,6 +1862,18 @@ return 0; else return 1; + } else if (CS2CHARSET(*ics) == JISX02132004KANJI1) { + ptab = find_convtab(&etable_jisx02132004_1, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == JISX02132004KANJI2) { + ptab = find_convtab(&etable_jisx02132004_2, istr); + if (ptab) + return 0; + else + return 1; } else if (CS2CHARSET(*ics) == SJIS) { /* eliminate wrong codes */ ptab = find_convtab(&etable_sjis, istr); @@ -1644,6 +1881,20 @@ return 0; else return 1; + } else if (CS2CHARSET(*ics) == SJIS2000) { + /* eliminate wrong codes */ + ptab = find_convtab(&etable_sjis2000, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == SJIS2004) { + /* eliminate wrong codes */ + ptab = find_convtab(&etable_sjis2004, istr); + if (ptab) + return 0; + else + return 1; } else if (CS2CHARSET(*ics) == UJIS) { /* eliminate wrong codes */ ptab = find_convtab(&etable_ujis, istr); @@ -1652,6 +1903,20 @@ else return 1; /* TODO: G2 */ + } else if (CS2CHARSET(*ics) == UJIS2000) { + /* eliminate wrong codes */ + ptab = find_convtab(&etable_ujis2000, istr); + if (ptab) + return 0; + else + return 1; + } else if (CS2CHARSET(*ics) == UJIS2004) { + /* eliminate wrong codes */ + ptab = find_convtab(&etable_ujis2004, istr); + if (ptab) + return 0; + else + return 1; } return 1; } Index: less/version.c diff -u less/version.c:1.107 less/version.c:1.109 --- less/version.c:1.107 Tue Sep 6 07:18:27 2005 +++ less/version.c Sat Feb 18 09:57:24 2006 @@ -748,9 +748,14 @@ even if it is not fit in first line. Less moves entire text to second line. iso258 9/4/05 Joined with less-382. -iso259 9/6/05 Changed an algorithm to detect the gap of parsing input stream. +iso259 9/6/05 Changed the algorithm to detect the gap of parsing input stream. This fixed a problem on long JIS/English text. Fixed '\r' problem. +iso260 9/19/05 Changed the algorithm handling input and output character sets. + Now jless use two variables, one represents supporting + character sets for input stream, and the other represents + encoding scheme for output stream. + Changed to support JISX1983:2004. */ -char version[] = "382+iso259"; +char version[] = "382+iso260";