Index: ChangeLog =================================================================== RCS file: /cvsroot/chasen/ChangeLog,v retrieving revision 1.146 retrieving revision 1.147 diff -u -r1.146 -r1.147 --- ChangeLog 13 Aug 2003 13:55:19 -0000 1.146 +++ ChangeLog 21 Aug 2003 01:14:44 -0000 1.147 @@ -1,3 +1,7 @@ +2003-08-21 TAKAOKA Kazuma + + * lib/tokenizer.c (cha_tok_parse): Fix for SPACE_POS. + 2003-08-13 TAKAOKA Kazuma * lib/iotool.c (cha_read_registry): Read the registry for the Index: lib/parse.c =================================================================== RCS file: /cvsroot/chasen/lib/parse.c,v retrieving revision 1.57 retrieving revision 1.58 diff -u -r1.57 -r1.58 --- lib/parse.c 30 Jul 2003 16:06:57 -0000 1.57 +++ lib/parse.c 21 Aug 2003 01:14:44 -0000 1.58 @@ -27,7 +27,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: parse.c,v 1.57 2003/07/30 16:06:57 kazuma-t Exp $ + * $Id: parse.c,v 1.58 2003/08/21 01:14:44 kazuma-t Exp $ */ #include "chalib.h" @@ -434,7 +434,7 @@ int mrph_idx, i; /* skip annotations and white space */ - while (cha_tok_anno_type(Cha_tokenizer, cursor) != 0 ) + while (cha_tok_anno_type(Cha_tokenizer, cursor) >= 0) cursor += cha_tok_char_type_len(Cha_tokenizer, cursor); if (cursor >= len) break; Index: lib/tokenizer.c =================================================================== RCS file: /cvsroot/chasen/lib/tokenizer.c,v retrieving revision 1.14 retrieving revision 1.16 diff -u -r1.14 -r1.16 --- lib/tokenizer.c 8 Jul 2003 17:08:49 -0000 1.14 +++ lib/tokenizer.c 21 Aug 2003 01:35:07 -0000 1.16 @@ -27,7 +27,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: tokenizer.c,v 1.14 2003/07/08 17:08:49 kazuma-t Exp $ + * $Id: tokenizer.c,v 1.16 2003/08/21 01:35:07 kazuma-t Exp $ */ #include @@ -154,6 +154,7 @@ int cursor, head; int state, state0; anno_info *anno = NULL; + int i; tok->string = str; tok->string_len = len; @@ -165,7 +166,8 @@ } memset(tok->_char_type, 0, sizeof(int) * len); - memset(tok->_anno_type, 0, sizeof(int) * len); + for (i = 0; i < len; i++) + tok->_anno_type[i] = -1; state0 = state = 0; /* NOSTATE */ for (cursor = head = 0; cursor < len; @@ -248,10 +250,9 @@ * This function returns the type of annotation, if there is * the begining of annotation on cursor. The type is more than 0. * - * And it returns less than 0 value, if the character on cursor is - * white space. + * And it returns 0, if the character on cursor is white space. * - * Otherwise it returns 0. + * Otherwise it returns less than 0. */ int cha_tok_anno_type(chasen_tok_t *tok, int cursor) @@ -329,7 +330,7 @@ ja_char_type_parse(chasen_tok_t *tok, int state, int *state0, int cursor) { if (state == JA_SPACE) { - tok->_anno_type[cursor] = -1; + tok->_anno_type[cursor] = 0; } else if ((state == HALF_LATIN) || (state == FULL_LATIN)) { ; /* do nothing */ @@ -350,7 +351,7 @@ en_char_type_parse(chasen_tok_t *tok, int state, int *state0, int cursor) { if (state == EN_SPACE) { - tok->_anno_type[cursor] = -1; + tok->_anno_type[cursor] = 0; } else if (state == EN_OTHER) { *state0 = EN_NOSTATE; }