/* * International Union of Pure and Applied Chemistry (IUPAC) * International Chemical Identifier (InChI) * Version 1 * Software version 1.01 * July 21, 2006 * Developed at NIST */ /* The code in this #include file reads InChI AuxInfo */ /****************************************************************************/ #define MIN_BOND_LENGTH (1.0e-6) #define INCHI_LINE_LEN 512 /*1024*/ /*256*/ #define INCHI_LINE_ADD 384 /*128*/ /*64*/ /* Note: (INCHI_LINE_LEN - INCHI_LINE_ADD) > (length of the longest item: szCoord) = 33 */ /*****************************************************************************/ #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #define AB_MAX_WELL_DEFINED_PARITY inchi_max(INCHI_PARITY_ODD, INCHI_PARITY_EVEN) /* 1, 2 => well defined parities, uncluding 'unknown' */ #define AB_MIN_WELL_DEFINED_PARITY inchi_min(INCHI_PARITY_ODD, INCHI_PARITY_EVEN) /* min(INCHI_PARITY_ODD, INCHI_PARITY_EVEN) */ #define ATOM_PARITY_WELL_DEF(X) (AB_MIN_WELL_DEFINED_PARITY <= (X) && (X) <= AB_MAX_WELL_DEFINED_PARITY) #define inchi_NUMH2(AT,CUR_AT) ((AT[CUR_AT].num_iso_H[0]>0?AT[CUR_AT].num_iso_H[0]:0) +AT[CUR_AT].num_iso_H[1]+AT[CUR_AT].num_iso_H[2]+AT[CUR_AT].num_iso_H[3]) #define SB_PARITY_FLAG 0x38 /* disconnected structure has undef. parity */ #define SB_PARITY_SHFT 3 #define SB_PARITY_MASK 0x07 #define SB_PARITY_1(X) (X & SB_PARITY_MASK) /* refers to connected structure */ #define SB_PARITY_2(X) (((X) >> SB_PARITY_SHFT) & SB_PARITY_MASK) /* refers to connected structure */ int str_fgetc( INCHI_FILE *f ); char *str_fgets( char *szLine, int len, INCHI_FILE *f ); int my_fgets( char *szLine, int len, INCHI_FILE *f, int *bTooLongLine ); #endif #ifdef INCHI_LIBRARY void FreeInchi_Atom( inchi_Atom **at ); inchi_Atom *CreateInchi_Atom( int num_atoms ); void FreeInchi_Input( inchi_Input *inp_at_data ); S_SHORT *is_in_the_slist( S_SHORT *pathAtom, S_SHORT nNextAtom, int nPathLen ); int is_element_a_metal( char szEl[] ); char *str_fgetsTab( char *szLine, int len, INCHI_FILE *f ); int my_fgetsTab( char *szLine, int len, INCHI_FILE *f, int *bTooLongLine ); int my_fgetsTab1( char *szLine, int len, INCHI_FILE *f, int *bTooLongLine ); #endif #ifndef INCHI_MAIN void FreeInchi_Stereo0D( inchi_Stereo0D **stereo0D ); inchi_Stereo0D *CreateInchi_Stereo0D( int num_stereo0D ); int Extract0DParities( inp_ATOM *at, int nNumAtoms, inchi_Stereo0D *stereo0D, int num_stereo0D, char *pStrErr, int *err ); #endif #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) /*******************************************************************/ int str_fgetc( INCHI_FILE *f ) { #ifdef INCHI_LIBRARY if ( f->nPtr < f->nUsedLength ) { return (int)f->pStr[f->nPtr++]; } return EOF; #else return fgetc( f ); #endif } /*******************************************************************/ char *str_fgets( char *szLine, int len, INCHI_FILE *f ) { int length=0, c=0; if ( -- len < 0 ) { return NULL; } while ( length < len && EOF != (c = str_fgetc( f )) ) { szLine[length++] = (char)c; if ( c == '\n' ) break; } if ( !length && EOF == c ) { return NULL; } szLine[length] = '\0'; return szLine; } /*******************************************************************/ int my_fgets( char *szLine, int len, INCHI_FILE *f, int *bTooLongLine ) { int length; char *p; do { p = str_fgets( szLine, len-1, f ); if ( !p ) { *bTooLongLine = 0; return -1; /* end of file or cannot read */ } szLine[len-1] = '\0'; /* *bTooLongLine = !strchr( szLine, '\n' ); */ p = strchr( szLine, '\n' ); *bTooLongLine = ( !p && ((int)strlen(szLine)) == len-2 ); LtrimRtrim( szLine, &length ); } while ( !length ); return length; } #endif #ifdef INCHI_LIBRARY /******************************************************************************************************/ void FreeInchi_Atom( inchi_Atom **at ) { if ( at && *at ) { inchi_free( *at ); *at = NULL; } } /******************************************************************************************************/ inchi_Atom *CreateInchi_Atom( int num_atoms ) { inchi_Atom *p = (inchi_Atom* ) inchi_calloc(num_atoms, sizeof(inchi_Atom) ); return p; } /******************************************************************************************************/ void FreeInchi_Input( inchi_Input *inp_at_data ) { FreeInchi_Atom( &inp_at_data->atom ); FreeInchi_Stereo0D( &inp_at_data->stereo0D ); memset( inp_at_data, 0, sizeof(*inp_at_data) ); } /*************************************************************************/ S_SHORT *is_in_the_slist( S_SHORT *pathAtom, S_SHORT nNextAtom, int nPathLen ) { for ( ; nPathLen && *pathAtom != nNextAtom; nPathLen--, pathAtom++ ) ; return nPathLen? pathAtom : NULL; } /************************************************/ int is_element_a_metal( char szEl[] ) { static const char szMetals[] = "K;V;Y;W;U;" "Li;Be;Na;Mg;Al;Ca;Sc;Ti;Cr;Mn;Fe;Co;Ni;Cu;Zn;Ga;Rb;Sr;Zr;" "Nb;Mo;Tc;Ru;Rh;Pd;Ag;Cd;In;Sn;Sb;Cs;Ba;La;Ce;Pr;Nd;Pm;Sm;" "Eu;Gd;Tb;Dy;Ho;Er;Tm;Yb;Lu;Hf;Ta;Re;Os;Ir;Pt;Au;Hg;Tl;Pb;" "Bi;Po;Fr;Ra;Ac;Th;Pa;Np;Pu;Am;Cm;Bk;Cf;Es;Fm;Md;No;Lr;Rf;"; const int len = strlen(szEl); const char *p; if ( 0 < len && len <= 2 && isalpha( UCINT szEl[0] ) && isupper( szEl[0] ) && (p = strstr(szMetals, szEl) ) && p[len] == ';' ) { return 1; /*return AtType_Metal;*/ } return 0; } /*******************************************************************/ /* read up to len or tab or LF; if empty read next until finds non-empty line */ /* remove leading and trailing white spaces; keep zero termination */ /*******************************************************************/ char *str_fgetsTab( char *szLine, int len, INCHI_FILE *f ) { int length=0, c=0; if ( --len < 0 ) { return NULL; } while ( length < len && EOF != (c = str_fgetc( f )) ) { if ( c == '\t' ) c = '\n'; szLine[length++] = (char)c; if ( c == '\n' ) break; } if ( !length && EOF == c ) { return NULL; } szLine[length] = '\0'; return szLine; } /*******************************************************************/ /* read up to len or tab or LF; if empty read next until finds non-empty line */ /* remove leading and trailing white spaces; keep zero termination */ /*******************************************************************/ int my_fgetsTab( char *szLine, int len, INCHI_FILE *f, int *bTooLongLine ) { int length; char *p; do { p = str_fgetsTab( szLine, len-1, f ); if ( !p ) { *bTooLongLine = 0; return -1; /* end of file or cannot read */ } szLine[len-1] = '\0'; /* *bTooLongLine = !strchr( szLine, '\n' ); */ p = strchr( szLine, '\n' ); *bTooLongLine = ( !p && ((int)strlen(szLine)) == len-2 ); LtrimRtrim( szLine, &length ); } while ( !length ); return length; } /*******************************************************************/ int my_fgetsTab1( char *szLine, int len, INCHI_FILE *f, int *bTooLongLine ) { int length; char *p; /*do {*/ p = str_fgetsTab( szLine, len-1, f ); if ( !p ) { *bTooLongLine = 0; return -1; /* end of file or cannot read */ } szLine[len-1] = '\0'; /* *bTooLongLine = !strchr( szLine, '\n' ); */ p = strchr( szLine, '\n' ); *bTooLongLine = ( !p && ((int)strlen(szLine)) == len-2 ); LtrimRtrim( szLine, &length ); /*} while ( !length );*/ return length; } #endif #ifndef INCHI_MAIN /******************************************************************************************************/ inchi_Stereo0D *CreateInchi_Stereo0D( int num_stereo0D ) { return (inchi_Stereo0D* ) inchi_calloc(num_stereo0D, sizeof(inchi_Stereo0D) ); } /******************************************************************************************************/ void FreeInchi_Stereo0D( inchi_Stereo0D **stereo0D ) { if ( stereo0D && *stereo0D ) { inchi_free( *stereo0D ); *stereo0D = NULL; } } #endif #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #ifdef INCHI_LIBRARY #define INChITo_Atom ll_INChIToInchi_Atom #else #define INChITo_Atom ee_INChIToIbChI_Atom #define FindToken e_FindToken #define LoadLine e_LoadLine #endif #define AT_NUM_BONDS(AT) (AT).num_bonds #define ATOM_NUMBER AT_NUM #define IN_NEIGH_LIST is_in_the_slist #define INPUT_FILE INCHI_FILE #define Create_Atom CreateInchi_Atom #define AT_BONDS_VAL(AT,I) AT[I].num_iso_H[0] #define ISOLATED_ATOM (-15) #define NUM_ISO_Hk(AT,I,K) AT[I].num_iso_H[K+1] #define IS_METAL_ATOM(AT,I) is_element_a_metal( AT[I].elname ) #else #define inchi_Atom inp_ATOM #define AT_NUM_BONDS(AT) (AT).valence #define ATOM_NUMBER AT_NUMB #define IN_NEIGH_LIST is_in_the_list #define inchi_NUMH2(AT,N) NUMH(AT,N) #define INChITo_Atom cc_INChIToInpAtom #define INPUT_FILE FILE #define Create_Atom CreateInpAtom #define AT_BONDS_VAL(AT,I) AT[I].chem_bonds_valence #define ISOLATED_ATOM 15 #define NUM_ISO_Hk(AT,I,K) AT[I].num_iso_H[K] #define IS_METAL_ATOM(AT,I) is_el_a_metal( AT[I].el_number ) #endif /*****************************************************************************/ /* local prototypes */ char *FindToken( INPUT_FILE *inp_molfile, int *bTooLongLine, const char *sToken, int lToken, char *szLine, int nLenLine, char *p, int *res ); char *LoadLine( INPUT_FILE *inp_molfile, int *bTooLongLine, int *bItemIsOver, char **s, char *szLine, int nLenLine, int nMinLen2Load, char *p, int *res ); int INChITo_Atom(INPUT_FILE *inp_molfile, MOL_COORD **szCoord, inchi_Stereo0D **stereo0D, int *num_stereo0D, int bDoNotAddH, INPUT_TYPE nInputType, inchi_Atom **at, int max_num_at, int *num_dimensions, int *num_bonds, char *pSdfLabel, char *pSdfValue, long *Id, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr ); #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) /*****************************************************************************/ int INChIToInchi_Atom ( INCHI_FILE *inp_molfile, inchi_Stereo0D **stereo0D, int *num_stereo0D, int bDoNotAddH, INPUT_TYPE nInputType, inchi_Atom **at, int max_num_at, int *num_dimensions, int *num_bonds, char *pSdfLabel, char *pSdfValue, long *Id, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr ); int INChIToInchi_Atom ( INCHI_FILE *inp_molfile, inchi_Stereo0D **stereo0D, int *num_stereo0D, int bDoNotAddH, INPUT_TYPE nInputType, inchi_Atom **at, int max_num_at, int *num_dimensions, int *num_bonds, char *pSdfLabel, char *pSdfValue, long *Id, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr ) { return INChITo_Atom ( inp_molfile, NULL, stereo0D, num_stereo0D, bDoNotAddH, nInputType, at, max_num_at, num_dimensions, num_bonds, pSdfLabel, pSdfValue, Id, pInpAtomFlags, err, pStrErr ); } #else /*****************************************************************************/ int INChIToInpAtom ( FILE *inp_molfile, MOL_COORD **szCoord, int bDoNotAddH, INPUT_TYPE nInputType, inp_ATOM **at, int max_num_at, int *num_dimensions, int *num_bonds, char *pSdfLabel, char *pSdfValue, long *Id, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr ); int INChIToInpAtom ( FILE *inp_molfile, MOL_COORD **szCoord, int bDoNotAddH, INPUT_TYPE nInputType, inp_ATOM **at, int max_num_at, int *num_dimensions, int *num_bonds, char *pSdfLabel, char *pSdfValue, long *Id, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr ) { return INChITo_Atom ( inp_molfile, szCoord, NULL, NULL, bDoNotAddH, nInputType, at, max_num_at, num_dimensions, num_bonds, pSdfLabel, pSdfValue, Id, pInpAtomFlags, err, pStrErr ); } #endif /*****************************************************************************/ char *FindToken( INPUT_FILE *inp_molfile, int *bTooLongLine, const char *sToken, int lToken, char *szLine, int nLenLine, char *p, int *res ) { char *q; int res2; while ( !(q = strstr( p, sToken ) ) ) { if ( (q = strrchr( p, '/' )) && (q + lToken > szLine + *res) ) { *res -= q - szLine; /* res = the length of the szLine to be left in */ memmove( szLine, q, *res + 1); } else { *res = 0; } if ( !*bTooLongLine || 0 > (res2 = my_fgetsTab1( szLine + *res, nLenLine - *res - 1, inp_molfile, bTooLongLine ) ) ) { /* the line is over or end of file */ return NULL; } else { *res += res2; p = szLine; } } return q + lToken; } /*****************************************************************************/ char *LoadLine( INPUT_FILE *inp_molfile, int *bTooLongLine, int *bItemIsOver, char **s, char *szLine, int nLenLine, int nMinLen2Load, char *p, int *res ) { int pos = p - szLine, res2; if ( !*bItemIsOver && nLenLine - (*res - pos) > nMinLen2Load ) { /* load the next portion if possible */ if ( pos ) { *res -= pos; memmove( szLine, p, *res+1 ); p = szLine; if ( *s ) { *s -= pos; } pos = 0; } res2 = my_fgetsTab1( szLine + *res, nLenLine - *res - 1, inp_molfile, bTooLongLine ); if ( res2 > 0 ) { *bItemIsOver = ( (*s = strchr( p + *res, '/') ) || !*bTooLongLine ); *res += res2; } else { *bItemIsOver = 1; } } return p; } /*****************************************************************************/ int INChITo_Atom(INPUT_FILE *inp_molfile, MOL_COORD **szCoord, inchi_Stereo0D **stereo0D, int *num_stereo0D, int bDoNotAddH, INPUT_TYPE nInputType, inchi_Atom **at, int max_num_at, int *num_dimensions, int *num_bonds, char *pSdfLabel, char *pSdfValue, long *Id, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr ) { int num_atoms = 0, bFindNext = 0, len, bHeaderRead, bItemIsOver, bErrorMsg, bRestoreInfo; int bFatal = 0, num_struct = 0; int i, k, k2, res, bond_type, bond_stereo1, bond_stereo2, bond_char, neigh, bond_parity, bond_parityNM; int bTooLongLine, res2, bTooLongLine2, pos, hlen, hk; long longID; char szLine[INCHI_LINE_LEN], szNextLine[INCHI_LINE_ADD], *p, *q, *s, parity; int b2D=0, b3D=0, b23D, nNumBonds = 0, bNonZeroXYZ, bNonMetal; int len_stereo0D = 0, max_len_stereo0D = 0; inchi_Stereo0D *atom_stereo0D = NULL; inchi_Atom *atom = NULL; MOL_COORD *pszCoord = NULL; INCHI_MODE InpAtomFlags = 0; /* 0 or FLAG_INP_AT_NONCHIRAL or FLAG_INP_AT_CHIRAL */ static const char szIsoH[] = "hdt"; /* plain tags */ static const char sStructHdrPln[] = "Structure:"; static const char sStructHdrPlnNoLblVal[] = " is missing"; static char sStructHdrPlnAuxStart[64] =""; /*"$1.1Beta/";*/ static int lenStructHdrPlnAuxStart = 0; static const char sStructHdrPlnRevAt[] = "/rA:"; static const char sStructHdrPlnRevBn[] = "/rB:"; static const char sStructHdrPlnRevXYZ[] = "/rC:"; const char *sToken; int lToken; if ( !lenStructHdrPlnAuxStart ) { lenStructHdrPlnAuxStart = sprintf( sStructHdrPlnAuxStart, "AuxInfo=" ); } if ( at ) { if ( *at && max_num_at ) { memset( *at, 0, max_num_at * sizeof(**at) ); } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) if ( stereo0D && num_stereo0D ) { if ( *stereo0D && *num_stereo0D ) { max_len_stereo0D = *num_stereo0D; memset( *stereo0D, 0, max_len_stereo0D * sizeof( **stereo0D )); } else { max_len_stereo0D = 0; } } #else if ( szCoord && *szCoord ) { inchi_free( *szCoord ); *szCoord = NULL; } #endif } else { bFindNext = 1; } bHeaderRead = bErrorMsg = bRestoreInfo = 0; *num_dimensions = *num_bonds = 0; /*************************************************************/ /* extract reversibility info from plain text INChI format */ /*************************************************************/ if ( nInputType == INPUT_INCHI_PLAIN ) { bHeaderRead = hk = 0; while ( 0 < (res = my_fgetsTab( szLine, sizeof(szLine)-1, inp_molfile, &bTooLongLine ) ) ) { /********************* find and interpret structure header ************/ if ( !bTooLongLine && (hlen=sizeof(sStructHdrPln)-1, !memcmp(szLine, sStructHdrPln, hlen)) ) { p = szLine + hlen; longID = 0; num_atoms = 0; /* structure number */ longID = strtol( p, &q, 10 ); if ( q && q[0] == '.' && q[1] == ' ' ) { p = q+2; } p = p + strspn( p, " \n\r" ); if ( pSdfLabel ) { pSdfLabel[0] = '\0'; } if ( pSdfValue ) { pSdfValue[0] = '\0'; } if ( *p ) { /* has label name */ /*p ++;*/ if ( q = strchr( p, '=' ) ) { /* '=' separates label name from the value */ len = inchi_min( q-p+1, MAX_SDF_HEADER-1); if ( pSdfLabel ) { mystrncpy( pSdfLabel, p, len ); LtrimRtrim( pSdfLabel, &len ); } p = q+1; q = p + (int)strlen( p ); if ( q-p > 0 ) { len = inchi_min( q-p+1, MAX_SDF_VALUE-1); if ( pSdfValue ) { mystrncpy( pSdfValue, p, len ); } p = q; } } else if ( q = strstr( p, sStructHdrPlnNoLblVal ) ) { len = inchi_min( q-p+1, MAX_SDF_HEADER-1); if ( pSdfLabel ) { mystrncpy( pSdfLabel, p, len ); } p = q+1; } } if ( Id ) *Id = longID; bHeaderRead = 1; bErrorMsg = bRestoreInfo = 0; } else if ( !memcmp( szLine, sStructHdrPlnAuxStart, lenStructHdrPlnAuxStart) ) { /* found the header of the AuxInfo, read AuxInfo head of the line */ if ( !bHeaderRead ) { longID = 0; if ( Id ) *Id = longID; if ( pSdfLabel ) { pSdfLabel[0] = '\0'; } if ( pSdfValue ) { pSdfValue[0] = '\0'; } } bHeaderRead = 0; /* check for empty "AuxInfo=ver//" */ p = strchr( szLine + lenStructHdrPlnAuxStart, '/' ); if ( p && p[1] == '/' && (!p[2] || '\n' == p[2]) ) { goto bypass_end_of_INChI_plain; } /***************** search for atoms block (plain) **********************/ p = szLine; sToken = sStructHdrPlnRevAt; lToken = sizeof(sStructHdrPlnRevAt)-1; /* search for sToken in the line; load next segments of the line if sToken has not found */ p = FindToken( inp_molfile, &bTooLongLine, sToken, lToken, szLine, sizeof(szLine), p, &res ); if ( !p ) { *err = INCHI_INP_ERROR_ERR; num_atoms = INCHI_INP_ERROR_RET; MOLFILE_ERR_SET (*err, 0, "Missing atom data"); goto bypass_end_of_INChI_plain; } else { /* atoms block started */ i = 0; res2 = bTooLongLine2 = -1; bItemIsOver = (s = strchr( p, '/') ) || !bTooLongLine; while ( 1 ) { p = LoadLine( inp_molfile, &bTooLongLine, &bItemIsOver, &s, szLine, sizeof(szLine), INCHI_LINE_ADD, p, &res ); if ( !i ) { /* allocate atom */ num_atoms = strtol( p, &q, 10 ); if ( !num_atoms || !q || !*q ) { num_atoms = 0; /* no atom data */ goto bypass_end_of_INChI_plain; } p = q; /* Molfile chirality flag */ switch( *p ) { case 'c': InpAtomFlags |= FLAG_INP_AT_CHIRAL; p ++; break; case 'n': InpAtomFlags |= FLAG_INP_AT_NONCHIRAL; p ++; break; } if ( at && *at ) { if ( num_atoms > max_num_at ) { inchi_free( *at ); *at = NULL; } else { memset( *at, 0, max_num_at * sizeof( **at ) ); atom = *at; } } if ( !at || !*at ) { atom = Create_Atom( num_atoms+1 ); if ( !atom ) { num_atoms = INCHI_INP_FATAL_RET; /* was -1; error */ *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI_plain; } } if ( stereo0D && *stereo0D ) { if ( num_atoms > max_len_stereo0D ) { FreeInchi_Stereo0D( stereo0D ); } else { memset( *stereo0D, 0, max_len_stereo0D * sizeof( **stereo0D ) ); atom_stereo0D = *stereo0D; } } if ( !stereo0D || !*stereo0D ) { max_len_stereo0D = num_atoms+1; atom_stereo0D = CreateInchi_Stereo0D( max_len_stereo0D ); if ( !atom_stereo0D ) { num_atoms = INCHI_INP_FATAL_RET; /* fatal error: cannot allocate */ *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI_plain; } } } /* element, first char */ if ( !isalpha( UCINT *p ) || !isupper( UCINT *p ) || i >= num_atoms ) { break; /* end of atoms block */ } atom[i].elname[0] = *p ++; /* element, second char */ if ( isalpha( UCINT *p ) && islower( UCINT *p ) ) { atom[i].elname[1] = *p ++; } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else atom[i].el_number = get_periodic_table_number( atom[i].elname ); #endif /* bonds' valence + number of non-isotopic H */ if ( isdigit( UCINT *p ) ) { AT_BONDS_VAL(atom,i) = (char)strtol( p, &q, 10 ); if ( !AT_BONDS_VAL(atom,i) ) AT_BONDS_VAL(atom,i) = ISOLATED_ATOM; /* same convention as in MOLfile, found zero bonds valence */ p = q; } /* charge */ atom[i].charge = (*p == '+')? 1 : (*p == '-')? -1 : 0; if ( atom[i].charge ) { p ++; if ( isdigit( UCINT *p ) ) { atom[i].charge *= (S_CHAR)(strtol( p, &q, 10 ) & CHAR_MASK); p = q; } } /* radical */ if ( *p == '.' ) { p ++; if ( isdigit( UCINT *p ) ) { atom[i].radical = (S_CHAR)strtol( p, &q, 10 ); p = q; } } /* isotopic mass */ if ( *p == 'i' ) { p ++; if ( isdigit( UCINT *p ) ) { int mw = strtol( p, &q, 10 ); p = q; #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) atom[i].isotopic_mass = mw; #else mw -= get_atw_from_elnum( atom[i].el_number ); if ( mw >= 0 ) mw ++; atom[i].iso_atw_diff = mw; #endif } } /* parity */ switch( *p ) { case 'o': parity = INCHI_PARITY_ODD; p ++; break; case 'e': parity = INCHI_PARITY_EVEN; p ++; break; case 'u': parity = INCHI_PARITY_UNKNOWN; p ++; break; case '?': parity = INCHI_PARITY_UNDEFINED; p ++; break; default: parity = 0; break; } if ( parity ) { atom_stereo0D[len_stereo0D].central_atom = i; atom_stereo0D[len_stereo0D].parity = parity; atom_stereo0D[len_stereo0D].type = INCHI_StereoType_Tetrahedral; len_stereo0D ++; } /* isotopic h, d, t */ for ( k = 0; k < NUM_H_ISOTOPES; k ++ ) { if ( *p == szIsoH[k] ) { NUM_ISO_Hk(atom,i,k) = 1; p ++; if ( isdigit( UCINT *p ) ) { NUM_ISO_Hk(atom,i,k) = (char)strtol( p, &q, 10 ); p = q; } } } i ++; } if ( !bItemIsOver || i != num_atoms || s && p != s ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong number of atoms"); goto bypass_end_of_INChI_plain; } } /***************** search for bonds block (plain) and read it *****************/ /*p = szLine;*/ sToken = sStructHdrPlnRevBn; lToken = sizeof(sStructHdrPlnRevBn)-1; /* search for sToken in the line; load next segments of the line if sToken has not found */ p = FindToken( inp_molfile, &bTooLongLine, sToken, lToken, szLine, sizeof(szLine), p, &res ); if ( !p ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Missing bonds data"); goto bypass_end_of_INChI_plain; } else { /* bonds block started */ i = 1; res2 = bTooLongLine2 = -1; bItemIsOver = (s = strchr( p, '/') ) || !bTooLongLine; if ( 1 == num_atoms ) { /* needed because the next '/' may be still out of szLine */ p = LoadLine( inp_molfile, &bTooLongLine, &bItemIsOver, &s, szLine, sizeof(szLine), INCHI_LINE_ADD, p, &res ); } while ( i < num_atoms ) { p = LoadLine( inp_molfile, &bTooLongLine, &bItemIsOver, &s, szLine, sizeof(szLine), INCHI_LINE_ADD, p, &res ); if ( i >= num_atoms || s && p >= s ) { break; /* end of bonds (plain) */ } /* bond, first char */ if ( *p == ';' ) { p ++; i ++; continue; } if ( !isalpha( UCINT *p ) ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong bonds data"); goto bypass_end_of_INChI_plain; } bond_char = *p ++; /* bond parity */ switch( *p ) { case '-': bond_parity = INCHI_PARITY_ODD; p ++; break; case '+': bond_parity = INCHI_PARITY_EVEN; p ++; break; case 'u': bond_parity = INCHI_PARITY_UNKNOWN; p ++; break; case '?': bond_parity = INCHI_PARITY_UNDEFINED; p ++; break; default: bond_parity = 0; break; } if ( bond_parity ) { switch( *p ) { case '-': bond_parityNM = INCHI_PARITY_ODD; p ++; break; case '+': bond_parityNM = INCHI_PARITY_EVEN; p ++; break; case 'u': bond_parityNM = INCHI_PARITY_UNKNOWN; p ++; break; case '?': bond_parityNM = INCHI_PARITY_UNDEFINED; p ++; break; default: bond_parityNM = 0; break; } } else { bond_parityNM = 0; } /* neighbor of the current atom */ if ( !isdigit( UCINT *p ) ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong bonds data"); goto bypass_end_of_INChI_plain; } neigh = (int)strtol( p, &q, 10 )-1; if ( i >= num_atoms || neigh >= num_atoms ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom"); goto bypass_end_of_INChI_plain; } p = q; bond_stereo1 = bond_stereo2 = 0; /* bond type & 2D stereo */ switch( bond_char ) { case 'v': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER; break; case 'V': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2EITHER; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1EITHER; break; case 'w': bond_type = INCHI_BOND_TYPE_DOUBLE; bond_stereo1 = bond_stereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER; break; case 's': bond_type = INCHI_BOND_TYPE_SINGLE; break; case 'd': bond_type = INCHI_BOND_TYPE_DOUBLE; break; case 't': bond_type = INCHI_BOND_TYPE_TRIPLE; break; case 'a': bond_type = INCHI_BOND_TYPE_ALTERN; break; case 'p': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1UP; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2UP; break; case 'P': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2UP; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1UP; break; case 'n': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN; break; case 'N': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2DOWN; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1DOWN; break; default: num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong bond type"); goto bypass_end_of_INChI_plain; } k = AT_NUM_BONDS(atom[i]) ++; atom[i].bond_type[k] = bond_type; atom[i].bond_stereo[k] = bond_stereo1; atom[i].neighbor[k] = (ATOM_NUMBER)neigh; k2 = AT_NUM_BONDS(atom[neigh]) ++; atom[neigh].bond_type[k2] = bond_type; atom[neigh].bond_stereo[k2] = bond_stereo2; atom[neigh].neighbor[k2] = (ATOM_NUMBER)i; bond_parity |= (bond_parityNM << SB_PARITY_SHFT); if ( bond_parity ) { if ( max_len_stereo0D <= len_stereo0D ) { /* realloc atom_Stereo0D */ inchi_Stereo0D *new_atom_stereo0D = CreateInchi_Stereo0D( max_len_stereo0D+num_atoms ); if ( !new_atom_stereo0D ) { num_atoms = INCHI_INP_FATAL_RET; /* fatal error: cannot allocate */ *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI_plain; } memcpy( new_atom_stereo0D, atom_stereo0D, len_stereo0D * sizeof(*atom_stereo0D) ); FreeInchi_Stereo0D( &atom_stereo0D ); atom_stereo0D = new_atom_stereo0D; max_len_stereo0D += num_atoms; } /* (a) i may be allene endpoint and neigh = allene middle point or (b) i may be allene middle point and neigh = allene endpoint !!!!! CURRENTLY ONLY (b) IS ALLOWED !!!!! */ atom_stereo0D[len_stereo0D].neighbor[1] = neigh; /* neigh < i */ atom_stereo0D[len_stereo0D].neighbor[2] = i; atom_stereo0D[len_stereo0D].parity = bond_parity; atom_stereo0D[len_stereo0D].type = INCHI_StereoType_DoubleBond; /* incl allenes & cumulenes */ len_stereo0D ++; } } if ( !bItemIsOver || i != num_atoms || s && p != s ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong number of bonds"); goto bypass_end_of_INChI_plain; } } /***************** search for coordinates block (plain) **********************/ /*p = szLine;*/ sToken = sStructHdrPlnRevXYZ; lToken = sizeof(sStructHdrPlnRevXYZ)-1; /* search for sToken in the line; load next segments of the line if sToken has not found */ p = FindToken( inp_molfile, &bTooLongLine, sToken, lToken, szLine, sizeof(szLine), p, &res ); if ( !p ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Missing atom coordinates data"); goto bypass_end_of_INChI_plain; } else { /* coordinates block started */ if ( pszCoord = (MOL_COORD*)inchi_malloc(inchi_max(num_atoms,1) * sizeof(MOL_COORD)) ) { memset( pszCoord, ' ', inchi_max(num_atoms,1) * sizeof(MOL_COORD)); } else { num_atoms = INCHI_INP_FATAL_RET; /* allocation error */ *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI_plain; } i = 0; res2 = bTooLongLine2 = -1; bItemIsOver = (s = strchr( p, '/') ) || !bTooLongLine; while ( i < num_atoms ) { p = LoadLine( inp_molfile, &bTooLongLine, &bItemIsOver, &s, szLine, sizeof(szLine), INCHI_LINE_ADD, p, &res ); if ( i >= num_atoms || s && p >= s ) { break; /* end of bonds (plain) */ } /* coord, first char */ if ( *p == ';' ) { for ( k = 0; k < NUM_COORD; k ++ ) { pszCoord[i][LEN_COORD*k + 4] = '0'; } p ++; i ++; continue; } for ( k = 0; k < 3; k ++ ) { double xyz; bNonZeroXYZ = 0; if ( *p == ';' ) { pszCoord[i][LEN_COORD*k + 4] = '0'; xyz = 0.0; } else if ( *p == ',' ) { /* empty */ pszCoord[i][LEN_COORD*k + 4] = '0'; xyz = 0.0; p ++; } else { xyz = strtod( p, &q ); bNonZeroXYZ = fabs(xyz) > MIN_BOND_LENGTH; if ( q != NULL ) { memcpy( pszCoord[i]+LEN_COORD*k, p, q-p ); if ( *q == ',' ) q ++; p = q; } else { pszCoord[i][LEN_COORD*k + 4] = '0'; } } switch( k ) { case 0: atom[i].x = xyz; b2D |= bNonZeroXYZ; break; case 1: atom[i].y = xyz; b2D |= bNonZeroXYZ; break; case 2: b3D |= bNonZeroXYZ; atom[i].z = xyz; break; } } if ( *p == ';' ) { p ++; /* end of this triple of coordinates */ i ++; } else { num_atoms = INCHI_INP_ERROR_RET; /* error in input data: atoms, bonds & coord must be present together */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong atom coordinates data"); goto bypass_end_of_INChI_plain; } } if ( !bItemIsOver || s && p != s || i != num_atoms ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong number of coordinates"); goto bypass_end_of_INChI_plain; } } /* end of coordinates */ /* set special valences and implicit H (xml) */ b23D = b2D | b3D; b2D = b3D = 0; if ( at ) { if ( !*at ) { int a1, a2, n1, n2, valence; int chem_bonds_valence; int nX=0, nY=0, nZ=0, nXYZ; *at = atom; /* special valences */ for ( bNonMetal = 0; bNonMetal < 1; bNonMetal ++ ) { for ( a1 = 0; a1 < num_atoms; a1 ++ ) { int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1]; #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else int bHasMetalNeighbor=0; #endif memset( num_bond_type, 0, sizeof(num_bond_type) ); valence = AT_BONDS_VAL(atom, a1); /* save atom valence if available */ AT_BONDS_VAL(atom, a1) = 0; #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else atom[a1].orig_at_number = a1+1; #endif nX = nY = nZ = 0; for ( n1 = 0; n1 < AT_NUM_BONDS(atom[a1]); n1 ++ ) { bond_type = atom[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE; if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) { bond_type = 0; MOLFILE_ERR_SET (*err, 0, "Unknown bond type in InChI aux assigned as a single bond"); } num_bond_type[ bond_type ] ++; nNumBonds ++; if ( b23D ) { neigh = atom[a1].neighbor[n1]; nX |= (fabs(atom[a1].x - atom[neigh].x) > MIN_BOND_LENGTH); nY |= (fabs(atom[a1].y - atom[neigh].y) > MIN_BOND_LENGTH); nZ |= (fabs(atom[a1].z - atom[neigh].z) > MIN_BOND_LENGTH); } } chem_bonds_valence = 0; for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) { chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1]; } if ( MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE && ( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) { /* accept input aromatic bonds for now */ switch ( n2 ) { case 2: chem_bonds_valence += 3; /* =A- */ break; case 3: chem_bonds_valence += 4; /* =A< */ break; default: /* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */ for ( n1 = 0; n1 < AT_NUM_BONDS(atom[a1]); n1 ++ ) { if ( atom[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN ) { ATOM_NUMBER *p1; a2 = atom[a1].neighbor[n1]; p1 = IN_NEIGH_LIST( atom[a2].neighbor, (ATOM_NUMBER)a1, AT_NUM_BONDS(atom[a2]) ); if ( p1 ) { atom[a1].bond_type[n1] = atom[a2].bond_type[p1-atom[a2].neighbor] = INCHI_BOND_TYPE_SINGLE; } else { *err = -2; /* Program error */ MOLFILE_ERR_SET (*err, 0, "Program error interpreting InChI aux"); num_atoms = 0; goto bypass_end_of_INChI_plain; /* no structure */ } } } chem_bonds_valence += n2; *err |= 32; /* Unrecognized aromatic bond(s) replaced with single */ MOLFILE_ERR_SET (*err, 0, "Atom has 1 or more than 3 aromatic bonds"); break; } } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) /************************************************************************************* * * Set number of hydrogen atoms */ { int num_iso_H; num_iso_H = atom[a1].num_iso_H[1] + atom[a1].num_iso_H[2] + atom[a1].num_iso_H[3]; if ( valence == ISOLATED_ATOM ) { atom[a1].num_iso_H[0] = 0; } else if ( valence && valence >= chem_bonds_valence ) { atom[a1].num_iso_H[0] = valence - chem_bonds_valence; } else if ( valence || bDoNotAddH ) { atom[a1].num_iso_H[0] = 0; } else if ( !bDoNotAddH ) { atom[a1].num_iso_H[0] = -1; /* auto add H */ } } #else /* added 2006-07-19 to process aromatic bonds same way as from molfile */ if ( n2 && !valence ) { int num_H = NUMH(atom, a1); /* only isotopic */ int chem_valence = chem_bonds_valence; int bUnusualValenceArom = detect_unusual_el_valence( (int)atom[a1].el_number, atom[a1].charge, atom[a1].radical, chem_valence, num_H, atom[a1].valence ); int bUnusualValenceNoArom = detect_unusual_el_valence( (int)atom[a1].el_number, atom[a1].charge, atom[a1].radical, chem_valence-1, num_H, atom[a1].valence ); #if ( CHECK_AROMBOND2ALT == 1 ) if ( bUnusualValenceArom && !bUnusualValenceNoArom && 0 == nBondsValToMetal( atom, a1) ) #else if ( bUnusualValenceArom && !bUnusualValenceNoArom ) #endif { /* typically NH in 5-member aromatic ring */ chem_bonds_valence --; } } else if ( n2 && valence ) { /* atom has aromatic bonds AND the chemical valence is known */ int num_H = NUMH(atom, a1); int chem_valence = chem_bonds_valence + num_H; if ( valence == chem_valence-1 ) { /* typically NH in 5-member aromatic ring */ chem_bonds_valence --; } } atom[a1].chem_bonds_valence = chem_bonds_valence; atom[a1].num_H = get_num_H( atom[a1].elname, atom[a1].num_H, atom[a1].num_iso_H, atom[a1].charge, atom[a1].radical, atom[a1].chem_bonds_valence, valence, 0, bDoNotAddH, bHasMetalNeighbor ); #endif } } nNumBonds /= 2; if ( b23D && nNumBonds ) { nXYZ = nX+nY+nZ; b2D = (nXYZ > 0); b3D = (nXYZ == 3); *num_dimensions = b3D? 3 : b2D? 2 : 0; *num_bonds = nNumBonds; } /*======= 0D parities =================================*/ #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) if ( len_stereo0D > 0 && atom_stereo0D && stereo0D ) { *stereo0D = atom_stereo0D; *num_stereo0D = len_stereo0D; } else { FreeInchi_Stereo0D( &atom_stereo0D ); *num_stereo0D = len_stereo0D = 0; } #endif for ( i = 0; i < len_stereo0D; i ++ ) { ATOM_NUMBER *p1, *p2; int sb_ord_from_a1 = -1, sb_ord_from_a2 = -1, bEnd1 = 0, bEnd2 = 0; switch( atom_stereo0D[i].type ) { case INCHI_StereoType_Tetrahedral: a1 = atom_stereo0D[i].central_atom; if ( atom_stereo0D[i].parity && (AT_NUM_BONDS(atom[a1]) == 3 || AT_NUM_BONDS(atom[a1]) == 4) ) { int ii, kk = 0; if ( AT_NUM_BONDS(atom[a1]) == 3 ) { atom_stereo0D[i].neighbor[kk++] = a1; } for ( ii = 0; ii < AT_NUM_BONDS(atom[a1]); ii ++ ) { atom_stereo0D[i].neighbor[kk++] = atom[a1].neighbor[ii]; } } break; case INCHI_StereoType_DoubleBond: #define MAX_CHAIN_LEN 20 a1 = atom_stereo0D[i].neighbor[1]; a2 = atom_stereo0D[i].neighbor[2]; p1 = IN_NEIGH_LIST( atom[a1].neighbor, (ATOM_NUMBER)a2, AT_NUM_BONDS(atom[a1]) ); p2 = IN_NEIGH_LIST( atom[a2].neighbor, (ATOM_NUMBER)a1, AT_NUM_BONDS(atom[a2]) ); if ( !p1 || !p2 ) { atom_stereo0D[i].type = INCHI_StereoType_None; atom_stereo0D[i].central_atom = NO_ATOM; atom_stereo0D[i].neighbor[0] = atom_stereo0D[i].neighbor[3] = -1; *err |= 64; /* Error in cumulene stereo */ MOLFILE_ERR_SET (*err, 0, "0D stereobond not recognized"); break; } /* streobond, allene, or cumulene */ sb_ord_from_a1 = p1 - atom[a1].neighbor; sb_ord_from_a2 = p2 - atom[a2].neighbor; if ( AT_NUM_BONDS(atom[a1]) == 2 && atom[a1].bond_type[0] + atom[a1].bond_type[1] == 2*INCHI_BOND_TYPE_DOUBLE && 0 == inchi_NUMH2(atom, a1) && (AT_NUM_BONDS(atom[a2]) != 2 || atom[a2].bond_type[0] + atom[a2].bond_type[1] != 2*INCHI_BOND_TYPE_DOUBLE ) ) { bEnd2 = 1; /* a2 is the end-atom, a1 is middle atom */ } if ( AT_NUM_BONDS(atom[a2]) == 2 && atom[a2].bond_type[0] + atom[a2].bond_type[1] == 2*INCHI_BOND_TYPE_DOUBLE && 0 == inchi_NUMH2(atom, a2) && (AT_NUM_BONDS(atom[a1]) != 2 || atom[a1].bond_type[0] + atom[a1].bond_type[1] != 2*INCHI_BOND_TYPE_DOUBLE ) ) { bEnd1 = 1; /* a1 is the end-atom, a2 is middle atom */ } if ( bEnd2 + bEnd1 == 1 ) { /* allene or cumulene */ ATOM_NUMBER chain[MAX_CHAIN_LEN+1], prev, cur, next; if ( bEnd2 && !bEnd1 ) { cur = a1; a1 = a2; a2 = cur; sb_ord_from_a1 = sb_ord_from_a2; } sb_ord_from_a2 = -1; cur = a1; next = a2; len = 0; chain[len++] = cur; chain[len++] = next; while ( len < MAX_CHAIN_LEN ) { /* arbitrary very high upper limit to prevent infinite loop */ prev = cur; cur = next; /* follow double bond path && avoid going back */ if ( AT_NUM_BONDS(atom[cur]) == 2 && atom[cur].bond_type[0]+atom[cur].bond_type[1] == 2*INCHI_BOND_TYPE_DOUBLE && 0 == inchi_NUMH2(atom, cur) ) { next = atom[cur].neighbor[atom[cur].neighbor[0] == prev]; chain[len++] = next; } else { break; } } if ( len > 2 && (p2 = IN_NEIGH_LIST( atom[cur].neighbor, (ATOM_NUMBER)prev, AT_NUM_BONDS(atom[cur]))) ) { sb_ord_from_a2 = p2 - atom[cur].neighbor; a2 = cur; /* by design we need to pick up the first non-stereo-bond-neighbor as "sn"-atom */ atom_stereo0D[i].neighbor[0] = atom[a1].neighbor[sb_ord_from_a1 == 0]; atom_stereo0D[i].neighbor[1] = a1; atom_stereo0D[i].neighbor[2] = a2; atom_stereo0D[i].neighbor[3] = atom[a2].neighbor[sb_ord_from_a2 == 0]; if ( len % 2 ) { atom_stereo0D[i].central_atom = chain[len/2]; atom_stereo0D[i].type = INCHI_StereoType_Allene; } else { atom_stereo0D[i].central_atom = NO_ATOM; } } else { /* error */ atom_stereo0D[i].type = INCHI_StereoType_None; atom_stereo0D[i].central_atom = NO_ATOM; atom_stereo0D[i].neighbor[0] = atom_stereo0D[i].neighbor[3] = -1; *err |= 64; /* Error in cumulene stereo */ MOLFILE_ERR_SET (*err, 0, "Cumulene stereo not recognized (0D)"); } #undef MAX_CHAIN_LEN } else { /****** a normal possibly stereogenic bond -- not an allene or cumulene *******/ /* by design we need to pick up the first non-stereo-bond-neighbor as "sn"-atom */ sb_ord_from_a1 = p1 - atom[a1].neighbor; sb_ord_from_a2 = p2 - atom[a2].neighbor; atom_stereo0D[i].neighbor[0] = atom[a1].neighbor[p1 == atom[a1].neighbor]; atom_stereo0D[i].neighbor[3] = atom[a2].neighbor[p2 == atom[a2].neighbor]; atom_stereo0D[i].central_atom = NO_ATOM; } if ( atom_stereo0D[i].type != INCHI_StereoType_None && sb_ord_from_a1 >= 0 && sb_ord_from_a2 >= 0 && ATOM_PARITY_WELL_DEF( SB_PARITY_2(atom_stereo0D[i].parity) ) ) { /* Detected well-defined disconnected stereo * locate first non-metal neighbors */ int a, n, j, /* k,*/ sb_ord, cur_neigh, min_neigh; for ( k = 0; k < 2; k ++ ) { a = k? atom_stereo0D[i].neighbor[2] : atom_stereo0D[i].neighbor[1]; sb_ord = k? sb_ord_from_a2 : sb_ord_from_a1; min_neigh = num_atoms; for ( n = j = 0; j < AT_NUM_BONDS(atom[a]); j ++ ) { cur_neigh = atom[a].neighbor[j]; if ( j != sb_ord && !IS_METAL_ATOM(atom, cur_neigh) ) { min_neigh = inchi_min( cur_neigh, min_neigh ); } } if ( min_neigh < num_atoms ) { atom_stereo0D[i].neighbor[k?3:0] = min_neigh; } else { MOLFILE_ERR_SET (*err, 0, "Cannot find non-metal stereobond neighor (0D)"); } } } break; } } /* end of 0D parities extraction */ /*exit_cycle:;*/ } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else /* transfer atom_stereo0D[] to atom[] */ if ( len_stereo0D ) { Extract0DParities( atom, num_atoms, atom_stereo0D, len_stereo0D, pStrErr, err ); } #endif if ( pInpAtomFlags ) { /* save chirality flag */ *pInpAtomFlags |= InpAtomFlags; } } else if ( atom ) { inchi_free( atom ); atom = NULL; } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else #if( FIX_READ_AUX_MEM_LEAK == 1 ) /* 2005-08-04 avoid memory leak */ if ( atom_stereo0D && !(stereo0D && *stereo0D == atom_stereo0D) ) { FreeInchi_Stereo0D( &atom_stereo0D ); } #endif if ( szCoord ) { *szCoord = pszCoord; pszCoord = NULL; } else #endif if ( pszCoord ) { inchi_free( pszCoord ); pszCoord = NULL; } goto bypass_end_of_INChI_plain; /*return num_atoms;*/ } } if ( atom_stereo0D ) { FreeInchi_Stereo0D( &atom_stereo0D ); } /* end of struct. reading cycle */ if ( res <= 0 ) { if ( *err == INCHI_INP_ERROR_ERR ) { return num_atoms; } *err = INCHI_INP_EOF_ERR; return INCHI_INP_EOF_RET; /* no more data */ } bypass_end_of_INChI_plain: /* cleanup */ if ( num_atoms == INCHI_INP_ERROR_RET && atom_stereo0D ) { if ( stereo0D && *stereo0D == atom_stereo0D ) { *stereo0D = NULL; *num_stereo0D = 0; } FreeInchi_Stereo0D( &atom_stereo0D ); } while ( bTooLongLine && 0 < my_fgetsTab1( szLine, sizeof(szLine)-1, inp_molfile, &bTooLongLine ) ) { ; } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) /* cleanup */ if ( !*at ) { if ( atom ) { inchi_free( atom ); atom = NULL; } if ( pszCoord ) { inchi_free( pszCoord ); pszCoord = NULL; } } #endif return num_atoms; } /***********************************************************/ /* extract reversibility info from xml text INChI format */ /* */ /* OBSOLETE CODE because InChI output in XML */ /* does not exist anymore. Unsupported. */ /* */ /***********************************************************/ if ( nInputType == INPUT_INCHI_XML ) { /* xml tags */ static const char sStructHdrXml[] = " max_num_at ) { inchi_free( *at ); *at = NULL; } else { memset( *at, 0, max_num_at * sizeof( **at ) ); atom = *at; } } if ( !at || !*at ) { atom = Create_Atom( num_atoms+1 ); if ( !atom ) { num_atoms = INCHI_INP_FATAL_RET; /* fatal error: cannot allocate */ *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI; } } if ( stereo0D && *stereo0D ) { if ( num_atoms > max_len_stereo0D ) { FreeInchi_Stereo0D( stereo0D ); } else { memset( *stereo0D, 0, max_len_stereo0D * sizeof( **stereo0D ) ); atom_stereo0D = *stereo0D; } } if ( !stereo0D || !*stereo0D ) { max_len_stereo0D = num_atoms+1; atom_stereo0D = CreateInchi_Stereo0D( max_len_stereo0D ); if ( !atom_stereo0D ) { num_atoms = INCHI_INP_FATAL_RET; /* fatal error: cannot allocate */ *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI; } } i = 0; bItemIsOver = 0; res2 = bTooLongLine2 = -1; /* read all atoms xml */ while ( i < num_atoms ) { pos = p - szLine; if ( !bItemIsOver && (int)sizeof(szLine)-res + pos > (int)sizeof(szNextLine) ) { /* load next line if possible */ res2 = my_fgets( szNextLine, sizeof(szNextLine)-1, inp_molfile, &bTooLongLine2 ); if ( res2 > 0 && memcmp(szNextLine, sStructRevXmlRevAtEnd, sizeof(sStructRevXmlRevAtEnd)-1) ) { if ( pos ) { res -= pos; /* number of chars left to process in szLine */ memmove( szLine, p, res*sizeof(szLine[0]) ); /* move them to the start of the line */ } memcpy( szLine+res, szNextLine, (res2+1)*sizeof(szNextLine[0]) ); res += res2; szLine[res] = '\0'; bTooLongLine = bTooLongLine2; p = szLine; } else { bItemIsOver = 1; } } /* element, first char */ if ( !isalpha( UCINT *p ) || !isupper( UCINT *p ) || i >= num_atoms ) { bHeaderRead = 0; /* wrong atom data */ num_atoms = INCHI_INP_ERROR_RET; /* was 0, error */ *err = INCHI_INP_ERROR_ERR; /* 40 */ MOLFILE_ERR_SET (*err, 0, "Wrong atoms data"); goto bypass_end_of_INChI; } atom[i].elname[0] = *p ++; /* element, second char */ if ( isalpha( UCINT *p ) && islower( UCINT *p ) ) { atom[i].elname[1] = *p ++; } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else atom[i].el_number = get_periodic_table_number( atom[i].elname ); #endif /* bonds' valence */ if ( isdigit( UCINT *p ) ) { AT_BONDS_VAL(atom,i) = (char)strtol( p, &q, 10 ); if ( !AT_BONDS_VAL(atom,i) ) AT_BONDS_VAL(atom,i) = ISOLATED_ATOM; /* same convention as in MOLfile, found zero bonds valence */ p = q; } /* charge */ atom[i].charge = (*p == '+')? 1 : (*p == '-')? -1 : 0; if ( atom[i].charge ) { p ++; if ( isdigit( UCINT *p ) ) { atom[i].charge *= (S_CHAR)(strtol( p, &q, 10 ) & CHAR_MASK); p = q; } } /* radical */ if ( *p == '.' ) { p ++; if ( isdigit( UCINT *p ) ) { atom[i].radical = (S_CHAR)strtol( p, &q, 10 ); p = q; } } /* isotopic mass */ if ( *p == 'i' ) { p ++; if ( isdigit( UCINT *p ) ) { int mw = strtol( p, &q, 10 ); p = q; #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) atom[i].isotopic_mass = mw; #else mw -= get_atw_from_elnum( atom[i].el_number ); if ( mw >= 0 ) mw ++; atom[i].iso_atw_diff = mw; #endif } } /* parity */ switch( *p ) { case 'o': parity = INCHI_PARITY_ODD; p ++; break; case 'e': parity = INCHI_PARITY_EVEN; p ++; break; case 'u': parity = INCHI_PARITY_UNKNOWN; p ++; break; case '?': parity = INCHI_PARITY_UNDEFINED; p ++; break; default: parity = 0; break; } if ( parity ) { atom_stereo0D[len_stereo0D].central_atom = i; atom_stereo0D[len_stereo0D].parity = parity; atom_stereo0D[len_stereo0D].type = INCHI_StereoType_Tetrahedral; len_stereo0D ++; } /* isotopic h, d, t */ for ( k = 0; k < NUM_H_ISOTOPES; k ++ ) { if ( *p == szIsoH[k] ) { NUM_ISO_Hk(atom,i,k) = 1; p ++; if ( isdigit( UCINT *p ) ) { NUM_ISO_Hk(atom,i,k) = (char)strtol( p, &q, 10 ); p = q; } } } i ++; } if ( !bItemIsOver || p - szLine != res || i != num_atoms ) { num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong number of atoms"); goto bypass_end_of_INChI; } /********************** bonds xml ****************************/ res = my_fgets( szLine, sizeof(szLine)-1, inp_molfile, &bTooLongLine ); if ( res <= 0 ) { num_atoms = 0; /* no data */ goto bypass_end_of_INChI; } if ( memcmp(szLine, sStructRevXmlRevBn, sizeof(sStructRevXmlRevBn)-1) ) { bHeaderRead = 0; /* invalid reversibility info; look for another header */ continue; } /* read (the head of) the xml bonds line */ res = my_fgets( szLine, sizeof(szLine)-1, inp_molfile, &bTooLongLine ); if ( res <= 0 ) { num_atoms = INCHI_INP_ERROR_RET; /* was 0; error: no data -- eof? */ *err = INCHI_INP_ERROR_ERR; goto bypass_end_of_INChI; } i = 1; bItemIsOver = 0; res2 = bTooLongLine2 = -1; p = szLine; if ( !memcmp(szLine, sStructRevXmlRevBnEnd, sizeof(sStructRevXmlRevBnEnd)-1) ) { /* empty bonds section */ res = 0; bItemIsOver = 1; } /* read all bonds (xml), starting from atom 1 (not 0) */ while ( i < num_atoms ) { pos = p - szLine; if ( !bItemIsOver && (int)sizeof(szLine)-res + pos > (int)sizeof(szNextLine) ) { /* load next line if possible */ res2 = my_fgets( szNextLine, sizeof(szNextLine)-1, inp_molfile, &bTooLongLine2 ); if ( res2 > 0 && memcmp(szNextLine, sStructRevXmlRevBnEnd, sizeof(sStructRevXmlRevBnEnd)-1) ) { if ( pos ) { res -= pos; /* number of chars left to process in szLine */ memmove( szLine, p, res*sizeof(szLine[0]) ); /* move them to the start of the line */ } memcpy( szLine+res, szNextLine, (res2+1)*sizeof(szNextLine[0]) ); res += res2; szLine[res] = '\0'; bTooLongLine = bTooLongLine2; p = szLine; } else { bItemIsOver = 1; } } if ( i >= num_atoms ) { break; } /* bond, first char */ if ( *p == ';' ) { p ++; i ++; continue; } if ( !isalpha( UCINT *p ) ) { num_atoms = INCHI_INP_ERROR_RET; /* error in input data */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong bonds data"); goto bypass_end_of_INChI; } bond_char = *p ++; /* bond parity */ switch( *p ) { case '-': bond_parity = INCHI_PARITY_ODD; p ++; break; case '+': bond_parity = INCHI_PARITY_EVEN; p ++; break; case 'u': bond_parity = INCHI_PARITY_UNKNOWN; p ++; break; case '?': bond_parity = INCHI_PARITY_UNDEFINED; p ++; break; default: bond_parity = 0; break; } if ( bond_parity ) { switch( *p ) { case '-': bond_parityNM = INCHI_PARITY_ODD; p ++; break; case '+': bond_parityNM = INCHI_PARITY_EVEN; p ++; break; case 'u': bond_parityNM = INCHI_PARITY_UNKNOWN; p ++; break; case '?': bond_parityNM = INCHI_PARITY_UNDEFINED; p ++; break; default: bond_parityNM = 0; break; } } else { bond_parityNM = 0; } /* neighbor of the current atom */ if ( !isdigit( UCINT *p ) ) { num_atoms = INCHI_INP_ERROR_RET; /* error in input data */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong bonds data"); goto bypass_end_of_INChI; } neigh = (int)strtol( p, &q, 10 )-1; if ( i >= num_atoms || neigh >= num_atoms ) { num_atoms = INCHI_INP_ERROR_RET; /* error in input data */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom"); goto bypass_end_of_INChI; } p = q; bond_stereo1 = bond_stereo2 = 0; /* bond type & 2D stereo */ switch( bond_char ) { case 'v': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER; break; case 'V': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2EITHER; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1EITHER; break; case 'w': bond_type = INCHI_BOND_TYPE_DOUBLE; bond_stereo1 = bond_stereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER; break; case 's': bond_type = INCHI_BOND_TYPE_SINGLE; break; case 'd': bond_type = INCHI_BOND_TYPE_DOUBLE; break; case 't': bond_type = INCHI_BOND_TYPE_TRIPLE; break; case 'a': bond_type = INCHI_BOND_TYPE_ALTERN; break; case 'p': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1UP; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2UP; break; case 'P': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2UP; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1UP; break; case 'n': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN; break; case 'N': bond_type = INCHI_BOND_TYPE_SINGLE; bond_stereo1 = INCHI_BOND_STEREO_SINGLE_2DOWN; bond_stereo2 = INCHI_BOND_STEREO_SINGLE_1DOWN; break; default: num_atoms = INCHI_INP_ERROR_RET; /* error */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong bond type"); goto bypass_end_of_INChI; } k = AT_NUM_BONDS(atom[i]) ++; atom[i].bond_type[k] = bond_type; atom[i].bond_stereo[k] = bond_stereo1; atom[i].neighbor[k] = (ATOM_NUMBER)neigh; k2 = AT_NUM_BONDS(atom[neigh]) ++; atom[neigh].bond_type[k2] = bond_type; atom[neigh].bond_stereo[k2] = bond_stereo2; atom[neigh].neighbor[k2] = (ATOM_NUMBER)i; bond_parity |= (bond_parityNM << SB_PARITY_SHFT); if ( bond_parity ) { if ( max_len_stereo0D <= len_stereo0D ) { /* realloc atom_Stereo0D */ inchi_Stereo0D *new_atom_stereo0D = CreateInchi_Stereo0D( max_len_stereo0D+num_atoms ); if ( !new_atom_stereo0D ) { num_atoms = INCHI_INP_FATAL_RET; /* fatal error: cannot allocate */ *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI; } memcpy( new_atom_stereo0D, atom_stereo0D, len_stereo0D * sizeof(*atom_stereo0D) ); FreeInchi_Stereo0D( &atom_stereo0D ); atom_stereo0D = new_atom_stereo0D; max_len_stereo0D += num_atoms; } /* (a) i may be allene endpoint and neigh = allene middle point or (b) i may be allene middle point and neigh = allene endpoint !!!!! CURRENTLY ONLY (b) IS ALLOWED !!!!! */ atom_stereo0D[len_stereo0D].neighbor[1] = neigh; /* neigh < i */ atom_stereo0D[len_stereo0D].neighbor[2] = i; atom_stereo0D[len_stereo0D].parity = bond_parity; atom_stereo0D[len_stereo0D].type = INCHI_StereoType_DoubleBond; /* incl allenes & cumulenes */ len_stereo0D ++; } } if ( !bItemIsOver || p - szLine != res || i != num_atoms ) { num_atoms = INCHI_INP_ERROR_RET; /* error in input data */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong number of bonds"); goto bypass_end_of_INChI; } /********************** coordinates xml ****************************/ if ( pszCoord = (MOL_COORD*)inchi_malloc(inchi_max(num_atoms,1) * sizeof(MOL_COORD)) ) { memset( pszCoord, ' ', inchi_max(num_atoms,1) * sizeof(MOL_COORD)); res = my_fgets( szLine, sizeof(szLine)-1, inp_molfile, &bTooLongLine ); if ( res <= 0 || /* compare the header */ memcmp(szLine, sStructRevXmlRevXYZ, sizeof(sStructRevXmlRevXYZ)-1) || /* read (the head of) the coordinates (xml) line */ 0 >= (res = my_fgets( szLine, sizeof(szLine)-1, inp_molfile, &bTooLongLine ))) { num_atoms = INCHI_INP_ERROR_RET; /* error in input data: atoms, bonds & coord must be present together */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Missing atom coordinates data"); goto bypass_end_of_INChI; } i = 0; bItemIsOver = 0; res2 = bTooLongLine2 = -1; p = szLine; if ( !memcmp(szLine, sStructRevXmlRevXYZEnd, sizeof(sStructRevXmlRevXYZEnd)-1) ) { /* empty bonds section */ res = 0; bItemIsOver = 1; } /* read all coordinates (xml), starting from atom 1 (not 0) */ while ( i < num_atoms ) { pos = p - szLine; if ( !bItemIsOver && (int)sizeof(szLine)-res + pos > (int)sizeof(szNextLine) ) { /* load next line if possible */ res2 = my_fgets( szNextLine, sizeof(szNextLine)-1, inp_molfile, &bTooLongLine2 ); if ( res2 > 0 && memcmp(szNextLine, sStructRevXmlRevXYZEnd, sizeof(sStructRevXmlRevXYZEnd)-1) ) { if ( pos ) { res -= pos; /* number of chars left to process in szLine */ memmove( szLine, p, res*sizeof(szLine[0]) ); /* move them to the start of the line */ } memcpy( szLine+res, szNextLine, (res2+1)*sizeof(szNextLine[0]) ); res += res2; szLine[res] = '\0'; bTooLongLine = bTooLongLine2; p = szLine; } else { bItemIsOver = 1; } } /* coord, first char */ if ( *p == ';' ) { for ( k = 0; k < NUM_COORD; k ++ ) { pszCoord[i][LEN_COORD*k + 4] = '0'; } p ++; i ++; continue; } for ( k = 0; k < 3; k ++ ) { double xyz; bNonZeroXYZ = 0; if ( *p == ';' ) { pszCoord[i][LEN_COORD*k + 4] = '0'; xyz = 0.0; } else if ( *p == ',' ) { /* empty */ pszCoord[i][LEN_COORD*k + 4] = '0'; xyz = 0.0; p ++; } else { xyz = strtod( p, &q ); bNonZeroXYZ = fabs(xyz) > MIN_BOND_LENGTH; if ( q != NULL ) { memcpy( pszCoord[i]+LEN_COORD*k, p, q-p ); if ( *q == ',' ) q ++; p = q; } else { pszCoord[i][LEN_COORD*k + 4] = '0'; } } switch( k ) { case 0: atom[i].x = xyz; b2D |= bNonZeroXYZ; break; case 1: atom[i].y = xyz; b2D |= bNonZeroXYZ; break; case 2: b3D |= bNonZeroXYZ; atom[i].z = xyz; break; } } if ( *p == ';' ) { p ++; /* end of this triple of coordinates */ i ++; } else { num_atoms = INCHI_INP_ERROR_RET; /* error in input data: atoms, bonds & coord must be present together */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong atom coordinates data"); goto bypass_end_of_INChI; } } if ( !bItemIsOver || p - szLine != res || i != num_atoms ) { num_atoms = INCHI_INP_ERROR_RET; /* error in input data: atoms, bonds & coord must be present together */ *err = INCHI_INP_ERROR_ERR; MOLFILE_ERR_SET (*err, 0, "Wrong number of coordinates"); goto bypass_end_of_INChI; } } else { /* allocation failed */ num_atoms = INCHI_INP_FATAL_RET; *err = INCHI_INP_FATAL_ERR; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); goto bypass_end_of_INChI; } /* set special valences and implicit H (xml) */ b23D = b2D | b3D; b2D = b3D = 0; if ( at ) { if ( !*at ) { int a1, a2, n1, n2, valence; int chem_bonds_valence; int nX=0, nY=0, nZ=0, nXYZ; *at = atom; /* special valences */ for ( bNonMetal = 0; bNonMetal < 1 /*2*/; bNonMetal ++ ) { for ( a1 = 0; a1 < num_atoms; a1 ++ ) { int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1]; #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else int bHasMetalNeighbor=0; #endif memset( num_bond_type, 0, sizeof(num_bond_type) ); valence = AT_BONDS_VAL(atom, a1); /* save atom valence if available */ AT_BONDS_VAL(atom, a1) = 0; #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else atom[a1].orig_at_number = a1+1; #endif nX = nY = nZ = 0; for ( n1 = 0; n1 < AT_NUM_BONDS(atom[a1]); n1 ++ ) { bond_type = atom[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE; if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) { bond_type = 0; /* cannot happen */ MOLFILE_ERR_SET (*err, 0, "Unknown bond type in InChI aux assigned as a single bond"); } num_bond_type[ bond_type ] ++; nNumBonds ++; if ( b23D ) { neigh = atom[a1].neighbor[n1]; nX |= (fabs(atom[a1].x - atom[neigh].x) > MIN_BOND_LENGTH); nY |= (fabs(atom[a1].y - atom[neigh].y) > MIN_BOND_LENGTH); nZ |= (fabs(atom[a1].z - atom[neigh].z) > MIN_BOND_LENGTH); } } chem_bonds_valence = 0; for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) { chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1]; } if ( MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE && ( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) { /* accept input aromatic bonds for now */ switch ( n2 ) { case 2: chem_bonds_valence += 3; /* =A- */ break; case 3: chem_bonds_valence += 4; /* =A< */ break; default: /* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */ for ( n1 = 0; n1 < AT_NUM_BONDS(atom[a1]); n1 ++ ) { if ( atom[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN ) { ATOM_NUMBER *p1; a2 = atom[a1].neighbor[n1]; p1 = IN_NEIGH_LIST( atom[a2].neighbor, (ATOM_NUMBER)a1, AT_NUM_BONDS(atom[a2]) ); if ( p1 ) { atom[a1].bond_type[n1] = atom[a2].bond_type[p1-atom[a2].neighbor] = INCHI_BOND_TYPE_SINGLE; } else { *err = -2; /* Program error */ MOLFILE_ERR_SET (*err, 0, "Program error interpreting InChI aux"); num_atoms = 0; goto bypass_end_of_INChI; /* no structure */ } } } chem_bonds_valence += n2; *err |= 32; /* Unrecognized aromatic bond(s) replaced with single */ MOLFILE_ERR_SET (*err, 0, "Atom has 1 or more than 3 aromatic bonds"); break; } } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) /************************************************************************************* * * Set number of hydrogen atoms */ { int num_iso_H; num_iso_H = atom[a1].num_iso_H[1] + atom[a1].num_iso_H[2] + atom[a1].num_iso_H[3]; if ( valence == ISOLATED_ATOM ) { atom[a1].num_iso_H[0] = 0; } else if ( valence && valence >= chem_bonds_valence ) { atom[a1].num_iso_H[0] = valence - chem_bonds_valence; } else if ( valence || bDoNotAddH ) { atom[a1].num_iso_H[0] = 0; } else if ( !bDoNotAddH ) { atom[a1].num_iso_H[0] = -1; /* auto add H */ } } #else /* added 2006-07-19 to process aromatic bonds same way as from molfile */ if ( n2 && !valence ) { int num_H = NUMH(atom, a1); /* only isotopic */ int chem_valence = chem_bonds_valence; int bUnusualValenceArom = detect_unusual_el_valence( (int)atom[a1].el_number, atom[a1].charge, atom[a1].radical, chem_valence, num_H, atom[a1].valence ); int bUnusualValenceNoArom = detect_unusual_el_valence( (int)atom[a1].el_number, atom[a1].charge, atom[a1].radical, chem_valence-1, num_H, atom[a1].valence ); #if ( CHECK_AROMBOND2ALT == 1 ) if ( bUnusualValenceArom && !bUnusualValenceNoArom && 0 == nBondsValToMetal( atom, a1) ) #else if ( bUnusualValenceArom && !bUnusualValenceNoArom ) #endif { /* typically NH in 5-member aromatic ring */ chem_bonds_valence --; } } else if ( n2 && valence ) { /* atom has aromatic bonds AND the chemical valence is known */ int num_H = NUMH(atom, a1); int chem_valence = chem_bonds_valence + num_H; if ( valence == chem_valence-1 ) { /* typically NH in 5-member aromatic ring */ chem_bonds_valence --; } } atom[a1].chem_bonds_valence = chem_bonds_valence; atom[a1].num_H = get_num_H( atom[a1].elname, atom[a1].num_H, atom[a1].num_iso_H, atom[a1].charge, atom[a1].radical, atom[a1].chem_bonds_valence, valence, 0, bDoNotAddH, bHasMetalNeighbor ); #endif } } nNumBonds /= 2; if ( b23D && nNumBonds ) { nXYZ = nX+nY+nZ; b2D = (nXYZ > 0); b3D = (nXYZ == 3); *num_dimensions = b3D? 3 : b2D? 2 : 0; *num_bonds = nNumBonds; } /*======= 0D parities =================================*/ #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) if ( len_stereo0D > 0 && atom_stereo0D && stereo0D ) { *stereo0D = atom_stereo0D; *num_stereo0D = len_stereo0D; } else { FreeInchi_Stereo0D( &atom_stereo0D ); *num_stereo0D = len_stereo0D = 0; } #endif for ( i = 0; i < len_stereo0D; i ++ ) { ATOM_NUMBER *p1, *p2; int sb_ord_from_a1 = -1, sb_ord_from_a2 = -1, bEnd1 = 0, bEnd2 = 0; switch( atom_stereo0D[i].type ) { case INCHI_StereoType_Tetrahedral: a1 = atom_stereo0D[i].central_atom; if ( atom_stereo0D[i].parity && (AT_NUM_BONDS(atom[a1]) == 3 || AT_NUM_BONDS(atom[a1]) == 4) ) { int ii, kk = 0; if ( AT_NUM_BONDS(atom[a1]) == 3 ) { atom_stereo0D[i].neighbor[kk++] = a1; } for ( ii = 0; ii < AT_NUM_BONDS(atom[a1]); ii ++ ) { atom_stereo0D[i].neighbor[kk++] = atom[a1].neighbor[ii]; } } break; case INCHI_StereoType_DoubleBond: #define MAX_CHAIN_LEN 20 a1 = atom_stereo0D[i].neighbor[1]; a2 = atom_stereo0D[i].neighbor[2]; p1 = IN_NEIGH_LIST( atom[a1].neighbor, (ATOM_NUMBER)a2, AT_NUM_BONDS(atom[a1]) ); p2 = IN_NEIGH_LIST( atom[a2].neighbor, (ATOM_NUMBER)a1, AT_NUM_BONDS(atom[a2]) ); if ( !p1 || !p2 ) { atom_stereo0D[i].type = INCHI_StereoType_None; atom_stereo0D[i].central_atom = NO_ATOM; atom_stereo0D[i].neighbor[0] = atom_stereo0D[i].neighbor[3] = -1; *err |= 64; /* Error in cumulene stereo */ MOLFILE_ERR_SET (*err, 0, "0D stereobond not recognized"); break; } /* streobond, allene, or cumulene */ sb_ord_from_a1 = p1 - atom[a1].neighbor; sb_ord_from_a2 = p2 - atom[a2].neighbor; if ( AT_NUM_BONDS(atom[a1]) == 2 && atom[a1].bond_type[0] + atom[a1].bond_type[1] == 2*INCHI_BOND_TYPE_DOUBLE && 0 == inchi_NUMH2(atom, a1) && (AT_NUM_BONDS(atom[a2]) != 2 || atom[a2].bond_type[0] + atom[a2].bond_type[1] != 2*INCHI_BOND_TYPE_DOUBLE ) ) { bEnd2 = 1; /* a2 is the end-atom, a1 is middle atom */ } if ( AT_NUM_BONDS(atom[a2]) == 2 && atom[a2].bond_type[0] + atom[a2].bond_type[1] == 2*INCHI_BOND_TYPE_DOUBLE && 0 == inchi_NUMH2(atom, a2) && (AT_NUM_BONDS(atom[a1]) != 2 || atom[a1].bond_type[0] + atom[a1].bond_type[1] != 2*INCHI_BOND_TYPE_DOUBLE ) ) { bEnd1 = 1; /* a1 is the end-atom, a2 is middle atom */ } if ( bEnd2 + bEnd1 == 1 ) { /* allene or cumulene */ ATOM_NUMBER chain[MAX_CHAIN_LEN+1], prev, cur, next; if ( bEnd2 && !bEnd1 ) { cur = a1; a1 = a2; a2 = cur; sb_ord_from_a1 = sb_ord_from_a2; } sb_ord_from_a2 = -1; cur = a1; next = a2; len = 0; chain[len++] = cur; chain[len++] = next; while ( len < MAX_CHAIN_LEN ) { /* arbitrary very high upper limit to prevent infinite loop */ prev = cur; cur = next; /* follow double bond path && avoid going back */ if ( AT_NUM_BONDS(atom[cur]) == 2 && atom[cur].bond_type[0]+atom[cur].bond_type[1] == 2*INCHI_BOND_TYPE_DOUBLE && 0 == inchi_NUMH2(atom, cur) ) { next = atom[cur].neighbor[atom[cur].neighbor[0] == prev]; chain[len++] = next; } else { break; } } if ( len > 2 && (p2 = IN_NEIGH_LIST( atom[cur].neighbor, (ATOM_NUMBER)prev, AT_NUM_BONDS(atom[cur]))) ) { sb_ord_from_a2 = p2 - atom[cur].neighbor; a2 = cur; /* by design we need to pick up the first non-stereo-bond-neighbor as "sn"-atom */ atom_stereo0D[i].neighbor[0] = atom[a1].neighbor[sb_ord_from_a1 == 0]; atom_stereo0D[i].neighbor[1] = a1; atom_stereo0D[i].neighbor[2] = a2; atom_stereo0D[i].neighbor[3] = atom[a2].neighbor[sb_ord_from_a2 == 0]; if ( len % 2 ) { atom_stereo0D[i].central_atom = chain[len/2]; atom_stereo0D[i].type = INCHI_StereoType_Allene; } else { atom_stereo0D[i].central_atom = NO_ATOM; } } else { /* error */ atom_stereo0D[i].type = INCHI_StereoType_None; atom_stereo0D[i].central_atom = NO_ATOM; atom_stereo0D[i].neighbor[0] = atom_stereo0D[i].neighbor[3] = -1; *err |= 64; /* Error in cumulene stereo */ MOLFILE_ERR_SET (*err, 0, "Cumulene stereo not recognized (0D)"); } #undef MAX_CHAIN_LEN } else { /****** a normal possibly stereogenic bond -- not an allene or cumulene *******/ /* by design we need to pick up the first non-stereo-bond-neighbor as "sn"-atom */ sb_ord_from_a1 = p1 - atom[a1].neighbor; sb_ord_from_a2 = p2 - atom[a2].neighbor; atom_stereo0D[i].neighbor[0] = atom[a1].neighbor[p1 == atom[a1].neighbor]; atom_stereo0D[i].neighbor[3] = atom[a2].neighbor[p2 == atom[a2].neighbor]; atom_stereo0D[i].central_atom = NO_ATOM; } if ( atom_stereo0D[i].type != INCHI_StereoType_None && sb_ord_from_a1 >= 0 && sb_ord_from_a2 >= 0 && ATOM_PARITY_WELL_DEF( SB_PARITY_2(atom_stereo0D[i].parity) ) ) { /* Detected well-defined disconnected stereo * locate first non-metal neighbors */ int a, n, j, /* k,*/ sb_ord, cur_neigh, min_neigh; for ( k = 0; k < 2; k ++ ) { a = k? atom_stereo0D[i].neighbor[2] : atom_stereo0D[i].neighbor[1]; sb_ord = k? sb_ord_from_a2 : sb_ord_from_a1; min_neigh = num_atoms; for ( n = j = 0; j < AT_NUM_BONDS(atom[a]); j ++ ) { cur_neigh = atom[a].neighbor[j]; if ( j != sb_ord && !IS_METAL_ATOM(atom, cur_neigh) ) { min_neigh = inchi_min( cur_neigh, min_neigh ); } } if ( min_neigh < num_atoms ) { atom_stereo0D[i].neighbor[k?3:0] = min_neigh; } else { MOLFILE_ERR_SET (*err, 0, "Cannot find non-metal stereobond neighor (0D)"); } } } break; } } /* end of 0D parities extraction */ /*exit_cycle:;*/ } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else /* transfer atom_stereo0D[] to atom[] */ if ( len_stereo0D ) { Extract0DParities( atom, num_atoms, atom_stereo0D, len_stereo0D, pStrErr, err ); } #endif if ( pInpAtomFlags ) { /* save chirality flag */ *pInpAtomFlags |= InpAtomFlags; } } else if ( atom ) { inchi_free( atom ); atom = NULL; } #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else if ( szCoord ) { *szCoord = pszCoord; pszCoord = NULL; } else #endif if ( pszCoord ) { inchi_free( pszCoord ); } goto bypass_end_of_INChI; /*return num_atoms;*/ } } if ( atom_stereo0D ) { FreeInchi_Stereo0D( &atom_stereo0D ); } /* end of struct. reading cycle, code never used? */ if ( res <= 0 ) { if ( *err == INCHI_INP_ERROR_ERR ) { return num_atoms; } *err = INCHI_INP_EOF_ERR; return INCHI_INP_EOF_RET; /* no more data */ } bypass_end_of_INChI: /* cleanup */ if ( num_atoms == INCHI_INP_ERROR_RET && atom_stereo0D ) { if ( stereo0D && *stereo0D == atom_stereo0D ) { *stereo0D = NULL; *num_stereo0D = 0; } FreeInchi_Stereo0D( &atom_stereo0D ); } if ( !memcmp(szLine, sStructHdrXmlEnd, sizeof(sStructHdrXmlEnd)-1) ) num_struct --; if ( !memcmp(szLine, sStructHdrXml, sizeof(sStructHdrXml)-1) ) num_struct ++; while ( num_struct > 0 && 0 < my_fgets( szLine, sizeof(szLine)-1, inp_molfile, &bTooLongLine ) ) { if ( !memcmp(szLine, sStructHdrXmlEnd, sizeof(sStructHdrXmlEnd)-1) ) num_struct --; else if ( !memcmp(szLine, sStructHdrXml, sizeof(sStructHdrXml)-1) ) num_struct ++; } return num_atoms; } return num_atoms; #undef AT_NUM_BONDS #undef ATOM_NUMBER #undef IN_NEIGH_LIST #undef inchi_NUMH2 #if( defined(INCHI_LIBRARY) || defined(INCHI_MAIN) ) #else #undef inchi_Atom #endif #undef AT_NUM_BONDS #undef ATOM_NUMBER #undef IN_NEIGH_LIST #undef inchi_NUMH2 #undef INChITo_Atom #undef MoreParms #undef INPUT_FILE #undef Create_Atom #undef AT_BONDS_VAL #undef ISOLATED_ATOM #undef NUM_ISO_Hk #undef IS_METAL_ATOM } #ifdef INCHI_MAIN /**********************************************************************************/ int INChIToInchi_Input( INCHI_FILE *inp_molfile, inchi_Input *orig_at_data, int bMergeAllInputStructures, int bDoNotAddH, INPUT_TYPE nInputType, char *pSdfLabel, char *pSdfValue, long *lSdfId, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr ) { /* inp_ATOM *at = NULL; */ int num_dimensions_new; int num_inp_bonds_new; int num_inp_atoms_new; int num_inp_0D_new; inchi_Atom *at_new = NULL; inchi_Atom *at_old = NULL; inchi_Stereo0D *stereo0D_new = NULL; inchi_Stereo0D *stereo0D_old = NULL; int nNumAtoms = 0, nNumStereo0D = 0; MOL_COORD *szCoordNew = NULL; MOL_COORD *szCoordOld = NULL; int i, j; if ( pStrErr ) { pStrErr[0] = '\0'; } /*FreeOrigAtData( orig_at_data );*/ if ( lSdfId ) *lSdfId = 0; do { at_old = orig_at_data? orig_at_data->atom : NULL; /* save pointer to the previous allocation */ stereo0D_old = orig_at_data? orig_at_data->stereo0D : NULL; szCoordOld = NULL; num_inp_atoms_new = INChIToInchi_Atom( inp_molfile, orig_at_data? &stereo0D_new:NULL, &num_inp_0D_new, bDoNotAddH, nInputType, orig_at_data? &at_new:NULL, MAX_ATOMS, &num_dimensions_new, &num_inp_bonds_new, pSdfLabel, pSdfValue, lSdfId, pInpAtomFlags, err, pStrErr ); if ( num_inp_atoms_new <= 0 && !*err ) { MOLFILE_ERR_SET (*err, 0, "Empty structure"); *err = 98; } else if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_atoms > 0 && bMergeAllInputStructures ) { *err = 0; /* end of file */ break; } else if ( num_inp_atoms_new > 0 && orig_at_data ) { /* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */ nNumAtoms = num_inp_atoms_new + orig_at_data->num_atoms; nNumStereo0D = num_inp_0D_new + orig_at_data->num_stereo0D; if ( nNumAtoms >= MAX_ATOMS ) { MOLFILE_ERR_SET (*err, 0, "Too many atoms"); *err = 70; orig_at_data->num_atoms = -1; } else if ( !at_old ) { /* the first structure */ orig_at_data->atom = at_new; at_new = NULL; orig_at_data->num_atoms = num_inp_atoms_new; num_inp_atoms_new = 0; orig_at_data->stereo0D = stereo0D_new; stereo0D_new = NULL; orig_at_data->num_stereo0D = num_inp_0D_new; num_inp_0D_new = 0; } else if ( orig_at_data->atom = CreateInchi_Atom( nNumAtoms ) ) { /* switch at_new <--> orig_at_data->at; */ if ( orig_at_data->num_atoms ) { memcpy( orig_at_data->atom, at_old, orig_at_data->num_atoms * sizeof(orig_at_data->atom[0]) ); /* adjust numbering in the newly read structure */ for ( i = 0; i < num_inp_atoms_new; i ++ ) { for ( j = 0; j < at_new[i].num_bonds; j ++ ) { at_new[i].neighbor[j] += orig_at_data->num_atoms; } } } FreeInchi_Atom( &at_old ); /* copy newly read structure */ memcpy( orig_at_data->atom + orig_at_data->num_atoms, at_new, num_inp_atoms_new * sizeof(orig_at_data->atom[0]) ); /* cpy newly read 0D stereo */ if ( num_inp_0D_new > 0 && stereo0D_new ) { if ( orig_at_data->stereo0D = CreateInchi_Stereo0D( nNumStereo0D ) ) { memcpy( orig_at_data->stereo0D, stereo0D_old, orig_at_data->num_stereo0D * sizeof(orig_at_data->stereo0D[0]) ); /* adjust numbering in the newly read structure */ for ( i = 0; i < num_inp_0D_new; i ++ ) { if ( stereo0D_new[i].central_atom >= 0 ) { stereo0D_new[i].central_atom += orig_at_data->num_atoms; } for ( j = 0; j < 4; j ++ ) { stereo0D_new[i].neighbor[j] += orig_at_data->num_atoms; } } FreeInchi_Stereo0D( &stereo0D_old ); memcpy( orig_at_data->stereo0D+orig_at_data->num_stereo0D, stereo0D_new, num_inp_0D_new * sizeof(orig_at_data->stereo0D[0]) ); } else { num_inp_0D_new = 0; MOLFILE_ERR_SET (*err, 0, "Out of RAM"); *err = -1; } } else { num_inp_0D_new = 0; } /* update lengths */ orig_at_data->num_atoms += num_inp_atoms_new; orig_at_data->num_stereo0D += num_inp_0D_new; } else { MOLFILE_ERR_SET (*err, 0, "Out of RAM"); *err = -1; } } else if ( num_inp_atoms_new > 0 ) { nNumAtoms += num_inp_atoms_new; } FreeInchi_Atom( &at_new ); num_inp_atoms_new = 0; FreeInchi_Stereo0D( &stereo0D_new ); num_inp_0D_new = 0; } while ( !*err && bMergeAllInputStructures ); /* if ( !*err ) { orig_at_data->num_components = MarkDisconnectedComponents( orig_at_data ); if ( orig_at_data->num_components == 0 ) { MOLFILE_ERR_SET (*err, 0, "No components found"); *err = 99; } if ( orig_at_data->num_components < 0 ) { MOLFILE_ERR_SET (*err, 0, "Too many components"); *err = 99; } } */ if ( szCoordNew ) { inchi_free( szCoordNew ); } if ( at_new ) { inchi_free( at_new ); } /* if ( !*err ) { if ( ReconcileAllCmlBondParities( orig_at_data->atom, orig_at_data->num_atoms ) ) { MOLFILE_ERR_SET (*err, 0, "Cannot reconcile stereobond parities"); if (!orig_at_data->num_atoms) { *err = 1; } } } */ if ( *err ) { FreeInchi_Input( orig_at_data ); } if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) { MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* */ } return orig_at_data? orig_at_data->num_atoms : nNumAtoms; } #endif #ifndef INCHI_MAIN #undef AB_MAX_WELL_DEFINED_PARITY #undef AB_MIN_WELL_DEFINED_PARITY #include "extr_ct.h" /****************************************************************************************/ int Extract0DParities( inp_ATOM *at, int nNumAtoms, inchi_Stereo0D *stereo0D, int num_stereo0D, char *pStrErr, int *err ) { if ( stereo0D && num_stereo0D > 0 ) { int i0D, a2, k, k_prev, type, j, j1, j2, len, parity, parityNM; int sb_ord_from_i1, sb_ord_from_i2, sn_ord_from_i1, sn_ord_from_i2; AT_NUMB i1n, i2n, i1, i2; for ( i0D = 0; i0D < num_stereo0D; i0D ++ ) { parity = (stereo0D[i0D].parity & SB_PARITY_MASK); parityNM = (stereo0D[i0D].parity & SB_PARITY_FLAG) >> SB_PARITY_SHFT; if ( parity == INCHI_PARITY_NONE || parity != INCHI_PARITY_ODD && parity != INCHI_PARITY_EVEN && parity != INCHI_PARITY_UNKNOWN && parity != INCHI_PARITY_UNDEFINED ) { char szTemp[16]; sprintf( szTemp, "#%d", i0D+1 ); MOLFILE_ERR_SET (*err, 0, "Wrong 0D stereo descriptor(s):"); MOLFILE_ERR_SET (*err, 0, szTemp); continue; /* warning */ } type = stereo0D[i0D].type; a2 = stereo0D[i0D].central_atom; /* central atom or -1 */ j = -1; len = 0; sb_ord_from_i1 = sb_ord_from_i2 = sn_ord_from_i1 = sn_ord_from_i2 = -1; i1n = i2n = i1 = i2 = MAX_ATOMS+1; if ( (type == INCHI_StereoType_Tetrahedral || type == INCHI_StereoType_Allene ) && 0 <= a2 && a2 < nNumAtoms || type == INCHI_StereoType_DoubleBond && a2 == NO_ATOM) { /* test the quadruplet */ for ( j = 0, k_prev = -1; j < 4; j ++, k_prev = k ) { k = stereo0D[i0D].neighbor[j]; if ( k < 0 || k >= nNumAtoms || k_prev == k ) break; /* tetrahedral atom connectivity test */ if ( type == INCHI_StereoType_Tetrahedral && k != a2 && !is_in_the_list( at[a2].neighbor, (AT_NUMB)k, at[a2].valence) ) { break; } /* Double bond, Cumulene and allene are tested in the next if() */ } } /* find in the adjacency lists the double bond neighbor that leads to the opposite atom */ if ( j == 4 && (type == INCHI_StereoType_Allene || type == INCHI_StereoType_DoubleBond) ) { AT_NUMB *p1 = NULL, *p2 = NULL, *q1 = NULL, *q2 = NULL; i1n = (AT_NUMB)stereo0D[i0D].neighbor[0]; i1 = (AT_NUMB)stereo0D[i0D].neighbor[1]; i2 = (AT_NUMB)stereo0D[i0D].neighbor[2]; i2n = (AT_NUMB)stereo0D[i0D].neighbor[3]; /* find q1 and q2 */ if ( !(q1 = is_in_the_list( at[i1].neighbor, i1n, at[i1].valence)) || !(q2 = is_in_the_list( at[i2].neighbor, i2n, at[i2].valence)) ) { j = -2; /* error flag */ } else /* allene or cumulene; follow double bonds from i1 to i2 */ if ( !(p1 = is_in_the_list( at[i1].neighbor, i2, at[i1].valence)) ) { /* at[i1] and at[i2] are not connected: can be only allene or cumulene */ AT_NUMB prev, cur, next; int num_dbond, i, next_ord, half_len; cur = next = i1; len = half_len = 0; while ( len < 20 ) { /* arbitrary very high upper limit to prevent infinite loop */ prev = cur; cur = next; for ( i = 0, num_dbond = 0; i < at[cur].valence; i ++ ) { /* follow double bond path && avoid going back */ if ( at[cur].bond_type[i] == BOND_TYPE_DOUBLE && prev != at[cur].neighbor[i] ) { next = at[cur].neighbor[i]; next_ord = i; num_dbond ++; } } if ( num_dbond == 1 && next != i1 ) { len ++; if ( len == 1 ) { sb_ord_from_i1 = next_ord; } if ( type == INCHI_StereoType_Allene && next == (AT_NUMB)a2 ) { half_len = len; } } else { break; } } if ( cur == i2 && prev != cur && 0 == num_dbond && len > 1 && (p2 = is_in_the_list( at[i2].neighbor, prev, at[i2].valence)) && (type != INCHI_StereoType_Allene || len == 2*half_len )) { sb_ord_from_i2 = p2 - at[i2].neighbor; sn_ord_from_i1 = q1 - at[i1].neighbor; sn_ord_from_i2 = q2 - at[i2].neighbor; } else { j = -5; /* error flag */ } } else /* allene must have been already processed, otherwise error */ if ( type == INCHI_StereoType_Allene ) { /* error: atoms #1 and #2 of allene are connected */ j = -3; /* error flag */ } else /* double bond only; the bond type is not checked because at the end of the normalization it may happen to be alternating */ if ( type == INCHI_StereoType_DoubleBond && (p2 = is_in_the_list( at[i2].neighbor, i1, at[i2].valence) ) ) { sb_ord_from_i1 = p1 - at[i1].neighbor; sb_ord_from_i2 = p2 - at[i2].neighbor; sn_ord_from_i1 = q1 - at[i1].neighbor; sn_ord_from_i2 = q2 - at[i2].neighbor; } else { j = -4; /* error flag */ } } if ( j != 4 ) { char szTemp[16]; sprintf( szTemp, "#%d", i0D+1 ); MOLFILE_ERR_SET (*err, 0, "Wrong 0D stereo descriptor(s):"); MOLFILE_ERR_SET (*err, 0, szTemp); continue; /* error */ } switch ( type ) { case INCHI_StereoType_None: continue; case INCHI_StereoType_DoubleBond: case INCHI_StereoType_Allene: for ( j1 = 0; j1 < MAX_NUM_STEREO_BONDS && at[i1].sb_parity[j1]; j1 ++ ) ; for ( j2 = 0; j2 < MAX_NUM_STEREO_BONDS && at[i2].sb_parity[j2]; j2 ++ ) ; if ( j1 < MAX_NUM_STEREO_BONDS && j2 < MAX_NUM_STEREO_BONDS && sb_ord_from_i1 >= 0 && sb_ord_from_i2 >= 0 && sn_ord_from_i1 >= 0 && sn_ord_from_i2 >= 0) { switch( parity ) { case INCHI_PARITY_ODD: at[i1].sb_parity[j1] = AB_PARITY_ODD; at[i2].sb_parity[j2] = AB_PARITY_EVEN; break; case INCHI_PARITY_EVEN: at[i1].sb_parity[j1] = AB_PARITY_ODD; at[i2].sb_parity[j2] = AB_PARITY_ODD; break; case INCHI_PARITY_UNKNOWN: at[i1].sb_parity[j1] = AB_PARITY_UNKN; at[i2].sb_parity[j2] = AB_PARITY_UNKN; break; case INCHI_PARITY_UNDEFINED: at[i1].sb_parity[j1] = AB_PARITY_UNDF; at[i2].sb_parity[j2] = AB_PARITY_UNDF; break; default: at[i1].sb_parity[j1] = AB_PARITY_NONE; at[i2].sb_parity[j2] = AB_PARITY_NONE; } switch( parityNM ) { case INCHI_PARITY_ODD: at[i1].sb_parity[j1] |= AB_PARITY_ODD << SB_PARITY_SHFT; at[i2].sb_parity[j2] |= AB_PARITY_EVEN << SB_PARITY_SHFT; break; case INCHI_PARITY_EVEN: at[i1].sb_parity[j1] |= AB_PARITY_ODD << SB_PARITY_SHFT; at[i2].sb_parity[j2] |= AB_PARITY_ODD << SB_PARITY_SHFT; break; case INCHI_PARITY_UNKNOWN: at[i1].sb_parity[j1] |= AB_PARITY_UNKN << SB_PARITY_SHFT; at[i2].sb_parity[j2] |= AB_PARITY_UNKN << SB_PARITY_SHFT; break; case INCHI_PARITY_UNDEFINED: at[i1].sb_parity[j1] |= AB_PARITY_UNDF << SB_PARITY_SHFT; at[i2].sb_parity[j2] |= AB_PARITY_UNDF << SB_PARITY_SHFT; break; default: break; } at[i1].sb_ord[j1] = sb_ord_from_i1; at[i1].sn_ord[j1] = sn_ord_from_i1; at[i1].sn_orig_at_num[j1] = at[i1n].orig_at_number; at[i2].sb_ord[j2] = sb_ord_from_i2; at[i2].sn_ord[j2] = sn_ord_from_i2; at[i2].sn_orig_at_num[j2] = at[i2n].orig_at_number; } break; case INCHI_StereoType_Tetrahedral: switch( parity ) { case INCHI_PARITY_ODD: at[a2].p_parity = AB_PARITY_ODD; break; case INCHI_PARITY_EVEN: at[a2].p_parity = AB_PARITY_EVEN; break; case INCHI_PARITY_UNKNOWN: at[a2].p_parity = AB_PARITY_UNKN; break; case INCHI_PARITY_UNDEFINED: at[a2].p_parity = AB_PARITY_UNDF; break; default: continue; } for ( j = 0; j < 4; j ++ ) { k = stereo0D[i0D].neighbor[j]; at[a2].p_orig_at_num[j] = at[k].orig_at_number; } break; default: break; } } /* take care of Unknown stereobonds: */ /* copy their Unknown stereo descriptors to at->bond_stereo (2005-03-01) */ FixUnkn0DStereoBonds(at, nNumAtoms); #ifdef INCHI_LIBRARY if ( k = ReconcileAllCmlBondParities( at, nNumAtoms, 0 ) ) { char szErrCode[16]; sprintf( szErrCode, "%d", k); AddMOLfileError( pStrErr, "0D Parities Reconciliation failed:" ); AddMOLfileError( pStrErr, szErrCode ); } #endif } return 0; } #endif