/*254:*/ #line 9958 "annoyance-filter.w" #define REVDATE "2004-08-04" #define Xfile string("X-Annoyance-Filter") \ #define pseudoCountsWord " COUNTS " \ #define fastDictionaryVersionNumber 1 #define fastDictionaryVoidLink static_cast (-1) #define fastDictionarySignature "AFfd" #define fastDictionaryFloatingTest (1.0/111) \ #define messageSentinel "From " \ #define ChIx(c) (static_cast ((c) ) &0xFF) \ #define HTMLCommentBegin "" \ #define POP_MAX_MESSAGE 512 #define POP_BUFFER ((POP_MAX_MESSAGE) +2) \ #define Annotate(c) (annotations.test(c) ) \ #line 9959 "annoyance-filter.w" /*232:*/ #line 9066 "annoyance-filter.w" #include "config.h" /*238:*/ #line 9205 "annoyance-filter.w" #ifdef WIN32 #undef HAVE_MMAP #endif /*:238*/ #line 9069 "annoyance-filter.w" /*233:*/ #line 9095 "annoyance-filter.w" #include #include #include #include #include #include #ifdef HAVE_FDSTREAM_COMPATIBILITY #include "fdstream.hpp" #endif #ifdef HAVE_NEW_STRSTREAM #include "mystrstream_new.h" #else #include "mystrstream.h" #endif #include #include #include #include #include #include #include #include #include #include #include using namespace std; /*:233*/ #line 9071 "annoyance-filter.w" /*234:*/ #line 9127 "annoyance-filter.w" #include #include #include #include #include #include /*:234*/ #line 9072 "annoyance-filter.w" /*235:*/ #line 9141 "annoyance-filter.w" #ifdef HAVE_STAT #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_DIRENT_H #include #endif #ifdef HAVE_MMAP #include #endif /*:235*/ #line 9073 "annoyance-filter.w" #ifdef WIN32 #define __GNU_LIBRARY__ #undef __GETOPT_H__ #endif #include #include "statlib.h" /*237:*/ #line 9183 "annoyance-filter.w" #if defined(HAVE_GNUPLOT) && defined(HAVE_NETPBM) && defined(HAVE_SYSTEM) #define HAVE_PLOT_UTILITIES #endif #if defined(HAVE_DIRENT_H) && defined(HAVE_STAT) #define HAVE_DIRECTORY_TRAVERSAL #endif #if defined(HAVE_PDFTOTEXT) && defined(HAVE_POPEN) && (defined(HAVE_MKSTEMP) || defined(HAVE_TMPNAM)) #define HAVE_PDF_DECODER #endif /*:237*/ #line 9082 "annoyance-filter.w" /*236:*/ #line 9161 "annoyance-filter.w" #if defined(HAVE_SOCKET) && defined(HAVE_SIGNAL) #define POP3_PROXY_SERVER #endif #ifdef POP3_PROXY_SERVER #include #include #include #include #include #include #include #endif /*:236*/ #line 9084 "annoyance-filter.w" /*:232*/ #line 9961 "annoyance-filter.w" /*226:*/ #line 8926 "annoyance-filter.w" static unsigned int messageCount[2]; static list messageTranscript; static queue parserDiagnostics; static bool saveParserDiagnostics= false; /*:226*//*241:*/ #line 9244 "annoyance-filter.w" static bool verbose= false; #ifdef TYPE_LOG static ofstream typeLog("/tmp/typelog.txt"); #endif static string pDiagFilename= ""; static string transcriptFilename= ""; static bool pTokenTrace= false; static unsigned int maxTokenLength= 64,minTokenLength= 1; static unsigned int streamMaxTokenLength= 64,streamMinTokenLength= 5; static bool singleDictionaryRead= true; static unsigned int phraseMin= 1,phraseMax= 1; static unsigned int phraseLimit= 48; static unsigned int autoPrune= 0; static bool popProxyTrace= false; static bool sloppyheaders= false; /*:241*//*250:*/ #line 9892 "annoyance-filter.w" #define ISOch(x) (static_cast ((x) & 0xFF)) #define isISOspace(x) (isascii(ISOch(x)) && isspace(ISOch(x))) #define isISOalpha(x) ((isoalpha[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0) #define isISOupper(x) ((isoupper[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0) #define isISOlower(x) ((isolower[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0) #define toISOupper(x) (isISOlower(x) ? (isascii(((unsigned char) (x))) ? \ toupper(x) : (((ISOch(x) != 0xDF) && \ (ISOch(x) != 0xFF)) ? \ (ISOch(x) - 0x20) : (x))) : (x)) #define toISOlower(x) (isISOupper(x) ? (isascii(ISOch(x)) ? \ tolower(x) : (ISOch(x) + 0x20)) \ : (x)) /*:250*//*251:*/ #line 9911 "annoyance-filter.w" const unsigned char isoalpha[32]= { 0,0,0,0,0,0,0,0,127,255,255,224,127,255,255,224,0,0,0,0,0,0,0,0,255,255, 254,255,255,255,254,255 }; const unsigned char isoupper[32]= { 0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,0,255,255,254,254, 0,0,0,0 }; const unsigned char isolower[32]= { 0,0,0,0,0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,1,255,255, 254,255 }; /*:251*/ #line 9962 "annoyance-filter.w" /*10:*/ #line 1257 "annoyance-filter.w" class dictionaryWord{ public: static const unsigned int nCategories= 2; enum mailCategory{Mail= 0,Junk= 1,Unknown}; string text; unsigned int occurrences[nCategories]; double junkProbability; dictionaryWord(string s= ""){ set(s); } void set(string s= "",unsigned int s_Mail= 0,unsigned int s_Junk= 0, double jProb= -1){ text= s; occurrences[Mail]= s_Mail; occurrences[Junk]= s_Junk; junkProbability= jProb; } string get(void)const{ return text; } unsigned int n_mail(void)const{ return occurrences[Mail]; } unsigned int n_junk(void)const{ return occurrences[Junk]; } unsigned int n_occurrences(void)const{ unsigned int o= 0; for(unsigned int i= 0;i { public: unsigned int memoryRequired; void add(dictionaryWord w,dictionaryWord::mailCategory category); void include(dictionaryWord&w); void exportCSV(ostream&os= cout); void importCSV(istream&is= cin); void computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages, double mailBias= 2,unsigned int minOccurrences= 5); void purge(unsigned int occurrences= 0); void resetCat(dictionaryWord::mailCategory category); void printStatistics(ostream&os= cout)const; #ifdef HAVE_PLOT_UTILITIES void plotProbabilityHistogram(string fileName,unsigned int nBins= 20)const; #endif void exportToBinaryFile(ostream&os); void importFromBinaryFile(istream&is); unsigned int estimateMemoryRequirement(void)const{ return memoryRequired; } dictionary():memoryRequired(0){ } }; /*:19*//*32:*/ #line 2055 "annoyance-filter.w" class fastDictionary{ private: static const u_int16_t byteOrderMark= 0xFEFF; static const u_int16_t doubleSize= sizeof(double); static const u_int16_t versionNumber= fastDictionaryVersionNumber; unsigned char*dblock; u_int32_t totalSize; u_int32_t hashTableOffset; u_int32_t hashTableBuckets; u_int32_t wordTableSize; u_int32_t*hashTable; unsigned char*wordTable; #ifdef HAVE_MMAP char*dp; int fileHandle; long fileLength; #endif void regen(void)const{ cerr<<"You should re-generate the fast dictionary on this machine."< &v, vector ::size_type off, const void*buf,const unsigned int bufl){ const unsigned char*bp= static_cast (buf); for(unsigned int i= 0;i *tlist; mailFolder*mf; public: MIMEdecoder(istream*i= NULL,mailFolder*m= NULL,string pb= "",list *tl= NULL){ set(i,m,pb,tl); resetDecodeErrors(); tlist= NULL; } virtual~MIMEdecoder(){ }; void set(istream*i= NULL,mailFolder*m= NULL, string pb= "",list *tl= NULL){ is= i; mf= m; partBoundary= pb; inputLine= ""; ip= 0; encodedLineCount= 0; lookAhead= false; atEnd= false; eofHit= false; tlist= tl; } virtual string name(void)const= 0; virtual void resetDecodeErrors(void){ nDecodeErrors= 0; } virtual unsigned int getDecodeErrors(void)const{ return nDecodeErrors; } virtual string getTerminatorSentinel(void)const{ return endBoundary; } virtual bool isEndOfFile(void)const{ return eofHit; } virtual unsigned int getEncodedLineCount(void)const{ return encodedLineCount; } virtual int getDecodedChar(void)= 0; virtual bool getDecodedLine(string&s); virtual void saveDecodedStream(ostream&os); virtual void saveDecodedStream(const string fname); protected: virtual bool getNextEncodedLine(void); }; /*:40*//*46:*/ #line 2714 "annoyance-filter.w" class identityMIMEdecoder:public MIMEdecoder{ public: string name(void)const{ return"Identity"; } int getDecodedChar(void){ while(!atEnd){ if(ip decodedBytes; public: base64MIMEdecoder(){ initialiseDecodingTable(); } string name(void)const{ return"Base64"; } int getDecodedChar(void); static string decodeEscapedText(const string s,mailFolder*m= NULL); }; /*:48*//*58:*/ #line 3023 "annoyance-filter.w" class quotedPrintableMIMEdecoder:public MIMEdecoder{ public: quotedPrintableMIMEdecoder(){ atEndOfLine= false; } string name(void)const{ return"Quoted-Printable"; } int getDecodedChar(void); static string decodeEscapedText(const string s,mailFolder*m= NULL); protected: bool atEndOfLine; int getNextChar(void); static int hex_to_nybble(const int ch); }; /*:58*//*68:*/ #line 3287 "annoyance-filter.w" class MBCSdecoder{ protected: const string*src; string::size_type p; mailFolder*mf; public: MBCSdecoder(mailFolder*m= NULL):src(NULL),p(0),mf(NULL){ } virtual~MBCSdecoder(){ } virtual string name(void)= 0; virtual void setSource(const string&s){ src= &s; p= 0; } virtual void setMailFolder(mailFolder*m= NULL){ mf= m; } virtual void reset(void){ } virtual int getNextDecodedChar(void)= 0; virtual int getNextEncodedByte(void){ if(p>=src->length()){ return-1; } return((*src)[p++])&0xFF; } protected: virtual int getNextNBytes(const unsigned int n); virtual int getNext2Bytes(void){ return getNextNBytes(2); } virtual int getNext3Bytes(void){ return getNextNBytes(3); } virtual int getNext4Bytes(void){ return getNextNBytes(4); } virtual void discardLine(void){ p= src->length(); } virtual void reportDecoderDiagnostic(const string s)const; virtual void reportDecoderDiagnostic(const ostringstream&os)const; }; /*:68*//*71:*/ #line 3402 "annoyance-filter.w" class EUC_MBCSdecoder:public MBCSdecoder{ public: virtual string name(void){ return"EUC"; } virtual int getNextDecodedChar(void); }; /*:71*//*73:*/ #line 3463 "annoyance-filter.w" class Big5_MBCSdecoder:public MBCSdecoder{ public: virtual string name(void){ return"Big5"; } virtual int getNextDecodedChar(void); }; /*:73*//*75:*/ #line 3509 "annoyance-filter.w" class Shift_JIS_MBCSdecoder:public MBCSdecoder{ protected: string pending; public: Shift_JIS_MBCSdecoder():pending(""){ } virtual~Shift_JIS_MBCSdecoder(){ } virtual string name(void){ return"Shift_JIS"; } virtual int getNextDecodedChar(void); }; /*:75*//*80:*/ #line 3633 "annoyance-filter.w" class Unicode_MBCSdecoder:public MBCSdecoder{ public: virtual string name(void){ return"Unicode"; } virtual int getNextDecodedChar(void)= 0; }; /*:80*//*81:*/ #line 3651 "annoyance-filter.w" class UCS_2_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{ protected: bool bigEndian; public: UCS_2_Unicode_MBCSdecoder(bool isBigEndian= true){ setBigEndian(isBigEndian); } void setBigEndian(bool isBigEndian= true){ bigEndian= isBigEndian; } virtual string name(void){ return"UCS_2_Unicode"; } virtual int getNextDecodedChar(void); }; /*:81*//*83:*/ #line 3702 "annoyance-filter.w" class UTF_8_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{ public: virtual string name(void){ return"UTF_8_Unicode"; } virtual int getNextDecodedChar(void); }; /*:83*//*85:*/ #line 3786 "annoyance-filter.w" class UTF_16_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{ protected: bool bigEndian; int getNextUTF_16Word(void){ int c1= getNextEncodedByte(); if(c1<0){ return c1; } int c2= getNextEncodedByte(); if(c2<0){ ostringstream os; os<setSource(s); } virtual void setPrefixSuffix(string pre= "",string suf= ""){ prefix= pre; suffix= suf; } virtual string getNextDecodedChar(void); virtual string decodeLine(const string&s); }; /*:88*//*91:*/ #line 3969 "annoyance-filter.w" class GB2312_MBCSinterpreter:public MBCSinterpreter{ public: GB2312_MBCSinterpreter(){ setPrefixSuffix(" "," "); } virtual string name(void){ return"GB2312"; } }; /*:91*//*92:*/ #line 3986 "annoyance-filter.w" class Big5_MBCSinterpreter:public MBCSinterpreter{ public: Big5_MBCSinterpreter(){ setPrefixSuffix(" "," "); } virtual string name(void){ return"Big5"; } }; /*:92*//*93:*/ #line 4003 "annoyance-filter.w" class Shift_JIS_MBCSinterpreter:public MBCSinterpreter{ public: Shift_JIS_MBCSinterpreter(){ setPrefixSuffix(" "," "); } virtual string name(void){ return"Shift_JIS"; } string getNextDecodedChar(void); }; /*:93*//*95:*/ #line 4053 "annoyance-filter.w" class KR_MBCSinterpreter:public MBCSinterpreter{ public: virtual string name(void){ return"KR"; } }; /*:95*//*96:*/ #line 4066 "annoyance-filter.w" class Unicode_MBCSinterpreter:public MBCSinterpreter{ public: Unicode_MBCSinterpreter(){ setPrefixSuffix(" "," "); } virtual string name(void){ return"Unicode"; } string getNextDecodedChar(void); }; /*:96*//*98:*/ #line 4136 "annoyance-filter.w" class applicationStringParser{ protected: bool error,eof; mailFolder*mf; virtual unsigned char get8(void); virtual void get8n(unsigned char*buf,const int n){ for(int i= 0;(!eof)&&(i *> fontMap; map fontGlyphCount; map fontInfoBits; queue strings; bool initialised; bool textOnly; public: flashTextExtractor(mailFolder*f= NULL): flashStream(f),initialised(false),textOnly(false){ } ~flashTextExtractor(){ close(); } virtual string name(void)const{ return"Flash"; } void setTextOnly(const bool tf){ textOnly= tf; } bool getTextOnly(void)const{ return textOnly; } bool nextString(string&s); virtual void close(void){ while(!fontMap.empty()){ delete fontMap.begin()->second; fontMap.erase(fontMap.begin()); } fontGlyphCount.clear(); fontInfoBits.clear(); while(!strings.empty()){ strings.pop(); } initialised= textOnly= false; flashStream::close(); } }; /*:114*//*125:*/ #line 5296 "annoyance-filter.w" #ifdef HAVE_PDF_DECODER class pdfTextExtractor:public applicationStringParser{ protected: bool initialised; #ifdef HAVE_FDSTREAM_COMPATIBILITY fdistream is; #else ifstream is; #endif FILE*ip; #ifdef HAVE_MKSTEMP char tempfn[256]; #else char tempfn[L_tmpnam+2]; #endif public: pdfTextExtractor(mailFolder*f= NULL): applicationStringParser(f), initialised(false), ip(NULL){ } ~pdfTextExtractor(){ close(); } virtual string name(void)const{ return"PDF"; } bool nextString(string&s); virtual void close(void){ if(ip!=NULL){ #ifndef HAVE_FDSTREAM_COMPATIBILITY is.close(); #endif pclose(ip); remove(tempfn); ip= NULL; } applicationStringParser::close(); initialised= false; } }; #endif /*:125*//*129:*/ #line 5436 "annoyance-filter.w" /*132:*/ #line 5824 "annoyance-filter.w" #ifdef HAVE_POPEN #if (defined HAVE_GUNZIP) || (defined HAVE_GZCAT) || (defined HAVE_GZIP) #define COMPRESSED_FILES static const char Compressed_file_type[]= ".gz"; static const char Uncompress_command[]= #if(defined HAVE_GUNZIP) "gunzip -c" #elif(defined HAVE_GZCAT) "gzcat" #elif(defined HAVE_GZIP) "gzip -cd" #endif ; #elif (defined HAVE_ZCAT) || (defined HAVE_UNCOMPRESS) || (defined HAVE_COMPRESS) #define COMPRESSED_FILES static const char Compressed_file_type[]= ".Z"; static const char Uncompress_command[]= #if(defined HAVE_ZCAT) "zcat" #elif(defined HAVE_UNCOMPRESS) "uncompress -c" #elif(defined HAVE_COMPRESS) "compress -cd" #endif ; #endif #endif /*:132*/ #line 5437 "annoyance-filter.w" class mailFolder{ public: istream*is; dictionaryWord::mailCategory category; unsigned int nLines; unsigned int nMessages; bool newMessage; bool expectingNewMessage; bool lastLineBlank; bool BSDfolder; bool inHeader; string lookAheadLine; bool lookedAhead; ifstream isc; #if defined(COMPRESSED_FILES) && defined(HAVE_FDSTREAM_COMPATIBILITY) fdistream iscc; #endif string fromLine; string messageID; string lastFromLine; string lastMessageID; #if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL) FILE*ip; #endif #ifdef HAVE_DIRECTORY_TRAVERSAL bool dirFolder; DIR*dh; string dirName,cfName; string pathSeparator; #ifdef HAVE_FDSTREAM_COMPATIBILITY fdistream ifcdir; #endif ifstream ifdir; istringstream nullstream; #endif string bodyContentType; string bodyContentTypeCharset; string bodyContentTypeName; string bodyContentTransferEncoding; string partBoundary; bool multiPart; bool inPartHeader; unsigned int partHeaderLines; stack partBoundaryStack; string mimeContentType; string mimeContentTypeCharset; string mimeContentTypeName; string mimeContentTypeBoundary; string mimeContentTransferEncoding; string mimeContentDispositionFilename; MIMEdecoder*mdp; identityMIMEdecoder imd; base64MIMEdecoder bmd; sinkMIMEdecoder smd; quotedPrintableMIMEdecoder qmd; MBCSinterpreter*mbi; EUC_MBCSdecoder mbd_euc; GB2312_MBCSinterpreter mbi_gb2312; Big5_MBCSdecoder mbd_big5; Big5_MBCSinterpreter mbi_big5; KR_MBCSinterpreter mbi_kr; UTF_8_Unicode_MBCSdecoder mbd_utf_8; Unicode_MBCSinterpreter mbi_unicode; applicationStringParser*asp; flashTextExtractor aspFlash; #ifdef HAVE_PDF_DECODER pdfTextExtractor aspPdf; #endif bool byteStream; list *tlist; list *dlist; mailFolder(istream&i,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){ #if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL) ip= NULL; #endif #ifdef HAVE_DIRECTORY_TRAVERSAL dirFolder= false; #endif set(&i,cat); } mailFolder(string fname,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){ #if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL) ip= NULL; #endif /*135:*/ #line 5913 "annoyance-filter.w" #ifdef HAVE_DIRECTORY_TRAVERSAL dirFolder= false; struct stat fs; if((stat(fname.c_str(),&fs)==0)&&S_ISDIR(fs.st_mode)){ dh= opendir(fname.c_str()); if(dh!=NULL){ dirFolder= true; dirName= fname; pathSeparator= '/'; if(!findNextFileInDirectory(fname)){ nullstream.str(""); is= &nullstream; }else{ if(verbose){ cerr<<"Processing files from directory \""<< dirName<<"\"."< 0){ int sll= readlink(jname.c_str(),slbuf,(sizeof slbuf)-1); if(sll>=0){ assert(sll (sizeof slbuf)); slbuf[sll]= 0; jname= slbuf; }else{ break; } } if(maxSlinks<=0){ cerr<<"Warning: probable symbolic link loop for \""<< fname<<"\""< *lp){ dlist= lp; } void setTranscriptList(list *lp){ tlist= lp; } unsigned int sizeMessageTranscript(const unsigned int lineOverhead= 1)const; void writeMessageTranscript(ostream&os= cout)const; void writeMessageTranscript(const string fname= "-")const; void clearMessageTranscript(void){ assert(tlist!=NULL); tlist->clear(); } void reportParserDiagnostic(const string s); void reportParserDiagnostic(const ostringstream&os); }; /*:129*//*170:*/ #line 7105 "annoyance-filter.w" class tokenDefinition{ protected: static const int numTokenChars= 256; bool isToken[numTokenChars], notExclusively[numTokenChars], notAtEnd[numTokenChars]; unsigned int minTokenLength,maxTokenLength; public: tokenDefinition(){ clear(); } void clear(void){ for(int i= 0;i 0){ minTokenLength= lmin; } if(lmax> 0){ maxTokenLength= lmax; } } unsigned int getLengthMin(void)const{ return minTokenLength; } unsigned int getLengthMax(void)const{ return maxTokenLength; } bool isTokenMember(const int c)const{ assert(c>=0&&c=0&&c=0&&c=minTokenLength)&&(l<=maxTokenLength); } bool isTokenLengthAcceptable(const string t)const{ return isTokenLengthAcceptable(t.length()); } void setTokenMember(bool v,const int cstart,const int cend= -1){ assert(cstart>=0&&cstart<=numTokenChars); assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars)); for(int i= cstart;i<=cend;i++){ isToken[i]= v; } } void setTokenNotExclusively(bool v,const int cstart,const int cend= -1){ assert(cstart>=0&&cstart<=numTokenChars); assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars)); for(int i= cstart;i<=cend;i++){ notExclusively[i]= v; } } void setTokenNotAtEnd(bool v,const int cstart,const int cend= -1){ assert(cstart>=0&&cstart<=numTokenChars); assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars)); for(int i= cstart;i<=cend;i++){ notAtEnd[i]= v; } } void setISO_8859defaults(unsigned int lmin= 0,unsigned int lmax= 0); void setUS_ASCIIdefaults(unsigned int lmin= 0,unsigned int lmax= 0); }; /*:170*//*173:*/ #line 7245 "annoyance-filter.w" class tokenParser{ protected: mailFolder*source; string cl; string::size_type clp; bool atEnd,inHTML,inHTMLcomment; tokenDefinition*td; tokenDefinition*btd; bool saveMessage; bool assemblePhrases; deque phraseQueue; deque pendingPhrases; public: list messageQueue; tokenParser(){ td= NULL; } void setSource(mailFolder&mf){ source= &mf; cl= ""; clp= 0; atEnd= inHTML= inHTMLcomment= false; saveMessage= false; messageQueue.clear(); phraseQueue.clear(); pendingPhrases.clear(); /*179:*/ #line 7653 "annoyance-filter.w" assemblePhrases= false; if((phraseMin!=1)||(phraseMax!=1)){ if((phraseMin>=1)&&(phraseMax>=phraseMin)){ if((phraseLimit> 0)&&(phraseLimit<((phraseMax*2)-1))){ cerr<<"Invalid --phraselimit setting. Too small for specified --phrasemax."<setLengthLimits(lMin,lMax); assert(btd!=NULL); btd->setLengthLimits(blMin,blMax); } unsigned int getTokenLengthMin(void)const{ return td->getLengthMin(); } unsigned int getTokenLengthMax(void)const{ return td->getLengthMax(); } void reportParserDiagnostic(const string s)const{ assert(source!=NULL); source->reportParserDiagnostic(s); } void reset(void){ if(inHTML){ reportParserDiagnostic(" tag unterminated at end of message."); } if(inHTMLcomment){ reportParserDiagnostic("HTML comment unterminated at end of message."); } inHTML= inHTMLcomment= false; clearMessageQueue(); phraseQueue.clear(); pendingPhrases.clear(); } bool nextToken(dictionaryWord&d); void assembleAllPhrases(dictionaryWord&d); /*182:*/ #line 7730 "annoyance-filter.w" void setSaveMessage(bool v){ saveMessage= v; source->setDiagnosticList(saveMessage?(&messageQueue):NULL); } bool getSaveMessage(void)const{ return saveMessage; } void clearMessageQueue(void){ if(saveMessage){ string s; if(isNewMessage()){ s= messageQueue.back(); } messageQueue.clear(); if(isNewMessage()){ messageQueue.push_back(s); } } } void writeMessageQueue(ostream&os){ list ::size_type l= messageQueue.size(),n= 0; for(list ::iterator p= messageQueue.begin(); p!=messageQueue.end();p++,n++){ if(!((n==(l-1))&& (p->substr(0,(sizeof messageSentinel)-1)==messageSentinel))){ os<<*p<isNewMessage()); } private: void nextLine(void){ while(true){ if(!(source->nextLine(cl))){ atEnd= true; cl= ""; break; } if(saveMessage){ messageQueue.push_back(cl); } if(source->isNewMessage()){ reset(); } break; } clp= 0; } }; /*:173*//*183:*/ #line 7773 "annoyance-filter.w" class classifyMessage{ public: mailFolder*mf; tokenParser tp; unsigned int nExtremal; dictionary*d; fastDictionary*fd; double unknownWordProbability; classifyMessage(mailFolder&m, dictionary&dt, fastDictionary*fdt= NULL, unsigned int nExt= 15,double uwp= 0.2); double classifyThis(bool createTranscript= false); protected: void addSignificantWordDiagnostics(list &l, list ::iterator where, multimap &rtokens,string endLine= ""); }; /*:183*//*186:*/ #line 7862 "annoyance-filter.w" #ifdef OLDWAY double abs(double x){ return(x<0)?(-(x)):x; } #endif /*:186*//*194:*/ #line 8119 "annoyance-filter.w" #ifdef POP3_PROXY_SERVER /*216:*/ #line 8724 "annoyance-filter.w" static RETSIGTYPE absentPlumber(int) { if(popProxyTrace){ cerr<<"POP3: Caught SIGPIPE--continuing."< multiLine,cMultiLine; int listenSocket; POP3ProxyFilterFunction filterFunction; public: POP3Proxy(unsigned short proxyPort= 9110, string serverN= "", unsigned short serverP= 110, POP3ProxyFilterFunction filterF= NULL ): popProxyPort(proxyPort), serverName(serverN), serverPort(serverP), opened(false), listenSocket(-1), filterFunction(filterF){ /*195:*/ #line 8198 "annoyance-filter.w" multiLine.insert("capa"); multiLine.insert("retr"); multiLine.insert("top"); cMultiLine.insert("list"); cMultiLine.insert("uidl"); /*:195*/ #line 8151 "annoyance-filter.w" ; } ~POP3Proxy(){ if(listenSocket!=-1){ close(listenSocket); signal(SIGPIPE,SIG_DFL); } } void setPopProxyPort(unsigned short p){ /*196:*/ #line 8211 "annoyance-filter.w" #ifndef NDEBUG if(opened){ cerr<<"Attempt to modify POP3 connection settings after connection opened."< annotations; #ifdef POP3_PROXY_SERVER static int popProxyPort= 9110; static string popProxyServer= ""; static int popProxyServerPort= 110; #endif static bool bsdFolder= false; /*:239*//*240:*/ #line 9236 "annoyance-filter.w" static unsigned int nTested= 0; /*:240*/ #line 9964 "annoyance-filter.w" /*11:*/ #line 1359 "annoyance-filter.w" bool operator<(dictionaryWord a,dictionaryWord b){ return a.get()=minOccurrences){ assert(nMailMessages> 0); assert(nJunkMessages> 0); junkProbability= min(0.99,max(0.01,min(nJunk/nJunkMessages,1.0)/ (min(nMail/nMailMessages,1.0)+min(nJunk/nJunkMessages,1.0)))); }else{ junkProbability= -1; } } /*:12*//*13:*/ #line 1419 "annoyance-filter.w" void dictionaryWord::describe(ostream&os){ os<=s.length())||(s[p]=='#')||(s[p]==';')){ continue; } if((s[p]=='-')||isdigit(s[p])){ p= s.find(','); if(p!=string::npos){ p1= s.find(',',p+1); if(p1!=string::npos){ p2= s.find(',',p1+1); if(p2!=string::npos){ junkProbability= atof(s.substr(0,p).c_str()); occurrences[Mail]= atoi(s.substr(p+1,p1-p).c_str()); occurrences[Junk]= atoi(s.substr(p1+1,p2-p).c_str()); p= s.find('"',p2+1); if(p!=string::npos){ p1= s.find_last_of('"'); if((p1!=string::npos)&&(p1> p)){ text= s.substr(p+1,(p1-p)-1); return true; } } } } } } junkProbability= -2; text= s; return true; } junkProbability= -3; return false; } } /*:15*//*16:*/ #line 1516 "annoyance-filter.w" void dictionaryWord::exportToBinaryFile(ostream&os){ unsigned char c; const unsigned char*fp; const double k1= -1.0; #define outCount(x) c = (x); os.put(c) #define outNumber(x) os.put((x >> 24) & 0xFF); os.put((x >> 16) & 0xFF); \ os.put((x >> 8) & 0xFF); os.put(x & 0xFF) outCount(text.length()); os.write(text.data(),text.length()); outNumber(n_mail()); outNumber(n_junk()); fp= reinterpret_cast (&k1); if(fp[0]==0){ fp= reinterpret_cast (&junkProbability); for(unsigned int i= 0;i<(sizeof junkProbability);i++){ outCount(fp[((sizeof junkProbability)-1)-i]); } }else{ os.write(reinterpret_cast (&junkProbability), sizeof junkProbability); } #undef outCount #undef outNumber } /*:16*//*17:*/ #line 1552 "annoyance-filter.w" bool dictionaryWord::importFromBinaryFile(istream&is){ unsigned char c; char sval[256]; unsigned char ibyte[4]; unsigned char fb[8]; unsigned char*fp; const double k1= -1.0; const unsigned char*kp; #define iNumber ((ibyte[0] << 24) | (ibyte[1] << 16) | (ibyte[2] << 8) | ibyte[3]) if(is.read(reinterpret_cast (&c),1)){ if(is.read(sval,c)){ text= string(sval,c); is.read(reinterpret_cast (ibyte),4); occurrences[Mail]= iNumber; is.read(reinterpret_cast (ibyte),4); occurrences[Junk]= iNumber; kp= reinterpret_cast (&k1); if(kp[0]==0){ is.read(reinterpret_cast (fb),8); fp= reinterpret_cast (&junkProbability); for(unsigned int i= 0;i<(sizeof junkProbability);i++){ fp[((sizeof junkProbability)-1)-i]= fb[i]; } }else{ is.read(reinterpret_cast (&junkProbability), sizeof junkProbability); } return true; } } return false; #undef iNumber } /*:17*//*20:*/ #line 1657 "annoyance-filter.w" void dictionary::add(dictionaryWord w,dictionaryWord::mailCategory category){ dictionary::iterator p; if((p= find(w.get()))!=end()){ p->second.add(category); }else{ insert(make_pair(w.get(),w)).first->second.add(category); memoryRequired+= w.estimateMemoryRequirement(); } } /*:20*//*21:*/ #line 1678 "annoyance-filter.w" void dictionary::include(dictionaryWord&w){ dictionary::iterator p; if((p= find(w.get()))!=end()){ p->second.occurrences[dictionaryWord::Mail]+= w.occurrences[dictionaryWord::Mail]; p->second.occurrences[dictionaryWord::Junk]+= w.occurrences[dictionaryWord::Junk]; }else{ insert(make_pair(w.get(),w)); } } /*:21*//*22:*/ #line 1699 "annoyance-filter.w" bool byProbability(const dictionaryWord*w1, const dictionaryWord*w2){ double dp= w1->getJunkProbability()-w2->getJunkProbability(); if(dp==0){ return w1->get()get(); } return dp<0; } void dictionary::exportCSV(ostream&os){ if(verbose){ cerr<<"Exporting dictionary to CSV file."< dv; for(iterator p= begin();p!=end();p++){ dv.push_back(&(p->second)); } sort(dv.begin(),dv.end(),byProbability); os<<"; Probability,Mail,Junk,Word"< ::iterator q= dv.begin();q!=dv.end();q++){ (*q)->exportCSV(os); } } /*:22*//*23:*/ #line 1740 "annoyance-filter.w" void dictionary::importCSV(istream&is){ if(verbose){ cerr<<"Importing dictionary from CSV file."<=-1){ include(dw); }else{ if(verbose){ cerr<<"Ill-formed record in CSV import: \""< ,int> { int p; public: explicit dictionaryWordProb_less(const int pt):p(pt){} bool operator()(const pair &dw)const{ return dw.second.getJunkProbability() pq; while(!empty()){ if(((occurrences> 0)&&(begin()->second.n_occurrences()> occurrences))|| (begin()->second.getJunkProbability()>=0)){ pq.push(begin()->second); } erase(begin()); } while(!pq.empty()){ insert(make_pair(pq.front().get(),pq.front())); memoryRequired+= pq.front().estimateMemoryRequirement(); pq.pop(); } #endif if(verbose){ cerr<second.resetCat(category); } } /*:25*//*26:*/ #line 1868 "annoyance-filter.w" void dictionary::printStatistics(ostream&os)const{ if(verbose){ cerr<<"Computing dictionary statistics."< dt; for(const_iterator mp= begin();mp!=end();mp++){ if(mp->second.getJunkProbability()>=0){ dt.push_back(mp->second.getJunkProbability()); } } os<<"Mean = "< hist(nBins); for(const_iterator mp= begin();mp!=end();mp++){ if(mp->second.getJunkProbability()>=0){ unsigned int bin= static_cast (mp->second.getJunkProbability()*nBins); hist[bin]++; } } /*:28*/ #line 1913 "annoyance-filter.w" ; /*29:*/ #line 1963 "annoyance-filter.w" for(unsigned int j= 0;j (j)/nBins)<<" "<"+fileName+".png"; #ifdef PLOT_DEBUG cout<second.computeJunkProbability(nMailMessages,nJunkMessages, mailBias,minOccurrences); } } /*:30*//*31:*/ #line 2004 "annoyance-filter.w" void dictionary::exportToBinaryFile(ostream&os){ if(verbose){ cerr<<"Exporting dictionary to binary file."<second.exportToBinaryFile(os); } } void dictionary::importFromBinaryFile(istream&is){ if(verbose){ cerr<<"Importing dictionary from binary file."< (mmap((caddr_t)0,fileLength, PROT_READ,MAP_SHARED|MAP_NORESERVE, fileHandle,0)); istrstream is(dp,fileLength); #else ifstream is(fname.c_str(),ios::in|ios::binary); if(!is){ cerr<<"Cannot open fast dictionary file "< (&s),sizeof s); if(s!=byteOrderMark){ cerr<<"Fast dictionary file "< (&s),sizeof s); if(s!=versionNumber){ cerr<<"Fast dictionary file "< (&s),sizeof s); u_int16_t filler; is.read(reinterpret_cast (&filler),sizeof filler); if(s==doubleSize){ is.read(reinterpret_cast (&d),sizeof d); } if((s!=doubleSize)||(d!=fastDictionaryFloatingTest)){ cerr<<"Fast dictionary file "< (&totalSize),sizeof totalSize); is.read(reinterpret_cast (&hashTableOffset),sizeof hashTableOffset); is.read(reinterpret_cast (&hashTableBuckets),sizeof hashTableBuckets); is.read(reinterpret_cast (&wordTableSize),sizeof wordTableSize); #ifdef HAVE_MMAP dblock= reinterpret_cast (dp)+is.tellg(); #else u_int32_t fdsize= (hashTableBuckets*sizeof(u_int32_t))+wordTableSize; try{ dblock= new unsigned char[fdsize]; }catch(bad_alloc){ cerr<<"Unable to allocate memory for fast dictionary."; return false; } is.read(reinterpret_cast (dblock),fdsize); is.close(); #endif hashTable= reinterpret_cast (dblock); wordTable= dblock+(hashTableBuckets*sizeof(u_int32_t)); if(verbose){ cerr<<"Loaded fast dictionary from "< hashTable(hashSize,fastDictionaryVoidLink); vector words; for(dictionary::const_iterator w= d.begin();w!=d.end();w++){ u_int32_t h= computeHashValue(w->first); unsigned int slot= h%hashSize; /*36:*/ #line 2376 "annoyance-filter.w" if(hashTable[slot]==fastDictionaryVoidLink){ hashTable[slot]= words.size(); }else{ u_int32_t p= hashTable[slot]; u_int32_t l; while(true){ memcpy(&l,&(words[p]),sizeof l); if(l==fastDictionaryVoidLink){ break; } p= l; } l= words.size(); memcpy(&(words[p]),&l,sizeof l); } /*:36*/ #line 2300 "annoyance-filter.w" ; /*37:*/ #line 2398 "annoyance-filter.w" vector ::size_type wl= words.size(); words.resize(words.size()+sizeof(u_int32_t)+ sizeof(double)+sizeof(u_int16_t)+w->second.get().length()); u_int32_t vl= fastDictionaryVoidLink; #ifdef OLDWAY memcpy(words.begin()+wl,&vl,sizeof vl); #else Vmemcpy(words,wl,&vl,sizeof vl); #endif wl+= sizeof vl; double jp= w->second.getJunkProbability(); #ifdef OLDWAY memcpy(words.begin()+wl,&jp,sizeof jp); #else Vmemcpy(words,wl,&jp,sizeof jp); #endif wl+= sizeof jp; u_int16_t wlen= w->second.get().length(); #ifdef OLDWAY memcpy(words.begin()+wl,&wlen,sizeof wlen); #else Vmemcpy(words,wl,&wlen,sizeof wlen); #endif wl+= sizeof wlen; #ifdef OLDWAY memcpy(words.begin()+wl,w->second.get().data(),wlen); #else Vmemcpy(words,wl,w->second.get().data(),wlen); #endif /*:37*/ #line 2301 "annoyance-filter.w" ; } o< (&b),sizeof b); b= versionNumber; o.write(reinterpret_cast (&b),sizeof b); b= doubleSize; o.write(reinterpret_cast (&b),sizeof b); b= 0; o.write(reinterpret_cast (&b),sizeof b); double td= fastDictionaryFloatingTest; o.write(reinterpret_cast (&td),sizeof td); u_int32_t headerSize= 4+(4*sizeof(u_int16_t))+sizeof(double)+ (4*sizeof(u_int32_t)); u_int32_t wordTableSize= words.size(); u_int32_t totalSize= headerSize+ (hashTable.size()*sizeof(u_int32_t))+ wordTableSize; o.write(reinterpret_cast (&totalSize),sizeof totalSize); o.write(reinterpret_cast (&headerSize),sizeof headerSize); o.write(reinterpret_cast (&hashSize),sizeof hashSize); o.write(reinterpret_cast (&wordTableSize),sizeof wordTableSize); #ifdef OLDWAY o.write(hashTable.begin(),hashTable.size()*sizeof(u_int32_t)); o.write(words.begin(),words.size()); #else for(vector ::const_iterator htp= hashTable.begin(); htp!=hashTable.end();htp++){ u_int32_t hte= *htp; o.write(reinterpret_cast (&hte),sizeof hte); } for(vector ::const_iterator wtp= words.begin(); wtp!=words.end();wtp++){ o.put(*wtp); } #endif if(verbose){ cerr<<"Exported "< (sqrt(static_cast (a))+1); if((a&1)==0){ a++; } while(true){ unsigned int remainder= 0; a+= 2; for(unsigned int n= 3;n<=sqlim;n+= 2){ if((remainder= (a%n))==0){ break; } } if(remainder!=0){ break; } } return a; } /*:38*//*39:*/ #line 2471 "annoyance-filter.w" u_int32_t fastDictionary::computeHashValue(const string&s){ u_int32_t hash= 1; for(unsigned int i= 0;ireportParserDiagnostic("Unterminated MIME sentinel at end of message."); mf->setNewMessageEligiblity(); } atEnd= true; } if((partBoundary!="")&&(inputLine.substr(0,2)=="--")&& (inputLine.substr(2,partBoundary.length())==partBoundary)){ if(Annotate('d')){ ostringstream os; os<<"Part boundary encountered: "<reportParserDiagnostic(os); } endBoundary= inputLine; atEnd= true; }else{ if(tlist!=NULL){ tlist->push_back(inputLine); } ip= 0; encodedLineCount++; } }else{ atEnd= true; eofHit= true; } } if(atEnd){ inputLine= ""; ip= 0; } return!atEnd; } /*:41*//*42:*/ #line 2631 "annoyance-filter.w" bool MIMEdecoder::getDecodedLine(string&s){ int ch; s= ""; while(true){ if(lookAhead){ ch= lookChar; lookAhead= false; }else{ ch= getDecodedChar(); } if(ch<0){ break; } /*43:*/ #line 2661 "annoyance-filter.w" if(ch=='\r'||ch=='\n'){ int cht= getDecodedChar(); if(!(((ch=='\r')&&(cht=='\n'))|| ((ch=='\n')&&(cht=='\r')))){ lookAhead= true; lookChar= cht; } return true; } /*:43*/ #line 2646 "annoyance-filter.w" ; s+= ch; } return s.length()> 0; } /*:42*//*44:*/ #line 2678 "annoyance-filter.w" void MIMEdecoder::saveDecodedStream(ostream&os){ int ch; while((ch= getDecodedChar())>=0){ os.put(ch); } } /*:44*//*45:*/ #line 2691 "annoyance-filter.w" void MIMEdecoder::saveDecodedStream(const string fname){ ofstream of(fname.c_str()); if(!of){ if(verbose){ cerr<<"Cannot create MIMEdecoder dump file: "< ' '){ c= inputLine[ip++]; break; } ip++; } if(c>=0){ break; } if(!getNextEncodedLine()){ break; } } /*:52*/ #line 2850 "annoyance-filter.w" ; /*53:*/ #line 2897 "annoyance-filter.w" if(c==EOF){ if(i> 0){ nDecodeErrors++; mf->reportParserDiagnostic("Unexpected end of file in Base64 decoding."); } return-1; } /*:53*/ #line 2851 "annoyance-filter.w" ; if(dtable[c]&0x80){ nDecodeErrors++; ostringstream os; os<<"Illegal character '"<reportParserDiagnostic(os.str()); i--; continue; } a[i]= (unsigned char)c; b[i]= dtable[c]; } /*:51*/ #line 2835 "annoyance-filter.w" ; /*54:*/ #line 2917 "annoyance-filter.w" o[0]= (b[0]<<2)|(b[1]>>4); o[1]= (b[1]<<4)|(b[2]>>2); o[2]= (b[2]<<6)|b[3]; j= a[2]=='='?1:(a[3]=='='?2:3); for(k= 0;k 0){ unsigned char v= decodedBytes[0]; decodedBytes.pop_front(); return v; } return-1; } /*:49*//*56:*/ #line 2955 "annoyance-filter.w" void base64MIMEdecoder::initialiseDecodingTable(void) { int i; for(i= 0;i<255;i++){ dtable[i]= 0x80; } for(i= 'A';i<='I';i++){ dtable[i]= 0+(i-'A'); } for(i= 'J';i<='R';i++){ dtable[i]= 9+(i-'J'); } for(i= 'S';i<='Z';i++){ dtable[i]= 18+(i-'S'); } for(i= 'a';i<='i';i++){ dtable[i]= 26+(i-'a'); } for(i= 'j';i<='r';i++){ dtable[i]= 35+(i-'j'); } for(i= 's';i<='z';i++){ dtable[i]= 44+(i-'s'); } for(i= '0';i<='9';i++){ dtable[i]= 52+(i-'0'); } #define CI(x) static_cast (x) dtable[CI('+')]= 62; dtable[CI('/')]= 63; dtable[CI('=')]= 0; #undef CI } /*:56*//*57:*/ #line 3000 "annoyance-filter.w" string base64MIMEdecoder::decodeEscapedText(const string s,mailFolder*m){ string r= ""; base64MIMEdecoder dc; istringstream iss(s); int dchar; dc.set(&iss,m,""); while((dchar= dc.getDecodedChar())>=0){ r+= static_cast (dchar); } return r; } /*:57*//*59:*/ #line 3048 "annoyance-filter.w" int quotedPrintableMIMEdecoder::getDecodedChar(void){ int ch; /*55:*/ #line 2935 "annoyance-filter.w" if(lookAhead){ lookAhead= false; return lookChar; } /*:55*/ #line 3052 "annoyance-filter.w" ; while(true){ ch= getNextChar(); if(ch=='='){ /*60:*/ #line 3072 "annoyance-filter.w" int ch1= getNextChar(); /*63:*/ #line 3149 "annoyance-filter.w" while(/*62:*/ #line 3135 "annoyance-filter.w" ((ch1==' ')||(ch1=='\t')||(ch1=='\r')) /*:62*/ #line 3150 "annoyance-filter.w" ){ ch1= getNextChar(); if(ch1=='\n'){ continue; } if(!/*62:*/ #line 3135 "annoyance-filter.w" ((ch1==' ')||(ch1=='\t')||(ch1=='\r')) /*:62*/ #line 3155 "annoyance-filter.w" ){ nDecodeErrors++; ostringstream os; os<<"Invalid character '"< (ch1)<< "' after soft line break in Quoted-Printable MIME part."; mf->reportParserDiagnostic(os.str()); ch1= ' '; } } /*:63*/ #line 3074 "annoyance-filter.w" ; if(ch1=='\n'){ continue; }else{ int n1= hex_to_nybble(ch1); int ch2= getNextChar(); int n2= hex_to_nybble(ch2); if(n1==-1||n2==-1){ ostringstream os; os<<"Invalid escape sequence '="<< static_cast (ch1)< (ch2)<< "' in Quoted-Printable MIME part."; mf->reportParserDiagnostic(os.str()); nDecodeErrors++; } ch= (n1<<4)|n2; } return ch; /*:60*/ #line 3057 "annoyance-filter.w" ; }else{ return ch; } } } /*:59*//*61:*/ #line 3101 "annoyance-filter.w" int quotedPrintableMIMEdecoder::getNextChar(void){ while(true){ if(atEndOfLine){ atEndOfLine= false; return'\n'; } if(ip='0')&&(ch<=('0'+9))){ return ch-'0'; }else if((ch>='A')&&(ch<=('A'+5))){ return 10+(ch-'A'); }else if((ch>='a')&&(ch<=('a'+5))){ return 10+(ch-'a'); } return-1; } /*:64*//*65:*/ #line 3189 "annoyance-filter.w" string quotedPrintableMIMEdecoder::decodeEscapedText(const string s,mailFolder*m){ string r= ""; string::size_type p; for(p= 0;p (s.length()-3)){ if(verbose){ cerr<<"decodeEscapedText: escape too near end of string: "< ((n1<<4)|n2); decoded= true; p+= 2; } } } if(!decoded){ r+= s[p]; } } return r; } /*:65*//*69:*/ #line 3354 "annoyance-filter.w" int MBCSdecoder::getNextNBytes(const unsigned int n){ assert((n>=1)&&(n<=4)); int v= 0; for(unsigned int i= 0;ireportParserDiagnostic(s); }else{ if(verbose){ cerr<=0xA1)&&(c1<=0xFE)){ int c2= getNextEncodedByte(); if((c2>=0x80)&&(c2<=0xFF)){ return(c1<<8)|c2; } if(c2==-1){ ostringstream os; os<=0)&&((c1&0x80)!=0)){ int c2= getNextEncodedByte(); if(c2==-1){ ostringstream os; os<=0){ /*77:*/ #line 3554 "annoyance-filter.w" if(((c1>=0x81)&&(c1<=0x9F))|| ((c1>=0xE0)&&(c1<=0xEF))|| ((c1>=0xF0)&&(c1<=0xFC))){ int c2= getNextEncodedByte(); if(c2==-1){ ostringstream os; os<=0x40)&&(c2<=0x7E))|| ((c2>=0x80)&&(c2<=0xFC)))){ ostringstream os; os< 6){ ostringstream os; os<>(nbytes+1)); for(string::size_type i= 1;i 0xDFFF)){ result= w1; nwydes= 1; }else if((w1>=0xD800)&&(w1<=0xDBFF)){ w2= getNextUTF_16Word(); if(w2<0){ ostringstream os; os< 0xDFFF)){ ostringstream os; os<getNextDecodedChar(); if(dc<0){ return""; } if(dc<256){ string r(1,static_cast (dc)); return r; } ostringstream os; os.setf(ios::uppercase); os<getNextDecodedChar(); if(dc<0){ return""; } if(dc<0xA1){ string r(1,static_cast (dc)); return r; } ostringstream os; os.setf(ios::uppercase); if((dc>=0xA1)&&(dc<=0xDF)){ os<<"SJIS-K"<getNextDecodedChar(); if(dc<0){ return""; } if(dc<=0xFF){ string r(1,static_cast (dc)); return r; } ostringstream os; os.setf(ios::uppercase); if(((dc>=0x3200)&&(dc<0xD800))|| ((dc>=0xF900)&&(dc<0xFAFF))){ os<nextByte(); if(ch==EOF){ eof= true; } return ch&0xFF; } /*:99*//*101:*/ #line 4288 "annoyance-filter.w" void flashStream::readHeader(void){ sig[0]= get8(); sig[1]= get8(); sig[2]= get8(); if(isEOF()||(memcmp(sig,"FWS",3)!=0)){ error= true; if(verbose){ cerr<<"Invalid signature in Flash animation file."< (version)< (s>>6); l= s&0x3F; if(l==0x3F){ l= get32(); } if(isOK()){ tDataLen= l; return tType!=stagEnd; } } tType= stagEnd; tDataLen= 0; return false; } /*:103*//*104:*/ #line 4365 "annoyance-filter.w" void flashStream::ignoreTag(unsigned int lookedAhead){ if(isOK()){ for(unsigned int i= lookedAhead;isOK()&&(i 0){ ch= get8(); s+= ch; n--; } } } /*:105*//*106:*/ #line 4409 "annoyance-filter.w" void flashStream::getRect(rect*r){ initBits(); int nBits= static_cast (getBits(5)); r->xMin= getSignedBits(nBits); r->xMax= getSignedBits(nBits); r->yMin= getSignedBits(nBits); r->yMax= getSignedBits(nBits); } /*:106*//*107:*/ #line 4427 "annoyance-filter.w" void flashStream::getMatrix(matrix*mat){ initBits(); if(getBits(1)){ int nBits= static_cast (getBits(5)); mat->a= getSignedBits(nBits); mat->d= getSignedBits(nBits); }else{ mat->a= mat->d= 0x00010000L; } if(getBits(1)){ int nBits= static_cast (getBits(5)); mat->b= getSignedBits(nBits); mat->c= getSignedBits(nBits); }else{ mat->b= mat->c= 0; } int nBits= static_cast (getBits(5)); mat->tx= getSignedBits(nBits); mat->ty= getSignedBits(nBits); } /*:107*//*109:*/ #line 4503 "annoyance-filter.w" void flashStream::initBits(void){ bitPos= 0; bitBuf= 0; } unsigned int flashStream::getBits(int n){ unsigned int v= 0; while(true){ int s= n-bitPos; if(s> 0){ v|= bitBuf<>-s; bitPos-= n; bitBuf&= 0xFF>>(8-bitPos); return v; } } } int flashStream::getSignedBits(const int n){ signed int v= static_cast (getBits(n)); if(v&(1<<(n-1))){ v|= -1< (get8()); map ::iterator fp= fontGlyphCount.find(fontID); if(fp==fontGlyphCount.end()){ if(verbose){ cerr<<"DefineFontInfo for font ID "<second; vector *v= new vector (nGlyphs); fontMap.insert(make_pair(fontID,v)); fontInfoBits.insert(make_pair(fontID,fFlags)); for(unsigned int g= 0;gsecond; fontChars= fontMap.find(fontId)->second; fFlags= fontInfoBits.find(fontId)->second; } } if(textRecordType&textHasColor){ #ifdef FLASH_PARSE_DEBUG int r= get8(); int g= get8(); int b= get8(); if(variant==2){ int a= get8(); cout<<" tfontColour: ("<=fGlyphs){ if(verbose){ cerr<<"Flash DefineText glyph index "<< iIndex<<" exceeds font size of "< ((wc>>8)&0xFF); s+= static_cast (wc&0xFF); }else{ s+= static_cast ((*fontChars)[iIndex]); } } } } #ifdef FLASH_PARSE_DEBUG cout< (get8()))!=sactionNone){ unsigned int dlen= 0; if((ac&0x80)!=0){ dlen= get16(); } switch(ac){ case sactionGetURL: { string url,target; getString(url); getString(target); if(!textOnly){ strings.push(url); } strings.push(target); } break; default: if(dlen> 0){ skip8n(dlen); } #ifdef FLASH_PARSE_DEBUG cout<<" Skipping action code 0x"<nextString(s)):(!(mdp->getDecodedLine(s)))){ if(asp!=NULL){ if(Annotate('d')){ ostringstream os; os<<"Closing "<name()<<" application file decoder."; reportParserDiagnostic(os); } asp->close(); asp= NULL; } s= mdp->getTerminatorSentinel(); decoderEOF= mdp->isEndOfFile(); if(decoderEOF){ s= ""; } if(Annotate('d')){ ostringstream os; os<<"Closing out "<name()<<" decoder. "<< mdp->getEncodedLineCount()<<" lines decoded."; reportParserDiagnostic(os); os.str(""); os<<"End sentinel: "<push_back(s); } /*139:*/ #line 6106 "annoyance-filter.w" #ifdef BSD_DIAG if(s.substr(0,(sizeof messageSentinel)-1)==messageSentinel){ if(!BSDfolder&&!lastLineBlank){ cerr<<"*** NonBSD From line ditched: "< 0)&&(!isISOspace(s[0])))){ nMessages++; newMessage= true; expectingNewMessage= false; inHeader= true; multiPart= false; inPartHeader= false; partHeaderLines= 0; bodyContentType= bodyContentTypeCharset= bodyContentTypeName= bodyContentTransferEncoding= ""; fromLine= s; lastFromLine= lastMessageID= messageID= ""; while(!partBoundaryStack.empty()){ ostringstream os; os<<"Orphaned part boundary on stack: \""< 0)&&(isISOspace(s[s.length()-1]))){ s.erase(s.length()-1); } setNewMessageEligiblity(s.empty()); /*:140*/ #line 5779 "annoyance-filter.w" ; /*141:*/ #line 6165 "annoyance-filter.w" if(inHeader||inPartHeader){ if(s==""){ if(inHeader){ if((!multiPart)&&(bodyContentTransferEncoding!="")){ mimeContentType= bodyContentType; mimeContentTypeCharset= bodyContentTypeCharset; mimeContentTypeName= bodyContentTypeName; mimeContentTransferEncoding= bodyContentTransferEncoding; multiPart= true; partBoundary= ""; } } inHeader= inPartHeader= false; /*153:*/ #line 6531 "annoyance-filter.w" if(multiPart){ assert(mdp==NULL); #ifdef TYPE_LOG typeLog<setMailFolder(this); if(Annotate('d')){ ostringstream os; os<<"Activating "<name()<<" application file decoder."; reportParserDiagnostic(os); } } /*:155*/ #line 6548 "annoyance-filter.w" ; /*156:*/ #line 6622 "annoyance-filter.w" if((asp==NULL)&&(streamMinTokenLength> 0)&& ((mimeContentType.substr(0,12)=="application/")|| (((mimeContentType.substr(0,6)=="audio/")|| (mimeContentType.substr(0,6)=="image/"))&& (isSpoofedExecutableFileExtension(mimeContentTypeName)|| isSpoofedExecutableFileExtension(mimeContentDispositionFilename)) ) ) ){ if(Annotate('d')){ ostringstream os; os<<"Activating byte stream parser for \""<push_back(Xfile+"-Decoder: "+mdp->name()); } if(Annotate('d')){ ostringstream os; os<<"Activating MIME "<name()<<" decoder with sentinel: "<pop_back(); } isSpoofedHeader= true; } } /*:144*/ #line 6217 "annoyance-filter.w" ; while((inHeader||inPartHeader)&&getline(*is,lookAheadLine)!=NULL){ string lal= lookAheadLine; while((lookAheadLine.length()> 0)&&(isISOspace(lookAheadLine[lookAheadLine.length()-1]))){ lookAheadLine.erase(lookAheadLine.length()-1); } if((lookAheadLine.length()> 0)&&isISOspace(lookAheadLine[0])){ string::size_type p= 1; while(isISOspace(lookAheadLine[p])){ p++; } s+= lookAheadLine.substr(p); if((tlist!=NULL)&&(!isSpoofedHeader)){ tlist->push_back(lal); } continue; } lookedAhead= true; lookAheadLine= lal; break; } if(isSpoofedHeader){ ostringstream os; os<<"Spoofed header rejected: "< 0){ s= sc; } } /*:147*/ #line 6186 "annoyance-filter.w" ; } /*:141*/ #line 5780 "annoyance-filter.w" ; /*149:*/ #line 6432 "annoyance-filter.w" if(multiPart&&inPartHeader){ string arg,par; partHeaderLines++; if(compareHeaderField(s,"content-type",arg)){ if(parseHeaderArgument(s,"charset",par)){ stringCanonicalise(par); mimeContentTypeCharset= par; } if(parseHeaderArgument(s,"boundary",par)){ mimeContentTypeBoundary= par; } if(parseHeaderArgument(s,"name",par)){ mimeContentTypeName= par; } mimeContentType= arg; } if(compareHeaderField(s,"content-transfer-encoding",arg)){ mimeContentTransferEncoding= arg; } if(compareHeaderField(s,"content-disposition",arg)){ if(parseHeaderArgument(s,"filename",par)){ mimeContentDispositionFilename= par; } } } /*:149*/ #line 5781 "annoyance-filter.w" ; /*151:*/ #line 6504 "annoyance-filter.w" if(multiPart&&(!inHeader)&& (partBoundary!="")&& (s.substr(0,2)=="--")&& (s.substr(2,partBoundary.length())==partBoundary)&& (s.substr(partBoundary.length()+2)!="--")){ inPartHeader= true; mimeContentType= mimeContentTypeCharset= mimeContentTypeBoundary= mimeContentTransferEncoding= ""; } /*:151*/ #line 5783 "annoyance-filter.w" ; /*152:*/ #line 6520 "annoyance-filter.w" if((mbi!=NULL)&&(!inHeader)&&(!inPartHeader)){ s= mbi->decodeLine(s); } /*:152*/ #line 5784 "annoyance-filter.w" ; return true; } } /*:130*//*131:*/ #line 5799 "annoyance-filter.w" int mailFolder::nextByte(void){ assert(mdp!=NULL); int c= mdp->getDecodedChar(); if(c<0){ byteStream= false; if(Annotate('d')){ ostringstream os; os<<"End of byte stream. Deactivating byte stream parser."; reportParserDiagnostic(os); } } return c; } /*:131*//*136:*/ #line 5951 "annoyance-filter.w" #ifdef HAVE_DIRECTORY_TRAVERSAL bool mailFolder::findNextFileInDirectory(string&fname){ assert(dirFolder); if(dh==NULL){ return false; } while(true){ struct dirent*de; struct stat fs; de= readdir(dh); if(de==NULL){ closedir(dh); dh= NULL; return false; } cfName= dirName+pathSeparator+de->d_name; if(stat(cfName.c_str(),&fs)==0){ if(S_ISREG(fs.st_mode)){ fname= cfName; return openNextFileInDirectory(); } }else{ if(verbose){ cerr<<"Cannot get status of "< 0){ int sll= readlink(jname.c_str(),slbuf,(sizeof slbuf)-1); if(sll>=0){ assert(sll (sizeof slbuf)); slbuf[sll]= 0; jname= slbuf; }else{ break; } } if(maxSlinks<=0){ cerr<<"Warning: probable symbolic link loop for \""<< fname<<"\""< target.length()){ string sc= s; stringCanonicalise(sc); if((sc.substr(0,target.length())==target)&& (sc[target.length()]==':')){ unsigned int i; for(i= target.length()+1;i target.length()){ string sc= s; string::size_type p,p1; stringCanonicalise(sc); if(((p= sc.find(target))!=string::npos)&& (sc.length()> (p+target.length()))&& (sc[p+target.length()]=='=')){ p+= target.length()+1; if(p 4)&&(sc[sc.length()-4]=='.')){ string ext= sc.substr(sc.length()-3); stringCanonicalise(ext); return((ext=="exe")|| (ext=="bat")|| (ext=="scr")|| (ext=="lnk")|| (ext=="pif")|| (ext=="com")); } return false; } /*:166*//*167:*/ #line 7012 "annoyance-filter.w" unsigned int mailFolder::sizeMessageTranscript(const unsigned int lineOverhead)const{ assert(tlist!=NULL); unsigned int n= tlist->size(),totsize= 0; if((n> 1)&& (tlist->back().substr(0,(sizeof messageSentinel)-1)==messageSentinel)){ n--; } list ::iterator p= tlist->begin(); for(unsigned int i= 0;ilength()+lineOverhead; p++; } return totsize; } /*:167*//*168:*/ #line 7037 "annoyance-filter.w" void mailFolder::writeMessageTranscript(ostream&os)const{ assert(tlist!=NULL); unsigned int n= tlist->size(); if((n> 1)&& (tlist->back().substr(0,(sizeof messageSentinel)-1)==messageSentinel)){ n--; } list ::iterator p= tlist->begin(); for(unsigned int i= 0;i (x) notAtEnd[CI('-')]= notAtEnd[CI('\'')]= true; #undef CI } /*:171*//*172:*/ #line 7219 "annoyance-filter.w" void tokenDefinition::setUS_ASCIIdefaults(unsigned int lmin,unsigned int lmax){ clear(); setLengthLimits(lmin,lmax); for(unsigned int c= 0;c<128;c++){ isToken[c]= isalpha(c)||isdigit(c); notExclusively[c]= (isdigit(c)||(c=='-'))?1:0; } #define CI(x) static_cast (x) isToken[CI('_')]= notExclusively[CI('_')]= true; notAtEnd[CI('-')]= notAtEnd[CI('\'')]= true; #undef CI } /*:172*//*174:*/ #line 7357 "annoyance-filter.w" bool tokenParser::nextToken(dictionaryWord&d){ string token; while(!atEnd){ /*175:*/ #line 7457 "annoyance-filter.w" if(!pendingPhrases.empty()){ token= pendingPhrases.front(); pendingPhrases.pop_front(); d.set(token); d.toLower(); if(pTokenTrace&&saveMessage){ messageQueue.push_back(string(" \"")+d.text+"\""); } return true; } /*:175*/ #line 7363 "annoyance-filter.w" ; token= ""; string::size_type necount= 0; if(source->isByteStream()){ /*178:*/ #line 7577 "annoyance-filter.w" int b; while((b= source->nextByte())>=0){ if(!(btd->isTokenMember(b))){ continue; } if(btd->isTokenNotAtEnd(b)){ continue; } if(btd->isTokenNotExclusively(b)){ necount++; } token+= static_cast (b); while(((b= source->nextByte())>=0)&& btd->isTokenMember(b) ){ if(btd->isTokenNotExclusively(b)){ necount++; } token+= static_cast (b); } while((token.length()> 0)&& btd->isTokenNotAtEnd(ChIx(token[token.length()-1])) ){ token.erase(token.length()-1); } if(!(btd->isTokenLengthAcceptable(token))){ token= ""; continue; } if(necount==token.length()){ token= ""; continue; } d.set(token); d.toLower(); /*180:*/ #line 7674 "annoyance-filter.w" if(assemblePhrases){ assembleAllPhrases(d); continue; } /*:180*/ #line 7631 "annoyance-filter.w" ; if(pTokenTrace&&saveMessage){ messageQueue.push_back(string(" \"")+d.text+"\""); } return true; } continue; /*:178*/ #line 7369 "annoyance-filter.w" ; } while((clpisTokenMember(ChIx(cl[clp])))) )){ /*176:*/ #line 7482 "annoyance-filter.w" if(inHTML&&!inHTMLcomment&&(cl.substr(clp,4)==HTMLCommentBegin)){ inHTMLcomment= true; clp+= 4; #ifdef HTML_COMMENT_DEBUG cout<<"------------------------------ HTML Comment begin: "<'||cl[clp+5]==' ')){ string tag; for(int i= 1;i<5;i++){ tag+= (islower(cl[clp+i]))?toupper(cl[clp+i]):cl[clp+i]; } if(tag=="HTML"){ inHTML= true; #ifdef HTML_COMMENT_DEBUG cout<<"------------------------------ In HTML: "<')){ string tag; for(int i= 2;i<6;i++){ tag+= (islower(cl[clp+i]))?toupper(cl[clp+i]):cl[clp+i]; } if(tag=="HTML"){ inHTML= false; #ifdef HTML_COMMENT_DEBUG cout<<"------------------------------ Out of HTML: "<=cl.length()){ nextLine(); continue; } if(td->isTokenNotAtEnd(ChIx(cl[clp]))){ clp++; continue; } if(td->isTokenNotExclusively(ChIx(cl[clp]))){ necount++; } token+= cl[clp++]; while((clpisTokenMember(ChIx(cl[clp])))){ if(td->isTokenNotExclusively(ChIx(cl[clp]))){ necount++; } token+= cl[clp++]; }else{ /*176:*/ #line 7482 "annoyance-filter.w" if(inHTML&&!inHTMLcomment&&(cl.substr(clp,4)==HTMLCommentBegin)){ inHTMLcomment= true; clp+= 4; #ifdef HTML_COMMENT_DEBUG cout<<"------------------------------ HTML Comment begin: "< 0)&& td->isTokenNotAtEnd(ChIx(token[token.length()-1])) ){ token.erase(token.length()-1); } if(!(td->isTokenLengthAcceptable(token))){ continue; } if(necount==token.length()){ continue; } d.set(token); d.toLower(); /*180:*/ #line 7674 "annoyance-filter.w" if(assemblePhrases){ assembleAllPhrases(d); continue; } /*:180*/ #line 7443 "annoyance-filter.w" ; if(pTokenTrace&&saveMessage){ messageQueue.push_back(string(" \"")+d.text+"\""); } return true; } return false; } /*:174*//*181:*/ #line 7690 "annoyance-filter.w" void tokenParser::assembleAllPhrases(dictionaryWord&d){ phraseQueue.push_back(d.text); if(phraseQueue.size()> phraseMax){ phraseQueue.pop_front(); assert(phraseQueue.size()==phraseMax); } for(unsigned int p= phraseMin;p<=phraseMax;p++){ if(p<=phraseQueue.size()){ deque ::const_reverse_iterator wp= phraseQueue.rbegin(); string phrase= ""; for(unsigned int i= 0;isetTranscriptList(&messageTranscript); if(Annotate('p')||Annotate('d')){ saveParserDiagnostics= true; } } /*187:*/ #line 7875 "annoyance-filter.w" set utokens; while(tp.nextToken(dw)){ utokens.insert(dw.get()); } /*:187*/ #line 7835 "annoyance-filter.w" ; /*188:*/ #line 7903 "annoyance-filter.w" multimap rtokens; for(set ::iterator t= utokens.begin();t!=utokens.end();t++){ double pdiff; dictionary::iterator dp; if(fd->isDictionaryLoaded()){ pdiff= fd->find(*t); if(pdiff<0){ pdiff= unknownWordProbability; } pdiff= abs(pdiff-0.5); }else{ if(((dp= d->find(*t))!=d->end())&& (dp->second.getJunkProbability()>=0)){ pdiff= abs(dp->second.getJunkProbability()-0.5); }else{ pdiff= abs(unknownWordProbability-0.5); } } rtokens.insert(make_pair(pdiff,*t)); } /*:188*/ #line 7837 "annoyance-filter.w" ; /*189:*/ #line 7937 "annoyance-filter.w" unsigned int n= min(static_cast ::size_type> (nExtremal),rtokens.size()); multimap ::const_reverse_iterator rp= rtokens.rbegin(); double probP= 1,probQ= 1; if(verbose){ cerr<<"Rank Probability Token"<isDictionaryLoaded()){ p= fd->find(rp->second); if(p<0){ p= unknownWordProbability; } }else{ dictionary::iterator dp= d->find(rp->second); p= ((dp==d->end())||(dp->second.getJunkProbability()<0))? unknownWordProbability:dp->second.getJunkProbability(); } if(verbose){ cerr<second< ::iterator p; for(p= tp.messageQueue.begin();p!=tp.messageQueue.end();p++){ if(p->length()==0){ break; } } os< ::iterator p; string transEndl= ""; for(p= messageTranscript.begin();p!=messageTranscript.end();p++){ if(p->length()==0){ break; } if(*p=="\r"){ transEndl= "\r"; break; } } double jp= junkProb; if(jp<0.001){ jp= 0; } os<=junkThreshold){ os<<"Junk"; }else if(junkProb<=mailThreshold){ os<<"Mail"; }else{ os<<"Indeterminate"; } os<writeMessageTranscript(transcriptFilename); } } return junkProb; } /*:185*//*192:*/ #line 8079 "annoyance-filter.w" void classifyMessage::addSignificantWordDiagnostics(list &l, list ::iterator where, multimap &rtokens,string endLine){ unsigned int n= min(static_cast ::size_type> (nExtremal),rtokens.size()); multimap ::const_reverse_iterator rp= rtokens.rbegin(); for(unsigned int i= 0;ifind(rp->second); double wp= ((dp==d->end())||((dp->second.getJunkProbability()<0)))? unknownWordProbability:dp->second.getJunkProbability(); ostringstream os; os<second<<"\""<=0){ break; } }while(errno==EINTR); if(clientSocket<0){ perror("POP3Proxy accepting connection from client"); return false; } if(verbose){ cout<<"Accepting POP3 connection from "< (-1)){ cstat= 0; }else{ h= gethostbyname(serverName.c_str()); if(h!=NULL){ memcpy(&serverIP,h->h_addr,sizeof serverIP); cstat= 0; }else{ cerr<<"POP3Proxy: POP3 server "< 0)&&isspace(clientBuffer[clientLength-1])){ clientLength--; } command= argument= ""; int i; for(i= 0;i=POP_MAX_MESSAGE){ cerr<<"POP3Proxy reply from server too long."< (reply.length())){ perror("POP3Proxy relaying multi-line reply to request to client"); break; } #ifdef POP3_TRACE_TRANSFER_DETAIL if(popProxyTrace){ cerr<<"POP3: <<<<<< Relaying "<>>>>>"<>>>>>"< 0){ tp.setSaveMessage(true); } d= &dt; fd= fdt; nExtremal= nExt; unknownWordProbability= uwp; } /*:184*//*229:*/ #line 9012 "annoyance-filter.w" static void updateProbability(void) { dict.computeJunkProbability(messageCount[dictionaryWord::Mail],messageCount[dictionaryWord::Junk], mailBias,minOccurrences); singleDictionaryRead= false; } /*:229*//*230:*/ #line 9024 "annoyance-filter.w" static void printDictionary(ostream&os= cout) { updateProbability(); os<<"Dictionary contains "<second.describe(os); } } /*:230*//*231:*/ #line 9038 "annoyance-filter.w" static double classifyMessages(const char*fname) { double jp; if(dict.empty()&&!fDict.isDictionaryLoaded()){ cerr<<"You cannot --classify or --test a message " "unless you have first loaded a dictionary."<= n characters (0 = none)"< 0)&&(replyBuffer[0]=='+')){ /*220:*/ #line 8836 "annoyance-filter.w" istrstream is(reply.data(),reply.length()); mailFolder mf(is,dictionaryWord::Mail); mf.forceInHeader(); /*:220*/ #line 8775 "annoyance-filter.w" ; /*221:*/ #line 8850 "annoyance-filter.w" classifyMessage cm(mf,dict,&fDict,significantWords,novelWordProbability); double jp= cm.classifyThis(true); if(verbose){ cerr<<"Message junk probability: "< 0){ tp.setSaveMessage(true); } dictionaryWord dw; unsigned int ntokens= 0; while(tp.nextToken(dw)){ dict.add(dw,mf.getCategory()); ntokens++; /*228:*/ #line 8988 "annoyance-filter.w" if((autoPrune!=0)&&(dict.estimateMemoryRequirement()> autoPrune)){ if(verbose){ cerr<<"Dictionary size "< ((autoPrune*9)/10)){ cerr<<"Dictionary size after --autoprune is larger than 90%"< (autoPrune*1.25); cerr<<"Increasing --autoprune threshold 25% to "< 0){ cerr<<"Binary streams will be parsed for words of "<< streamMinTokenLength<<" characters or more."<=junkThreshold){ cout<<"JUNK"< 0)&& isdigit(sarg[pind+1])){ popProxyServerPort= atoi(sarg.substr(pind+1).c_str()); }else{ cerr<<"Invalid port number specification in --pop3server argument."< (mmap((caddr_t)0,fileLength, PROT_READ,MAP_SHARED|MAP_NORESERVE, fileHandle,0)); istrstream is(dp,fileLength); #else ifstream is(optarg,ios::binary); if(!is){ cerr<<"Cannot open dictionary file "< 0&&(transcriptFilename=="") #ifdef POP3_PROXY_SERVER &&(popProxyServer=="") #endif ){ cerr<<"Warning: --annotate requested but no --transcript or --pop3proxy requested."<