/*254:*/
#line 9958 "annoyance-filter.w"
#define REVDATE "2004-08-04"
#define Xfile string("X-Annoyance-Filter") \
#define pseudoCountsWord " COUNTS " \
#define fastDictionaryVersionNumber 1
#define fastDictionaryVoidLink static_cast<u_int32_t> (-1)
#define fastDictionarySignature "AFfd"
#define fastDictionaryFloatingTest (1.0/111) \
#define messageSentinel "From " \
#define ChIx(c) (static_cast<unsigned int> ((c) ) &0xFF) \
#define HTMLCommentBegin "<!--"
#define HTMLCommentEnd "-->" \
#define POP_MAX_MESSAGE 512
#define POP_BUFFER ((POP_MAX_MESSAGE) +2) \
#define Annotate(c) (annotations.test(c) ) \
#line 9959 "annoyance-filter.w"
/*232:*/
#line 9066 "annoyance-filter.w"
#include "config.h"
/*238:*/
#line 9205 "annoyance-filter.w"
#ifdef WIN32
#undef HAVE_MMAP
#endif
/*:238*/
#line 9069 "annoyance-filter.w"
/*233:*/
#line 9095 "annoyance-filter.w"
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstdlib>
#include <string>
#include <sstream>
#ifdef HAVE_FDSTREAM_COMPATIBILITY
#include "fdstream.hpp"
#endif
#ifdef HAVE_NEW_STRSTREAM
#include "mystrstream_new.h"
#else
#include "mystrstream.h"
#endif
#include <vector>
#include <algorithm>
#include <map>
#include <stack>
#include <deque>
#include <queue>
#include <list>
#include <set>
#include <bitset>
#include <functional>
#include <cmath>
using namespace std;
/*:233*/
#line 9071 "annoyance-filter.w"
/*234:*/
#line 9127 "annoyance-filter.w"
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>
/*:234*/
#line 9072 "annoyance-filter.w"
/*235:*/
#line 9141 "annoyance-filter.w"
#ifdef HAVE_STAT
#include <sys/stat.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif
#ifdef HAVE_MMAP
#include <sys/mman.h>
#endif
/*:235*/
#line 9073 "annoyance-filter.w"
#ifdef WIN32
#define __GNU_LIBRARY__
#undef __GETOPT_H__
#endif
#include <getopt.h>
#include "statlib.h"
/*237:*/
#line 9183 "annoyance-filter.w"
#if defined(HAVE_GNUPLOT) && defined(HAVE_NETPBM) && defined(HAVE_SYSTEM)
#define HAVE_PLOT_UTILITIES
#endif
#if defined(HAVE_DIRENT_H) && defined(HAVE_STAT)
#define HAVE_DIRECTORY_TRAVERSAL
#endif
#if defined(HAVE_PDFTOTEXT) && defined(HAVE_POPEN) && (defined(HAVE_MKSTEMP) || defined(HAVE_TMPNAM))
#define HAVE_PDF_DECODER
#endif
/*:237*/
#line 9082 "annoyance-filter.w"
/*236:*/
#line 9161 "annoyance-filter.w"
#if defined(HAVE_SOCKET) && defined(HAVE_SIGNAL)
#define POP3_PROXY_SERVER
#endif
#ifdef POP3_PROXY_SERVER
#include <signal.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <errno.h>
#endif
/*:236*/
#line 9084 "annoyance-filter.w"
/*:232*/
#line 9961 "annoyance-filter.w"
/*226:*/
#line 8926 "annoyance-filter.w"
static unsigned int messageCount[2];
static list<string> messageTranscript;
static queue<string> parserDiagnostics;
static bool saveParserDiagnostics= false;
/*:226*//*241:*/
#line 9244 "annoyance-filter.w"
static bool verbose= false;
#ifdef TYPE_LOG
static ofstream typeLog("/tmp/typelog.txt");
#endif
static string pDiagFilename= "";
static string transcriptFilename= "";
static bool pTokenTrace= false;
static unsigned int maxTokenLength= 64,minTokenLength= 1;
static unsigned int streamMaxTokenLength= 64,streamMinTokenLength= 5;
static bool singleDictionaryRead= true;
static unsigned int phraseMin= 1,phraseMax= 1;
static unsigned int phraseLimit= 48;
static unsigned int autoPrune= 0;
static bool popProxyTrace= false;
static bool sloppyheaders= false;
/*:241*//*250:*/
#line 9892 "annoyance-filter.w"
#define ISOch(x) (static_cast<unsigned char> ((x) & 0xFF))
#define isISOspace(x) (isascii(ISOch(x)) && isspace(ISOch(x)))
#define isISOalpha(x) ((isoalpha[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0)
#define isISOupper(x) ((isoupper[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0)
#define isISOlower(x) ((isolower[ISOch(x) / 8] & (0x80 >> (ISOch(x) % 8))) != 0)
#define toISOupper(x) (isISOlower(x) ? (isascii(((unsigned char) (x))) ? \
toupper(x) : (((ISOch(x) != 0xDF) && \
(ISOch(x) != 0xFF)) ? \
(ISOch(x) - 0x20) : (x))) : (x))
#define toISOlower(x) (isISOupper(x) ? (isascii(ISOch(x)) ? \
tolower(x) : (ISOch(x) + 0x20)) \
: (x))
/*:250*//*251:*/
#line 9911 "annoyance-filter.w"
const unsigned char isoalpha[32]= {
0,0,0,0,0,0,0,0,127,255,255,224,127,255,255,224,0,0,0,0,0,0,0,0,255,255,
254,255,255,255,254,255
};
const unsigned char isoupper[32]= {
0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,0,255,255,254,254,
0,0,0,0
};
const unsigned char isolower[32]= {
0,0,0,0,0,0,0,0,0,0,0,0,127,255,255,224,0,0,0,0,0,0,0,0,0,0,0,1,255,255,
254,255
};
/*:251*/
#line 9962 "annoyance-filter.w"
/*10:*/
#line 1257 "annoyance-filter.w"
class dictionaryWord{
public:
static const unsigned int nCategories= 2;
enum mailCategory{Mail= 0,Junk= 1,Unknown};
string text;
unsigned int occurrences[nCategories];
double junkProbability;
dictionaryWord(string s= ""){
set(s);
}
void set(string s= "",unsigned int s_Mail= 0,unsigned int s_Junk= 0,
double jProb= -1){
text= s;
occurrences[Mail]= s_Mail;
occurrences[Junk]= s_Junk;
junkProbability= jProb;
}
string get(void)const{
return text;
}
unsigned int n_mail(void)const{
return occurrences[Mail];
}
unsigned int n_junk(void)const{
return occurrences[Junk];
}
unsigned int n_occurrences(void)const{
unsigned int o= 0;
for(unsigned int i= 0;i<nCategories;i++){
o+= occurrences[i];
}
return o;
}
void add(mailCategory cat,unsigned int howMany= 1){
assert(cat==Mail||cat==Junk);
occurrences[cat]+= howMany;
}
unsigned int resetCat(mailCategory cat){
assert(cat==Mail||cat==Junk);
occurrences[cat]= 0;
return occurrences[Mail]+occurrences[Junk];
}
void computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias= 2,unsigned int minOccurrences= 5);
double getJunkProbability(void)const{
return junkProbability;
}
unsigned int length(void)const{
return text.length();
}
unsigned int estimateMemoryRequirement(void)const{
return(((length()+3)/4)*4)+sizeof(string::size_type)+
(sizeof(unsigned int)*nCategories)+
sizeof(double)+
(sizeof(int)*8);
}
void toLower(void){
transform(text.begin(),text.end(),text.begin(),&dictionaryWord::to_iso_lower);
}
void describe(ostream&os= cout);
void exportCSV(ostream&os= cout);
bool importCSV(istream&is= cin);
static string categoryName(mailCategory c){
return(c==Mail)?"mail":((c==Junk)?"junk":"unknown");
}
void exportToBinaryFile(ostream&os);
bool importFromBinaryFile(istream&is);
protected:
/*18:*/
#line 1593 "annoyance-filter.w"
static char to_iso_lower(char c){
return toISOlower(c);
}
static char to_iso_upper(char c){
return toISOupper(c);
}
/*:18*/
#line 1351 "annoyance-filter.w"
;
};
/*:10*//*19:*/
#line 1613 "annoyance-filter.w"
class dictionary:public map<string,dictionaryWord> {
public:
unsigned int memoryRequired;
void add(dictionaryWord w,dictionaryWord::mailCategory category);
void include(dictionaryWord&w);
void exportCSV(ostream&os= cout);
void importCSV(istream&is= cin);
void computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias= 2,unsigned int minOccurrences= 5);
void purge(unsigned int occurrences= 0);
void resetCat(dictionaryWord::mailCategory category);
void printStatistics(ostream&os= cout)const;
#ifdef HAVE_PLOT_UTILITIES
void plotProbabilityHistogram(string fileName,unsigned int nBins= 20)const;
#endif
void exportToBinaryFile(ostream&os);
void importFromBinaryFile(istream&is);
unsigned int estimateMemoryRequirement(void)const{
return memoryRequired;
}
dictionary():memoryRequired(0){
}
};
/*:19*//*32:*/
#line 2055 "annoyance-filter.w"
class fastDictionary{
private:
static const u_int16_t byteOrderMark= 0xFEFF;
static const u_int16_t doubleSize= sizeof(double);
static const u_int16_t versionNumber= fastDictionaryVersionNumber;
unsigned char*dblock;
u_int32_t totalSize;
u_int32_t hashTableOffset;
u_int32_t hashTableBuckets;
u_int32_t wordTableSize;
u_int32_t*hashTable;
unsigned char*wordTable;
#ifdef HAVE_MMAP
char*dp;
int fileHandle;
long fileLength;
#endif
void regen(void)const{
cerr<<"You should re-generate the fast dictionary on this machine."<<endl;
}
static unsigned int nextGreaterPrime(unsigned int a);
static u_int32_t computeHashValue(const string&s);
static void Vmemcpy(vector<unsigned char> &v,
vector<unsigned char> ::size_type off,
const void*buf,const unsigned int bufl){
const unsigned char*bp= static_cast<const unsigned char*> (buf);
for(unsigned int i= 0;i<bufl;i++){
v[off++]= *bp++;
}
}
public:
fastDictionary():dblock(NULL){
#ifdef HAVE_MMAP
dp= NULL;
#endif
}
~fastDictionary(){
#ifdef HAVE_MMAP
if(dp!=NULL){
munmap(dp,fileLength);
close(fileHandle);
}
#else
if(dblock!=NULL){
delete dblock;
}
#endif
}
bool load(const string fname);
bool isDictionaryLoaded(void){
return dblock!=NULL;
}
double find(const string&target)const;
void describe(ostream&os= cout)const{
if(dblock!=NULL){
os<<"Total size of fast dictionary is "<<totalSize<<endl;
os<<"Hash table offset: "<<hashTableOffset<<endl;
os<<"Hash table buckets: "<<hashTableBuckets<<endl;
os<<"Word table size: "<<wordTableSize<<endl;
}else{
os<<"No fast dictionary is loaded."<<endl;
}
}
static void exportDictionary(const dictionary&d,ostream&o);
static void exportDictionary(const dictionary&d,const string fname);
};
/*:32*//*40:*/
#line 2491 "annoyance-filter.w"
class mailFolder;
class MIMEdecoder{
public:
istream*is;
string partBoundary;
bool atEnd;
bool eofHit;
unsigned int nDecodeErrors;
protected:
string inputLine;
string::size_type ip;
unsigned encodedLineCount;
bool lookAhead;
int lookChar;
string endBoundary;
list<string> *tlist;
mailFolder*mf;
public:
MIMEdecoder(istream*i= NULL,mailFolder*m= NULL,string pb= "",list<string> *tl= NULL){
set(i,m,pb,tl);
resetDecodeErrors();
tlist= NULL;
}
virtual~MIMEdecoder(){
};
void set(istream*i= NULL,mailFolder*m= NULL,
string pb= "",list<string> *tl= NULL){
is= i;
mf= m;
partBoundary= pb;
inputLine= "";
ip= 0;
encodedLineCount= 0;
lookAhead= false;
atEnd= false;
eofHit= false;
tlist= tl;
}
virtual string name(void)const= 0;
virtual void resetDecodeErrors(void){
nDecodeErrors= 0;
}
virtual unsigned int getDecodeErrors(void)const{
return nDecodeErrors;
}
virtual string getTerminatorSentinel(void)const{
return endBoundary;
}
virtual bool isEndOfFile(void)const{
return eofHit;
}
virtual unsigned int getEncodedLineCount(void)const{
return encodedLineCount;
}
virtual int getDecodedChar(void)= 0;
virtual bool getDecodedLine(string&s);
virtual void saveDecodedStream(ostream&os);
virtual void saveDecodedStream(const string fname);
protected:
virtual bool getNextEncodedLine(void);
};
/*:40*//*46:*/
#line 2714 "annoyance-filter.w"
class identityMIMEdecoder:public MIMEdecoder{
public:
string name(void)const{
return"Identity";
}
int getDecodedChar(void){
while(!atEnd){
if(ip<inputLine.length()){
return inputLine[ip++]&0xFF;
}
if(getNextEncodedLine()){
continue;
}
}
return-1;
}
bool getDecodedLine(string&s){
if(ip<inputLine.length()){
s= inputLine.substr(ip);
ip= inputLine.length();
return true;
}
if(getNextEncodedLine()){
s= inputLine;
ip= inputLine.length();
return true;
}
return false;
}
};
/*:46*//*47:*/
#line 2755 "annoyance-filter.w"
class sinkMIMEdecoder:public MIMEdecoder{
public:
string name(void)const{
return"Sink";
}
int getDecodedChar(void){
if(!atEnd){
while(getNextEncodedLine());
assert(atEnd);
}
return-1;
}
};
/*:47*//*48:*/
#line 2778 "annoyance-filter.w"
class base64MIMEdecoder:public MIMEdecoder{
private:
unsigned char dtable[256];
void initialiseDecodingTable(void);
deque<unsigned char> decodedBytes;
public:
base64MIMEdecoder(){
initialiseDecodingTable();
}
string name(void)const{
return"Base64";
}
int getDecodedChar(void);
static string decodeEscapedText(const string s,mailFolder*m= NULL);
};
/*:48*//*58:*/
#line 3023 "annoyance-filter.w"
class quotedPrintableMIMEdecoder:public MIMEdecoder{
public:
quotedPrintableMIMEdecoder(){
atEndOfLine= false;
}
string name(void)const{
return"Quoted-Printable";
}
int getDecodedChar(void);
static string decodeEscapedText(const string s,mailFolder*m= NULL);
protected:
bool atEndOfLine;
int getNextChar(void);
static int hex_to_nybble(const int ch);
};
/*:58*//*68:*/
#line 3287 "annoyance-filter.w"
class MBCSdecoder{
protected:
const string*src;
string::size_type p;
mailFolder*mf;
public:
MBCSdecoder(mailFolder*m= NULL):src(NULL),p(0),mf(NULL){
}
virtual~MBCSdecoder(){
}
virtual string name(void)= 0;
virtual void setSource(const string&s){
src= &s;
p= 0;
}
virtual void setMailFolder(mailFolder*m= NULL){
mf= m;
}
virtual void reset(void){
}
virtual int getNextDecodedChar(void)= 0;
virtual int getNextEncodedByte(void){
if(p>=src->length()){
return-1;
}
return((*src)[p++])&0xFF;
}
protected:
virtual int getNextNBytes(const unsigned int n);
virtual int getNext2Bytes(void){
return getNextNBytes(2);
}
virtual int getNext3Bytes(void){
return getNextNBytes(3);
}
virtual int getNext4Bytes(void){
return getNextNBytes(4);
}
virtual void discardLine(void){
p= src->length();
}
virtual void reportDecoderDiagnostic(const string s)const;
virtual void reportDecoderDiagnostic(const ostringstream&os)const;
};
/*:68*//*71:*/
#line 3402 "annoyance-filter.w"
class EUC_MBCSdecoder:public MBCSdecoder{
public:
virtual string name(void){
return"EUC";
}
virtual int getNextDecodedChar(void);
};
/*:71*//*73:*/
#line 3463 "annoyance-filter.w"
class Big5_MBCSdecoder:public MBCSdecoder{
public:
virtual string name(void){
return"Big5";
}
virtual int getNextDecodedChar(void);
};
/*:73*//*75:*/
#line 3509 "annoyance-filter.w"
class Shift_JIS_MBCSdecoder:public MBCSdecoder{
protected:
string pending;
public:
Shift_JIS_MBCSdecoder():pending(""){
}
virtual~Shift_JIS_MBCSdecoder(){
}
virtual string name(void){
return"Shift_JIS";
}
virtual int getNextDecodedChar(void);
};
/*:75*//*80:*/
#line 3633 "annoyance-filter.w"
class Unicode_MBCSdecoder:public MBCSdecoder{
public:
virtual string name(void){
return"Unicode";
}
virtual int getNextDecodedChar(void)= 0;
};
/*:80*//*81:*/
#line 3651 "annoyance-filter.w"
class UCS_2_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{
protected:
bool bigEndian;
public:
UCS_2_Unicode_MBCSdecoder(bool isBigEndian= true){
setBigEndian(isBigEndian);
}
void setBigEndian(bool isBigEndian= true){
bigEndian= isBigEndian;
}
virtual string name(void){
return"UCS_2_Unicode";
}
virtual int getNextDecodedChar(void);
};
/*:81*//*83:*/
#line 3702 "annoyance-filter.w"
class UTF_8_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{
public:
virtual string name(void){
return"UTF_8_Unicode";
}
virtual int getNextDecodedChar(void);
};
/*:83*//*85:*/
#line 3786 "annoyance-filter.w"
class UTF_16_Unicode_MBCSdecoder:public Unicode_MBCSdecoder{
protected:
bool bigEndian;
int getNextUTF_16Word(void){
int c1= getNextEncodedByte();
if(c1<0){
return c1;
}
int c2= getNextEncodedByte();
if(c2<0){
ostringstream os;
os<<name()<<"_MBCSdecoder: Premature end of line in UTF-16 character.";
reportDecoderDiagnostic(os);
return-1;
}
if(bigEndian){
c1= (c1<<8)|c2;
}else{
c1|= (c2<<8);
}
return c1;
}
public:
UTF_16_Unicode_MBCSdecoder(bool isBigEndian= true){
setBigEndian(isBigEndian);
}
void setBigEndian(bool isBigEndian= true){
bigEndian= isBigEndian;
}
virtual string name(void){
return"UTF_16_Unicode";
}
virtual int getNextDecodedChar(void);
};
/*:85*//*88:*/
#line 3883 "annoyance-filter.w"
class MBCSinterpreter{
protected:
const string*src;
MBCSdecoder*dp;
string prefix,suffix;
public:
virtual~MBCSinterpreter(){
}
virtual string name(void)= 0;
virtual void setDecoder(MBCSdecoder&d){
dp= &d;
}
virtual void setSource(const string&s){
assert(dp!=NULL);
dp->setSource(s);
}
virtual void setPrefixSuffix(string pre= "",string suf= ""){
prefix= pre;
suffix= suf;
}
virtual string getNextDecodedChar(void);
virtual string decodeLine(const string&s);
};
/*:88*//*91:*/
#line 3969 "annoyance-filter.w"
class GB2312_MBCSinterpreter:public MBCSinterpreter{
public:
GB2312_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}
virtual string name(void){
return"GB2312";
}
};
/*:91*//*92:*/
#line 3986 "annoyance-filter.w"
class Big5_MBCSinterpreter:public MBCSinterpreter{
public:
Big5_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}
virtual string name(void){
return"Big5";
}
};
/*:92*//*93:*/
#line 4003 "annoyance-filter.w"
class Shift_JIS_MBCSinterpreter:public MBCSinterpreter{
public:
Shift_JIS_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}
virtual string name(void){
return"Shift_JIS";
}
string getNextDecodedChar(void);
};
/*:93*//*95:*/
#line 4053 "annoyance-filter.w"
class KR_MBCSinterpreter:public MBCSinterpreter{
public:
virtual string name(void){
return"KR";
}
};
/*:95*//*96:*/
#line 4066 "annoyance-filter.w"
class Unicode_MBCSinterpreter:public MBCSinterpreter{
public:
Unicode_MBCSinterpreter(){
setPrefixSuffix(" "," ");
}
virtual string name(void){
return"Unicode";
}
string getNextDecodedChar(void);
};
/*:96*//*98:*/
#line 4136 "annoyance-filter.w"
class applicationStringParser{
protected:
bool error,eof;
mailFolder*mf;
virtual unsigned char get8(void);
virtual void get8n(unsigned char*buf,const int n){
for(int i= 0;(!eof)&&(i<n);i++){
buf[i]= get8();
}
}
public:
applicationStringParser(mailFolder*f= NULL):
error(false),eof(false),mf(NULL){
setMailFolder(f);
}
virtual~applicationStringParser(){
}
virtual string name(void)const= 0;
void setMailFolder(mailFolder*f){
mf= f;
}
virtual bool nextString(string&s)= 0;
virtual void close(void){
error= eof= false;
}
bool isError(void)const{
return error;
}
bool isEOF(void)const{
return eof;
}
bool isOK(void)const{
return(!isEOF())&&(!isError());
}
};
/*:98*//*100:*/
#line 4209 "annoyance-filter.w"
class flashStream:public applicationStringParser{
protected:
/*110:*/
#line 4553 "annoyance-filter.w"
typedef enum{
stagEnd= 0,
stagShowFrame= 1,
stagDefineShape= 2,
stagFreeCharacter= 3,
stagPlaceObject= 4,
stagRemoveObject= 5,
stagDefineBits= 6,
stagDefineButton= 7,
stagJPEGTables= 8,
stagSetBackgroundColor= 9,
stagDefineFont= 10,
stagDefineText= 11,
stagDoAction= 12,
stagDefineFontInfo= 13,
stagDefineSound= 14,
stagStartSound= 15,
stagDefineButtonSound= 17,
stagSoundStreamHead= 18,
stagSoundStreamBlock= 19,
stagDefineBitsLossless= 20,
stagDefineBitsJPEG2= 21,
stagDefineShape2= 22,
stagDefineButtonCxform= 23,
stagProtect= 24,
stagPlaceObject2= 26,
stagRemoveObject2= 28,
stagDefineShape3= 32,
stagDefineText2= 33,
stagDefineButton2= 34,
stagDefineBitsJPEG3= 35,
stagDefineBitsLossless2= 36,
stagDefineEditText= 37,
stagDefineSprite= 39,
stagNameCharacter= 40,
stagFrameLabel= 43,
stagSoundStreamHead2= 45,
stagDefineMorphShape= 46,
stagDefineFont2= 48,
}tagType;
/*:110*/
#line 4213 "annoyance-filter.w"
;
/*111:*/
#line 4607 "annoyance-filter.w"
typedef enum{
sactionNone= 0x00,
sactionNextFrame= 0x04,
sactionPrevFrame= 0x05,
sactionPlay= 0x06,
sactionStop= 0x07,
sactionToggleQuality= 0x08,
sactionStopSounds= 0x09,
sactionAdd= 0x0A,
sactionSubtract= 0x0B,
sactionMultiply= 0x0C,
sactionDivide= 0x0D,
sactionEqual= 0x0E,
sactionLessThan= 0x0F,
sactionLogicalAnd= 0x10,
sactionLogicalOr= 0x11,
sactionLogicalNot= 0x12,
sactionStringEqual= 0x13,
sactionStringLength= 0x14,
sactionSubString= 0x15,
sactionInt= 0x18,
sactionEval= 0x1C,
sactionSetVariable= 0x1D,
sactionSetTargetExpression= 0x20,
sactionStringConcat= 0x21,
sactionGetProperty= 0x22,
sactionSetProperty= 0x23,
sactionDuplicateClip= 0x24,
sactionRemoveClip= 0x25,
sactionTrace= 0x26,
sactionStartDragMovie= 0x27,
sactionStopDragMovie= 0x28,
sactionStringLessThan= 0x29,
sactionRandom= 0x30,
sactionMBLength= 0x31,
sactionOrd= 0x32,
sactionChr= 0x33,
sactionGetTimer= 0x34,
sactionMBSubString= 0x35,
sactionMBOrd= 0x36,
sactionMBChr= 0x37,
sactionHasLength= 0x80,
sactionGotoFrame= 0x81,
sactionGetURL= 0x83,
sactionWaitForFrame= 0x8A,
sactionSetTarget= 0x8B,
sactionGotoLabel= 0x8C,
sactionWaitForFrameExpression= 0x8D,
sactionPushData= 0x96,
sactionBranchAlways= 0x99,
sactionGetURL2= 0x9A,
sactionBranchIfTrue= 0x9D,
sactionCallFrame= 0x9E,
sactionGotoExpression= 0x9F
}actionCode;
/*:111*/
#line 4214 "annoyance-filter.w"
;
/*112:*/
#line 4671 "annoyance-filter.w"
typedef enum{
fontUnicode= 0x20,
fontShiftJIS= 0x10,
fontANSI= 0x08,
fontItalic= 0x04,
fontBold= 0x02,
fontWideCodes= 0x01
}fontFlags;
typedef enum{
isTextControl= 0x80,
textHasFont= 0x08,
textHasColor= 0x04,
textHasYOffset= 0x02,
textHasXOffset= 0x01
}textFlags;
typedef enum{
seditTextFlagsHasFont= 0x0001,
seditTextFlagsHasMaxLength= 0x0002,
seditTextFlagsHasTextColor= 0x0004,
seditTextFlagsReadOnly= 0x0008,
seditTextFlagsPassword= 0x0010,
seditTextFlagsMultiline= 0x0020,
seditTextFlagsWordWrap= 0x0040,
seditTextFlagsHasText= 0x0080,
seditTextFlagsUseOutlines= 0x0100,
seditTextFlagsBorder= 0x0800,
seditTextFlagsNoSelect= 0x1000,
seditTextFlagsHasLayout= 0x2000
}editTextFlags;
/*:112*/
#line 4215 "annoyance-filter.w"
;
/*113:*/
#line 4712 "annoyance-filter.w"
typedef struct{
int xMin,xMax,yMin,yMax;
}rect;
typedef struct{
int a;
int b;
int c;
int d;
int tx;
int ty;
}matrix;
/*:113*/
#line 4216 "annoyance-filter.w"
;
unsigned char sig[3];
unsigned char version;
unsigned int fileLength;
rect frameSize;
unsigned short frameRate;
unsigned short frameCount;
tagType tType;
unsigned int tDataLen;
unsigned int bitBuf,bitPos;
public:
flashStream(mailFolder*f= NULL):
applicationStringParser(f){
}
void readHeader(void);
void describe(ostream&os= cout);
bool nextTag(void);
tagType getTagType(void)const{
return tType;
}
unsigned int getTagDataLength(void)const{
return tDataLen;
}
void ignoreTag(unsigned int lookedAhead= 0);
virtual void close(void){
applicationStringParser::close();
}
protected:
/*108:*/
#line 4463 "annoyance-filter.w"
unsigned short get16(void){
unsigned short u16;
u16= get8();
u16|= get8()<<8;
return u16;
}
unsigned int get32(void){
unsigned int u32;
u32= get8();
u32|= get8()<<8;
u32|= get8()<<16;
u32|= get8()<<24;
return u32;
}
void get16n(unsigned short*buf,const int n){
for(int i= 0;(!eof)&&(i<n);i++){
buf[i]= get16();
}
}
void get32n(unsigned int*buf,const int n){
for(int i= 0;(!eof)&&(i<n);i++){
buf[i]= get32();
}
}
/*:108*/
#line 4264 "annoyance-filter.w"
;
void skip8n(const int n){
for(int i= 0;(!eof)&&(i<n);i++){
get8();
}
}
void getString(string&s,int n= -1);
void initBits(void);
unsigned int getBits(int n);
int getSignedBits(const int n);
void getRect(rect*r);
void getMatrix(matrix*mat);
};
/*:100*//*114:*/
#line 4734 "annoyance-filter.w"
class flashTextExtractor:public flashStream{
protected:
map<unsigned short,vector<unsigned short> *> fontMap;
map<unsigned short,unsigned short> fontGlyphCount;
map<unsigned short,fontFlags> fontInfoBits;
queue<string> strings;
bool initialised;
bool textOnly;
public:
flashTextExtractor(mailFolder*f= NULL):
flashStream(f),initialised(false),textOnly(false){
}
~flashTextExtractor(){
close();
}
virtual string name(void)const{
return"Flash";
}
void setTextOnly(const bool tf){
textOnly= tf;
}
bool getTextOnly(void)const{
return textOnly;
}
bool nextString(string&s);
virtual void close(void){
while(!fontMap.empty()){
delete fontMap.begin()->second;
fontMap.erase(fontMap.begin());
}
fontGlyphCount.clear();
fontInfoBits.clear();
while(!strings.empty()){
strings.pop();
}
initialised= textOnly= false;
flashStream::close();
}
};
/*:114*//*125:*/
#line 5296 "annoyance-filter.w"
#ifdef HAVE_PDF_DECODER
class pdfTextExtractor:public applicationStringParser{
protected:
bool initialised;
#ifdef HAVE_FDSTREAM_COMPATIBILITY
fdistream is;
#else
ifstream is;
#endif
FILE*ip;
#ifdef HAVE_MKSTEMP
char tempfn[256];
#else
char tempfn[L_tmpnam+2];
#endif
public:
pdfTextExtractor(mailFolder*f= NULL):
applicationStringParser(f),
initialised(false),
ip(NULL){
}
~pdfTextExtractor(){
close();
}
virtual string name(void)const{
return"PDF";
}
bool nextString(string&s);
virtual void close(void){
if(ip!=NULL){
#ifndef HAVE_FDSTREAM_COMPATIBILITY
is.close();
#endif
pclose(ip);
remove(tempfn);
ip= NULL;
}
applicationStringParser::close();
initialised= false;
}
};
#endif
/*:125*//*129:*/
#line 5436 "annoyance-filter.w"
/*132:*/
#line 5824 "annoyance-filter.w"
#ifdef HAVE_POPEN
#if (defined HAVE_GUNZIP) || (defined HAVE_GZCAT) || (defined HAVE_GZIP)
#define COMPRESSED_FILES
static const char Compressed_file_type[]= ".gz";
static const char Uncompress_command[]=
#if(defined HAVE_GUNZIP)
"gunzip -c"
#elif(defined HAVE_GZCAT)
"gzcat"
#elif(defined HAVE_GZIP)
"gzip -cd"
#endif
;
#elif (defined HAVE_ZCAT) || (defined HAVE_UNCOMPRESS) || (defined HAVE_COMPRESS)
#define COMPRESSED_FILES
static const char Compressed_file_type[]= ".Z";
static const char Uncompress_command[]=
#if(defined HAVE_ZCAT)
"zcat"
#elif(defined HAVE_UNCOMPRESS)
"uncompress -c"
#elif(defined HAVE_COMPRESS)
"compress -cd"
#endif
;
#endif
#endif
/*:132*/
#line 5437 "annoyance-filter.w"
class mailFolder{
public:
istream*is;
dictionaryWord::mailCategory category;
unsigned int nLines;
unsigned int nMessages;
bool newMessage;
bool expectingNewMessage;
bool lastLineBlank;
bool BSDfolder;
bool inHeader;
string lookAheadLine;
bool lookedAhead;
ifstream isc;
#if defined(COMPRESSED_FILES) && defined(HAVE_FDSTREAM_COMPATIBILITY)
fdistream iscc;
#endif
string fromLine;
string messageID;
string lastFromLine;
string lastMessageID;
#if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL)
FILE*ip;
#endif
#ifdef HAVE_DIRECTORY_TRAVERSAL
bool dirFolder;
DIR*dh;
string dirName,cfName;
string pathSeparator;
#ifdef HAVE_FDSTREAM_COMPATIBILITY
fdistream ifcdir;
#endif
ifstream ifdir;
istringstream nullstream;
#endif
string bodyContentType;
string bodyContentTypeCharset;
string bodyContentTypeName;
string bodyContentTransferEncoding;
string partBoundary;
bool multiPart;
bool inPartHeader;
unsigned int partHeaderLines;
stack<string> partBoundaryStack;
string mimeContentType;
string mimeContentTypeCharset;
string mimeContentTypeName;
string mimeContentTypeBoundary;
string mimeContentTransferEncoding;
string mimeContentDispositionFilename;
MIMEdecoder*mdp;
identityMIMEdecoder imd;
base64MIMEdecoder bmd;
sinkMIMEdecoder smd;
quotedPrintableMIMEdecoder qmd;
MBCSinterpreter*mbi;
EUC_MBCSdecoder mbd_euc;
GB2312_MBCSinterpreter mbi_gb2312;
Big5_MBCSdecoder mbd_big5;
Big5_MBCSinterpreter mbi_big5;
KR_MBCSinterpreter mbi_kr;
UTF_8_Unicode_MBCSdecoder mbd_utf_8;
Unicode_MBCSinterpreter mbi_unicode;
applicationStringParser*asp;
flashTextExtractor aspFlash;
#ifdef HAVE_PDF_DECODER
pdfTextExtractor aspPdf;
#endif
bool byteStream;
list<string> *tlist;
list<string> *dlist;
mailFolder(istream&i,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){
#if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL)
ip= NULL;
#endif
#ifdef HAVE_DIRECTORY_TRAVERSAL
dirFolder= false;
#endif
set(&i,cat);
}
mailFolder(string fname,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){
#if defined(COMPRESSED_FILES) || defined(HAVE_DIRECTORY_TRAVERSAL)
ip= NULL;
#endif
/*135:*/
#line 5913 "annoyance-filter.w"
#ifdef HAVE_DIRECTORY_TRAVERSAL
dirFolder= false;
struct stat fs;
if((stat(fname.c_str(),&fs)==0)&&S_ISDIR(fs.st_mode)){
dh= opendir(fname.c_str());
if(dh!=NULL){
dirFolder= true;
dirName= fname;
pathSeparator= '/';
if(!findNextFileInDirectory(fname)){
nullstream.str("");
is= &nullstream;
}else{
if(verbose){
cerr<<"Processing files from directory \""<<
dirName<<"\"."<<endl;
}
}
}else{
cerr<<"Cannot open mail folder directory \""<<fname<<"\""<<endl;
exit(1);
}
}
#endif
/*:135*/
#line 5547 "annoyance-filter.w"
;
#ifdef HAVE_DIRECTORY_TRAVERSAL
if(!dirFolder){
#endif
#ifdef COMPRESSED_FILES
/*133:*/
#line 5866 "annoyance-filter.w"
#ifdef HAVE_READLINK
int maxSlinks= 50;
string jname= fname;
char slbuf[1024];
while(maxSlinks--> 0){
int sll= readlink(jname.c_str(),slbuf,(sizeof slbuf)-1);
if(sll>=0){
assert(sll<static_cast<int> (sizeof slbuf));
slbuf[sll]= 0;
jname= slbuf;
}else{
break;
}
}
if(maxSlinks<=0){
cerr<<"Warning: probable symbolic link loop for \""<<
fname<<"\""<<endl;
}
#endif
/*:133*/
#line 5553 "annoyance-filter.w"
;
if(jname.rfind(Compressed_file_type)==
(jname.length()-string(Compressed_file_type).length())){
/*134:*/
#line 5895 "annoyance-filter.w"
string cmd(Uncompress_command);
cmd+= ' '+fname;
ip= popen(cmd.c_str(),"r");
#ifdef HAVE_FDSTREAM_COMPATIBILITY
iscc.attach(fileno(ip));
is= &iscc;
#else
isc.attach(fileno(ip));
is= &isc;
#endif
/*:134*/
#line 5557 "annoyance-filter.w"
;
}else{
#endif
if(fname=="-"){
is= &cin;
}else{
isc.open(fname.c_str());
is= &isc;
}
#ifdef COMPRESSED_FILES
}
#endif
#ifdef HAVE_DIRECTORY_TRAVERSAL
}
#endif
if(!(*is)){
cerr<<"Cannot open mail folder file "<<fname<<endl;
exit(1);
}
set(is,cat);
}
~mailFolder(){
#ifdef COMPRESSED_FILES
if(ip!=NULL){
pclose(ip);
}
#endif
}
void set(istream*i,dictionaryWord::mailCategory cat= dictionaryWord::Unknown){
is= i;
nLines= nMessages= 0;
lookedAhead= false;
lookAheadLine= "";
category= cat;
dlist= NULL;
tlist= NULL;
/*142:*/
#line 6193 "annoyance-filter.w"
mimeContentType= mimeContentTypeCharset=
mimeContentTypeName= mimeContentDispositionFilename=
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;
/*:142*/
#line 5595 "annoyance-filter.w"
;
bodyContentType= bodyContentTypeCharset=
bodyContentTypeName= bodyContentTransferEncoding= "";
expectingNewMessage= true;
setNewMessageEligiblity();
BSDfolder= false;
}
void setCategory(dictionaryWord::mailCategory c){
category= c;
}
dictionaryWord::mailCategory getCategory(void)const{
return category;
}
void setBSDmode(bool mode){
BSDfolder= mode;
}
bool getBSDmode(void)const{
return BSDfolder;
}
void setNewMessageEligiblity(bool stat= true){
lastLineBlank= stat;
}
void forceInHeader(bool state= true){
inHeader= state;
}
bool nextLine(string&s);
int nextByte(void);
#ifdef HAVE_DIRECTORY_TRAVERSAL
bool findNextFileInDirectory(string&fname);
bool openNextFileInDirectory(void);
#endif
static void stringCanonicalise(string&s);
static bool compareHeaderField(string&s,const string target,string&arg);
static bool parseHeaderArgument(string&s,const string target,string&arg);
static bool isSpoofedExecutableFileExtension(const string&s);
bool isNewMessage(void)const{
return newMessage;
}
unsigned int getMessageCount(void)const{
return nMessages;
}
unsigned int getLineCount(void)const{
return nLines;
}
bool isByteStream(void)const{
return byteStream;
}
void describe(ostream&os= cout)const{
os<<"Mail folder. Category: "<<dictionaryWord::categoryName(category)<<endl;
os<<" Lines: "<<getLineCount()<<" Messages: "<<getMessageCount()<<endl;
}
void setDiagnosticList(list<string> *lp){
dlist= lp;
}
void setTranscriptList(list<string> *lp){
tlist= lp;
}
unsigned int sizeMessageTranscript(const unsigned int lineOverhead= 1)const;
void writeMessageTranscript(ostream&os= cout)const;
void writeMessageTranscript(const string fname= "-")const;
void clearMessageTranscript(void){
assert(tlist!=NULL);
tlist->clear();
}
void reportParserDiagnostic(const string s);
void reportParserDiagnostic(const ostringstream&os);
};
/*:129*//*170:*/
#line 7105 "annoyance-filter.w"
class tokenDefinition{
protected:
static const int numTokenChars= 256;
bool isToken[numTokenChars],
notExclusively[numTokenChars],
notAtEnd[numTokenChars];
unsigned int minTokenLength,maxTokenLength;
public:
tokenDefinition(){
clear();
}
void clear(void){
for(int i= 0;i<numTokenChars;i++){
isToken[i]= notExclusively[i]= notAtEnd[i]= false;
}
setLengthLimits(1,65535);
}
void setLengthLimits(unsigned int lmin= 0,unsigned int lmax= 0){
if(lmin> 0){
minTokenLength= lmin;
}
if(lmax> 0){
maxTokenLength= lmax;
}
}
unsigned int getLengthMin(void)const{
return minTokenLength;
}
unsigned int getLengthMax(void)const{
return maxTokenLength;
}
bool isTokenMember(const int c)const{
assert(c>=0&&c<numTokenChars);
return isToken[c];
}
bool isTokenNotExclusively(const int c)const{
assert(c>=0&&c<numTokenChars);
return notExclusively[c];
}
bool isTokenNotAtEnd(const int c)const{
assert(c>=0&&c<numTokenChars);
return notAtEnd[c];
}
bool isTokenLengthAcceptable(string::size_type l)const{
return(l>=minTokenLength)&&(l<=maxTokenLength);
}
bool isTokenLengthAcceptable(const string t)const{
return isTokenLengthAcceptable(t.length());
}
void setTokenMember(bool v,const int cstart,const int cend= -1){
assert(cstart>=0&&cstart<=numTokenChars);
assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars));
for(int i= cstart;i<=cend;i++){
isToken[i]= v;
}
}
void setTokenNotExclusively(bool v,const int cstart,const int cend= -1){
assert(cstart>=0&&cstart<=numTokenChars);
assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars));
for(int i= cstart;i<=cend;i++){
notExclusively[i]= v;
}
}
void setTokenNotAtEnd(bool v,const int cstart,const int cend= -1){
assert(cstart>=0&&cstart<=numTokenChars);
assert((cend==-1)||(cend>=cstart&&cend<=numTokenChars));
for(int i= cstart;i<=cend;i++){
notAtEnd[i]= v;
}
}
void setISO_8859defaults(unsigned int lmin= 0,unsigned int lmax= 0);
void setUS_ASCIIdefaults(unsigned int lmin= 0,unsigned int lmax= 0);
};
/*:170*//*173:*/
#line 7245 "annoyance-filter.w"
class tokenParser{
protected:
mailFolder*source;
string cl;
string::size_type clp;
bool atEnd,inHTML,inHTMLcomment;
tokenDefinition*td;
tokenDefinition*btd;
bool saveMessage;
bool assemblePhrases;
deque<string> phraseQueue;
deque<string> pendingPhrases;
public:
list<string> messageQueue;
tokenParser(){
td= NULL;
}
void setSource(mailFolder&mf){
source= &mf;
cl= "";
clp= 0;
atEnd= inHTML= inHTMLcomment= false;
saveMessage= false;
messageQueue.clear();
phraseQueue.clear();
pendingPhrases.clear();
/*179:*/
#line 7653 "annoyance-filter.w"
assemblePhrases= false;
if((phraseMin!=1)||(phraseMax!=1)){
if((phraseMin>=1)&&(phraseMax>=phraseMin)){
if((phraseLimit> 0)&&(phraseLimit<((phraseMax*2)-1))){
cerr<<"Invalid --phraselimit setting. Too small for specified --phrasemax."<<endl;
}else{
assemblePhrases= true;
}
}else{
cerr<<"Invalid --phrasemin/max parameters. Must be 1 <= min <= max."<<endl;
}
}
/*:179*/
#line 7277 "annoyance-filter.w"
;
}
void setTokenDefinition(tokenDefinition&t,tokenDefinition&bt){
td= &t;
btd= &bt;
}
void setTokenLengthLimits(unsigned int lMax,unsigned int lMin= 1,
unsigned int blMax= 1,unsigned int blMin= 1){
assert(td!=NULL);
td->setLengthLimits(lMin,lMax);
assert(btd!=NULL);
btd->setLengthLimits(blMin,blMax);
}
unsigned int getTokenLengthMin(void)const{
return td->getLengthMin();
}
unsigned int getTokenLengthMax(void)const{
return td->getLengthMax();
}
void reportParserDiagnostic(const string s)const{
assert(source!=NULL);
source->reportParserDiagnostic(s);
}
void reset(void){
if(inHTML){
reportParserDiagnostic("<HTML> tag unterminated at end of message.");
}
if(inHTMLcomment){
reportParserDiagnostic("HTML comment unterminated at end of message.");
}
inHTML= inHTMLcomment= false;
clearMessageQueue();
phraseQueue.clear();
pendingPhrases.clear();
}
bool nextToken(dictionaryWord&d);
void assembleAllPhrases(dictionaryWord&d);
/*182:*/
#line 7730 "annoyance-filter.w"
void setSaveMessage(bool v){
saveMessage= v;
source->setDiagnosticList(saveMessage?(&messageQueue):NULL);
}
bool getSaveMessage(void)const{
return saveMessage;
}
void clearMessageQueue(void){
if(saveMessage){
string s;
if(isNewMessage()){
s= messageQueue.back();
}
messageQueue.clear();
if(isNewMessage()){
messageQueue.push_back(s);
}
}
}
void writeMessageQueue(ostream&os){
list<string> ::size_type l= messageQueue.size(),n= 0;
for(list<string> ::iterator p= messageQueue.begin();
p!=messageQueue.end();p++,n++){
if(!((n==(l-1))&&
(p->substr(0,(sizeof messageSentinel)-1)==messageSentinel))){
os<<*p<<endl;
}
}
}
/*:182*/
#line 7323 "annoyance-filter.w"
;
bool isNewMessage(void)const{
return atEnd||(source->isNewMessage());
}
private:
void nextLine(void){
while(true){
if(!(source->nextLine(cl))){
atEnd= true;
cl= "";
break;
}
if(saveMessage){
messageQueue.push_back(cl);
}
if(source->isNewMessage()){
reset();
}
break;
}
clp= 0;
}
};
/*:173*//*183:*/
#line 7773 "annoyance-filter.w"
class classifyMessage{
public:
mailFolder*mf;
tokenParser tp;
unsigned int nExtremal;
dictionary*d;
fastDictionary*fd;
double unknownWordProbability;
classifyMessage(mailFolder&m,
dictionary&dt,
fastDictionary*fdt= NULL,
unsigned int nExt= 15,double uwp= 0.2);
double classifyThis(bool createTranscript= false);
protected:
void addSignificantWordDiagnostics(list<string> &l,
list<string> ::iterator where,
multimap<double,string> &rtokens,string endLine= "");
};
/*:183*//*186:*/
#line 7862 "annoyance-filter.w"
#ifdef OLDWAY
double abs(double x){
return(x<0)?(-(x)):x;
}
#endif
/*:186*//*194:*/
#line 8119 "annoyance-filter.w"
#ifdef POP3_PROXY_SERVER
/*216:*/
#line 8724 "annoyance-filter.w"
static RETSIGTYPE absentPlumber(int)
{
if(popProxyTrace){
cerr<<"POP3: Caught SIGPIPE--continuing."<<endl;
}
signal(SIGPIPE,absentPlumber);
}
/*:216*/
#line 8122 "annoyance-filter.w"
typedef void(*POP3ProxyFilterFunction)(const string command,const string argument,char*replyBuffer,int*replyLength,string&reply);
class POP3Proxy{
protected:
unsigned short popProxyPort;
string serverName;
unsigned short serverPort;
bool opened;
private:
set<string> multiLine,cMultiLine;
int listenSocket;
POP3ProxyFilterFunction filterFunction;
public:
POP3Proxy(unsigned short proxyPort= 9110,
string serverN= "",
unsigned short serverP= 110,
POP3ProxyFilterFunction filterF= NULL
):
popProxyPort(proxyPort),
serverName(serverN),
serverPort(serverP),
opened(false),
listenSocket(-1),
filterFunction(filterF){
/*195:*/
#line 8198 "annoyance-filter.w"
multiLine.insert("capa");
multiLine.insert("retr");
multiLine.insert("top");
cMultiLine.insert("list");
cMultiLine.insert("uidl");
/*:195*/
#line 8151 "annoyance-filter.w"
;
}
~POP3Proxy(){
if(listenSocket!=-1){
close(listenSocket);
signal(SIGPIPE,SIG_DFL);
}
}
void setPopProxyPort(unsigned short p){
/*196:*/
#line 8211 "annoyance-filter.w"
#ifndef NDEBUG
if(opened){
cerr<<"Attempt to modify POP3 connection settings after connection opened."<<endl;
abort();
}
#endif
/*:196*/
#line 8162 "annoyance-filter.w"
;
popProxyPort= p;
}
void setServerName(string&s){
/*196:*/
#line 8211 "annoyance-filter.w"
#ifndef NDEBUG
if(opened){
cerr<<"Attempt to modify POP3 connection settings after connection opened."<<endl;
abort();
}
#endif
/*:196*/
#line 8167 "annoyance-filter.w"
;
serverName= s;
}
void setServerPort(unsigned short p){
/*196:*/
#line 8211 "annoyance-filter.w"
#ifndef NDEBUG
if(opened){
cerr<<"Attempt to modify POP3 connection settings after connection opened."<<endl;
abort();
}
#endif
/*:196*/
#line 8172 "annoyance-filter.w"
;
serverPort= p;
}
void setFilterFunction(POP3ProxyFilterFunction ff){
filterFunction= ff;
}
bool acceptConnections(int maxBacklog= 25);
bool serviceConnection(void);
bool operateProxyServer(int maxBacklog= 25);
};
#endif
/*:194*/
#line 9963 "annoyance-filter.w"
/*239:*/
#line 9217 "annoyance-filter.w"
static double mailBias= 2.0;
static unsigned int minOccurrences= 5;
static double junkThreshold= 0.9;
static double mailThreshold= 0.9;
static int significantWords= 15;
static double novelWordProbability= 0.2;
static bitset<1<<(sizeof(char)*8)> annotations;
#ifdef POP3_PROXY_SERVER
static int popProxyPort= 9110;
static string popProxyServer= "";
static int popProxyServerPort= 110;
#endif
static bool bsdFolder= false;
/*:239*//*240:*/
#line 9236 "annoyance-filter.w"
static unsigned int nTested= 0;
/*:240*/
#line 9964 "annoyance-filter.w"
/*11:*/
#line 1359 "annoyance-filter.w"
bool operator<(dictionaryWord a,dictionaryWord b){
return a.get()<b.get();
}
/*:11*//*12:*/
#line 1394 "annoyance-filter.w"
void dictionaryWord::computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias,unsigned int minOccurrences)
{
double nMail= occurrences[Mail]*mailBias,
nJunk= occurrences[Junk];
nMailMessages= max(nMailMessages,1u);
nJunkMessages= max(nJunkMessages,1u);
if((nMail+nJunk)>=minOccurrences){
assert(nMailMessages> 0);
assert(nJunkMessages> 0);
junkProbability= min(0.99,max(0.01,min(nJunk/nJunkMessages,1.0)/
(min(nMail/nMailMessages,1.0)+min(nJunk/nJunkMessages,1.0))));
}else{
junkProbability= -1;
}
}
/*:12*//*13:*/
#line 1419 "annoyance-filter.w"
void dictionaryWord::describe(ostream&os){
os<<text<<
" Mail: "<<n_mail()<<", Junk: "<<n_junk()<<
", Probability: "<<setprecision(5)<<junkProbability<<endl;
}
/*:13*//*14:*/
#line 1432 "annoyance-filter.w"
void dictionaryWord::exportCSV(ostream&os){
os<<setprecision(5)<<junkProbability<<","<<
occurrences[Mail]<<","<<occurrences[Junk]<<",\""<<
text<<"\""<<endl;
}
/*:14*//*15:*/
#line 1457 "annoyance-filter.w"
bool dictionaryWord::importCSV(istream&is){
while(true){
string s;
if(getline(is,s)){
string::size_type p,p1,p2;
for(p= 0;p<s.length();p++){
if(!isISOspace(s[p])){
break;
}
}
if((p>=s.length())||(s[p]=='#')||(s[p]==';')){
continue;
}
if((s[p]=='-')||isdigit(s[p])){
p= s.find(',');
if(p!=string::npos){
p1= s.find(',',p+1);
if(p1!=string::npos){
p2= s.find(',',p1+1);
if(p2!=string::npos){
junkProbability= atof(s.substr(0,p).c_str());
occurrences[Mail]= atoi(s.substr(p+1,p1-p).c_str());
occurrences[Junk]= atoi(s.substr(p1+1,p2-p).c_str());
p= s.find('"',p2+1);
if(p!=string::npos){
p1= s.find_last_of('"');
if((p1!=string::npos)&&(p1> p)){
text= s.substr(p+1,(p1-p)-1);
return true;
}
}
}
}
}
}
junkProbability= -2;
text= s;
return true;
}
junkProbability= -3;
return false;
}
}
/*:15*//*16:*/
#line 1516 "annoyance-filter.w"
void dictionaryWord::exportToBinaryFile(ostream&os){
unsigned char c;
const unsigned char*fp;
const double k1= -1.0;
#define outCount(x) c = (x); os.put(c)
#define outNumber(x) os.put((x >> 24) & 0xFF); os.put((x >> 16) & 0xFF); \
os.put((x >> 8) & 0xFF); os.put(x & 0xFF)
outCount(text.length());
os.write(text.data(),text.length());
outNumber(n_mail());
outNumber(n_junk());
fp= reinterpret_cast<const unsigned char*> (&k1);
if(fp[0]==0){
fp= reinterpret_cast<unsigned char*> (&junkProbability);
for(unsigned int i= 0;i<(sizeof junkProbability);i++){
outCount(fp[((sizeof junkProbability)-1)-i]);
}
}else{
os.write(reinterpret_cast<const char*> (&junkProbability),
sizeof junkProbability);
}
#undef outCount
#undef outNumber
}
/*:16*//*17:*/
#line 1552 "annoyance-filter.w"
bool dictionaryWord::importFromBinaryFile(istream&is){
unsigned char c;
char sval[256];
unsigned char ibyte[4];
unsigned char fb[8];
unsigned char*fp;
const double k1= -1.0;
const unsigned char*kp;
#define iNumber ((ibyte[0] << 24) | (ibyte[1] << 16) | (ibyte[2] << 8) | ibyte[3])
if(is.read(reinterpret_cast<char*> (&c),1)){
if(is.read(sval,c)){
text= string(sval,c);
is.read(reinterpret_cast<char*> (ibyte),4);
occurrences[Mail]= iNumber;
is.read(reinterpret_cast<char*> (ibyte),4);
occurrences[Junk]= iNumber;
kp= reinterpret_cast<const unsigned char*> (&k1);
if(kp[0]==0){
is.read(reinterpret_cast<char*> (fb),8);
fp= reinterpret_cast<unsigned char*> (&junkProbability);
for(unsigned int i= 0;i<(sizeof junkProbability);i++){
fp[((sizeof junkProbability)-1)-i]= fb[i];
}
}else{
is.read(reinterpret_cast<char*> (&junkProbability),
sizeof junkProbability);
}
return true;
}
}
return false;
#undef iNumber
}
/*:17*//*20:*/
#line 1657 "annoyance-filter.w"
void dictionary::add(dictionaryWord w,dictionaryWord::mailCategory category){
dictionary::iterator p;
if((p= find(w.get()))!=end()){
p->second.add(category);
}else{
insert(make_pair(w.get(),w)).first->second.add(category);
memoryRequired+= w.estimateMemoryRequirement();
}
}
/*:20*//*21:*/
#line 1678 "annoyance-filter.w"
void dictionary::include(dictionaryWord&w){
dictionary::iterator p;
if((p= find(w.get()))!=end()){
p->second.occurrences[dictionaryWord::Mail]+= w.occurrences[dictionaryWord::Mail];
p->second.occurrences[dictionaryWord::Junk]+= w.occurrences[dictionaryWord::Junk];
}else{
insert(make_pair(w.get(),w));
}
}
/*:21*//*22:*/
#line 1699 "annoyance-filter.w"
bool byProbability(const dictionaryWord*w1,
const dictionaryWord*w2){
double dp= w1->getJunkProbability()-w2->getJunkProbability();
if(dp==0){
return w1->get()<w2->get();
}
return dp<0;
}
void dictionary::exportCSV(ostream&os){
if(verbose){
cerr<<"Exporting dictionary to CSV file."<<endl;
}
vector<dictionaryWord*> dv;
for(iterator p= begin();p!=end();p++){
dv.push_back(&(p->second));
}
sort(dv.begin(),dv.end(),byProbability);
os<<"; Probability,Mail,Junk,Word"<<endl;
dictionaryWord pdw;
pdw.set(pseudoCountsWord,
messageCount[dictionaryWord::Mail],
messageCount[dictionaryWord::Junk],-1);
pdw.exportCSV(os);
for(vector<dictionaryWord*> ::iterator q= dv.begin();q!=dv.end();q++){
(*q)->exportCSV(os);
}
}
/*:22*//*23:*/
#line 1740 "annoyance-filter.w"
void dictionary::importCSV(istream&is){
if(verbose){
cerr<<"Importing dictionary from CSV file."<<endl;
}
dictionaryWord dw;
while(dw.importCSV(is)){
if(dw.getJunkProbability()==-1&&(dw.get()==pseudoCountsWord)){
messageCount[dictionaryWord::Mail]+= dw.n_mail();
messageCount[dictionaryWord::Junk]+= dw.n_junk();
}else if(dw.getJunkProbability()>=-1){
include(dw);
}else{
if(verbose){
cerr<<"Ill-formed record in CSV import: \""<<dw.get()<<"\""<<endl;
}
}
}
}
/*:23*//*24:*/
#line 1805 "annoyance-filter.w"
#ifdef PURGE_USES_REMOVE_IF
class dictionaryWordProb_less:public unary_function<pair<string,dictionaryWord> ,int> {
int p;
public:
explicit dictionaryWordProb_less(const int pt):p(pt){}
bool operator()(const pair<string,dictionaryWord> &dw)const{
return dw.second.getJunkProbability()<p;
}
};
#endif
void dictionary::purge(unsigned int occurrences){
if(verbose){
cerr<<"Pruning rare words from database: "<<flush;
}
memoryRequired= 0;
#ifdef PURGE_USES_REMOVE_IF
remove_if(begin(),end(),dictionaryWordProb_less(0));
#else
queue<dictionaryWord> pq;
while(!empty()){
if(((occurrences> 0)&&(begin()->second.n_occurrences()> occurrences))||
(begin()->second.getJunkProbability()>=0)){
pq.push(begin()->second);
}
erase(begin());
}
while(!pq.empty()){
insert(make_pair(pq.front().get(),pq.front()));
memoryRequired+= pq.front().estimateMemoryRequirement();
pq.pop();
}
#endif
if(verbose){
cerr<<size()<<" words remaining."<<endl;
cerr<<" Dictionary size "<<estimateMemoryRequirement()<<" bytes."<<endl;
}
}
/*:24*//*25:*/
#line 1851 "annoyance-filter.w"
void dictionary::resetCat(dictionaryWord::mailCategory category){
if(verbose){
cerr<<"Resetting counts for category "<<
dictionaryWord::categoryName(category)<<endl;
}
for(iterator mp= begin();mp!=end();mp++){
mp->second.resetCat(category);
}
}
/*:25*//*26:*/
#line 1868 "annoyance-filter.w"
void dictionary::printStatistics(ostream&os)const{
if(verbose){
cerr<<"Computing dictionary statistics."<<endl;
}
os<<"Dictionary statistics:"<<endl;
dataTable<double> dt;
for(const_iterator mp= begin();mp!=end();mp++){
if(mp->second.getJunkProbability()>=0){
dt.push_back(mp->second.getJunkProbability());
}
}
os<<"Mean = "<<dt.mean()<<endl;
os<<"Geometric mean = "<<dt.geometricMean()<<endl;
os<<"Harmonic mean = "<<dt.harmonicMean()<<endl;
os<<"RMS = "<<dt.RMS()<<endl;
os<<"Median = "<<dt.median()<<endl;
os<<"Mode = "<<dt.mode()<<endl;
os<<"Percentile(0.5) = "<<dt.percentile(0.5)<<endl;
os<<"Quartile(1) = "<<dt.quartile(1)<<endl;
os<<"Quartile(3) = "<<dt.quartile(3)<<endl;
os<<"Variance = "<<dt.variance()<<endl;
os<<"Standard deviation = "<<dt.stdev()<<endl;
os<<"CentralMoment(3) = "<<dt.centralMoment(3)<<endl;
os<<"Skewness = "<<dt.skewness()<<endl;
os<<"Kurtosis = "<<dt.kurtosis()<<endl;
}
/*:26*//*27:*/
#line 1903 "annoyance-filter.w"
#ifdef HAVE_PLOT_UTILITIES
#define PLOT_DEBUG
void dictionary::plotProbabilityHistogram(string fileName,unsigned int nBins)const{
if(verbose){
cerr<<"Plotting probability histogram to "<<fileName<<".png"<<endl;
}
ofstream gp((fileName+".gp").c_str()),
dat((fileName+".dat").c_str());
/*28:*/
#line 1947 "annoyance-filter.w"
vector<unsigned int> hist(nBins);
for(const_iterator mp= begin();mp!=end();mp++){
if(mp->second.getJunkProbability()>=0){
unsigned int bin= static_cast<unsigned int> (mp->second.getJunkProbability()*nBins);
hist[bin]++;
}
}
/*:28*/
#line 1913 "annoyance-filter.w"
;
/*29:*/
#line 1963 "annoyance-filter.w"
for(unsigned int j= 0;j<nBins;j++){
dat<<(static_cast<double> (j)/nBins)<<" "<<hist[j]<<endl;
}
/*:29*/
#line 1914 "annoyance-filter.w"
;
gp<<"set term pbm small color"<<endl;
gp<<"set ylabel \"Number of Words\""<<endl;
gp<<"set xlabel \"Probability\""<<endl;
gp<<"plot \""<<fileName<<".dat\" using 1:2 title \"\" with boxes"<<endl;
string command("gnuplot ");
command+= fileName+".gp | pnmtopng >"+fileName+".png";
#ifdef PLOT_DEBUG
cout<<command<<endl;
#else
command+= " 2>/dev/null";
#endif
gp.close();
dat.close();
system(command.c_str());
#ifndef PLOT_DEBUG
remove((fileName+".gp").c_str());
remove((fileName+".dat").c_str());
#endif
}
#endif
/*:27*//*30:*/
#line 1974 "annoyance-filter.w"
void dictionary::computeJunkProbability(unsigned int nMailMessages,unsigned int nJunkMessages,
double mailBias,unsigned int minOccurrences)
{
for(dictionary::iterator p= begin();p!=end();p++){
p->second.computeJunkProbability(nMailMessages,nJunkMessages,
mailBias,minOccurrences);
}
}
/*:30*//*31:*/
#line 2004 "annoyance-filter.w"
void dictionary::exportToBinaryFile(ostream&os){
if(verbose){
cerr<<"Exporting dictionary to binary file."<<endl;
}
dictionaryWord pdw;
pdw.set(pseudoCountsWord,
messageCount[dictionaryWord::Mail],
messageCount[dictionaryWord::Junk],-1);
pdw.exportToBinaryFile(os);
for(dictionary::iterator p= begin();p!=end();p++){
p->second.exportToBinaryFile(os);
}
}
void dictionary::importFromBinaryFile(istream&is){
if(verbose){
cerr<<"Importing dictionary from binary file."<<endl;
}
dictionaryWord dw;
if(dw.importFromBinaryFile(is)){
assert(dw.get()==pseudoCountsWord);
messageCount[dictionaryWord::Mail]+= dw.n_mail();
messageCount[dictionaryWord::Junk]+= dw.n_junk();
while(dw.importFromBinaryFile(is)){
include(dw);
}
}
}
/*:31*//*33:*/
#line 2147 "annoyance-filter.w"
bool fastDictionary::load(const string fname){
#ifdef HAVE_MMAP
fileHandle= open(fname.c_str(),O_RDONLY);
if(fileHandle==-1){
cerr<<"Cannot open fast dictionary file "<<fname<<endl;
return false;
}
fileLength= lseek(fileHandle,0,2);
lseek(fileHandle,0,0);
dp= static_cast<char*> (mmap((caddr_t)0,fileLength,
PROT_READ,MAP_SHARED|MAP_NORESERVE,
fileHandle,0));
istrstream is(dp,fileLength);
#else
ifstream is(fname.c_str(),ios::in|ios::binary);
if(!is){
cerr<<"Cannot open fast dictionary file "<<fname<<"."<<endl;
return false;
}
#endif
char signature[4];
is.read(signature,4);
if(memcmp(signature,fastDictionarySignature,4)!=0){
cerr<<"File "<<fname<<" is not a fast dictionary."<<endl;
fdlbail:;
#ifdef HAVE_MMAP
munmap(dp,fileLength);
close(fileHandle);
dp= NULL;
#endif
return false;
}
u_int16_t s;
is.read(reinterpret_cast<char*> (&s),sizeof s);
if(s!=byteOrderMark){
cerr<<"Fast dictionary file "<<fname<<
" was created on a platform with incompatible byte order."<<endl;
regen();
goto fdlbail;
}
is.read(reinterpret_cast<char*> (&s),sizeof s);
if(s!=versionNumber){
cerr<<"Fast dictionary file "<<fname<<
" is version "<<s<<". Version "<<versionNumber<<" is required."<<endl;
regen();
goto fdlbail;
}
double d;
is.read(reinterpret_cast<char*> (&s),sizeof s);
u_int16_t filler;
is.read(reinterpret_cast<char*> (&filler),sizeof filler);
if(s==doubleSize){
is.read(reinterpret_cast<char*> (&d),sizeof d);
}
if((s!=doubleSize)||(d!=fastDictionaryFloatingTest)){
cerr<<"Fast dictionary file "<<fname<<
" was created on a machine with incompatible floating point format."<<endl;
regen();
goto fdlbail;
}
is.read(reinterpret_cast<char*> (&totalSize),sizeof totalSize);
is.read(reinterpret_cast<char*> (&hashTableOffset),sizeof hashTableOffset);
is.read(reinterpret_cast<char*> (&hashTableBuckets),sizeof hashTableBuckets);
is.read(reinterpret_cast<char*> (&wordTableSize),sizeof wordTableSize);
#ifdef HAVE_MMAP
dblock= reinterpret_cast<unsigned char*> (dp)+is.tellg();
#else
u_int32_t fdsize= (hashTableBuckets*sizeof(u_int32_t))+wordTableSize;
try{
dblock= new unsigned char[fdsize];
}catch(bad_alloc){
cerr<<"Unable to allocate memory for fast dictionary.";
return false;
}
is.read(reinterpret_cast<char*> (dblock),fdsize);
is.close();
#endif
hashTable= reinterpret_cast<u_int32_t*> (dblock);
wordTable= dblock+(hashTableBuckets*sizeof(u_int32_t));
if(verbose){
cerr<<"Loaded fast dictionary from "<<fname<<"."<<endl;
}
return true;
}
/*:33*//*34:*/
#line 2251 "annoyance-filter.w"
double fastDictionary::find(const string&target)const{
assert(dblock!=NULL);
u_int32_t bucket= computeHashValue(target)%hashTableBuckets;
if(hashTable[bucket]!=fastDictionaryVoidLink){
u_int16_t wlen= target.length();
unsigned int sOffset= sizeof(u_int32_t)+sizeof(double);
unsigned char*cword= wordTable+hashTable[bucket];
while(true){
u_int16_t wl;
memcpy(&wl,cword+sOffset,sizeof wl);
if((wl==wlen)&&
(memcmp(target.data(),cword+sOffset+sizeof(u_int16_t),wlen)==0)){
double jp;
memcpy(&jp,cword+sizeof(u_int32_t),sizeof(double));
return jp;
}
u_int32_t lnk;
memcpy(&lnk,cword,sizeof lnk);
if(lnk==fastDictionaryVoidLink){
break;
}
cword= wordTable+lnk;
}
}
return-1;
}
/*:34*//*35:*/
#line 2289 "annoyance-filter.w"
void fastDictionary::exportDictionary(const dictionary&d,ostream&o){
u_int32_t hashSize= nextGreaterPrime(d.size());
vector<u_int32_t> hashTable(hashSize,fastDictionaryVoidLink);
vector<unsigned char> words;
for(dictionary::const_iterator w= d.begin();w!=d.end();w++){
u_int32_t h= computeHashValue(w->first);
unsigned int slot= h%hashSize;
/*36:*/
#line 2376 "annoyance-filter.w"
if(hashTable[slot]==fastDictionaryVoidLink){
hashTable[slot]= words.size();
}else{
u_int32_t p= hashTable[slot];
u_int32_t l;
while(true){
memcpy(&l,&(words[p]),sizeof l);
if(l==fastDictionaryVoidLink){
break;
}
p= l;
}
l= words.size();
memcpy(&(words[p]),&l,sizeof l);
}
/*:36*/
#line 2300 "annoyance-filter.w"
;
/*37:*/
#line 2398 "annoyance-filter.w"
vector<unsigned char> ::size_type wl= words.size();
words.resize(words.size()+sizeof(u_int32_t)+
sizeof(double)+sizeof(u_int16_t)+w->second.get().length());
u_int32_t vl= fastDictionaryVoidLink;
#ifdef OLDWAY
memcpy(words.begin()+wl,&vl,sizeof vl);
#else
Vmemcpy(words,wl,&vl,sizeof vl);
#endif
wl+= sizeof vl;
double jp= w->second.getJunkProbability();
#ifdef OLDWAY
memcpy(words.begin()+wl,&jp,sizeof jp);
#else
Vmemcpy(words,wl,&jp,sizeof jp);
#endif
wl+= sizeof jp;
u_int16_t wlen= w->second.get().length();
#ifdef OLDWAY
memcpy(words.begin()+wl,&wlen,sizeof wlen);
#else
Vmemcpy(words,wl,&wlen,sizeof wlen);
#endif
wl+= sizeof wlen;
#ifdef OLDWAY
memcpy(words.begin()+wl,w->second.get().data(),wlen);
#else
Vmemcpy(words,wl,w->second.get().data(),wlen);
#endif
/*:37*/
#line 2301 "annoyance-filter.w"
;
}
o<<fastDictionarySignature;
u_int16_t b;
b= byteOrderMark;
o.write(reinterpret_cast<const char*> (&b),sizeof b);
b= versionNumber;
o.write(reinterpret_cast<const char*> (&b),sizeof b);
b= doubleSize;
o.write(reinterpret_cast<const char*> (&b),sizeof b);
b= 0;
o.write(reinterpret_cast<const char*> (&b),sizeof b);
double td= fastDictionaryFloatingTest;
o.write(reinterpret_cast<const char*> (&td),sizeof td);
u_int32_t headerSize= 4+(4*sizeof(u_int16_t))+sizeof(double)+
(4*sizeof(u_int32_t));
u_int32_t wordTableSize= words.size();
u_int32_t totalSize= headerSize+
(hashTable.size()*sizeof(u_int32_t))+
wordTableSize;
o.write(reinterpret_cast<const char*> (&totalSize),sizeof totalSize);
o.write(reinterpret_cast<const char*> (&headerSize),sizeof headerSize);
o.write(reinterpret_cast<const char*> (&hashSize),sizeof hashSize);
o.write(reinterpret_cast<const char*> (&wordTableSize),sizeof wordTableSize);
#ifdef OLDWAY
o.write(hashTable.begin(),hashTable.size()*sizeof(u_int32_t));
o.write(words.begin(),words.size());
#else
for(vector<u_int32_t> ::const_iterator htp= hashTable.begin();
htp!=hashTable.end();htp++){
u_int32_t hte= *htp;
o.write(reinterpret_cast<const char*> (&hte),sizeof hte);
}
for(vector<unsigned char> ::const_iterator wtp= words.begin();
wtp!=words.end();wtp++){
o.put(*wtp);
}
#endif
if(verbose){
cerr<<"Exported "<<d.size()<<" words to fast dictionary."<<endl;
}
}
void fastDictionary::exportDictionary(const dictionary&d,const string fname){
ofstream of(fname.c_str(),ios::out|ios::binary);
if(of){
exportDictionary(d,of);
of.close();
}else{
cerr<<"Unable to create fast dictionary file "<<fname<<endl;
}
}
/*:35*//*38:*/
#line 2440 "annoyance-filter.w"
unsigned int fastDictionary::nextGreaterPrime(unsigned int a){
unsigned int sqlim= static_cast<unsigned int> (sqrt(static_cast<double> (a))+1);
if((a&1)==0){
a++;
}
while(true){
unsigned int remainder= 0;
a+= 2;
for(unsigned int n= 3;n<=sqlim;n+= 2){
if((remainder= (a%n))==0){
break;
}
}
if(remainder!=0){
break;
}
}
return a;
}
/*:38*//*39:*/
#line 2471 "annoyance-filter.w"
u_int32_t fastDictionary::computeHashValue(const string&s){
u_int32_t hash= 1;
for(unsigned int i= 0;i<s.length();i++){
hash= (hash*17)^s[i];
}
return hash;
}
/*:39*//*41:*/
#line 2583 "annoyance-filter.w"
bool MIMEdecoder::getNextEncodedLine(void){
if(!atEnd){
if(getline(*is,inputLine)!=NULL){
if(inputLine.substr(0,(sizeof messageSentinel)-1)==messageSentinel){
endBoundary= inputLine;
if(partBoundary!=""){
assert(mf!=NULL);
mf->reportParserDiagnostic("Unterminated MIME sentinel at end of message.");
mf->setNewMessageEligiblity();
}
atEnd= true;
}
if((partBoundary!="")&&(inputLine.substr(0,2)=="--")&&
(inputLine.substr(2,partBoundary.length())==partBoundary)){
if(Annotate('d')){
ostringstream os;
os<<"Part boundary encountered: "<<inputLine;
mf->reportParserDiagnostic(os);
}
endBoundary= inputLine;
atEnd= true;
}else{
if(tlist!=NULL){
tlist->push_back(inputLine);
}
ip= 0;
encodedLineCount++;
}
}else{
atEnd= true;
eofHit= true;
}
}
if(atEnd){
inputLine= "";
ip= 0;
}
return!atEnd;
}
/*:41*//*42:*/
#line 2631 "annoyance-filter.w"
bool MIMEdecoder::getDecodedLine(string&s){
int ch;
s= "";
while(true){
if(lookAhead){
ch= lookChar;
lookAhead= false;
}else{
ch= getDecodedChar();
}
if(ch<0){
break;
}
/*43:*/
#line 2661 "annoyance-filter.w"
if(ch=='\r'||ch=='\n'){
int cht= getDecodedChar();
if(!(((ch=='\r')&&(cht=='\n'))||
((ch=='\n')&&(cht=='\r')))){
lookAhead= true;
lookChar= cht;
}
return true;
}
/*:43*/
#line 2646 "annoyance-filter.w"
;
s+= ch;
}
return s.length()> 0;
}
/*:42*//*44:*/
#line 2678 "annoyance-filter.w"
void MIMEdecoder::saveDecodedStream(ostream&os){
int ch;
while((ch= getDecodedChar())>=0){
os.put(ch);
}
}
/*:44*//*45:*/
#line 2691 "annoyance-filter.w"
void MIMEdecoder::saveDecodedStream(const string fname){
ofstream of(fname.c_str());
if(!of){
if(verbose){
cerr<<"Cannot create MIMEdecoder dump file: "<<fname<<endl;
}
}else{
saveDecodedStream(of);
of.close();
}
}
/*:45*//*49:*/
#line 2806 "annoyance-filter.w"
int base64MIMEdecoder::getDecodedChar(void){
/*55:*/
#line 2935 "annoyance-filter.w"
if(lookAhead){
lookAhead= false;
return lookChar;
}
/*:55*/
#line 2808 "annoyance-filter.w"
;
if(decodedBytes.size()==0){
/*50:*/
#line 2831 "annoyance-filter.w"
unsigned char a[4],b[4],o[3];
int j,k;
/*51:*/
#line 2846 "annoyance-filter.w"
for(int i= 0;i<4;i++){
int c;
/*52:*/
#line 2872 "annoyance-filter.w"
while(true){
c= -1;
while(ip<inputLine.length()){
if(inputLine[ip]> ' '){
c= inputLine[ip++];
break;
}
ip++;
}
if(c>=0){
break;
}
if(!getNextEncodedLine()){
break;
}
}
/*:52*/
#line 2850 "annoyance-filter.w"
;
/*53:*/
#line 2897 "annoyance-filter.w"
if(c==EOF){
if(i> 0){
nDecodeErrors++;
mf->reportParserDiagnostic("Unexpected end of file in Base64 decoding.");
}
return-1;
}
/*:53*/
#line 2851 "annoyance-filter.w"
;
if(dtable[c]&0x80){
nDecodeErrors++;
ostringstream os;
os<<"Illegal character '"<<c<<"' in Base64 input stream.";
mf->reportParserDiagnostic(os.str());
i--;
continue;
}
a[i]= (unsigned char)c;
b[i]= dtable[c];
}
/*:51*/
#line 2835 "annoyance-filter.w"
;
/*54:*/
#line 2917 "annoyance-filter.w"
o[0]= (b[0]<<2)|(b[1]>>4);
o[1]= (b[1]<<4)|(b[2]>>2);
o[2]= (b[2]<<6)|b[3];
j= a[2]=='='?1:(a[3]=='='?2:3);
for(k= 0;k<j;k++){
decodedBytes.push_back(o[k]);
}
/*:54*/
#line 2836 "annoyance-filter.w"
;
/*:50*/
#line 2810 "annoyance-filter.w"
;
}
if(decodedBytes.size()> 0){
unsigned char v= decodedBytes[0];
decodedBytes.pop_front();
return v;
}
return-1;
}
/*:49*//*56:*/
#line 2955 "annoyance-filter.w"
void base64MIMEdecoder::initialiseDecodingTable(void)
{
int i;
for(i= 0;i<255;i++){
dtable[i]= 0x80;
}
for(i= 'A';i<='I';i++){
dtable[i]= 0+(i-'A');
}
for(i= 'J';i<='R';i++){
dtable[i]= 9+(i-'J');
}
for(i= 'S';i<='Z';i++){
dtable[i]= 18+(i-'S');
}
for(i= 'a';i<='i';i++){
dtable[i]= 26+(i-'a');
}
for(i= 'j';i<='r';i++){
dtable[i]= 35+(i-'j');
}
for(i= 's';i<='z';i++){
dtable[i]= 44+(i-'s');
}
for(i= '0';i<='9';i++){
dtable[i]= 52+(i-'0');
}
#define CI(x) static_cast<int> (x)
dtable[CI('+')]= 62;
dtable[CI('/')]= 63;
dtable[CI('=')]= 0;
#undef CI
}
/*:56*//*57:*/
#line 3000 "annoyance-filter.w"
string base64MIMEdecoder::decodeEscapedText(const string s,mailFolder*m){
string r= "";
base64MIMEdecoder dc;
istringstream iss(s);
int dchar;
dc.set(&iss,m,"");
while((dchar= dc.getDecodedChar())>=0){
r+= static_cast<char> (dchar);
}
return r;
}
/*:57*//*59:*/
#line 3048 "annoyance-filter.w"
int quotedPrintableMIMEdecoder::getDecodedChar(void){
int ch;
/*55:*/
#line 2935 "annoyance-filter.w"
if(lookAhead){
lookAhead= false;
return lookChar;
}
/*:55*/
#line 3052 "annoyance-filter.w"
;
while(true){
ch= getNextChar();
if(ch=='='){
/*60:*/
#line 3072 "annoyance-filter.w"
int ch1= getNextChar();
/*63:*/
#line 3149 "annoyance-filter.w"
while(/*62:*/
#line 3135 "annoyance-filter.w"
((ch1==' ')||(ch1=='\t')||(ch1=='\r'))
/*:62*/
#line 3150 "annoyance-filter.w"
){
ch1= getNextChar();
if(ch1=='\n'){
continue;
}
if(!/*62:*/
#line 3135 "annoyance-filter.w"
((ch1==' ')||(ch1=='\t')||(ch1=='\r'))
/*:62*/
#line 3155 "annoyance-filter.w"
){
nDecodeErrors++;
ostringstream os;
os<<"Invalid character '"<<static_cast<char> (ch1)<<
"' after soft line break in Quoted-Printable MIME part.";
mf->reportParserDiagnostic(os.str());
ch1= ' ';
}
}
/*:63*/
#line 3074 "annoyance-filter.w"
;
if(ch1=='\n'){
continue;
}else{
int n1= hex_to_nybble(ch1);
int ch2= getNextChar();
int n2= hex_to_nybble(ch2);
if(n1==-1||n2==-1){
ostringstream os;
os<<"Invalid escape sequence '="<<
static_cast<char> (ch1)<<static_cast<char> (ch2)<<
"' in Quoted-Printable MIME part.";
mf->reportParserDiagnostic(os.str());
nDecodeErrors++;
}
ch= (n1<<4)|n2;
}
return ch;
/*:60*/
#line 3057 "annoyance-filter.w"
;
}else{
return ch;
}
}
}
/*:59*//*61:*/
#line 3101 "annoyance-filter.w"
int quotedPrintableMIMEdecoder::getNextChar(void){
while(true){
if(atEndOfLine){
atEndOfLine= false;
return'\n';
}
if(ip<inputLine.length()){
if(ip==(inputLine.length()-1)){
atEndOfLine= true;
}
return inputLine[ip++];
}
if(!getNextEncodedLine()){
break;
}
if(inputLine.length()==0){
atEndOfLine= true;
}
}
return-1;
}
/*:61*//*64:*/
#line 3172 "annoyance-filter.w"
int quotedPrintableMIMEdecoder::hex_to_nybble(const int ch){
if((ch>='0')&&(ch<=('0'+9))){
return ch-'0';
}else if((ch>='A')&&(ch<=('A'+5))){
return 10+(ch-'A');
}else if((ch>='a')&&(ch<=('a'+5))){
return 10+(ch-'a');
}
return-1;
}
/*:64*//*65:*/
#line 3189 "annoyance-filter.w"
string quotedPrintableMIMEdecoder::decodeEscapedText(const string s,mailFolder*m){
string r= "";
string::size_type p;
for(p= 0;p<s.length();p++){
bool decoded= false;
if(s[p]=='='){
if(p> (s.length()-3)){
if(verbose){
cerr<<"decodeEscapedText: escape too near end of string: "<<s<<endl;
}
}else{
int n1= hex_to_nybble(s[p+1]),
n2= hex_to_nybble(s[p+2]);
if((n1<0)||(n2<0)){
if(verbose){
cerr<<"decodeEscapedText: invalid escape sequence \""<<
s.substr(p,3)<<"\""<<endl;
}
}else{
r+= static_cast<char> ((n1<<4)|n2);
decoded= true;
p+= 2;
}
}
}
if(!decoded){
r+= s[p];
}
}
return r;
}
/*:65*//*69:*/
#line 3354 "annoyance-filter.w"
int MBCSdecoder::getNextNBytes(const unsigned int n){
assert((n>=1)&&(n<=4));
int v= 0;
for(unsigned int i= 0;i<n;i++){
int b= getNextEncodedByte();
if(b<0){
return b;
}
v= (v<<8)|b;
}
assert(v!=-1);
return v;
}
/*:69*//*70:*/
#line 3376 "annoyance-filter.w"
void MBCSdecoder::reportDecoderDiagnostic(const string s)const{
if(mf!=NULL){
mf->reportParserDiagnostic(s);
}else{
if(verbose){
cerr<<s<<endl;
}
}
}
void MBCSdecoder::reportDecoderDiagnostic(const ostringstream&os)const{
reportDecoderDiagnostic(os.str());
}
/*:70*//*72:*/
#line 3421 "annoyance-filter.w"
int EUC_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();
if((c1>=0xA1)&&(c1<=0xFE)){
int c2= getNextEncodedByte();
if((c2>=0x80)&&(c2<=0xFF)){
return(c1<<8)|c2;
}
if(c2==-1){
ostringstream os;
os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}
discardLine();
ostringstream os;
os<<name()<<"_MBCSdecoder: Invalid second byte in two byte character: "
"0x"<<setiosflags(ios::uppercase)<<hex<<c1<<" "<<"0x"<<c2<<".";
reportDecoderDiagnostic(os);
return c1;
}
return c1;
}
/*:72*//*74:*/
#line 3477 "annoyance-filter.w"
int Big5_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();
if((c1>=0)&&((c1&0x80)!=0)){
int c2= getNextEncodedByte();
if(c2==-1){
ostringstream os;
os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}
return(c1<<8)|c2;
}
return c1;
}
/*:74*//*76:*/
#line 3534 "annoyance-filter.w"
int Shift_JIS_MBCSdecoder::getNextDecodedChar(void){
/*78:*/
#line 3586 "annoyance-filter.w"
if(!pending.empty()){
int pc= pending[0];
pending= pending.substr(1);
return pc;
}
/*:78*/
#line 3536 "annoyance-filter.w"
;
int c1= getNextEncodedByte();
if(c1>=0){
/*77:*/
#line 3554 "annoyance-filter.w"
if(((c1>=0x81)&&(c1<=0x9F))||
((c1>=0xE0)&&(c1<=0xEF))||
((c1>=0xF0)&&(c1<=0xFC))){
int c2= getNextEncodedByte();
if(c2==-1){
ostringstream os;
os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}
if(!(((c2>=0x40)&&(c2<=0x7E))||
((c2>=0x80)&&(c2<=0xFC)))){
ostringstream os;
os<<name()<<"_MBCSdecoder: Invalid second byte in two byte character: "
"0x"<<setiosflags(ios::uppercase)<<hex<<c1<<" "<<"0x"<<c2<<".";
reportDecoderDiagnostic(os);
return-1;
}
return(c1<<8)|c2;
}
/*:77*/
#line 3541 "annoyance-filter.w"
;
/*79:*/
#line 3604 "annoyance-filter.w"
switch(c1){
case 0x80:
c1= '\\';
break;
case 0xFD:
c1= 0xA9;
break;
case 0xFE:
c1= 'T';
pending= "M";
break;
case 0xFF:
c1= '.';
pending= "..";
break;
}
/*:79*/
#line 3542 "annoyance-filter.w"
;
}
return c1;
}
/*:76*//*82:*/
#line 3676 "annoyance-filter.w"
int UCS_2_Unicode_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();
int c2= getNextEncodedByte();
if(c2==-1){
ostringstream os;
os<<name()<<"_MBCSdecoder: Premature end of line in two byte character.";
reportDecoderDiagnostic(os);
return-1;
}
if(bigEndian){
c1= (c1<<8)|c2;
}else{
c1|= (c2<<8);
}
return c1;
}
/*:82*//*84:*/
#line 3716 "annoyance-filter.w"
int UTF_8_Unicode_MBCSdecoder::getNextDecodedChar(void){
int c1= getNextEncodedByte();
if(c1<0){
return c1;
}
string::size_type nbytes= 0;
unsigned int result;
if(c1<=0x7F){
result= c1;
nbytes= 1;
}else{
unsigned char chn= c1;
while((chn&0x80)!=0){
nbytes++;
chn<<= 1;
}
if(nbytes> 6){
ostringstream os;
os<<name()<<"_MBCSdecoder: Invalid first byte "<<
"0x"<<setiosflags(ios::uppercase)<<hex<<c1<<" in UTF-8 encoded string";
reportDecoderDiagnostic(os);
return-1;
}
result= c1&(0xFF>>(nbytes+1));
for(string::size_type i= 1;i<nbytes;i++){
c1= getNextEncodedByte();
if(c1<0){
ostringstream os;
os<<name()<<"_MBCSdecoder: Premature end of line in UTF-8 character.";
reportDecoderDiagnostic(os);
return-1;
}
if((c1&0xC0)!=0x80){
ostringstream os;
os<<name()<<"_MBCSdecoder: Bad byte 1--n signature in UTF-8 encoded sequence.";
reportDecoderDiagnostic(os);
}
result= (result<<6)|(c1&0x3F);
}
}
return result;
}
/*:84*//*86:*/
#line 3832 "annoyance-filter.w"
int UTF_16_Unicode_MBCSdecoder::getNextDecodedChar(void){
string::size_type nwydes= 0;
int w1,w2,result;
w1= getNextUTF_16Word();
if(w1<0){
return w1;
}
if((w1<=0xD800)||(w1> 0xDFFF)){
result= w1;
nwydes= 1;
}else if((w1>=0xD800)&&(w1<=0xDBFF)){
w2= getNextUTF_16Word();
if(w2<0){
ostringstream os;
os<<name()<<"_MBCSdecoder: Premature end of line in UTF-16 two word character.";
reportDecoderDiagnostic(os);
return-1;
}
nwydes= 2;
if((w2<0xDC00)||(w2> 0xDFFF)){
ostringstream os;
os<<name()<<"_MBCSdecoder: Invalid second word surrogate "<<
"0x"<<setiosflags(ios::uppercase)<<hex<<w2<<" in UTF-16 encoded string.";
reportDecoderDiagnostic(os);
return-1;
}
result= (((w1&0x3FF)<<10)|(w2&0x3FF))+0x10000;
}else{
ostringstream os;
os<<name()<<"_MBCSdecoder: Invalid first word surrogate "<<
"0x"<<setiosflags(ios::uppercase)<<hex<<w1<<" in UTF-16 encoded string.";
reportDecoderDiagnostic(os);
return-1;
}
return result;
}
/*:86*//*89:*/
#line 3931 "annoyance-filter.w"
string MBCSinterpreter::getNextDecodedChar(void){
assert(dp!=NULL);
int dc= dp->getNextDecodedChar();
if(dc<0){
return"";
}
if(dc<256){
string r(1,static_cast<char> (dc));
return r;
}
ostringstream os;
os.setf(ios::uppercase);
os<<prefix<<name()<<"-"<<hex<<dc<<dec<<suffix;
return os.str();
}
/*:89*//*90:*/
#line 3953 "annoyance-filter.w"
string MBCSinterpreter::decodeLine(const string&s){
string r= "",t;
setSource(s);
while((t= getNextDecodedChar())!=""){
r+= t;
}
return r;
}
/*:90*//*94:*/
#line 4024 "annoyance-filter.w"
string Shift_JIS_MBCSinterpreter::getNextDecodedChar(void){
assert(dp!=NULL);
int dc= dp->getNextDecodedChar();
if(dc<0){
return"";
}
if(dc<0xA1){
string r(1,static_cast<char> (dc));
return r;
}
ostringstream os;
os.setf(ios::uppercase);
if((dc>=0xA1)&&(dc<=0xDF)){
os<<"SJIS-K"<<hex<<dc<<dec;
}else{
os<<prefix<<"SJIS-"<<hex<<dc<<dec<<suffix;
}
return os.str();
}
/*:94*//*97:*/
#line 4091 "annoyance-filter.w"
string Unicode_MBCSinterpreter::getNextDecodedChar(void){
assert(dp!=NULL);
int dc= dp->getNextDecodedChar();
if(dc<0){
return"";
}
if(dc<=0xFF){
string r(1,static_cast<char> (dc));
return r;
}
ostringstream os;
os.setf(ios::uppercase);
if(((dc>=0x3200)&&(dc<0xD800))||
((dc>=0xF900)&&(dc<0xFAFF))){
os<<prefix<<"UCS-"<<hex<<dc<<dec<<suffix;
}else{
os<<"UCS-"<<hex<<dc<<dec;
}
return os.str();
}
/*:97*//*99:*/
#line 4186 "annoyance-filter.w"
unsigned char applicationStringParser::get8(void){
assert(mf!=NULL);
int ch= mf->nextByte();
if(ch==EOF){
eof= true;
}
return ch&0xFF;
}
/*:99*//*101:*/
#line 4288 "annoyance-filter.w"
void flashStream::readHeader(void){
sig[0]= get8();
sig[1]= get8();
sig[2]= get8();
if(isEOF()||(memcmp(sig,"FWS",3)!=0)){
error= true;
if(verbose){
cerr<<"Invalid signature in Flash animation file."<<endl;
}
return;
}
version= get8();
fileLength= get32();
getRect(&frameSize);
frameRate= get16();
frameCount= get16();
}
/*:101*//*102:*/
#line 4312 "annoyance-filter.w"
void flashStream::describe(ostream&os){
os<<"Flash animation version "<<
static_cast<unsigned int> (version)<<endl;
os<<" File length: "<<fileLength<<" bytes."<<endl;
os<<" Frame size: X: "<<frameSize.xMin<<" - "<<
frameSize.xMax<<
" Y: "<<frameSize.yMin<<" - "<<
frameSize.yMax<<endl;
os<<" Frame rate: "<<setprecision(5)<<(frameRate/256.0)<<
" fps."<<endl;
os<<" Frame count: "<<frameCount<<endl;
}
/*:102*//*103:*/
#line 4336 "annoyance-filter.w"
bool flashStream::nextTag(void){
unsigned short s= get16();
unsigned long l;
if(isOK()){
tType= static_cast<tagType> (s>>6);
l= s&0x3F;
if(l==0x3F){
l= get32();
}
if(isOK()){
tDataLen= l;
return tType!=stagEnd;
}
}
tType= stagEnd;
tDataLen= 0;
return false;
}
/*:103*//*104:*/
#line 4365 "annoyance-filter.w"
void flashStream::ignoreTag(unsigned int lookedAhead){
if(isOK()){
for(unsigned int i= lookedAhead;isOK()&&(i<tDataLen);i++){
get8();
}
}
}
/*:104*//*105:*/
#line 4386 "annoyance-filter.w"
void flashStream::getString(string&s,int n){
s= "";
char ch;
if(n==-1){
while((ch= get8())!=0){
s+= ch;
}
}else{
while(n> 0){
ch= get8();
s+= ch;
n--;
}
}
}
/*:105*//*106:*/
#line 4409 "annoyance-filter.w"
void flashStream::getRect(rect*r){
initBits();
int nBits= static_cast<int> (getBits(5));
r->xMin= getSignedBits(nBits);
r->xMax= getSignedBits(nBits);
r->yMin= getSignedBits(nBits);
r->yMax= getSignedBits(nBits);
}
/*:106*//*107:*/
#line 4427 "annoyance-filter.w"
void flashStream::getMatrix(matrix*mat){
initBits();
if(getBits(1)){
int nBits= static_cast<int> (getBits(5));
mat->a= getSignedBits(nBits);
mat->d= getSignedBits(nBits);
}else{
mat->a= mat->d= 0x00010000L;
}
if(getBits(1)){
int nBits= static_cast<int> (getBits(5));
mat->b= getSignedBits(nBits);
mat->c= getSignedBits(nBits);
}else{
mat->b= mat->c= 0;
}
int nBits= static_cast<int> (getBits(5));
mat->tx= getSignedBits(nBits);
mat->ty= getSignedBits(nBits);
}
/*:107*//*109:*/
#line 4503 "annoyance-filter.w"
void flashStream::initBits(void){
bitPos= 0;
bitBuf= 0;
}
unsigned int flashStream::getBits(int n){
unsigned int v= 0;
while(true){
int s= n-bitPos;
if(s> 0){
v|= bitBuf<<s;
n-= bitPos;
bitBuf= get8();
bitPos= 8;
}else{
v|= bitBuf>>-s;
bitPos-= n;
bitBuf&= 0xFF>>(8-bitPos);
return v;
}
}
}
int flashStream::getSignedBits(const int n){
signed int v= static_cast<int> (getBits(n));
if(v&(1<<(n-1))){
v|= -1<<n;
}
return v;
}
/*:109*//*115:*/
#line 4800 "annoyance-filter.w"
bool flashTextExtractor::nextString(string&s){
if(!initialised){
initialised= true;
readHeader();
if(!isOK()){
if(verbose){
cerr<<"Invalid header in Flash application file."<<endl;
close();
while(!isEOF()){
get8();
}
return false;
}
}
}
while(true){
haveStrings:
/*116:*/
#line 4887 "annoyance-filter.w"
if(!strings.empty()){
s= strings.front();
strings.pop();
return true;
}
/*:116*/
#line 4818 "annoyance-filter.w"
;
while((!isEOF())&&(!isError())&&nextTag()){
unsigned int variant= 0;
switch(tType){
case stagDefineFont:
/*117:*/
#line 4899 "annoyance-filter.w"
{
#ifdef FLASH_PARSE_DEBUG
cout<<"DefineFont"<<endl;
#endif
unsigned short fontID= get16();
unsigned int offsetTable= get16();
#ifdef FLASH_PARSE_DEBUG
cout<<" Font ID: "<<fontID<<endl;
cout<<" Glyph count: "<<(offsetTable/2)<<endl;
#endif
fontGlyphCount.insert(make_pair(fontID,offsetTable/2));
ignoreTag(2*2);
}
/*:117*/
#line 4825 "annoyance-filter.w"
;
break;
case stagDefineFont2:
/*118:*/
#line 4919 "annoyance-filter.w"
{
#ifdef FLASH_PARSE_DEBUG
cout<<"DefineFont2"<<endl;
#endif
unsigned short fontID= get16();
get16();
unsigned int fontNameLen= get8();
string fontName;
getString(fontName,fontNameLen);
if(!textOnly){
strings.push(fontName);
}
unsigned int nGlyphs= get16();
fontGlyphCount.insert(make_pair(fontID,nGlyphs));
ignoreTag(2+2+1+fontNameLen+2);
}
/*:118*/
#line 4829 "annoyance-filter.w"
;
break;
case stagDefineFontInfo:
/*119:*/
#line 4952 "annoyance-filter.w"
{
#ifdef FLASH_PARSE_DEBUG
cout<<"DefineFontInfo"<<endl;
#endif
unsigned short fontID= get16();
unsigned int fontNameLen= get8();
string fontName;
getString(fontName,fontNameLen);
if(!textOnly){
strings.push(fontName);
}
fontFlags fFlags= static_cast<fontFlags> (get8());
map<unsigned short,unsigned short> ::iterator fp= fontGlyphCount.find(fontID);
if(fp==fontGlyphCount.end()){
if(verbose){
cerr<<"DefineFontInfo for font ID "<<fontID<<
" without previous DefineFont."<<endl;
}
ignoreTag(4);
}else{
unsigned nGlyphs= fp->second;
vector<unsigned short> *v= new vector<unsigned short> (nGlyphs);
fontMap.insert(make_pair(fontID,v));
fontInfoBits.insert(make_pair(fontID,fFlags));
for(unsigned int g= 0;g<nGlyphs;g++){
if(fFlags&fontWideCodes){
(*v)[g]= get16();
}else{
(*v)[g]= get8();
}
}
}
}
/*:119*/
#line 4833 "annoyance-filter.w"
;
break;
case stagDefineText2:
variant= 2;
case stagDefineText:
/*120:*/
#line 4995 "annoyance-filter.w"
{
#ifdef FLASH_PARSE_DEBUG
unsigned short textID= get16();
cout<<"DefineText. ID = "<<textID<<endl;
#else
get16();
#endif
rect tr;
getRect(&tr);
matrix tm;
getMatrix(&tm);
unsigned short textGlyphBits= get8();
unsigned short textAdvanceBits= get8();
int fontId= -1;
map<unsigned short,vector<unsigned short> *> ::iterator fontp= fontMap.end();
map<unsigned short,unsigned short> ::iterator fgcp= fontGlyphCount.end();
unsigned int fGlyphs= 0;
fontFlags fFlags= static_cast<fontFlags> (0);
vector<unsigned short> *fontChars= NULL;
while(true){
unsigned int textRecordType= get8();
if(textRecordType==0){
break;
}
if(textRecordType&isTextControl){
#ifdef FLASH_PARSE_DEBUG
cout<<"Text control record."<<endl;
#endif
if(textRecordType&textHasFont){
fontId= get16();
#ifdef FLASH_PARSE_DEBUG
cout<<" fontId: "<<fontId<<endl;
#endif
fgcp= fontGlyphCount.find(fontId);
if(fgcp==fontGlyphCount.end()){
fontp= fontMap.end();
if(verbose){
cerr<<"Flash DefineText item references undefined font ID "<<
fontId<<endl;
}
}else{
fGlyphs= fgcp->second;
fontChars= fontMap.find(fontId)->second;
fFlags= fontInfoBits.find(fontId)->second;
}
}
if(textRecordType&textHasColor){
#ifdef FLASH_PARSE_DEBUG
int r= get8();
int g= get8();
int b= get8();
if(variant==2){
int a= get8();
cout<<" tfontColour: ("<<r<<","<<
g<<","<<b<<","<<a<<")"<<endl;
}else{
cout<<" tfontColour: ("<<r<<","<<
g<<","<<b<<")"<<endl;
}
#else
skip8n(3);
#endif
}
if(textRecordType&textHasXOffset){
#ifdef FLASH_PARSE_DEBUG
int iXOffset= get16();
cout<<" X offset "<<iXOffset<<endl;
#else
get16();
#endif
}
if(textRecordType&textHasYOffset){
#ifdef FLASH_PARSE_DEBUG
int iYOffset= get16();
cout<<" Y offset "<<iYOffset<<endl;
#else
get16();
#endif
}
if(textRecordType&textHasFont){
#ifdef FLASH_PARSE_DEBUG
int iFontHeight= get16();
cout<<" Font Height: "<<iFontHeight<<endl;
#else
get16();
#endif
}
}else{
#ifdef FLASH_PARSE_DEBUG
cout<<"Text glyph record."<<endl;
#endif
unsigned int nGlyphs= textRecordType&0x7F;
initBits();
string s= "";
for(unsigned int i= 0;i<nGlyphs;i++){
unsigned int iIndex= getBits(textGlyphBits);
#ifdef FLASH_PARSE_DEBUG
unsigned int iAdvance= getBits(textAdvanceBits);
cout<<"["<<iIndex<<","<<iAdvance<<"] "<<flush;
#else
getBits(textAdvanceBits);
#endif
if(fontId<0){
if(verbose){
cerr<<"Flash DefineText does not specify font."<<endl;
}
}else if(fgcp!=fontGlyphCount.end()){
if(iIndex>=fGlyphs){
if(verbose){
cerr<<"Flash DefineText glyph index "<<
iIndex<<" exceeds font size of "<<fGlyphs<<"."<<
endl;
}
}else{
if(fFlags&fontWideCodes){
unsigned int wc= (*fontChars)[iIndex];
s+= static_cast<char> ((wc>>8)&0xFF);
s+= static_cast<char> (wc&0xFF);
}else{
s+= static_cast<char> ((*fontChars)[iIndex]);
}
}
}
}
#ifdef FLASH_PARSE_DEBUG
cout<<endl;
cout<<"Decoded: ("<<s<<")"<<endl;
#endif
/*121:*/
#line 5142 "annoyance-filter.w"
if(fFlags&fontUnicode){
UCS_2_Unicode_MBCSdecoder mbd_ucs;
Unicode_MBCSinterpreter mbi_ucs;
mbi_ucs.setDecoder(mbd_ucs);
s= mbi_ucs.decodeLine(s);
}else if(fFlags&fontShiftJIS){
Shift_JIS_MBCSdecoder mbd_sjis;
Shift_JIS_MBCSinterpreter mbi_sjis;
mbi_sjis.setDecoder(mbd_sjis);
s= mbi_sjis.decodeLine(s);
}
/*:121*/
#line 5131 "annoyance-filter.w"
;
strings.push(s);
}
}
}
/*:120*/
#line 4842 "annoyance-filter.w"
;
break;
case stagDefineEditText:
/*122:*/
#line 5164 "annoyance-filter.w"
{
#ifdef FLASH_PARSE_DEBUG
cout<<"Edit text record."<<endl;
#endif
get16();
rect rBounds;
getRect(&rBounds);
unsigned int flags= get16();
#ifdef FLASH_PARSE_DEBUG
cout<<"DefineEditText. Flags = 0x"<<hex<<flags<<dec<<endl;
#endif
if(flags&seditTextFlagsHasFont){
#ifdef FLASH_PARSE_DEBUG
unsigned short uFontId= get16();
unsigned short uFontHeight= get16();
cout<<"FontId: "<<uFontId<<" FontHeight: "<<uFontHeight<<endl;
#else
get16();
get16();
#endif
}
if(flags&seditTextFlagsHasTextColor){
skip8n(4);
}
if(flags&seditTextFlagsHasMaxLength){
#ifdef FLASH_PARSE_DEBUG
int iMaxLength= get16();
printf("length:%d ",iMaxLength);
#else
get16();
#endif
}
if(flags&seditTextFlagsHasLayout){
skip8n(1+(2*4));
}
string varname;
getString(varname);
if(!textOnly){
strings.push(varname);
}
if(flags&seditTextFlagsHasText){
string s;
char c;
while((c= get8())!=0){
s+= c;
}
strings.push(s);
}
}
/*:122*/
#line 4846 "annoyance-filter.w"
;
break;
case stagFrameLabel:
/*123:*/
#line 5229 "annoyance-filter.w"
{
string s;
getString(s);
if(!textOnly){
strings.push(s);
}
}
/*:123*/
#line 4850 "annoyance-filter.w"
;
break;
case stagDoAction:
/*124:*/
#line 5244 "annoyance-filter.w"
{
#ifdef FLASH_PARSE_DEBUG
cout<<"Do action:"<<endl;
#endif
actionCode ac;
while(isOK()&&(ac= static_cast<actionCode> (get8()))!=sactionNone){
unsigned int dlen= 0;
if((ac&0x80)!=0){
dlen= get16();
}
switch(ac){
case sactionGetURL:
{
string url,target;
getString(url);
getString(target);
if(!textOnly){
strings.push(url);
}
strings.push(target);
}
break;
default:
if(dlen> 0){
skip8n(dlen);
}
#ifdef FLASH_PARSE_DEBUG
cout<<" Skipping action code 0x"<<hex<<ac<<dec<<
" data length "<<dlen<<endl;
#endif
break;
}
}
}
/*:124*/
#line 4854 "annoyance-filter.w"
;
break;
default:
#ifdef FLASH_PARSE_DEBUG
cout<<"nextString ignoring tag type "<<getTagType()<<" data length: "<<
getTagDataLength()<<endl;
#endif
ignoreTag();
break;
}
if(!strings.empty()){
goto haveStrings;
}
}
if(strings.empty()){
break;
}
}
while(isOK()){
get8();
}
return false;
}
/*:115*//*126:*/
#line 5353 "annoyance-filter.w"
#ifdef HAVE_PDF_DECODER
bool pdfTextExtractor::nextString(string&s){
if(!initialised){
initialised= true;
/*127:*/
#line 5382 "annoyance-filter.w"
#ifdef HAVE_MKSTEMP
strcpy(tempfn,"PDF_decode_XXXXXX");
int pdffd= mkstemp(tempfn);
#ifdef HAVE_FDSTREAM_COMPATIBILITY
fdostream pdfstr(pdffd);
#else
ofstream pdfstr(pdffd);
#endif
#else
tmpnam(tempfn);
ofstream pdfstr(tempfn,ios::out|ios::binary);
#endif
if(!pdfstr){
cerr<<"Cannot create PDF temporary file "<<tempfn<<endl;
error= eof= true;
return false;
}
while(isOK()){
pdfstr<<get8();
}
#ifdef HAVE_MKSTEMP
#ifdef HAVE_FDSTREAM_COMPATIBILITY
::close(pdffd);
#else
pdfstr.close();
#endif
#else
pdfstr.close();
#endif
/*:127*/
#line 5359 "annoyance-filter.w"
;
/*128:*/
#line 5418 "annoyance-filter.w"
string pdfcmd= "pdftotext ";
pdfcmd+= tempfn;
pdfcmd+= " -";
ip= popen(pdfcmd.c_str(),"r");
if(ip==NULL){
cerr<<"Cannot open pipe to pdftotext."<<endl;
error= eof= true;
return false;
}
is.attach(fileno(ip));
is.clear();
/*:128*/
#line 5360 "annoyance-filter.w"
;
}
if(ip==NULL){
return false;
}
if(getline(is,s)!=NULL){
return true;
}
close();
return false;
}
#endif
/*:126*//*130:*/
#line 5695 "annoyance-filter.w"
bool mailFolder::nextLine(string&s){
while(true){
bool decoderEOF= false;
if(lookedAhead){
s= lookAheadLine;
lookedAhead= false;
}else{
if(mdp!=NULL){
if((asp!=NULL)?(!asp->nextString(s)):(!(mdp->getDecodedLine(s)))){
if(asp!=NULL){
if(Annotate('d')){
ostringstream os;
os<<"Closing "<<asp->name()<<" application file decoder.";
reportParserDiagnostic(os);
}
asp->close();
asp= NULL;
}
s= mdp->getTerminatorSentinel();
decoderEOF= mdp->isEndOfFile();
if(decoderEOF){
s= "";
}
if(Annotate('d')){
ostringstream os;
os<<"Closing out "<<mdp->name()<<" decoder. "<<
mdp->getEncodedLineCount()<<" lines decoded.";
reportParserDiagnostic(os);
os.str("");
os<<"End sentinel: "<<s;
reportParserDiagnostic(os);
}
/*142:*/
#line 6193 "annoyance-filter.w"
mimeContentType= mimeContentTypeCharset=
mimeContentTypeName= mimeContentDispositionFilename=
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;
/*:142*/
#line 5731 "annoyance-filter.w"
;
inPartHeader=
!((s.substr(0,2)=="--")&&
(s.substr(2,partBoundary.length())==partBoundary)&&
(s.substr(partBoundary.length()+2,2)=="--"));
if((!inPartHeader)&&(!(partBoundaryStack.empty()))){
partBoundary= partBoundaryStack.top();
partBoundaryStack.pop();
}
}
}else{
if(!getline(*is,s)){
/*138:*/
#line 6044 "annoyance-filter.w"
#ifdef HAVE_DIRECTORY_TRAVERSAL
if(dirFolder){
if(ip!=NULL){
pclose(ip);
ip= NULL;
}else{
ifdir.close();
}
if(findNextFileInDirectory(cfName)){
continue;
}
}
#endif
/*:138*/
#line 5743 "annoyance-filter.w"
;
return false;
}
}
}
nLines++;
if(sloppyheaders){
if(inHeader&&multiPart&&
(partBoundary!="")&&
(s.substr(0,2)=="--")&&
(s.substr(2,partBoundary.length())==partBoundary)){
if(Annotate('d')){
ostringstream os;
os<<"Header runs into --"<<partBoundary<<" sentinel. Adding blank line to end header.";
reportParserDiagnostic(os);
}
assert(!lookedAhead);
lookedAhead= true;
lookAheadLine= s;
s= "";
}
}
if((mdp==NULL)&&(tlist!=NULL)&&(!decoderEOF)){
tlist->push_back(s);
}
/*139:*/
#line 6106 "annoyance-filter.w"
#ifdef BSD_DIAG
if(s.substr(0,(sizeof messageSentinel)-1)==messageSentinel){
if(!BSDfolder&&!lastLineBlank){
cerr<<"*** NonBSD From line ditched: "<<s<<endl;
}
}
#endif
if(((s.substr(0,(sizeof messageSentinel)-1)==messageSentinel)&&
(BSDfolder||lastLineBlank))
||
(expectingNewMessage&&(s.length()> 0)&&(!isISOspace(s[0])))){
nMessages++;
newMessage= true;
expectingNewMessage= false;
inHeader= true;
multiPart= false;
inPartHeader= false;
partHeaderLines= 0;
bodyContentType= bodyContentTypeCharset=
bodyContentTypeName= bodyContentTransferEncoding= "";
fromLine= s;
lastFromLine= lastMessageID= messageID= "";
while(!partBoundaryStack.empty()){
ostringstream os;
os<<"Orphaned part boundary on stack: \""<<partBoundaryStack.top()<<"\"";
reportParserDiagnostic(os);
partBoundaryStack.pop();
}
/*142:*/
#line 6193 "annoyance-filter.w"
mimeContentType= mimeContentTypeCharset=
mimeContentTypeName= mimeContentDispositionFilename=
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;
/*:142*/
#line 6136 "annoyance-filter.w"
;
}else{
newMessage= false;
}
/*:139*/
#line 5778 "annoyance-filter.w"
;
/*140:*/
#line 6149 "annoyance-filter.w"
while((s.length()> 0)&&(isISOspace(s[s.length()-1]))){
s.erase(s.length()-1);
}
setNewMessageEligiblity(s.empty());
/*:140*/
#line 5779 "annoyance-filter.w"
;
/*141:*/
#line 6165 "annoyance-filter.w"
if(inHeader||inPartHeader){
if(s==""){
if(inHeader){
if((!multiPart)&&(bodyContentTransferEncoding!="")){
mimeContentType= bodyContentType;
mimeContentTypeCharset= bodyContentTypeCharset;
mimeContentTypeName= bodyContentTypeName;
mimeContentTransferEncoding= bodyContentTransferEncoding;
multiPart= true;
partBoundary= "";
}
}
inHeader= inPartHeader= false;
/*153:*/
#line 6531 "annoyance-filter.w"
if(multiPart){
assert(mdp==NULL);
#ifdef TYPE_LOG
typeLog<<mimeContentType<<"\t"<<
mimeContentTypeCharset<<"\t"<<
mimeContentTransferEncoding<<endl;
#endif
/*154:*/
#line 6566 "annoyance-filter.w"
if(mimeContentType=="multipart/alternative"){
if(mimeContentTypeBoundary!=""){
partBoundaryStack.push(partBoundary);
partBoundary= mimeContentTypeBoundary;
}else{
if(Annotate('d')){
ostringstream os;
os<<"Boundary missing from Content-Type of multipart/alternative.";
reportParserDiagnostic(os);
}
}
}
/*:154*/
#line 6546 "annoyance-filter.w"
;
/*155:*/
#line 6586 "annoyance-filter.w"
#ifdef HAVE_PDF_DECODER
if(mimeContentType=="application/pdf"){
asp= &aspPdf;
}else
#endif
if((mimeContentType=="application/x-shockwave-flash")||
(mimeContentType=="image/vnd.rn-realflash")){
asp= &aspFlash;
}
if(asp!=NULL){
asp->setMailFolder(this);
if(Annotate('d')){
ostringstream os;
os<<"Activating "<<asp->name()<<" application file decoder.";
reportParserDiagnostic(os);
}
}
/*:155*/
#line 6548 "annoyance-filter.w"
;
/*156:*/
#line 6622 "annoyance-filter.w"
if((asp==NULL)&&(streamMinTokenLength> 0)&&
((mimeContentType.substr(0,12)=="application/")||
(((mimeContentType.substr(0,6)=="audio/")||
(mimeContentType.substr(0,6)=="image/"))&&
(isSpoofedExecutableFileExtension(mimeContentTypeName)||
isSpoofedExecutableFileExtension(mimeContentDispositionFilename))
)
)
){
if(Annotate('d')){
ostringstream os;
os<<"Activating byte stream parser for \""<<mimeContentType<<"\"";
reportParserDiagnostic(os);
}
byteStream= true;
}
/*:156*/
#line 6550 "annoyance-filter.w"
;
/*157:*/
#line 6652 "annoyance-filter.w"
if(Annotate('d')){
ostringstream os;
reportParserDiagnostic("");
os<<"mimeContentType: {"<<mimeContentType<<"}";
reportParserDiagnostic(os);
os.str("");
os<<"mimeContentTypeCharset: {"<<mimeContentTypeCharset<<"}";
reportParserDiagnostic(os);
os.str("");
os<<"mimeContentTransferEncoding: {"<<mimeContentTransferEncoding<<"}";
reportParserDiagnostic(os);
}
if((asp==NULL)&&
(mimeContentType.substr(0,6)=="image/")||
(mimeContentType.substr(0,6)=="video/")
){
smd.set(is,this,partBoundary,tlist);
mdp= &smd;
if(Annotate('d')){
ostringstream os;
os<<"Activating MIME sink decoder with sentinel: \""<<partBoundary<<
"\" due to Content-Type = "<<mimeContentType;
reportParserDiagnostic(os);
}
if(dlist){
dlist->push_back(Xfile+"-Decoder: Sink");
}
}
/*:157*/
#line 6552 "annoyance-filter.w"
/*158:*/
#line 6692 "annoyance-filter.w"
else if(byteStream||(asp!=NULL)||
(mimeContentType=="plain/txt")||
(mimeContentType.substr(0,5)=="text/")||
(mimeContentType=="message/rfc822")){
/*159:*/
#line 6717 "annoyance-filter.w"
bool gibberish= false;
if(mimeContentTypeCharset.substr(0,6)=="gb2312"){
mbd_euc.setMailFolder(this);
mbi_gb2312.setDecoder(mbd_euc);
mbi= &mbi_gb2312;
}
if(mimeContentTypeCharset=="big5"){
mbd_big5.setMailFolder(this);
mbi_big5.setDecoder(mbd_big5);
mbi= &mbi_big5;
}
if(mimeContentTypeCharset=="utf-8"){
mbd_utf_8.setMailFolder(this);
mbi_unicode.setDecoder(mbd_utf_8);
mbi= &mbi_unicode;
}
if(mimeContentTypeCharset=="euc-kr"){
mbd_euc.setMailFolder(this);
mbi_kr.setDecoder(mbd_euc);
mbi= &mbi_kr;
}
#ifdef CHECK_FOR_GIBBERISH_CHARACTER_SETS
if((mimeContentTypeCharset.length()==0)||
(mimeContentTypeCharset=="us-ascii")||
(mimeContentTypeCharset.substr(0,8)=="iso-8859")||
(mimeContentTypeCharset=="windows-1251")){
if(Annotate('d')){
ostringstream os;
os<<"Accepting part in Content-Type-Charset: "<<mimeContentTypeCharset<<" ("<<
mimeContentType<<" "<<mimeContentTransferEncoding<<")";
reportParserDiagnostic(os);
}
}else{
if(Annotate('d')){
ostringstream os;
os<<"Rejecting part in Content-Type-Charset: "<<mimeContentTypeCharset<<" ("<<
mimeContentType<<" "<<mimeContentTransferEncoding<<")";
reportParserDiagnostic(os);
}
gibberish= true;
}
#endif
/*:159*/
#line 6698 "annoyance-filter.w"
;
/*160:*/
#line 6781 "annoyance-filter.w"
if(!gibberish){
if((mimeContentTransferEncoding.length()==0)||
(mimeContentTransferEncoding.substr(0,4)=="7bit")||
(mimeContentTransferEncoding.substr(0,4)=="8bit")||
(mimeContentTransferEncoding=="ascii")){
imd.set(is,this,partBoundary,tlist);
mdp= &imd;
}else if(mimeContentTransferEncoding=="base64"){
bmd.set(is,this,partBoundary,tlist);
mdp= &bmd;
}else if(mimeContentTransferEncoding=="quoted-printable"){
qmd.set(is,this,partBoundary,tlist);
mdp= &qmd;
}else{
gibberish= true;
smd.set(is,this,partBoundary,tlist);
mdp= &smd;
}
assert(mdp!=NULL);
if(Annotate('d')){
ostringstream os;
os<<(gibberish?"Rejecting":"Accepting")<<
" part in Content-Transfer-Encoding: "<<mimeContentTransferEncoding<<" ("<<
mimeContentTypeCharset<<" "<<mimeContentType<<")";
reportParserDiagnostic(os);
}
if(dlist){
dlist->push_back(Xfile+"-Decoder: "+mdp->name());
}
if(Annotate('d')){
ostringstream os;
os<<"Activating MIME "<<mdp->name()<<" decoder with sentinel: "<<partBoundary;
reportParserDiagnostic(os);
}
}
/*:160*/
#line 6700 "annoyance-filter.w"
;
/*161:*/
#line 6826 "annoyance-filter.w"
if(byteStream&&(mdp==NULL)){
if(Annotate('d')){
ostringstream os;
os<<"Canceling byte stream mode due to Content-Transfer-Encoding: {"<<
mimeContentTransferEncoding<<"} ("<<
mimeContentTypeCharset<<" "<<mimeContentType<<")";
reportParserDiagnostic(os);
}
byteStream= false;
}
/*:161*/
#line 6702 "annoyance-filter.w"
;
/*162:*/
#line 6850 "annoyance-filter.w"
if(mimeContentType=="message/rfc822"){
/*142:*/
#line 6193 "annoyance-filter.w"
mimeContentType= mimeContentTypeCharset=
mimeContentTypeName= mimeContentDispositionFilename=
mimeContentTypeBoundary= mimeContentTransferEncoding= "";
mdp= NULL;
mbi= NULL;
asp= NULL;
byteStream= false;
/*:142*/
#line 6852 "annoyance-filter.w"
;
forceInHeader();
}
/*:162*/
#line 6704 "annoyance-filter.w"
;
}
/*:158*/
#line 6554 "annoyance-filter.w"
;
}
/*:153*/
#line 6179 "annoyance-filter.w"
;
}
/*143:*/
#line 6216 "annoyance-filter.w"
/*144:*/
#line 6264 "annoyance-filter.w"
bool isSpoofedHeader= false;
if(inHeader){
string sc= s,scx= Xfile;
stringCanonicalise(sc);
stringCanonicalise(scx);
scx+= '-';
if(sc.substr(0,scx.length())==scx){
if(tlist!=NULL){
tlist->pop_back();
}
isSpoofedHeader= true;
}
}
/*:144*/
#line 6217 "annoyance-filter.w"
;
while((inHeader||inPartHeader)&&getline(*is,lookAheadLine)!=NULL){
string lal= lookAheadLine;
while((lookAheadLine.length()> 0)&&(isISOspace(lookAheadLine[lookAheadLine.length()-1]))){
lookAheadLine.erase(lookAheadLine.length()-1);
}
if((lookAheadLine.length()> 0)&&isISOspace(lookAheadLine[0])){
string::size_type p= 1;
while(isISOspace(lookAheadLine[p])){
p++;
}
s+= lookAheadLine.substr(p);
if((tlist!=NULL)&&(!isSpoofedHeader)){
tlist->push_back(lal);
}
continue;
}
lookedAhead= true;
lookAheadLine= lal;
break;
}
if(isSpoofedHeader){
ostringstream os;
os<<"Spoofed header rejected: "<<s;
reportParserDiagnostic(os.str());
continue;
}
/*:143*/
#line 6181 "annoyance-filter.w"
;
/*145:*/
#line 6291 "annoyance-filter.w"
{
string arg;
if(inHeader&&compareHeaderField(s,"message-id",arg)){
messageID= arg;
lastMessageID= "";
}
}
/*:145*/
#line 6183 "annoyance-filter.w"
;
/*150:*/
#line 6469 "annoyance-filter.w"
string::size_type p,p1;
string arg;
if(inHeader&&compareHeaderField(s,"content-type",arg)){
string sc= s;
stringCanonicalise(sc);
if((p= sc.find("multipart/",13))!=string::npos){
if((p= sc.find("boundary=",p+10))!=string::npos){
if(s[p+9]=='\"'){
p1= sc.find("\"",p+10);
p+= 10;
}else{
p+= 9;
p1= sc.length()-p;
}
multiPart= true;
partBoundary= s.substr(p,(p1-p));
if(Annotate('d')){
ostringstream os;
os<<"Multi-part boundary: \""<<partBoundary<<"\"";
reportParserDiagnostic(os);
}
}
}
}
/*:150*/
#line 6184 "annoyance-filter.w"
;
/*146:*/
#line 6311 "annoyance-filter.w"
{
string arg,par;
if(compareHeaderField(s,"content-type",arg)){
if(parseHeaderArgument(s,"charset",par)){
stringCanonicalise(par);
bodyContentTypeCharset= par;
}
if(parseHeaderArgument(s,"name",par)){
bodyContentTypeName= par;
}
bodyContentType= arg;
}
if(inHeader&&compareHeaderField(s,"content-transfer-encoding",arg)){
bodyContentTransferEncoding= arg;
}
}
/*:146*/
#line 6185 "annoyance-filter.w"
;
/*147:*/
#line 6342 "annoyance-filter.w"
if(inHeader){
string sc= s;
string::size_type p,p1,p2,p3,p4;
char etype;
unsigned int ndecodes= 0;
string charset;
stringCanonicalise(sc);
p4= 0;
while(((p= sc.find("=?",p4))!=string::npos)){
p4= p+2;
if(((p1= sc.find("?q?",p4))!=string::npos)||
((p1= sc.find("?b?",p4))!=string::npos)){
charset= sc.substr(p4,p1-p4);
etype= sc[p1+1];
p4= p1+3;
if((p2= sc.find("?=",p4))!=string::npos){
p1+= 3;
p3= p2-p1;
string drt;
if(etype=='q'){
drt= quotedPrintableMIMEdecoder::decodeEscapedText(sc.substr(p1,p3),this);
}else{
assert(etype=='b');
drt= base64MIMEdecoder::decodeEscapedText(sc.substr(p1,p3),this);
}
/*148:*/
#line 6389 "annoyance-filter.w"
if(charset.substr(0,6)=="gb2312"){
EUC_MBCSdecoder mbd_euc;
GB2312_MBCSinterpreter mbi_gb2312;
mbd_euc.setMailFolder(this);
mbi_gb2312.setDecoder(mbd_euc);
drt= mbi_gb2312.decodeLine(drt);
}else if(charset=="big5"){
Big5_MBCSdecoder mbd_big5;
Big5_MBCSinterpreter mbi_big5;
mbd_big5.setMailFolder(this);
mbi_big5.setDecoder(mbd_big5);
drt= mbi_big5.decodeLine(drt);
}else if(charset=="utf-8"){
UTF_8_Unicode_MBCSdecoder mbd_utf_8;
Unicode_MBCSinterpreter mbi_unicode;
mbd_utf_8.setMailFolder(this);
mbi_unicode.setDecoder(mbd_utf_8);
drt= mbi_unicode.decodeLine(drt);
}else if(charset=="euc-kr"){
EUC_MBCSdecoder mbd_euc;
KR_MBCSinterpreter mbi_kr;
mbd_euc.setMailFolder(this);
mbi_kr.setDecoder(mbd_euc);
drt= mbi_kr.decodeLine(drt);
}else if((charset.substr(0,8)=="iso-8859")||
(charset=="us-ascii")){
}else{
ostringstream os;
os<<"Header line: no interpreter for ("<<charset<<") character set.";
reportParserDiagnostic(os.str());
}
/*:148*/
#line 6369 "annoyance-filter.w"
;
sc.replace(p,(p2-p)+2,drt);
p4= p+drt.length();
ndecodes++;
}
}
}
if(ndecodes> 0){
s= sc;
}
}
/*:147*/
#line 6186 "annoyance-filter.w"
;
}
/*:141*/
#line 5780 "annoyance-filter.w"
;
/*149:*/
#line 6432 "annoyance-filter.w"
if(multiPart&&inPartHeader){
string arg,par;
partHeaderLines++;
if(compareHeaderField(s,"content-type",arg)){
if(parseHeaderArgument(s,"charset",par)){
stringCanonicalise(par);
mimeContentTypeCharset= par;
}
if(parseHeaderArgument(s,"boundary",par)){
mimeContentTypeBoundary= par;
}
if(parseHeaderArgument(s,"name",par)){
mimeContentTypeName= par;
}
mimeContentType= arg;
}
if(compareHeaderField(s,"content-transfer-encoding",arg)){
mimeContentTransferEncoding= arg;
}
if(compareHeaderField(s,"content-disposition",arg)){
if(parseHeaderArgument(s,"filename",par)){
mimeContentDispositionFilename= par;
}
}
}
/*:149*/
#line 5781 "annoyance-filter.w"
;
/*151:*/
#line 6504 "annoyance-filter.w"
if(multiPart&&(!inHeader)&&
(partBoundary!="")&&
(s.substr(0,2)=="--")&&
(s.substr(2,partBoundary.length())==partBoundary)&&
(s.substr(partBoundary.length()+2)!="--")){
inPartHeader= true;
mimeContentType= mimeContentTypeCharset= mimeContentTypeBoundary=
mimeContentTransferEncoding= "";
}
/*:151*/
#line 5783 "annoyance-filter.w"
;
/*152:*/
#line 6520 "annoyance-filter.w"
if((mbi!=NULL)&&(!inHeader)&&(!inPartHeader)){
s= mbi->decodeLine(s);
}
/*:152*/
#line 5784 "annoyance-filter.w"
;
return true;
}
}
/*:130*//*131:*/
#line 5799 "annoyance-filter.w"
int mailFolder::nextByte(void){
assert(mdp!=NULL);
int c= mdp->getDecodedChar();
if(c<0){
byteStream= false;
if(Annotate('d')){
ostringstream os;
os<<"End of byte stream. Deactivating byte stream parser.";
reportParserDiagnostic(os);
}
}
return c;
}
/*:131*//*136:*/
#line 5951 "annoyance-filter.w"
#ifdef HAVE_DIRECTORY_TRAVERSAL
bool mailFolder::findNextFileInDirectory(string&fname){
assert(dirFolder);
if(dh==NULL){
return false;
}
while(true){
struct dirent*de;
struct stat fs;
de= readdir(dh);
if(de==NULL){
closedir(dh);
dh= NULL;
return false;
}
cfName= dirName+pathSeparator+de->d_name;
if(stat(cfName.c_str(),&fs)==0){
if(S_ISREG(fs.st_mode)){
fname= cfName;
return openNextFileInDirectory();
}
}else{
if(verbose){
cerr<<"Cannot get status of "<<cfName<<". Skipping."<<endl;
}
}
}
}
#endif
/*:136*//*137:*/
#line 5991 "annoyance-filter.w"
#ifdef HAVE_DIRECTORY_TRAVERSAL
bool mailFolder::openNextFileInDirectory(void){
assert(dirFolder);
if(dh==NULL){
return false;
}
#ifdef COMPRESSED_FILES
string fname= cfName;
/*133:*/
#line 5866 "annoyance-filter.w"
#ifdef HAVE_READLINK
int maxSlinks= 50;
string jname= fname;
char slbuf[1024];
while(maxSlinks--> 0){
int sll= readlink(jname.c_str(),slbuf,(sizeof slbuf)-1);
if(sll>=0){
assert(sll<static_cast<int> (sizeof slbuf));
slbuf[sll]= 0;
jname= slbuf;
}else{
break;
}
}
if(maxSlinks<=0){
cerr<<"Warning: probable symbolic link loop for \""<<
fname<<"\""<<endl;
}
#endif
/*:133*/
#line 6002 "annoyance-filter.w"
;
if(jname.rfind(Compressed_file_type)==
(jname.length()-string(Compressed_file_type).length())){
string cmd(Uncompress_command);
cmd+= ' '+fname;
ip= popen(cmd.c_str(),"r");
#ifdef HAVE_FDSTREAM_COMPATIBILITY
ifcdir.attach(fileno(ip));
ifcdir.clear();
is= &ifcdir;
#else
ifdir.attach(fileno(ip));
ifdir.clear();
is= &ifdir;
#endif
}else{
#endif
ifdir.open(cfName.c_str());
if(!ifdir.is_open()){
if(verbose){
cerr<<"Unable to open mail folder directory file \""<<
cfName<<"\""<<endl;
}
return false;
}
ifdir.clear();
is= &ifdir;
#ifdef COMPRESSED_FILES
}
#endif
expectingNewMessage= true;
setNewMessageEligiblity();
return true;
}
#endif
/*:137*//*163:*/
#line 6863 "annoyance-filter.w"
void mailFolder::stringCanonicalise(string&s)
{
for(unsigned int i= 0;i<s.length();i++){
if(isISOupper(s[i])){
s[i]= toISOlower(s[i]);
}
}
}
/*:163*//*164:*/
#line 6879 "annoyance-filter.w"
bool mailFolder::compareHeaderField(string&s,const string target,string&arg)
{
if(s.length()> target.length()){
string sc= s;
stringCanonicalise(sc);
if((sc.substr(0,target.length())==target)&&
(sc[target.length()]==':')){
unsigned int i;
for(i= target.length()+1;i<sc.length();i++){
if(!isISOspace(sc[i])){
break;
}
}
if(i<sc.length()){
int n= 0;
while((i+n)<sc.length()){
if(isISOspace(sc[i+n])||(sc[i+n]==';')){
break;
}
n++;
}
arg= sc.substr(i,n);
}else{
arg= "";
}
return true;
}
}
return false;
}
/*:164*//*165:*/
#line 6920 "annoyance-filter.w"
bool mailFolder::parseHeaderArgument(string&s,const string target,string&arg)
{
if(s.length()> target.length()){
string sc= s;
string::size_type p,p1;
stringCanonicalise(sc);
if(((p= sc.find(target))!=string::npos)&&
(sc.length()> (p+target.length()))&&
(sc[p+target.length()]=='=')){
p+= target.length()+1;
if(p<s.length()){
if(s[p]=='"'){
if((p1= s.find('"',p+1))!=string::npos){
arg= s.substr(p+1,p1-(p+1));
return true;
}
}else{
string::size_type i= p;
for(;i<s.length();i++){
if(!isISOspace(s[i])){
break;
}
}
if(i<s.length()){
int n= 0;
while((i+n)<s.length()){
if((isISOspace(s[i+n]))||(s[i+n]==';')){
break;
}
n++;
}
arg= s.substr(i,n);
}else{
arg= "";
}
return true;
}
}
}
}
return false;
}
/*:165*//*166:*/
#line 6988 "annoyance-filter.w"
bool mailFolder::isSpoofedExecutableFileExtension(const string&s)
{
string sc= s;
stringCanonicalise(sc);
if((sc.length()> 4)&&(sc[sc.length()-4]=='.')){
string ext= sc.substr(sc.length()-3);
stringCanonicalise(ext);
return((ext=="exe")||
(ext=="bat")||
(ext=="scr")||
(ext=="lnk")||
(ext=="pif")||
(ext=="com"));
}
return false;
}
/*:166*//*167:*/
#line 7012 "annoyance-filter.w"
unsigned int mailFolder::sizeMessageTranscript(const unsigned int lineOverhead)const{
assert(tlist!=NULL);
unsigned int n= tlist->size(),totsize= 0;
if((n> 1)&&
(tlist->back().substr(0,(sizeof messageSentinel)-1)==messageSentinel)){
n--;
}
list<string> ::iterator p= tlist->begin();
for(unsigned int i= 0;i<n;i++){
totsize+= p->length()+lineOverhead;
p++;
}
return totsize;
}
/*:167*//*168:*/
#line 7037 "annoyance-filter.w"
void mailFolder::writeMessageTranscript(ostream&os)const{
assert(tlist!=NULL);
unsigned int n= tlist->size();
if((n> 1)&&
(tlist->back().substr(0,(sizeof messageSentinel)-1)==messageSentinel)){
n--;
}
list<string> ::iterator p= tlist->begin();
for(unsigned int i= 0;i<n;i++){
os<<*p++<<endl;;
}
}
void mailFolder::writeMessageTranscript(const string fname)const{
if(fname!="-"){
ofstream of(fname.c_str());
writeMessageTranscript(of);
of.close();
}else{
writeMessageTranscript(cout);
}
}
/*:168*//*169:*/
#line 7071 "annoyance-filter.w"
void mailFolder::reportParserDiagnostic(const string s){
if(verbose){
if((lastFromLine!=fromLine)||(lastMessageID!=messageID)){
cerr<<fromLine<<endl;
if(messageID!=""){
cerr<<"Message-ID: "<<messageID<<":"<<endl;
}
lastFromLine= fromLine;
lastMessageID= messageID;
}
cerr<<" "<<s<<endl;
}
if(Annotate('p')||Annotate('d')){
parserDiagnostics.push(s);
}
}
void mailFolder::reportParserDiagnostic(const ostringstream&os){
reportParserDiagnostic(os.str());
}
/*:169*//*171:*/
#line 7199 "annoyance-filter.w"
void tokenDefinition::setISO_8859defaults(unsigned int lmin,unsigned int lmax){
clear();
setLengthLimits(lmin,lmax);
for(unsigned int c= 0;c<256;c++){
isToken[c]= (isascii(c)&&isdigit(c))||isISOalpha(c)||
(c=='-')||(c=='\'')||(c=='$');
notExclusively[c]= (isdigit(c)||(c=='-'))?1:0;
}
#define CI(x) static_cast<int> (x)
notAtEnd[CI('-')]= notAtEnd[CI('\'')]= true;
#undef CI
}
/*:171*//*172:*/
#line 7219 "annoyance-filter.w"
void tokenDefinition::setUS_ASCIIdefaults(unsigned int lmin,unsigned int lmax){
clear();
setLengthLimits(lmin,lmax);
for(unsigned int c= 0;c<128;c++){
isToken[c]= isalpha(c)||isdigit(c);
notExclusively[c]= (isdigit(c)||(c=='-'))?1:0;
}
#define CI(x) static_cast<int> (x)
isToken[CI('_')]= notExclusively[CI('_')]= true;
notAtEnd[CI('-')]= notAtEnd[CI('\'')]= true;
#undef CI
}
/*:172*//*174:*/
#line 7357 "annoyance-filter.w"
bool tokenParser::nextToken(dictionaryWord&d){
string token;
while(!atEnd){
/*175:*/
#line 7457 "annoyance-filter.w"
if(!pendingPhrases.empty()){
token= pendingPhrases.front();
pendingPhrases.pop_front();
d.set(token);
d.toLower();
if(pTokenTrace&&saveMessage){
messageQueue.push_back(string(" \"")+d.text+"\"");
}
return true;
}
/*:175*/
#line 7363 "annoyance-filter.w"
;
token= "";
string::size_type necount= 0;
if(source->isByteStream()){
/*178:*/
#line 7577 "annoyance-filter.w"
int b;
while((b= source->nextByte())>=0){
if(!(btd->isTokenMember(b))){
continue;
}
if(btd->isTokenNotAtEnd(b)){
continue;
}
if(btd->isTokenNotExclusively(b)){
necount++;
}
token+= static_cast<char> (b);
while(((b= source->nextByte())>=0)&&
btd->isTokenMember(b)
){
if(btd->isTokenNotExclusively(b)){
necount++;
}
token+= static_cast<char> (b);
}
while((token.length()> 0)&&
btd->isTokenNotAtEnd(ChIx(token[token.length()-1]))
){
token.erase(token.length()-1);
}
if(!(btd->isTokenLengthAcceptable(token))){
token= "";
continue;
}
if(necount==token.length()){
token= "";
continue;
}
d.set(token);
d.toLower();
/*180:*/
#line 7674 "annoyance-filter.w"
if(assemblePhrases){
assembleAllPhrases(d);
continue;
}
/*:180*/
#line 7631 "annoyance-filter.w"
;
if(pTokenTrace&&saveMessage){
messageQueue.push_back(string(" \"")+d.text+"\"");
}
return true;
}
continue;
/*:178*/
#line 7369 "annoyance-filter.w"
;
}
while((clp<cl.length())&&
(inHTMLcomment||
(!(td->isTokenMember(ChIx(cl[clp]))))
)){
/*176:*/
#line 7482 "annoyance-filter.w"
if(inHTML&&!inHTMLcomment&&(cl.substr(clp,4)==HTMLCommentBegin)){
inHTMLcomment= true;
clp+= 4;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment begin: "<<cl<<endl;
#endif
continue;
}
if(inHTML&&inHTMLcomment&&(cl.substr(clp,3)==HTMLCommentEnd)){
inHTMLcomment= false;
clp+= 3;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment end: "<<cl<<endl;
#endif
continue;
}
#ifdef HTML_COMMENT_DEBUG
if(inHTMLcomment){
cout<<cl[clp];
if(clp==(cl.length()-1)){
cout<<endl;
}
}
#endif
/*:176*/
#line 7377 "annoyance-filter.w"
;
/*177:*/
#line 7516 "annoyance-filter.w"
if(cl[clp]=='<'&&(clp<=(cl.length()-6))){
if((cl[clp+1]=='H'||cl[clp+1]=='h')&&
(cl[clp+5]=='>'||cl[clp+5]==' ')){
string tag;
for(int i= 1;i<5;i++){
tag+= (islower(cl[clp+i]))?toupper(cl[clp+i]):cl[clp+i];
}
if(tag=="HTML"){
inHTML= true;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ In HTML: "<<cl<<endl;
#endif
}
}
}
if(cl[clp]=='<'&&(clp<=(cl.length()-7))){
if((cl[clp+1]=='/')&&(cl[clp+2]=='H'||cl[clp+2]=='h')&&
(cl[clp+6]=='>')){
string tag;
for(int i= 2;i<6;i++){
tag+= (islower(cl[clp+i]))?toupper(cl[clp+i]):cl[clp+i];
}
if(tag=="HTML"){
inHTML= false;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ Out of HTML: "<<cl<<endl;
#endif
}
}
}
/*:177*/
#line 7378 "annoyance-filter.w"
;
clp++;
}
if(clp>=cl.length()){
nextLine();
continue;
}
if(td->isTokenNotAtEnd(ChIx(cl[clp]))){
clp++;
continue;
}
if(td->isTokenNotExclusively(ChIx(cl[clp]))){
necount++;
}
token+= cl[clp++];
while((clp<cl.length())){
if((!inHTMLcomment)&&(td->isTokenMember(ChIx(cl[clp])))){
if(td->isTokenNotExclusively(ChIx(cl[clp]))){
necount++;
}
token+= cl[clp++];
}else{
/*176:*/
#line 7482 "annoyance-filter.w"
if(inHTML&&!inHTMLcomment&&(cl.substr(clp,4)==HTMLCommentBegin)){
inHTMLcomment= true;
clp+= 4;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment begin: "<<cl<<endl;
#endif
continue;
}
if(inHTML&&inHTMLcomment&&(cl.substr(clp,3)==HTMLCommentEnd)){
inHTMLcomment= false;
clp+= 3;
#ifdef HTML_COMMENT_DEBUG
cout<<"------------------------------ HTML Comment end: "<<cl<<endl;
#endif
continue;
}
#ifdef HTML_COMMENT_DEBUG
if(inHTMLcomment){
cout<<cl[clp];
if(clp==(cl.length()-1)){
cout<<endl;
}
}
#endif
/*:176*/
#line 7407 "annoyance-filter.w"
;
if(inHTMLcomment){
clp++;
continue;
}
break;
}
}
while((token.length()> 0)&&
td->isTokenNotAtEnd(ChIx(token[token.length()-1]))
){
token.erase(token.length()-1);
}
if(!(td->isTokenLengthAcceptable(token))){
continue;
}
if(necount==token.length()){
continue;
}
d.set(token);
d.toLower();
/*180:*/
#line 7674 "annoyance-filter.w"
if(assemblePhrases){
assembleAllPhrases(d);
continue;
}
/*:180*/
#line 7443 "annoyance-filter.w"
;
if(pTokenTrace&&saveMessage){
messageQueue.push_back(string(" \"")+d.text+"\"");
}
return true;
}
return false;
}
/*:174*//*181:*/
#line 7690 "annoyance-filter.w"
void tokenParser::assembleAllPhrases(dictionaryWord&d){
phraseQueue.push_back(d.text);
if(phraseQueue.size()> phraseMax){
phraseQueue.pop_front();
assert(phraseQueue.size()==phraseMax);
}
for(unsigned int p= phraseMin;p<=phraseMax;p++){
if(p<=phraseQueue.size()){
deque<string> ::const_reverse_iterator wp= phraseQueue.rbegin();
string phrase= "";
for(unsigned int i= 0;i<p;i++){
phrase= (*wp)+((phrase=="")?"":" ")+phrase;
wp++;
}
if((phraseLimit==0)||(phrase.length()<=phraseLimit)){
pendingPhrases.push_back(phrase);
}
}
}
}
/*:181*//*185:*/
#line 7823 "annoyance-filter.w"
double classifyMessage::classifyThis(bool createTranscript){
dictionaryWord dw;
double junkProb= -1;
if(createTranscript||(transcriptFilename!="")){
mf->setTranscriptList(&messageTranscript);
if(Annotate('p')||Annotate('d')){
saveParserDiagnostics= true;
}
}
/*187:*/
#line 7875 "annoyance-filter.w"
set<string> utokens;
while(tp.nextToken(dw)){
utokens.insert(dw.get());
}
/*:187*/
#line 7835 "annoyance-filter.w"
;
/*188:*/
#line 7903 "annoyance-filter.w"
multimap<double,string> rtokens;
for(set<string> ::iterator t= utokens.begin();t!=utokens.end();t++){
double pdiff;
dictionary::iterator dp;
if(fd->isDictionaryLoaded()){
pdiff= fd->find(*t);
if(pdiff<0){
pdiff= unknownWordProbability;
}
pdiff= abs(pdiff-0.5);
}else{
if(((dp= d->find(*t))!=d->end())&&
(dp->second.getJunkProbability()>=0)){
pdiff= abs(dp->second.getJunkProbability()-0.5);
}else{
pdiff= abs(unknownWordProbability-0.5);
}
}
rtokens.insert(make_pair(pdiff,*t));
}
/*:188*/
#line 7837 "annoyance-filter.w"
;
/*189:*/
#line 7937 "annoyance-filter.w"
unsigned int n= min(static_cast<multimap<double,string> ::size_type> (nExtremal),rtokens.size());
multimap<double,string> ::const_reverse_iterator rp= rtokens.rbegin();
double probP= 1,probQ= 1;
if(verbose){
cerr<<"Rank Probability Token"<<endl;
}
for(unsigned int i= 0;i<n;i++){
double p;
if(fd->isDictionaryLoaded()){
p= fd->find(rp->second);
if(p<0){
p= unknownWordProbability;
}
}else{
dictionary::iterator dp= d->find(rp->second);
p= ((dp==d->end())||(dp->second.getJunkProbability()<0))?
unknownWordProbability:dp->second.getJunkProbability();
}
if(verbose){
cerr<<setw(3)<<setiosflags(ios::right)<<(i+1)<<" "<<
setw(9)<<setprecision(5)<<setiosflags(ios::left)<<p<<
" "<<rp->second<<endl;
}
probP*= p;
probQ*= (1-p);
rp++;
}
junkProb= probP/(probP+probQ);
if(verbose){
cerr<<"ProbP = "<<probP<<", ProbQ = "<<probQ<<endl;
}
/*:189*/
#line 7839 "annoyance-filter.w"
;
if(tp.getSaveMessage()){
/*190:*/
#line 7979 "annoyance-filter.w"
ostringstream os;
list<string> ::iterator p;
for(p= tp.messageQueue.begin();p!=tp.messageQueue.end();p++){
if(p->length()==0){
break;
}
}
os<<Xfile<<"-Junk-Probability: "<<setprecision(5)<<junkProb;
tp.messageQueue.insert(p,os.str());
os.str("");
addSignificantWordDiagnostics(messageTranscript,p,rtokens);
/*:190*/
#line 7842 "annoyance-filter.w"
;
ofstream mdump(pDiagFilename.c_str());
tp.writeMessageQueue(mdump);
mdump.close();
}
if(createTranscript||(transcriptFilename!="")){
/*191:*/
#line 8020 "annoyance-filter.w"
ostringstream os;
list<string> ::iterator p;
string transEndl= "";
for(p= messageTranscript.begin();p!=messageTranscript.end();p++){
if(p->length()==0){
break;
}
if(*p=="\r"){
transEndl= "\r";
break;
}
}
double jp= junkProb;
if(jp<0.001){
jp= 0;
}
os<<Xfile<<"-Junk-Probability: "<<setprecision(3)<<jp<<transEndl;
messageTranscript.insert(p,os.str());
os.str("");
os<<Xfile<<"-Classification: ";
if(junkProb>=junkThreshold){
os<<"Junk";
}else if(junkProb<=mailThreshold){
os<<"Mail";
}else{
os<<"Indeterminate";
}
os<<transEndl;
messageTranscript.insert(p,os.str());
if(Annotate('w')){
addSignificantWordDiagnostics(messageTranscript,p,rtokens,transEndl);
}
if(Annotate('p')||Annotate('d')){
while(!parserDiagnostics.empty()){
ostringstream os;
os<<Xfile<<"-Parser-Diagnostic: "<<parserDiagnostics.front()<<transEndl;
messageTranscript.insert(p,os.str());
parserDiagnostics.pop();
}
}
/*:191*/
#line 7849 "annoyance-filter.w"
;
if(transcriptFilename!=""){
mf->writeMessageTranscript(transcriptFilename);
}
}
return junkProb;
}
/*:185*//*192:*/
#line 8079 "annoyance-filter.w"
void classifyMessage::addSignificantWordDiagnostics(list<string> &l,
list<string> ::iterator where,
multimap<double,string> &rtokens,string endLine){
unsigned int n= min(static_cast<multimap<double,string> ::size_type> (nExtremal),rtokens.size());
multimap<double,string> ::const_reverse_iterator rp= rtokens.rbegin();
for(unsigned int i= 0;i<n;i++){
dictionary::iterator dp= d->find(rp->second);
double wp= ((dp==d->end())||((dp->second.getJunkProbability()<0)))?
unknownWordProbability:dp->second.getJunkProbability();
ostringstream os;
os<<Xfile<<"-Significant-Word: "<<
setw(3)<<setiosflags(ios::right)<<(i+1)<<" "<<
setw(8)<<setprecision(5)<<setiosflags(ios::left)<<wp<<
" \""<<rp->second<<"\""<<endLine;
l.insert(where,os.str());
os.str("");
rp++;
}
}
/*:192*//*197:*/
#line 8226 "annoyance-filter.w"
#ifdef POP3_PROXY_SERVER
bool POP3Proxy::acceptConnections(int maxBacklog){
struct sockaddr_in name;
listenSocket= socket(AF_INET,SOCK_STREAM,0);
if(listenSocket<0){
perror("POP3Proxy opening socket to listen for connections");
listenSocket= -1;
return false;
}
name.sin_family= AF_INET;
name.sin_addr.s_addr= INADDR_ANY;
name.sin_port= htons(popProxyPort);
if(bind(listenSocket,(struct sockaddr*)&name,sizeof name)<0){
close(listenSocket);
perror("POP3Proxy binding socket to listen for connections");
listenSocket= -1;
return false;
}
if(listen(listenSocket,maxBacklog)<0){
close(listenSocket);
perror("POP3Proxy calling listen for connection socket");
listenSocket= -1;
return false;
}
signal(SIGPIPE,absentPlumber);
opened= true;
return opened;
}
#endif
/*:197*//*198:*/
#line 8268 "annoyance-filter.w"
#ifdef POP3_PROXY_SERVER
bool POP3Proxy::serviceConnection(void){
assert(opened);
int clientSocket;
struct sockaddr_in from;
socklen_t fromlen;
/*199:*/
#line 8291 "annoyance-filter.w"
errno= 0;
do{
fromlen= sizeof from;
clientSocket= accept(listenSocket,(struct sockaddr*)&from,&fromlen);
if(clientSocket>=0){
break;
}
}while(errno==EINTR);
if(clientSocket<0){
perror("POP3Proxy accepting connection from client");
return false;
}
if(verbose){
cout<<"Accepting POP3 connection from "<<inet_ntoa(from.sin_addr)<<endl;
}
/*:199*/
#line 8277 "annoyance-filter.w"
;
/*200:*/
#line 8312 "annoyance-filter.w"
int clientLength,serverLength;
char clientBuffer[POP_BUFFER],serverBuffer[POP_BUFFER];
int serverSocket;
u_int32_t serverIP;
struct hostent*h;
int cstat= -1;
bool ok= true;
string command,argument,reply;
/*201:*/
#line 8338 "annoyance-filter.w"
if(isdigit(serverName[0])&&(serverIP= inet_addr(serverName.c_str()))!=static_cast<u_int32_t> (-1)){
cstat= 0;
}else{
h= gethostbyname(serverName.c_str());
if(h!=NULL){
memcpy(&serverIP,h->h_addr,sizeof serverIP);
cstat= 0;
}else{
cerr<<"POP3Proxy: POP3 server "<<serverName.c_str()<<" unknown."<<endl;
close(clientSocket);
return false;
}
}
/*:201*/
#line 8322 "annoyance-filter.w"
;
/*202:*/
#line 8358 "annoyance-filter.w"
struct sockaddr_in serverHost;
serverHost.sin_family= AF_INET;
serverSocket= socket(AF_INET,SOCK_STREAM,0);
if(serverSocket<0){
perror("POP3Proxy opening socket to POP server");
cstat= -1;
}else{
if(popProxyTrace){
cerr<<"POP3: serverSocket opened."<<endl;
}
serverHost.sin_port= htons(serverPort);
memcpy((char*)&serverHost.sin_addr.s_addr,(char*)(&serverIP),
sizeof serverHost.sin_addr.s_addr);
errno= 0;
do{
cstat= connect(serverSocket,(struct sockaddr*)&(serverHost),sizeof serverHost);
if(popProxyTrace){
cerr<<"POP3: serverSocket connected."<<endl;
}
if(cstat==0){
if(popProxyTrace){
cerr<<"POP3: Connected to POP server on "<<inet_ntoa(serverHost.sin_addr)<<
":"<<ntohs(serverHost.sin_port)<<endl;
}
break;
}else{
perror("POP3Proxy connection to POP server failed");
}
}while(errno==EINTR);
if(cstat<0){
cerr<<"POP3Proxy: Cannot connect to POP3 server "<<serverName.c_str()<<endl;
}
}
/*:202*/
#line 8323 "annoyance-filter.w"
;
/*203:*/
#line 8402 "annoyance-filter.w"
serverLength= recv(serverSocket,serverBuffer,POP_MAX_MESSAGE,0);
if(serverLength<0){
perror("POP3Proxy reading greeting from server");
ok= false;
}else{
clientLength= send(clientSocket,serverBuffer,serverLength,0);
if(clientLength<0){
perror("POP3Proxy forwarding greeting to client");
ok= false;
}
}
/*:203*/
#line 8324 "annoyance-filter.w"
;
/*204:*/
#line 8427 "annoyance-filter.w"
while(ok){
/*205:*/
#line 8452 "annoyance-filter.w"
if(popProxyTrace){
cerr<<"POP3: Reading request from client."<<endl;
}
clientLength= recv(clientSocket,clientBuffer,POP_MAX_MESSAGE,0);
if(popProxyTrace){
cerr<<"POP3: Read "<<clientLength<<" request bytes from client."<<endl;
}
if(clientLength<=0){
break;
}
/*:205*/
#line 8430 "annoyance-filter.w"
;
/*206:*/
#line 8473 "annoyance-filter.w"
if(isspace(clientBuffer[0])){
continue;
}
/*:206*/
#line 8431 "annoyance-filter.w"
;
/*207:*/
#line 8481 "annoyance-filter.w"
serverLength= send(serverSocket,clientBuffer,clientLength,0);
if(serverLength!=clientLength){
perror("POP3Proxy forwarding request to server");
break;
}
/*:207*/
#line 8432 "annoyance-filter.w"
;
/*208:*/
#line 8497 "annoyance-filter.w"
while((clientLength> 0)&&isspace(clientBuffer[clientLength-1])){
clientLength--;
}
command= argument= "";
int i;
for(i= 0;i<clientLength;i++){
if(isspace(clientBuffer[i])){
break;
}
char ch= clientBuffer[i];
if(isalpha(ch)&&isupper(ch)){
ch= tolower(ch);
}
command+= ch;
}
while((i<clientLength)&&isspace(clientBuffer[i])){
i++;
}
if(i<clientLength){
argument= string(clientBuffer+i,clientLength-i);
}
if(popProxyTrace){
cerr<<"POP3: Client command ("<<command<<") Argument ("<<argument<<")"<<endl;
}
/*:208*/
#line 8433 "annoyance-filter.w"
;
/*209:*/
#line 8531 "annoyance-filter.w"
serverLength= 0;
int rl= -1;
while(true){
rl= recv(serverSocket,serverBuffer+serverLength,1,0);
if(rl<0){
perror("POP3Proxy reading request status from server");
break;
}
serverLength++;
if(serverBuffer[serverLength-1]=='\n'){
break;
}
if(serverLength>=POP_MAX_MESSAGE){
cerr<<"POP3Proxy reply from server too long."<<endl;
rl= -1;
break;
}
}
if(rl<0){
break;
}
if(popProxyTrace){
cerr<<"POP3: Server reply is "<<serverLength<<" bytes"<<endl;
}
/*:209*/
#line 8434 "annoyance-filter.w"
;
/*210:*/
#line 8568 "annoyance-filter.w"
reply= "";
if((serverBuffer[0]=='+')&&
((multiLine.find(command)!=multiLine.end())||
((argument=="")&&(cMultiLine.find(command)!=cMultiLine.end())))){
int bll;
char bp[POP_BUFFER];
if(popProxyTrace){
cerr<<"POP3: Reading multi-line reply from server."<<endl;
}
do{
bll= recv(serverSocket,bp,POP_MAX_MESSAGE,0);
if(bll<0){
perror("POP3Proxy reading multi-line reply to request from server");
break;
}
#ifdef POP3_TRACE_TRANSFER_DETAIL
if(popProxyTrace){
cerr<<"POP3: Appending "<<bll<<" bytes to multi-line reply."<<endl;
}
#endif
reply+= string(bp,bll);
}while((reply.length()<3)||
((reply!=".\r\n")&&
(reply.substr(reply.length()-5)!="\r\n.\r\n")));
}
/*:210*/
#line 8435 "annoyance-filter.w"
;
/*211:*/
#line 8605 "annoyance-filter.w"
if(popProxyTrace){
cerr<<"POP3: Calling filter function."<<endl;
}
if(filterFunction!=NULL){
serverBuffer[serverLength]= 0;
filterFunction(command,argument,serverBuffer,&serverLength,reply);
}
if(popProxyTrace){
cerr<<"POP3: Returned from filter function."<<endl;
}
/*:211*/
#line 8437 "annoyance-filter.w"
;
/*212:*/
#line 8623 "annoyance-filter.w"
clientLength= send(clientSocket,serverBuffer,serverLength,0);
if(clientLength!=serverLength){
perror("POP3Proxy relaying status of request to client");
break;
}
if(popProxyTrace){
cerr<<"POP3: Relaying "<<serverLength<<
" byte status line to client: "<<serverBuffer;
if((serverLength==0)||(serverBuffer[serverLength-1])!='\n'){
cerr<<endl;
}
}
/*:212*/
#line 8439 "annoyance-filter.w"
;
/*213:*/
#line 8644 "annoyance-filter.w"
if(reply!=""){
if(popProxyTrace){
cerr<<"POP3: Relaying "<<reply.length()<<" byte multi-line reply to client."<<endl;
}
#ifdef POP3_MAX_CLIENT_WRITE
clientLength= 0;
int rpl= reply.length();
while(clientLength<((int)reply.length())){
int bcl,pcl;
bcl= min(rpl,POP3_MAX_CLIENT_WRITE);
#ifdef POP3_TRACE_TRANSFER_DETAIL
if(popProxyTrace){
cerr<<"POP3: Writing "<<bcl<<" bytes of multi-line reply to client."<<endl;
}
#endif
pcl= send(clientSocket,reply.data()+clientLength,bcl,0);
if(pcl!=bcl){
if(popProxyTrace){
cerr<<"POP3: Error writing "<<bcl<<" bytes: wrote "<<pcl<<" bytes."<<endl;
}
break;
}
clientLength+= pcl;
rpl-= pcl;
}
#else
clientLength= send(clientSocket,reply.data(),reply.length(),0);
#endif
if(clientLength!=static_cast<int> (reply.length())){
perror("POP3Proxy relaying multi-line reply to request to client");
break;
}
#ifdef POP3_TRACE_TRANSFER_DETAIL
if(popProxyTrace){
cerr<<"POP3: <<<<<< Relaying "<<reply.length()<<" byte multi-line reply body to client. >>>>>>"<<endl;
cerr<<reply;
cerr<<"POP3: <<<<<< End multi-line reply body. >>>>>>"<<endl;
}
#endif
}
/*:213*/
#line 8440 "annoyance-filter.w"
;
if(command=="quit"){
break;
}
}
/*:204*/
#line 8325 "annoyance-filter.w"
;
/*214:*/
#line 8696 "annoyance-filter.w"
close(clientSocket);
close(serverSocket);
if(verbose){
cerr<<"Closing POP3 connection from "<<inet_ntoa(from.sin_addr)<<endl;
}
/*:214*/
#line 8326 "annoyance-filter.w"
;
/*:200*/
#line 8279 "annoyance-filter.w"
;
return true;
}
#endif
/*:198*//*215:*/
#line 8709 "annoyance-filter.w"
#ifdef POP3_PROXY_SERVER
bool POP3Proxy::operateProxyServer(int maxBacklog){
if(acceptConnections(maxBacklog)){
while(serviceConnection());
}
return false;
}
#endif
/*:215*/
#line 9965 "annoyance-filter.w"
/*225:*/
#line 8921 "annoyance-filter.w"
static dictionary dict;
static fastDictionary fDict;
/*:225*//*249:*/
#line 9882 "annoyance-filter.w"
static tokenDefinition isoToken;
static tokenDefinition asciiToken;
/*:249*/
#line 9966 "annoyance-filter.w"
/*184:*/
#line 7800 "annoyance-filter.w"
classifyMessage::classifyMessage(mailFolder&m,
dictionary&dt,fastDictionary*fdt,
unsigned int nExt,double uwp){
mf= &m;
tp.setSource(m);
tp.setTokenDefinition(isoToken,asciiToken);
tp.setTokenLengthLimits(maxTokenLength,minTokenLength,
streamMaxTokenLength,streamMinTokenLength);
if(pDiagFilename.length()> 0){
tp.setSaveMessage(true);
}
d= &dt;
fd= fdt;
nExtremal= nExt;
unknownWordProbability= uwp;
}
/*:184*//*229:*/
#line 9012 "annoyance-filter.w"
static void updateProbability(void)
{
dict.computeJunkProbability(messageCount[dictionaryWord::Mail],messageCount[dictionaryWord::Junk],
mailBias,minOccurrences);
singleDictionaryRead= false;
}
/*:229*//*230:*/
#line 9024 "annoyance-filter.w"
static void printDictionary(ostream&os= cout)
{
updateProbability();
os<<"Dictionary contains "<<dict.size()<<" unique tokens."<<endl;
for(dictionary::iterator dp= dict.begin();dp!=dict.end();dp++){
dp->second.describe(os);
}
}
/*:230*//*231:*/
#line 9038 "annoyance-filter.w"
static double classifyMessages(const char*fname)
{
double jp;
if(dict.empty()&&!fDict.isDictionaryLoaded()){
cerr<<"You cannot --classify or --test a message "
"unless you have first loaded a dictionary."<<endl;
jp= 0.5;
}else{
mailFolder mf(fname,dictionaryWord::Mail);
classifyMessage cm(mf,dict,&fDict,significantWords,novelWordProbability);
jp= cm.classifyThis();
if(verbose){
cerr<<"Message junk probability: "<<setprecision(5)<<jp<<endl;
}
}
nTested++;
return jp;
}
/*:231*//*242:*/
#line 9266 "annoyance-filter.w"
static void usage(void)
{
cout<<PRODUCT<<" -- Annoyance Filter. Call"<<endl;
cout<<" with "<<PRODUCT<<" [options]"<<endl;
cout<<""<<endl;
cout<<"Options:"<<endl;
cout<<" --annotate options Specify optional annotations in --transcript"<<endl;
cout<<" --autoprune n Automatically prune unique words when dictionary exceeds n bytes"<<endl;
cout<<" --biasmail n Set frequency bias for words and phrases in legitimate mail to n"<<endl;
cout<<" --binword n Scan binary streams for words >= n characters (0 = none)"<<endl;
cout<<" --bsdfolder Next --mail or --junk folder uses BSD \"From \" separator"<<endl;
cout<<" --classify fname Classify first message in fname"<<endl;
cout<<" --clearjunk Clear junk counts in dictionary"<<endl;
cout<<" --clearmail Clear mail counts in dictionary"<<endl;
cout<<" --copyright Print copyright information"<<endl;
cout<<" --csvread fname Import dictionary from fname in CSV format"<<endl;
cout<<" --csvwrite fname Export dictionary to fname in CSV format"<<endl;
cout<<" --fread fname Load fast dictionary from fname"<<endl;
cout<<" --fwrite fname Write fast dictionary to fname"<<endl;
cout<<" --help, -u Print this message"<<endl;
#ifdef Jig
cout<<" --jig Test component in temporary jig"<<endl;
#endif
cout<<" --junk, -j folder Add folder contents to junk mail dictionary"<<endl;
cout<<" --list Print dictionary on standard output"<<endl;
cout<<" --mail, -m folder Add folder contents to legitimate mail dictionary"<<endl;
cout<<" --newword n Set probability for words not in dictionary to n"<<endl;
cout<<" --pdiag fname Print parser diagnostics to fname"<<endl;
cout<<" --phraselimit n Set phrase maximum length to n characters"<<endl;
cout<<" --phrasemax n Set phrase maximum to n words"<<endl;
cout<<" --phrasemin n Set phrase minimum to n words"<<endl;
#ifdef HAVE_PLOT_UTILITIES
cout<<" --plot fname Plot histogram of word probabilities in dictionary"<<endl;
#endif
#ifdef POP3_PROXY_SERVER
cout<<" --pop3port n Listen for POP3 proxy requests on port n (default 9110)"<<endl;
cout<<" --pop3server serv[:p] Operate POP3 proxy for server, port p (default 110)"<<endl;
cout<<" --pop3trace Trace POP3 proxy traffic on standard error"<<endl;
#endif
cout<<" --prune Prune infrequently used words from dictionary"<<endl;
cout<<" --ptrace Include detailed trace in --pdiag output"<<endl;
cout<<" --read, -r fname Import dictionary from fname"<<endl;
cout<<" --sigwords n Classify message based on n most significant words"<<endl;
cout<<" --sloppyheaders Accept messages with malformed MIME part separators"<<endl;
cout<<" --statistics Print statistics of dictionary"<<endl;
cout<<" --test, -t fname Test first message in fname"<<endl;
cout<<" --threshjunk n Set junk threshold to n"<<endl;
cout<<" --threshmail n Set mail threshold to n"<<endl;
cout<<" --transcript fname Write annotated message transcript to fname"<<endl;
cout<<" --verbose, -v Print processing information"<<endl;
cout<<" --version Print version number"<<endl;
cout<<" --write fname Export dictionary to fname"<<endl;
cout<<""<<endl;
cout<<"by John Walker"<<endl;
cout<<"http://www.fourmilab.ch/"<<endl;
}
/*:242*/
#line 9967 "annoyance-filter.w"
/*219:*/
#line 8770 "annoyance-filter.w"
#ifdef POP3_PROXY_SERVER
void popFilter(const string command,const string argument,char*replyBuffer,int*replyLength,string&reply){
if((command=="retr")&&((*replyLength)> 0)&&(replyBuffer[0]=='+')){
/*220:*/
#line 8836 "annoyance-filter.w"
istrstream is(reply.data(),reply.length());
mailFolder mf(is,dictionaryWord::Mail);
mf.forceInHeader();
/*:220*/
#line 8775 "annoyance-filter.w"
;
/*221:*/
#line 8850 "annoyance-filter.w"
classifyMessage cm(mf,dict,&fDict,significantWords,novelWordProbability);
double jp= cm.classifyThis(true);
if(verbose){
cerr<<"Message junk probability: "<<setprecision(5)<<jp<<endl;
}
/*:221*/
#line 8776 "annoyance-filter.w"
;
#define not_POPFILTER_TRACE
#ifdef POPFILTER_TRACE
cerr<<"Classification done."<<endl;
#endif
#ifdef OLDWAY
ostringstream os;
#else
unsigned int mtl= mf.sizeMessageTranscript();
#ifdef POPFILTER_TRACE
cerr<<"Message transcript predicted size: "<<mtl<<endl;
#endif
char*mtbuf= new char[mtl+16];
ostrstream os(mtbuf,mtl+16);
#endif
mf.writeMessageTranscript(os);
#ifdef POPFILTER_TRACE
cerr<<"Transcript written."<<endl;
#endif
mf.clearMessageTranscript();
#ifdef POPFILTER_TRACE
cerr<<"Transcript cleared."<<endl;
cerr<<"Message transcript actual size: "<<os.tellp()<<endl;
#endif
reply.erase();
#ifndef OLDWAY
os<<'\0';
#endif
reply= os.str();
#ifdef POPFILTER_TRACE
cerr<<"Reply string length: "<<reply.length()<<endl;
#endif
#ifndef OLDWAY
delete mtbuf;
#endif
#ifdef POPFILTER_TRACE
cerr<<"Reply created."<<endl;
#endif
/*222:*/
#line 8874 "annoyance-filter.w"
ostringstream rs;
rs<<"+OK "<<(reply.length()-3)<<" octets\r\n";
memcpy(replyBuffer,rs.str().data(),rs.str().length());
*replyLength= rs.str().length();
/*:222*/
#line 8815 "annoyance-filter.w"
;
#ifdef POPFILTER_TRACE
cerr<<"Reply length modification done."<<endl;
#endif
}
}
#endif
/*:219*//*227:*/
#line 8938 "annoyance-filter.w"
static void addFolder(const char*fname,dictionaryWord::mailCategory cat)
{
if(verbose){
cerr<<"Adding "<<(bsdFolder?"BSD ":"")<<"folder "<<
fname<<" as "<<dictionaryWord::categoryName(cat)<<":"<<endl;
}
mailFolder mf(fname,cat);
mf.setBSDmode(bsdFolder);
bsdFolder= false;
tokenParser tp;
tp.setSource(mf);
tp.setTokenDefinition(isoToken,asciiToken);
tp.setTokenLengthLimits(maxTokenLength,minTokenLength,
streamMaxTokenLength,streamMinTokenLength);
if(pDiagFilename.length()> 0){
tp.setSaveMessage(true);
}
dictionaryWord dw;
unsigned int ntokens= 0;
while(tp.nextToken(dw)){
dict.add(dw,mf.getCategory());
ntokens++;
/*228:*/
#line 8988 "annoyance-filter.w"
if((autoPrune!=0)&&(dict.estimateMemoryRequirement()> autoPrune)){
if(verbose){
cerr<<"Dictionary size "<<dict.estimateMemoryRequirement()<<
"; starting automatic prune."<<endl;
}
dict.purge(1);
if(dict.estimateMemoryRequirement()> ((autoPrune*9)/10)){
cerr<<"Dictionary size after --autoprune is larger than 90%"<<endl;
cerr<<"of --autoprune setting of "<<autoPrune<<" bytes."<<endl;
autoPrune= static_cast<unsigned int> (autoPrune*1.25);
cerr<<"Increasing --autoprune threshold 25% to "<<autoPrune<<
" to avoid thrashing."<<endl;
}
}
/*:228*/
#line 8964 "annoyance-filter.w"
;
}
messageCount[mf.getCategory()]+= mf.getMessageCount();
if(verbose){
cerr<<" Added "<<mf.getMessageCount()<<" messages, "<<
ntokens<<" tokens in "<<mf.getLineCount()<<" lines."<<endl;
cerr<<" Dictionary contains "<<dict.size()<<" unique tokens."<<endl;
cerr<<" Dictionary size "<<dict.estimateMemoryRequirement()<<" bytes."<<endl;
}
}
/*:227*//*246:*/
#line 9832 "annoyance-filter.w"
static unsigned int nOptionalCaps= 0;
static void printOptionalCapability(const string&s)
{
if(nOptionalCaps==0){
cout<<"Optional capabilities configured:"<<endl;
nOptionalCaps++;
}
cout<<" "<<s<<"."<<endl;
}
/*:246*/
#line 9968 "annoyance-filter.w"
/*223:*/
#line 8886 "annoyance-filter.w"
/*253:*/
#line 9943 "annoyance-filter.w"
#ifdef Jig
#endif
/*:253*/
#line 8888 "annoyance-filter.w"
;
int main(int argc,char*argv[])
{
int opt;
/*224:*/
#line 8909 "annoyance-filter.w"
memset(messageCount,0,sizeof messageCount);
isoToken.setISO_8859defaults(minTokenLength,maxTokenLength);
asciiToken.setUS_ASCIIdefaults(streamMinTokenLength,streamMaxTokenLength);
/*:224*/
#line 8894 "annoyance-filter.w"
;
/*243:*/
#line 9332 "annoyance-filter.w"
static const struct option long_options[]= {
{"annotate",1,NULL,222},
{"autoprune",1,NULL,232},
{"biasmail",1,NULL,225},
{"binword",1,NULL,221},
{"bsdfolder",0,NULL,231},
{"classify",1,NULL,209},
{"clearjunk",0,NULL,215},
{"clearmail",0,NULL,216},
{"copyright",0,NULL,200},
{"csvread",1,NULL,205},
{"csvwrite",1,NULL,207},
{"fread",1,NULL,228},
{"fwrite",1,NULL,229},
{"help",0,NULL,'u'},
#ifdef Jig
{"jig",0<