/*
 * Decompiled with CFR 0.152.
 */
package com.caucho.web.search;

import com.caucho.util.CharBuffer;
import com.caucho.util.IntMap;
import com.caucho.vfs.Path;
import com.caucho.vfs.ReadStream;
import com.caucho.web.search.Spider;
import com.caucho.xml.LooseHtml;
import com.caucho.xml.XmlUtil;
import com.caucho.xpath.XPath;
import java.io.InputStream;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;

public class Page {
    private static final int TITLE_SCORE = 200;
    private static final int KEYWORD_SCORE = 100;
    private static final int DESCRIPTION_SCORE = 10;
    private static final int H1_SCORE = 50;
    private static final int H2_SCORE = 25;
    private static final int H3_SCORE = 10;
    private static final int H4_SCORE = 5;
    private static final int A_SCORE = 10;
    private static final int DESC_LENGTH = 255;
    private static IntMap stopWords;
    static String[] stop;
    private ArrayList links;
    private Spider spider;
    private Path path;
    private String url;
    private String title;
    private String description;
    private Document doc;
    private HashMap words;

    public static Page analyze(Spider spider, Path path, ReadStream readStream, String string) throws Exception {
        if (string == null || !string.equals("text/html")) {
            return null;
        }
        Document document = new LooseHtml().parseDocument((InputStream)readStream);
        Page page = new Page(spider, path, document);
        Iterator iterator = XPath.select((String)"/html/head/meta", (Node)document);
        while (iterator.hasNext()) {
            Element element = (Element)iterator.next();
            if (!element.getAttribute("name").equalsIgnoreCase("robots")) continue;
            return null;
        }
        page.analyzeTitle();
        page.analyzeDescription();
        page.analyzeWords();
        page.analyzeLinks();
        return page;
    }

    public String getUrl() {
        return this.url;
    }

    public String getTitle() {
        return this.title;
    }

    public String getDescription() {
        return this.description;
    }

    private final void analyzeTitle() throws Exception {
        this.title = XPath.evalString((String)"/html/head/title", (Node)this.doc);
        if (this.title == null || this.title.equals("")) {
            this.title = this.url;
        }
    }

    private final void analyzeDescription() throws Exception {
        String string = XPath.evalString((String)"/html/head/meta[@name='description']/@content", (Node)this.doc);
        if (string != null && string.length() > 0) {
            this.description = string.length() > 255 ? string.substring(0, 255) : string;
            this.addParagraph(this.description, 10);
            return;
        }
        Iterator iterator = XPath.select((String)"//text()", (Node)this.doc);
        CharBuffer charBuffer = new CharBuffer();
        while (iterator.hasNext() && charBuffer.length() < 255) {
            Text text = (Text)iterator.next();
            if (text.getParentNode().getNodeName().equals("script") || text.getParentNode().getNodeName().equals("style") || text.getParentNode().getNodeName().equals("title")) continue;
            String string2 = text.getNodeValue();
            int n = 0;
            while (n < string2.length()) {
                if (!(string2.charAt(n) == '\'' || Character.isWhitespace(string2.charAt(n)) && charBuffer.length() != 0 && Character.isWhitespace(charBuffer.charAt(charBuffer.length() - 1)))) {
                    charBuffer.append(string2.charAt(n));
                }
                ++n;
            }
        }
        if (charBuffer.length() > 255) {
            charBuffer.setLength(255);
        }
        this.description = charBuffer.toString();
    }

    private final void analyzeWords() throws Exception {
        String string = XPath.evalString((String)"/html/head/title", (Node)this.doc);
        this.addParagraph(string, 200);
        string = XPath.evalString((String)"/html/head/meta[@name='keywords']/@content", (Node)this.doc);
        this.addParagraph(string, 100);
        this.addSelect("//text()", 1);
    }

    private final void addSelect(String string, int n) throws Exception {
        Iterator iterator = XPath.select((String)string, (Node)this.doc);
        while (iterator.hasNext()) {
            Text text = (Text)iterator.next();
            if (text.getParentNode().getNodeName().equals("script") || text.getParentNode().getNodeName().equals("style")) continue;
            this.addParagraph(text.getNodeValue(), n);
        }
    }

    private final void addParagraph(String string, int n) {
        if (string == null) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        this.getWords(arrayList, string);
        int n2 = arrayList.size() - 1;
        while (n2 >= 0) {
            String string2 = (String)arrayList.get(n2);
            this.addWord(string2, n);
            --n2;
        }
    }

    private final void addWord(String string, int n) {
        Word word = (Word)this.words.get(string);
        if (word == null) {
            if (stopWords.get((Object)string) > 0) {
                return;
            }
            word = new Word(string);
            this.words.put(string, word);
        }
        word.addScore(n);
    }

    public Iterator getWords() {
        return this.words.values().iterator();
    }

    public void analyzeLinks() throws Exception {
        Path path = this.path.getParent();
        this.links = new ArrayList();
        ArrayList arrayList = new ArrayList();
        Iterator iterator = XPath.select((String)"//a", (Node)this.doc);
        while (iterator.hasNext()) {
            int n;
            Element element = (Element)iterator.next();
            String string = element.getAttribute("href");
            if (string.equals("")) {
                string = element.getAttribute("HREF");
            }
            if ((n = string.indexOf(35)) >= 0) {
                string = string.substring(0, n);
            }
            if (string.length() <= 0) continue;
            Path path2 = path.lookup(string);
            this.links.add(path2);
            if (!this.spider.isValidPage(path2)) continue;
            String string2 = XmlUtil.textValue((Node)element);
            arrayList.clear();
            this.getWords(arrayList, string2);
            int n2 = this.spider.getStore().getPage(path2.getURL());
            int n3 = arrayList.size() - 1;
            while (n3 >= 0) {
                String string3 = (String)arrayList.get(n3);
                this.spider.getStore().addScore(n2, string3, 10);
                --n3;
            }
        }
    }

    private final void getWords(ArrayList arrayList, String string) {
        if (string == null) {
            return;
        }
        CharBuffer charBuffer = new CharBuffer();
        int n = 0;
        while (n < string.length()) {
            char c = string.charAt(n);
            if (!Character.isLetter(c)) {
                ++n;
                continue;
            }
            charBuffer.clear();
            while (n < string.length() && (Character.isLetterOrDigit(c = string.charAt(n)) || c == '-' || c == '_' || c == '.' && n + 1 < string.length() && Character.isLetterOrDigit(string.charAt(n + 1)))) {
                charBuffer.append(c);
                ++n;
            }
            if (charBuffer.length() > 2) {
                arrayList.add(this.normalize(charBuffer));
                continue;
            }
            if (charBuffer.length() != 0) continue;
            ++n;
        }
    }

    private final String normalize(CharBuffer charBuffer) {
        return charBuffer.toLowerCase().toString();
    }

    public Iterator getLinks() throws Exception {
        return ((AbstractList)this.links).iterator();
    }

    Page(Spider spider, Path path, Document document) {
        this.spider = spider;
        this.path = path;
        this.url = path.getPath();
        String string = path.getQuery();
        if (string != null) {
            this.url = this.url + "?" + string;
        }
        this.doc = document;
        this.words = new HashMap();
        this.links = new ArrayList();
    }

    static {
        stop = new String[]{"and", "are", "all", "also", "can", "for", "from", "has", "have", "how", "may", "might", "must", "not", "only", "other", "should", "the", "that", "this", "than", "there", "use", "you", "your", "was", "with", "were", "what", "when", "where", "who", "why"};
        stopWords = new IntMap();
        int n = 0;
        while (n < stop.length) {
            stopWords.put((Object)stop[n], 1);
            ++n;
        }
    }

    public static class Word {
        String word;
        int score;

        void addScore(int n) {
            this.score += n;
        }

        public String getWord() {
            return this.word;
        }

        public int getScore() {
            return this.score;
        }

        Word(String string) {
            this.word = string;
        }
    }
}

