// http://kalvis.com?tidypage This is a fork of the Readability
// project http://lab.arc90.com/experiments/readability/

var tidypageVersion = "0.3 A fork of the Readability project http://lab.arc90.com/experiments/readability/";

var readStyle='style-newspaper';
var readSize='size-large';
var readMargin='margin-narrow';

var bigWords = getCharCount(document.body, " ")*0.3 // 30% is big.

function guts(){
    var objOverlay = document.createElement("div");
    var objinnerDiv = document.createElement("div");
    objOverlay.id = "readOverlay";
    objinnerDiv.id = "readInner";
    // Apply user-selected styling:
    document.body.className = readStyle;
    objOverlay.className = readStyle;
    objinnerDiv.className = readMargin + " " + readSize;
    objinnerDiv.appendChild(grabArticle()); // Get the article and
					    // place it inside the
					    // inner Div
    objOverlay.appendChild(objinnerDiv); // Insert the inner div into
					 // the overlay
    // For totally hosed HTML, add body node that can't be found
    // because of bad HTML or something.
    if(document.body == null){
	body = document.createElement("body");
	document.body = body;
    }
    document.body.innerHTML = "";
    // Inserts the new content:
    document.body.style.backgroundColor = "white";
    document.body.style.color = "black";
    document.body.style.font="12pt Helvetica";
    document.body.style.margin = "7% 7% 7% 7%";
    document.body.insertBefore(objOverlay, document.body.firstChild);
}

function grabArticle(){
    var allParagraphs = document.getElementsByTagName("p");
    var topDivCount = 0;
    var topDiv = null;
    var topDivParas;
    var articleContent = document.createElement("DIV");
    var articleTitle = document.createElement("H1");
    var articleLocation = document.createElement("H3");
    var articleFooter = document.createElement("DIV");
    // Replace all doubled-up <BR> tags with <P> tags, and remove fonts.
    var pattern =  new RegExp ("<br/?>[ \r\n\s]*<br/?>", "g");
    document.body.innerHTML = document.body.innerHTML.replace(pattern, "</p><p>").replace(/<\/?font[^>]*>/g, '');
    // Grab the title from the <title> tag and inject it as the title.
    articleTitle.innerHTML = document.title;
    articleContent.appendChild(articleTitle);
    if (document.alreadySeen){
	articleLocation.innerHTML = document.location.href;
	articleContent.appendChild(articleLocation);
    } else{
	document.alreadySeen = true;
    }
    // Study all the paragraphs and find the chunk that has the best score.
    // A score is determined by things like: Number of <p>'s, commas, special classes, etc.
    for (var j=0; j < allParagraphs.length; j++){
	parentNode = allParagraphs[j].parentNode;
	// Initialize tidypage data
	if(typeof parentNode.tidypage == 'undefined'){
		parentNode.tidypage = {"contentScore": 0};
		// Look for a special classname
		if(parentNode.className.match(/(comment|meta|footer|footnote)/))
		    parentNode.tidypage.contentScore -= 50;
		else if(parentNode.className.match(/((^|\\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\\s|$))/))
		    parentNode.tidypage.contentScore += 25;
		// Look for a special ID
		if(parentNode.id.match(/(comment|meta|footer|footnote)/))
		    parentNode.tidypage.contentScore -= 50;
		else if(parentNode.id.match(/^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/))
		    parentNode.tidypage.contentScore += 25;
	}
	// Add a point for the paragraph found
	if(getInnerText(allParagraphs[j]).length > 10)
	    parentNode.tidypage.contentScore++;
	// Add points for any commas within this paragraph
	parentNode.tidypage.contentScore += getCharCount(allParagraphs[j]);
    }
    // Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
    for(nodeIndex = 0; (node = document.getElementsByTagName('*')[nodeIndex]); nodeIndex++)
	if(typeof node.tidypage != 'undefined' && (topDiv == null || node.tidypage.contentScore > topDiv.tidypage.contentScore))
	    topDiv = node;
    if(topDiv == null){
	topDiv = document.createElement('div');
	topDiv.innerHTML = 'Sorry, tidypage was unable to parse this page for content. If you feel like it should have been able to, please <a href="mailto:tidypage@kalvis.com">let us know by submitting a bug report.</a>';
    }
    // REMOVES ALL STYLESHEETS ...
    for (var k=0;k < document.styleSheets.length; k++) {
	if (document.styleSheets[k].href != null && document.styleSheets[k].href.lastIndexOf("tidypage") == -1){
	    document.styleSheets[k].disabled = true;
	}
    }
    // Remove all style tags in head (not doing this on IE):
    var styleTags = document.getElementsByTagName("style");
    for (var j=0;j < styleTags.length; j++)
	if (navigator.appName != "Microsoft Internet Explorer")
	    styleTags[j].textContent = "";
    cleanStyles(topDiv); // Removes all style attribute
    topDiv = killDivs(topDiv); // Goes in and removes DIV's that have
			       // more non <p> stuff than <p> stuff
    topDiv = killBreaks(topDiv); // Removes any consecutive <br />'s
				 // into just one <br />
    // Cleans out junk from the topDiv just in case:
    topDiv = clean(topDiv, "form");
    topDiv = clean(topDiv, "object");
    topDiv = clean(topDiv, "table", 250);
    topDiv = clean(topDiv, "h1");
    topDiv = clean(topDiv, "h2");
    topDiv = clean(topDiv, "iframe");
    // Add the footer and contents:
    articleFooter.id = "readFooter";
    articleFooter.innerHTML = "<div style='color: red' class='footer-right' > <a href='http://news.kalvis.com/'>news&raquo;</a> <span id='comment' onclick='changeComment(this)' class='version'> tidypage " + tidypageVersion + " --- " + new Date().toString().replace(/GMT.*/,'') + "</span> </div>";
    articleContent.appendChild(topDiv);
    articleContent.appendChild(articleFooter);
    return articleContent;
}

// This can be used to change the comment at the end of the text.
function changeComment(pig){
    pig.innerHTML=prompt('Change the comment', pig.innerHTML);
}

// Get the inner text of a node - cross browser compatibly.
function getInnerText(e){
    if (navigator.appName == "Microsoft Internet Explorer")
	return e.innerText;
    else
	return e.textContent;
}

// Get character count
function getCharCount (e, s){
    s = s || ",";
    return getInnerText(e).split(s).length;
}

function cleanStyles(e){
    e = e || document;
    var cur = e.firstChild;
    // If we had a bad node, there's not much we can do.
    if(!e)
	return;
    // Remove any root styles, if we're able.
    if(typeof e.removeAttribute == 'function')
	e.removeAttribute('style');
    // Go until there are no more child node
    while ( cur != null ) {
	if ( cur.nodeType == 1 ){
	    // Remove style attribute(s):
	    cur.removeAttribute("style");
	    cleanStyles(cur);
	}
	cur = cur.nextSibling;
    }
}

function killDivs(e){
    var divsList = e.getElementsByTagName("div");
    var curDivLength = divsList.length;
    // Gather counts for other typical elements embedded within.
    // Traverse backwards so we can remove nodes at the same time
    // without effecting the traversal.
    for (var i=curDivLength-1; i >= 0; i--){
	var p = divsList[i].getElementsByTagName("p").length;
	var img = divsList[i].getElementsByTagName("img").length;
	var li = divsList[i].getElementsByTagName("li").length;
	var a = divsList[i].getElementsByTagName("a").length;
	var embed = divsList[i].getElementsByTagName("embed").length;
	// If the number of commas is less than 10 (bad sign) ...
	if (getCharCount(divsList[i]) < 10){
	    // And the number of non-paragraph elements is more than
	    // paragraphs or other ominous signs:
	    if (img > p || li > p || a > p || p == 0 || embed > 0){
		divsList[i].parentNode.removeChild(divsList[i]);
	    }
	}
    }
    return e;
}

function killBreaks(e){
    e.innerHTML
	= e.innerHTML.replace(/(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,'<br />');
    return e;
}

function clean(e, tags, minWords){
    var targetList = e.getElementsByTagName(tags);
    minWords = minWords || bigWords;
    for (var y=0; y < targetList.length; y++){
	// If the text content isn't laden with words, remove the
	// child:
	if (getCharCount(targetList[y], " ") < minWords){
	    targetList[y].parentNode.removeChild(targetList[y]);
	}
    }
    return e;
}

if (document.kalvjaTidy){
    document.body.style.fontSize
	= document.body.style.fontSize.replace(/pt/, "")*1.2 + "pt";
} else {
    document.kalvjaTidy = true;
    guts();
    guts();
}

//Local Variables:
//mode: java
//End:
