<%@ page language="java" import="java.util.*,java.text.*,java.io.*" %>
<%@page import="org.apache.xerces.parsers.*" %>
<%@page import="org.w3c.dom.*" %>
<%@page import="org.xml.sax.SAXException" %>
<%!
/***********************************************UNWANTED LINES **********************************************************************************/
final static int DETAIL = 2;
final static int CLEAN = 1;
final static int NO = 0;
int debug = NO; // change this debug flag value to have different debugging levels
// the StringTokenizer delimiter
String delimiter = " \t\n\r\f~`!@#$%^&*()_-+={}[]|\\:;\"',.?>";
String str_error_detail = "";
Vector parseFields(String str_fields) {
Vector fields = null;
if(str_fields != null && str_fields.length() > 0) {
StringTokenizer t_fields = new StringTokenizer( str_fields, ", " );
fields = new Vector();
while(t_fields.hasMoreTokens()) {
String key = t_fields.nextToken().toString();
fields.add(key);
}
}
// if(debug >= DETAIL) System.out.println("parse fields:" + fields);
return fields;
}
void setError(String msg)
{
if ( !str_error_detail.equals("") )
str_error_detail += "
";
str_error_detail += msg;
}
boolean findExact(String line, String key) {
boolean found = false;
line=removeHTMLTags(line);
StringTokenizer words = new StringTokenizer(line, delimiter);
while(words.hasMoreTokens()) {
String word = words.nextToken();
if(line.indexOf("href")!=-1)
return found;
if(indexOf(line, "src=",true) != -1)
return found;
if(indexOf(line, "title",true) != -1)
return found;
if(indexOf(line, "window.",true) != -1)
return found;
if(word.equalsIgnoreCase(key)) {
found = true;
break;
}
}
return found;
}
boolean fileFind(String fileName, String key) throws Exception {
boolean found = false;
BufferedReader file = new BufferedReader(new FileReader(fileName));
try {
String line = null;
while((line = file.readLine())!= null) {
found = find(line, key);
if(found == true)
break;
}
} catch (IOException ioe) {
throw new Exception("IOException: " + ioe.getMessage());
} finally {
file.close();
}
return found;
}
int count(String line, String key) {
int count = 0;
StringTokenizer words = new StringTokenizer(line, delimiter);
while(words.hasMoreTokens()) {
String word = words.nextToken();
if(word.equalsIgnoreCase(key))
count++;
}
return count;
}
int fileCount(String fileName, String key) throws Exception {
int count = 0;
BufferedReader file = new BufferedReader(new FileReader(fileName));
try {
String line = null;
while((line = file.readLine())!= null) {
count += count(line, key);
}
} catch (IOException ioe) {
throw new Exception("IOException: " + ioe.getMessage());
} finally {
file.close();
}
return count;
}
String getHead(String fileName) throws Exception {
StringBuffer entireHead = new StringBuffer();
BufferedReader file = new BufferedReader(new FileReader(fileName));
try {
String line = null;
boolean startHead = false;
boolean endHead = false;
while(!endHead && (line = file.readLine())!= null ) {
int start = 0;
int end = line.length();
int headIndex = indexOf(line, "", true);
if( headIndex != -1 ) {
start = headIndex + 6;
startHead = true;
}
int endHeadIndex = indexOf(line, "", true);
if(endHeadIndex != -1) {
end = endHeadIndex;
endHead = true;
}
if(startHead)
entireHead.append(line.substring(start, end));
}
// if(debug >= DETAIL) System.out.println("head is:" + entireHead);
} catch (IOException ioe) {
throw new Exception("IOException: " + ioe.getMessage());
} finally {
file.close();
}
return entireHead.toString();
}
String getTitle(String head) {
String title = "";
int start = indexOf(head, "", true);
int end = indexOf(head, "", true);
try
{
title = head.substring(start+7, end);
}
catch (IndexOutOfBoundsException ie)
{
// if(debug >= DETAIL) System.out.println("head without title.");
}
//if(debug >= DETAIL) System.out.println("Title: " + title);
return title;
}
HashMap getMETA(String head) {
HashMap metas = new HashMap();
String left = head;
do
{
int start = indexOf(left, "", start, true);
if(start == -1 || end == -1)
break; // got all metas
String meta = left.substring(start+5, end);
if(indexOf(meta, "NAME=", true) != -1 && indexOf(meta, "CONTENT=", true) != -1) {
StringTokenizer st = new StringTokenizer(meta, "=\"");
String name = null;
String content = null;
if(st.hasMoreTokens()) {
String word = st.nextToken().trim();
if(word.equalsIgnoreCase("NAME")) {
if(st.hasMoreTokens()) {
name = st.nextToken().trim();
}
}
if(st.hasMoreTokens()) {
String word2 = st.nextToken().trim();
if(word2.equalsIgnoreCase("CONTENT")) {
if(st.hasMoreTokens()) {
content = st.nextToken().trim();
}
}
}
}
if(name != null && content != null)
metas.put(name.toUpperCase(), content);
}
left = left.substring(end);
} while (left.length()>0);
//if(debug >= DETAIL) System.out.println("META HashMap: " + metas);
return metas;
}
int getPercentageInANDFind (HashMap result)
{
float count = 0;
for(Iterator i = result.values().iterator(); i.hasNext(); )
{
Boolean value = (Boolean)i.next();
if(value.booleanValue()== true)
count++;
}
int percentage = Math.round(100*count/result.size());
return percentage;
}
/*******************************************************************************************************************************************/
/**************************************************
COMMON Functions
Recursively remove HTML Tags (Supriya)
****************************************************/
String removeHTMLTags(String line)
{
if(line.indexOf("href")!=-1)
return "";
if(line.indexOf("RZ")!=-1)
return "";
if(line.indexOf("