components, check for keyword in
for (int i = 0; i < files.getLength(); i++)
{
String titleContent = "";
String desc = "";
String fileName = "";
Element file = (Element) files.item(i);
fileName = file.getAttribute("url");
if (fileName.length()==0)
fileName = "Filename Not Specfied";
NodeList titleList = file.getElementsByTagName("title");
NodeList metaList = file.getElementsByTagName("meta");
Element title = (Element)titleList.item(0);
if (title != null)
titleContent = getText(title);
else
titleContent = fileName;
boolean keywordMatch = false;
//----- Extract meta content (only keywords and description currently supported)
// TODO: build vector of all requested meta tags
for (int j = 0; j < metaList.getLength(); j++)
{
Element meta = (Element) metaList.item(j);
String metaName = meta.getAttribute("name");
if (metaName == null || metaName.length() == 0)
throw new IllegalArgumentException("Missing meta name attribute or value");
if (metaName.equalsIgnoreCase("keywords"))
{
String keywords = getText(meta);
if(keywords.indexOf(keyword) != -1) keywordMatch = true;
}
else if (metaName.equalsIgnoreCase("description"))
desc = meta.getAttribute("content");
}
//----- keyword match found in keywords
if (keywordMatch)
{
HashMap fields = new HashMap();
fields.put("TITLE", titleContent);
fields.put("DESCRIPTION", desc);
fields.put("PERCENTAGE", new Integer(100));
fields.put("URL", location+fileName); //base url appended later
matchingsInXMLFile.put(fileName, fields);
}
} // end files loop
}
// get text content from xml node
String getText(Node node)
{
// We need to retrieve the text from elements, entity
// references, CDATA sections, and text nodes; but not
// comments or processing instructions
int type = node.getNodeType();
if (type == Node.COMMENT_NODE
|| type == Node.PROCESSING_INSTRUCTION_NODE) return "";
StringBuffer text = new StringBuffer();
String value = node.getNodeValue();
if (value != null) text.append(value);
if (node.hasChildNodes())
{
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++)
{
Node child = children.item(i);
text.append(getText(child));
}
}
return text.toString();
}
////////////////////////// end of declarations ////////////////////////////
%>
<%
////////////////////////// start of main line /////////////////////////////
// Don't disable cache so back button works after clicking on a link
str_error_detail = ""; //clear prior error messages
//----- Get search parameters
String str_keywords = request.getParameter("RZkeywords");
String str_option = request.getParameter("RZoption");
String str_scope = request.getParameter("RZscope");
String str_subscope = request.getParameter("RZsubscope");
String str_metaName = request.getParameter("RZmeta_name");
String str_pathOption = request.getParameter("RZpath_option");
String str_base_url = request.getParameter("RZbase_url_override");
String str_base_dir = request.getParameter("RZbase_dir_override");
String str_location = request.getParameter("RZlocation_override") == null ? "/" : request.getParameter("RZlocation_override");
String str_filenames = request.getParameter("RZfilenames"); //(e.g. "*.html,*.htm")
String str_excludeFiles = request.getParameter("RZexclude_files"); //(e.g. "*.gif,*.jpg")
String str_excludeDirs = request.getParameter("RZexclude_dirs"); //(e.g. "images,docs")
String str_linkTarget = request.getParameter("RZtarget");
String str_subfolders = request.getParameter("RZsubfolders");
//MVW added 11/4/04 to default search to location of this file
if (str_base_url == null || str_base_url.equals("")) {
String requestURL = request.getRequestURL().toString();
int separatorIndex = requestURL.lastIndexOf("/");
if (separatorIndex != -1) {
str_base_url = requestURL.substring(0, separatorIndex);
}
}
if (str_base_dir == null || str_base_dir.equals("") && request.getServletPath() != null) {
String servletPath = request.getServletPath();
int separatorIndex = servletPath.lastIndexOf("/");
if (separatorIndex >= 0) {
servletPath = servletPath.substring(0, separatorIndex);
}
servletPath = replaceAll(servletPath, "/", File.separator);
str_base_dir = application.getRealPath("/")
+ (servletPath.length() > 1 ? servletPath.substring(1) : "");
}
//End MVW additions
///////////////// added 12/08/02 for searching xml file ////////////
// new option - search xml file for keywords
String str_xmlfile = request.getParameter("RZxmlfile");
////////////////////////////////////////////////////////////////////
if(debug >= CLEAN) {
out.println("Searching parameters
");
out.println("");
out.println("keywords >>> " +str_keywords );
out.println("scope >>> " +str_scope );
out.println("subscope >>> " +str_subscope );
out.println("metaName >>> " +str_metaName );
out.println("option >>> " +str_option );
out.println("pathOption >>> " +str_pathOption );
out.println("base_url >>> " +str_base_url );
out.println("base_dir >>> " +str_base_dir );
out.println("location >>> " +str_location );
out.println("filenames >>> " +str_filenames );
out.println("excludeFiles >>> " +str_excludeFiles );
out.println("excludeDirs >>> " +str_excludeDirs );
out.println("linkTarget >>> " +str_linkTarget );
out.println("subfolders >>> " +str_subfolders );
out.println("xmlfile >>> " +str_xmlfile );
out.println("
");
out.println("
");
}
//----- Set Intellegent Defaults
if (str_keywords == null) str_keywords = "";
if (str_scope == null) str_scope = "";
if (str_subscope == null) str_subscope = "";
if (str_metaName == null) str_metaName = "";
if (str_option == null) str_option = "";
if (str_pathOption == null) str_pathOption = "";
if (str_base_url == null) str_base_url = "";
if (str_base_dir == null) str_base_dir = "";
if (str_location == null) str_location = "";
if (str_filenames == null) str_filenames = "";
if (str_excludeFiles == null) str_excludeFiles = "";
if (str_excludeDirs == null) str_excludeDirs = "";
if (str_linkTarget == null) str_linkTarget = "";
if (str_subfolders == null) str_subfolders = "";
if (str_xmlfile == null) str_xmlfile = "";
str_base_url = str_base_url.replace('\\','/');
str_base_dir = str_base_dir.replace('\\','/');
str_location = str_location.replace('\\','/');
str_filenames = str_filenames.replace('\\','/');
str_xmlfile = str_xmlfile.replace('\\','/');
if (str_base_url.length() > 0 && !str_base_url.endsWith("/") ) str_base_url += "/";
if (str_base_dir.length() > 0 && !str_base_dir.endsWith("/") ) str_base_dir += "/";
if (str_location.length() > 0 && !str_location.endsWith("/") ) str_location += "/";
if(str_option.length() == 0) str_option = "AND";
if(str_pathOption.length() == 0) str_pathOption = "URL";
if(str_scope.length() == 0) str_scope = "FILE";
if(str_subscope.length() == 0) str_subscope = "KEYWORDS";
if(str_linkTarget.length() == 0) str_linkTarget = "_self";
//----- Set the subscope if scope is HEAD
String subscope = null;
if(str_scope.equals("HEAD"))
{
if(str_subscope.equals("OTHER"))
subscope = str_metaName;
else
subscope = str_subscope;
}
//----- Set the option to include subfolders or not
boolean includeSubDir = true;
if(str_subfolders.length() == 0) includeSubDir = false;
//----- Set the exact keyword if search option is EXACT
String exactKeyword = null;
if(str_option.equals("EXACT"))
{
exactKeyword = str_keywords;
if(exactKeyword != null)
exactKeyword = exactKeyword.trim();
}
//----- Construct real search path
// (ensure base dir supplied if using URL location method)
String searchPath = null;
if(str_location.length() >0)
{
searchPath = str_location.trim();
if(str_pathOption.toUpperCase().equals("URL"))
{
if(str_base_dir.length() > 0)
{
if( searchPath.equals("/") ) //treat as absolute reference
searchPath = str_base_dir;
else
searchPath = str_base_dir + searchPath;
} else
{
setError("Invalid Search Configuration - Base server dir not specified.");
searchPath = null;
}
}
}
//----- Construct real excluded directories
Vector excludeDirs = parseFields(str_excludeDirs);
Vector excludeRealDirs = null;
if(searchPath != null && excludeDirs != null)
{
excludeRealDirs = new Vector();
for(int i = 0; i < excludeDirs.size(); i++)
{
String realPath = constructRealPath(searchPath, (String)excludeDirs.elementAt(i));
if(new File(realPath).exists())
excludeRealDirs.add(realPath);
else
setError("The excluded directory does not exist: " + realPath);
}
}
//----- Not yet used (keep for reference in case needed)
String separator = System.getProperty("file.separator");
//----- Create more vectors
Vector rawKeywords = parseFields(str_keywords);
Vector fileNames = parseFields(str_filenames);
Vector excludeFiles = parseFields(str_excludeFiles);
if(debug >= CLEAN) {
System.out.println("-------------- PARAMETERS (Defaults Set) ------------");
System.out.println("keywords >>> " +str_keywords );
System.out.println("scope >>> " +str_scope );
System.out.println("subscope >>> " +str_subscope );
System.out.println("metaName >>> " +str_metaName );
System.out.println("option >>> " +str_option );
System.out.println("pathOption >>> " +str_pathOption );
System.out.println("base_url >>> " +str_base_url );
System.out.println("base_dir >>> " +str_base_dir );
System.out.println("location >>> " +str_location );
System.out.println("filenames >>> " +str_filenames );
System.out.println("excludeFiles >>> " +str_excludeFiles );
System.out.println("excludeDirs >>> " +str_excludeDirs );
System.out.println("linkTarget >>> " +str_linkTarget );
System.out.println("subfolders >>> " +str_subfolders );
System.out.println("xmlfile >>> " +str_xmlfile );
System.out.println("-------------- Computed SEARCH Variables ------------");
System.out.println("rawKeywords:" + rawKeywords);
System.out.println("exactKeyword:" + exactKeyword);
System.out.println("searchPath:" + searchPath);
System.out.println("filenames:" + fileNames);
System.out.println("excludeFiles:" + excludeFiles);
System.out.println("excludeDirs:" + excludeDirs);
}
///////////////////////////// start file search ///////////////////////////////////
long startTime = System.currentTimeMillis();
long numOfFilesProcessed = 0; // not implemented yet
HashMap finalMatchings = new HashMap();
//----- Do file search if no XMLfile or filenames not empty
if ( (str_xmlfile.length() == 0 || str_filenames.length() > 0)
&& rawKeywords != null
&& searchPath != null )
{
if(str_option.equals("EXACT"))
{
try
{
if(str_scope.equals("FILE"))
{
numOfFilesProcessed = getNumOfFiles(searchPath, true, includeSubDir);
finalMatchings = filePathEXACTFind(searchPath, exactKeyword, fileNames, excludeFiles, excludeRealDirs, true, includeSubDir);
}
else if(str_scope.equals("HEAD"))
{
numOfFilesProcessed = getNumOfFiles(searchPath, true, includeSubDir);
finalMatchings = headPathEXACTFind(searchPath, subscope, exactKeyword, fileNames, excludeFiles, excludeRealDirs, true, includeSubDir);
}
} catch (Exception e)
{
if(debug >= CLEAN) e.printStackTrace();
setError("The location specified to search does not exist: " + searchPath);
}
} else
{
//-------- construct key words for non-exact search
Vector keywords = new Vector();
Vector notKeywords = new Vector();
for(int i = 0; i < rawKeywords.size(); i++)
{
String key = (String)rawKeywords.elementAt(i);
if(key.equals("NOT"))
{
if(++i < rawKeywords.size())
{
String notKey = (String)rawKeywords.elementAt(i);
notKeywords.add(notKey);
}
}else
{
keywords.add(key);
}
}
if(debug >= CLEAN) System.out.println("keywords:" + keywords);
if(debug >= CLEAN) System.out.println("notKeywords:" + notKeywords);
HashMap matchings = null;
HashMap matchingsNOT = null;
if(keywords.size() > 0 || notKeywords.size() > 0)
numOfFilesProcessed = getNumOfFiles(searchPath, true, includeSubDir);
//----- Search and weight the good keywords
if(keywords.size() > 0)
{
if(debug >= CLEAN) System.out.println("searching keywords.........................");
try
{
if(str_option.equals("AND") && str_scope.equals("FILE"))
matchings = filePathANDFind(searchPath, keywords, fileNames, excludeFiles, excludeRealDirs, true, includeSubDir);
else if(str_option.equals("OR") && str_scope.equals("FILE"))
matchings = filePathORFind(searchPath, keywords, fileNames, excludeFiles, excludeRealDirs, true, includeSubDir);
else if(str_scope.equals("HEAD"))
matchings = headPathANDFind(searchPath, subscope, keywords, fileNames, excludeFiles, excludeRealDirs, true, includeSubDir);
if(debug >= CLEAN) System.out.println("matchings:" + matchings);
}
catch (Exception e)
{
if(debug >= CLEAN) e.printStackTrace();
setError("The path you specified to search does not exist.");
}
}
//----- Search and weight the not keywords
if(notKeywords.size()>0)
{
if(debug >= CLEAN) System.out.println("searching NOT keywords.........................");
try
{
if(str_scope.equals("FILE"))
matchingsNOT = filePathNOTFind(searchPath, notKeywords, fileNames, excludeFiles, excludeRealDirs, true, includeSubDir);
else if(str_scope.equals("HEAD"))
matchingsNOT = headPathNOTFind(searchPath, subscope, notKeywords, fileNames, excludeFiles, excludeRealDirs, true, includeSubDir);
if(debug >= CLEAN) System.out.println("matchingsNOT:" + matchingsNOT);
}
catch (Exception e)
{
if(debug >= CLEAN) e.printStackTrace();
setError("The path you specified to search does not exist.");
}
}
//----- Combine the matching files
if(str_option.equals("AND"))
finalMatchings = mergeMatchings(matchings, keywords.size(), matchingsNOT, notKeywords.size());
else if(str_option.equals("OR"))
{
if(matchings != null)
finalMatchings.putAll(matchings);
if(matchingsNOT != null)
finalMatchings.putAll(matchingsNOT);
}
}
if(debug >= CLEAN) System.out.println("final matchings:" + finalMatchings);
}
/////////////////////////////// end file search ////////////////////////////////
/////////////////////////////////////////////////////////////
// added 12/08/02 for searching xml file
//---------- Search XML file -----------//
matchingsInXMLFile = new HashMap();
if(str_xmlfile.length() > 0)
{
String xmlPathFilename = "";
StringTokenizer xmlfiles = new StringTokenizer(str_xmlfile, "|");
while(xmlfiles.hasMoreTokens())
{
xmlPathFilename = "file:///" + str_base_dir + str_location + xmlfiles.nextToken();
try
{
// returns results in matchingsInXMLFile HashMap
searchXMLFile(xmlPathFilename, str_location, str_keywords);
}
catch (Exception e)
{
if(debug >= CLEAN) e.printStackTrace();
setError("Error in searching xml file: " + e.getMessage());
}
}
}
//---------- put the two search results together ----------//
if(matchingsInXMLFile != null && matchingsInXMLFile.size() > 0) {
if(finalMatchings == null) finalMatchings = new HashMap();
finalMatchings.putAll(matchingsInXMLFile);
}
/////////////////////////////////////////////////////////////
//---------- Construct the result ----------//
if(finalMatchings != null)
{
if(str_option.equals("OR") || str_pathOption.equals("URL"))
{
for(Iterator i = finalMatchings.entrySet().iterator(); i.hasNext(); )
{
Map.Entry entry = (Map.Entry)i.next();
String filePathName = (String)entry.getKey();
HashMap result = (HashMap)entry.getValue();
int percentage = ((Integer)result.get("PERCENTAGE")).intValue();
// ignore the percentage of AND if search option is OR
if(str_option.equals("OR"))
result.put("PERCENTAGE", new Integer(100));
// construct URL if search location is URL
if(str_pathOption.equals("URL"))
{
if(str_base_url.length() > 0 && str_base_dir.length() > 0)
{
String fileURI = filePathName.replace('\\','/');
// if filePathName contains base directory, strip off original base directory
if( fileURI.indexOf(str_base_dir) != -1 )
fileURI = fileURI.substring(str_base_dir.length());
// prefix the base url
String fileURL = str_base_url + fileURI;
result.put("URL", fileURL);
}
}
}
}
}
//---------- sort the result by percentage/score ----------//
Vector sortedMatchings = getNodeVector(finalMatchings);
if(str_option.equals("AND")) {
sort(sortedMatchings, 0, sortedMatchings.size() - 1);
}
//---------- Clean up results variables ----------//
/*
if (str_keywords == null || str_keywords.equals(""))
str_keywords = "** No search keywords **";*/
int int_count = 0;
if(sortedMatchings != null) int_count = finalMatchings.size();
long endTime = System.currentTimeMillis();
double processTime = (endTime - startTime)/1000.0; // in seconds
/*TODO:
String str_search_status = numOfFilesProcessed.toString() + " files searched in ";
+ processTime.toString() + " seconds";
if(debug >= CLEAN)
System.out.println(str_search_status);
*/
%>