Get all pages, */
if(!$singlepage) {
/* While theres is an unreaded file in our resource */
while (($file = readdir($dir)) !== false) {
/* Filter all files which arn't intressting */
if((strstr($file,$suffix))&&($file!=".")&&($file!="..")&&(strstr($file,$prefix))){
/* Collect informations */
$str[$file]=array();
$str[$file]['name'] = $file;
$str[$file]['size'] = filesize($basedir.$file);
/* Readfile conent too ? */
if(!$onlyIndex){
$str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
$str[$file]['headline'] = getheader_from_content($str[$file]['content']);
}
/* Include file status, for debugging, not used in script yet */
$str[$file]['stat'] = stat($basedir.$file);
$cnt++;
}
}
/* Only get on file*/
}else{
/* Pages read = 1 */
$cnt = 1;
/* Prepare result*/
$file = $singlepage;
$str[$file] = array();
$str[$file]['name'] = $file;
$str[$file]['size'] = filesize($basedir.$file);
/* If onlyIndex == true skip reading content */
if(!$onlyIndex){
$str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
$str[$file]['headline'] = getheader_from_content($str[$file]['content']);
}
/* Include file status, for debugging, not used in script yet */
$str[$file]['stat'] = stat($basedir.$file);
}
/* Sort to right order */
asort($str);
/* Endtime for Benchmark*/
$end = (time()+microtime());
$str['global']['cmptime'] = $end-$start;
/* Number of pages readed */
$str['global']['numpages']= $cnt;
closedir($dir);
return($str);
}
/* Read filecontent */
function getcontents($file)
{
$str = "" ; // Temporary variable for file contents
$tmp = "" ; // Temporary varibale for partitial file contents
/* open file and read*/
$fp = fopen($file,"r");
if($fp) {
while($tmp = fread($fp,512))
{
$str.= $tmp;
}
}else{
return(false);
}
return($str);
}
/*Remove tags */
function remove_unwanted_tags($str,$replacements)
{
$str=preg_replace($replacements['from'],$replacements['to'],$str);
return($str);
}
/*Converts the all links to specified path, is needed to get simple navigation */
function linkwrapper($str,$link)
{
$str = preg_replace("/HREF=\"http/i","target=\"_blank\" href=\"http",$str);
$str = preg_replace("/HREF=\"/","href=\"".$link."?pg=",$str);
// $str=str_replace("HREF=\"","href=\"".$link."?pg=",$str);
return($str);
}
/* Search content */
function search($arr,$word)
{
global $minwordlength,$allowed_chars_in_searchword;
/* Prepare Vars */
$result =array(); // Search result, filename, + hits + hits per word + matches
$words =array(); // Temporary searchword handling
$useablewords =array(); // Temporary searchword handling
$tryword = ""; // Temporary searchword handling
$result['global']['maxhit'] = 0;
unset($_SESSION['lastresults']);
unset($_SESSION['parsed_search_keyword']);
$_SESSION['parsed_search_keyword']="";
/* prepare searchwords */
$word = trim($word);
/* Filter all unusable chars */
$word = preg_replace($allowed_chars_in_searchword,"",$word);
$words = split(" ",str_replace("+"," ",$word));
/* Check all wordlengths */
foreach($words as $tryword){
$tryword = trim($tryword);
/* Filter words smaler than 3 chars */
if(strlen($tryword)>=$minwordlength) {
$_SESSION['parsed_search_keyword'].=$tryword." ";
$useablewords[]=$tryword;
}
}
/* Use words to search the content */
foreach($arr as $key=>$val)
{
/* overallhits counts hits per page */
$overallhits=0;
/* Search all words */
foreach($useablewords as $word)
{
/* Skip key global, it contains no file data - it is a summary info*/
if($key!="global")
{
/* Get all hits for the word in $matches*/
preg_match_all("/".$word."/i",$arr[$key]['content'], $matches,PREG_OFFSET_CAPTURE);
/* Filter in Tag results*/
if(count($matches[0])){
foreach($matches[0] as $num=>$hit){
if(is_in_tag($arr[$key]['content'],$hit[1])) {
unset($matches[0][$num]);
}
}
}
/* Count matches */
$overallhits=$overallhits + count($matches[0]);
/* Save collected data */
$result[$key]['hits'][$word] = count($matches[0]);
$result[$key]['hits']['overall']= $overallhits;
/* Save max hits for page */
if($overallhits > $result['global']['maxhit']){
$result['global']['maxhit']=$overallhits;
}
/* Add results for word to return value*/
$result[$key]['match'][$word]=array();
$result[$key]['match'][$word]=$matches[0];
}
}
}
/* Save result in Session, so we can mark words later, or go back to search, without searching again*/
$_SESSION['lastresults'] = $result;
return($result);
}
/* Detect 10 Best result entries, sort and call createResultEntry to create HTML output for complete list */
function searchlist($arr,$res,$maxresults)
{
$global = $res['global'];
$topten = array(); // To detect 10 best solutions
$ret = ""; // return value
unset($res['global']);
/* Detect 10 best Sites */
foreach($res as $key=>$val){
/* Skip results with no hits */
if($val['hits']['overall']>0){
$topten[$key] = $val['hits']['overall'];
}
}
/* Sort by hit position in content, to easier mark words */
asort($topten);
$topten = array_reverse($topten);
$topten = (array_slice($topten,0,$maxresults));
/* We have a result, an array with all content, an array with hits and position and we have the 10 best hits */
/* Foreach */
foreach($topten as $name => $hits) {
$ret.= createResultEntry($arr[$name],$res[$name],$name,$global['maxhit']);
}
/* appending footer message for resultlist */
$ret.= "
".sprintf(_("%s results for your search with the keyword %s interpreted as %s"),
"".count($topten)."",
"".($_SESSION['search_string'])."",
"".$_SESSION['parsed_search_keyword']."");
$ret.="
";
return($ret);
}
/* This function marks a string with the given search result for this string*/
function markup_page($arr,$res)
{
global $pre_mark,$suf_mark;
$ret = ""; // return value
$repl = array();
$posadd = 0;
foreach($res['match'] as $word => $matches) {
foreach($matches as $matchnr=>$match) {
$repl[$match[1]]=$match[0];
}
}
ksort($repl);
foreach($repl as $position=>$word) {
$pos1 = strlen($arr);
$arr= markword($arr,($position+$posadd),$word,$pre_mark,$suf_mark);
$pos2 = strlen($arr);
$posadd =$posadd + ($pos2 - $pos1);
}
return($arr);
}
/* This function marks a single word with the specified prefix and suffix */
function markword($string,$position,$word,$prefix,$suffix)
{
$wordlength = strlen($word);
$wholelength = strlen($string);
$first = substr($string,0,$position);
$last = substr($string,($position+$wordlength),$wholelength);
return($first.$prefix.$word.$suffix.$last);
}
/* Creates HTML output for a single search result entry */
function createResultEntry($entry,$res,$name,$max)
{
$percentage = (int)(($res['hits']['overall'] / $max) * 100) ;
$color = dechex($percentage+150);
$color2 = dechex(150 - $percentage);
$entry['content'] = preg_replace("\"".$entry['headline']."\"","",$entry['content'],1);
if(strlen($color)==1) $color = "0".$color;
/* the object tag is needed for W3c */
$str = "
";
$str.= "