Get all pages, */ if(!$singlepage) { /* While theres is an unreaded file in our resource */ while (($file = readdir($dir)) !== false) { /* Filter all files which arn't intressting */ if((strstr($file,$suffix))&&($file!=".")&&($file!="..")&&(strstr($file,$prefix))){ /* Collect informations */ $str[$file]=array(); $str[$file]['name'] = $file; $str[$file]['size'] = filesize($basedir.$file); /* Readfile conent too ? */ if(!$onlyIndex){ $str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements); $str[$file]['headline'] = getheader_from_content($str[$file]['content']); } /* Include file status, for debugging, not used in script yet */ $str[$file]['stat'] = stat($basedir.$file); $cnt++; } } /* Only get on file*/ }else{ /* Pages read = 1 */ $cnt = 1; /* Prepare result*/ $file = $singlepage; $str[$file] = array(); $str[$file]['name'] = $file; $str[$file]['size'] = filesize($basedir.$file); /* If onlyIndex == true skip reading content */ if(!$onlyIndex){ $str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements); $str[$file]['headline'] = getheader_from_content($str[$file]['content']); } /* Include file status, for debugging, not used in script yet */ $str[$file]['stat'] = stat($basedir.$file); } /* Sort to right order */ asort($str); /* Endtime for Benchmark*/ $end = (time()+microtime()); $str['global']['cmptime'] = $end-$start; /* Number of pages readed */ $str['global']['numpages']= $cnt; closedir($dir); return($str); } /* Read filecontent */ function getcontents($file) { $str = "" ; // Temporary variable for file contents $tmp = "" ; // Temporary varibale for partitial file contents /* open file and read*/ $fp = fopen($file,"r"); if($fp) { while($tmp = fread($fp,512)) { $str.= $tmp; } }else{ return(false); } return($str); } /*Remove tags */ function remove_unwanted_tags($str,$replacements) { $str=preg_replace($replacements['from'],$replacements['to'],$str); return($str); } /*Converts the all links to specified path, is needed to get simple navigation */ function linkwrapper($str,$link) { $str = preg_replace("/HREF=\"http/i","target=\"_blank\" href=\"http",$str); $str = preg_replace("/HREF=\"/","href=\"".$link."?pg=",$str); // $str=str_replace("HREF=\"","href=\"".$link."?pg=",$str); return($str); } /* Search content */ function search($arr,$word) { global $minwordlength,$allowed_chars_in_searchword; /* Prepare Vars */ $result =array(); // Search result, filename, + hits + hits per word + matches $words =array(); // Temporary searchword handling $useablewords =array(); // Temporary searchword handling $tryword = ""; // Temporary searchword handling $result['global']['maxhit'] = 0; unset($_SESSION['lastresults']); unset($_SESSION['parsed_search_keyword']); $_SESSION['parsed_search_keyword']=""; /* prepare searchwords */ $word = trim($word); /* Filter all unusable chars */ $word = preg_replace($allowed_chars_in_searchword,"",$word); $words = split(" ",str_replace("+"," ",$word)); /* Check all wordlengths */ foreach($words as $tryword){ $tryword = trim($tryword); /* Filter words smaler than 3 chars */ if(strlen($tryword)>=$minwordlength) { $_SESSION['parsed_search_keyword'].=$tryword." "; $useablewords[]=$tryword; } } /* Use words to search the content */ foreach($arr as $key=>$val) { /* overallhits counts hits per page */ $overallhits=0; /* Search all words */ foreach($useablewords as $word) { /* Skip key global, it contains no file data - it is a summary info*/ if($key!="global") { /* Get all hits for the word in $matches*/ preg_match_all("/".$word."/i",$arr[$key]['content'], $matches,PREG_OFFSET_CAPTURE); /* Filter in Tag results*/ if(count($matches[0])){ foreach($matches[0] as $num=>$hit){ if(is_in_tag($arr[$key]['content'],$hit[1])) { unset($matches[0][$num]); } } } /* Count matches */ $overallhits=$overallhits + count($matches[0]); /* Save collected data */ $result[$key]['hits'][$word] = count($matches[0]); $result[$key]['hits']['overall']= $overallhits; /* Save max hits for page */ if($overallhits > $result['global']['maxhit']){ $result['global']['maxhit']=$overallhits; } /* Add results for word to return value*/ $result[$key]['match'][$word]=array(); $result[$key]['match'][$word]=$matches[0]; } } } /* Save result in Session, so we can mark words later, or go back to search, without searching again*/ $_SESSION['lastresults'] = $result; return($result); } /* Detect 10 Best result entries, sort and call createResultEntry to create HTML output for complete list */ function searchlist($arr,$res,$maxresults) { $global = $res['global']; $topten = array(); // To detect 10 best solutions $ret = ""; // return value unset($res['global']); /* Detect 10 best Sites */ foreach($res as $key=>$val){ /* Skip results with no hits */ if($val['hits']['overall']>0){ $topten[$key] = $val['hits']['overall']; } } /* Sort by hit position in content, to easier mark words */ asort($topten); $topten = array_reverse($topten); $topten = (array_slice($topten,0,$maxresults)); /* We have a result, an array with all content, an array with hits and position and we have the 10 best hits */ /* Foreach */ foreach($topten as $name => $hits) { $ret.= createResultEntry($arr[$name],$res[$name],$name,$global['maxhit']); } /* appending footer message for resultlist */ $ret.= "
".sprintf(_("%s results for your search with the keyword %s interpreted as %s"), "".count($topten)."", "".($_SESSION['search_string'])."", "".$_SESSION['parsed_search_keyword'].""); $ret.="

"; return($ret); } /* This function marks a string with the given search result for this string*/ function markup_page($arr,$res) { global $pre_mark,$suf_mark; $ret = ""; // return value $repl = array(); $posadd = 0; foreach($res['match'] as $word => $matches) { foreach($matches as $matchnr=>$match) { $repl[$match[1]]=$match[0]; } } ksort($repl); foreach($repl as $position=>$word) { $pos1 = strlen($arr); $arr= markword($arr,($position+$posadd),$word,$pre_mark,$suf_mark); $pos2 = strlen($arr); $posadd =$posadd + ($pos2 - $pos1); } return($arr); } /* This function marks a single word with the specified prefix and suffix */ function markword($string,$position,$word,$prefix,$suffix) { $wordlength = strlen($word); $wholelength = strlen($string); $first = substr($string,0,$position); $last = substr($string,($position+$wordlength),$wholelength); return($first.$prefix.$word.$suffix.$last); } /* Creates HTML output for a single search result entry */ function createResultEntry($entry,$res,$name,$max) { $percentage = (int)(($res['hits']['overall'] / $max) * 100) ; $color = dechex($percentage+150); $color2 = dechex(150 - $percentage); if(strlen($color)==1) $color = "0".$color; /* the object tag is needed for W3c */ $str = "
".$entry['headline']." -".htmlentities( substr(strip_tags($entry['content']),0,120))."...
".htmlentities(sprintf(_("%s%% hit rate in file %s"),$percentage,$name))."
"; $str.= "
"; return($str); } /*Simple function to detect if we prepare to change a tag or visible text */ function is_in_tag($string,$pos) { $pos1 = strpos($string,"<",$pos); $pos2 = strpos($string,">",$pos); if ($pos1 > $pos2) { return(true); }else{ return(false); } } /*Returns frist line of readable text, it should be the headline */ function getheader_from_content($str) { $str = strip_tags($str); $pos = 0; $arr = split("\n",$str); foreach($arr as $possibleheadline){ if(strlen($possibleheadline)>=3){ return $possibleheadline; } } } // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler: ?>