1 <?php
2 /*
3 This code is part of GOsa (https://gosa.gonicus.de)
4 Copyright (C) 2005, Fabian Hickert
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
22 /*******************************************
23 Only function definition will follow here
24 /*******************************************
26 /* Define which tags musst be delete, header, navigation, banner */
27 $replacements=array();
28 $replacements['from']=array("@<!DOC.*<BODY >@si",
29 "/border=\".*\"/i",
30 "'<code.*code>'",
31 // "/alt=\".*\"/i",
32 "/<HR>/",
33 "@<ADDRESS[^>]*?>.*?ADDRESS>@si",
34 "@<\/BODY[^>]*?>.*?HTML>@si",
35 "'<TABLE.*>'",
36 "/src.*icons/i",
37 "/src=\"/i",
38 "/<H1 ALIGN=\"CENTER\">/",
39 /* picture replacements */
40 // "",
41 );
43 $replacements['to']=array("",
44 " border=\"0\" ",
45 "",
46 // "",
47 "",
48 "",
49 "",
50 "<table border=1 cellspacing=0 bgcolor=\"#E0E0E0\" width=\"95%\" align=\"center\" cellpadding=\"3\" summary=\"\">",
51 "src=\"",
52 "src=\"images/",
53 "<H1>",
54 /* picture replacements */
55 // "",
56 );
59 /* Reads all files in specified directory with contents an some inforations about the file */
60 /* Read all files with contents*/
61 /* |Folder="/var/ww...",
62 | |Fileprefix="node"
63 | | |Filesuffix=".html"
64 | | | |WithoutContent=false(This means : read content)
65 | | | | |Singlepage=false(Means read all, if w want to read single, specify its filename)"*/
66 function readfiles($basedir,$prefix,$suffix,$onlyIndex,$singlepage=false)
67 {
68 global $replacements;
70 $str = array(); // Temporary variable
71 $cnt = 0; // Array index creation
72 $file = ""; // Contains Filename
74 $dir = opendir($basedir);
76 $str['global']['start'] = $cnt; // collect basic informations - Startpage
77 $str['global']['basedir'] = $basedir; // collect basic informations - Basedirectory
79 /* Startime for Benchmark */
80 $start = (time()+microtime());
82 /* if singlepage == false -> Get all pages, */
83 if(!$singlepage) {
85 /* While theres is an unreaded file in our resource */
86 while (($file = readdir($dir)) !== false) {
88 /* Filter all files which arn't intressting */
89 if((strstr($file,$suffix))&&($file!=".")&&($file!="..")&&(strstr($file,$prefix))){
91 /* Collect informations */
92 $str[$file]=array();
93 $str[$file]['name'] = $file;
94 $str[$file]['size'] = filesize($basedir.$file);
96 /* Readfile conent too ? */
97 if(!$onlyIndex){
98 $str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
99 $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
100 }
102 /* Include file status, for debugging, not used in script yet */
103 $str[$file]['stat'] = stat($basedir.$file);
104 $cnt++;
105 }
106 }
108 /* Only get on file*/
109 }else{
110 /* Pages read = 1 */
111 $cnt = 1;
113 /* Prepare result*/
114 $file = $singlepage;
115 $str[$file] = array();
116 $str[$file]['name'] = $file;
117 $str[$file]['size'] = filesize($basedir.$file);
119 /* If onlyIndex == true skip reading content */
120 if(!$onlyIndex){
121 $str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
122 $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
123 }
125 /* Include file status, for debugging, not used in script yet */
126 $str[$file]['stat'] = stat($basedir.$file);
127 }
129 /* Sort to right order */
130 asort($str);
132 /* Endtime for Benchmark*/
133 $end = (time()+microtime());
134 $str['global']['cmptime'] = $end-$start;
136 /* Number of pages readed */
137 $str['global']['numpages']= $cnt;
138 closedir($dir);
139 return($str);
140 }
143 /* Read filecontent */
144 function getcontents($file)
145 {
146 $str = "" ; // Temporary variable for file contents
147 $tmp = "" ; // Temporary varibale for partitial file contents
149 /* open file and read*/
150 $fp = fopen($file,"r");
151 if($fp) {
152 while($tmp = fread($fp,512))
153 {
154 $str.= $tmp;
155 }
156 }else{
157 return(false);
158 }
159 return($str);
160 }
163 /*Remove tags */
164 function remove_unwanted_tags($str,$replacements)
165 {
166 $str=preg_replace($replacements['from'],$replacements['to'],$str);
167 return($str);
168 }
171 /*Converts the all links to specified path, is needed to get simple navigation */
172 function linkwrapper($str,$link)
173 {
174 $str = preg_replace("/HREF=\"http/i","target=\"_blank\" href=\"http",$str);
175 $str = preg_replace("/HREF=\"/","href=\"".$link."?pg=",$str);
176 // $str=str_replace("HREF=\"","href=\"".$link."?pg=",$str);
177 return($str);
178 }
181 /* Search content */
182 function search($arr,$word)
183 {
184 global $minwordlength,$allowed_chars_in_searchword;
185 /* Prepare Vars */
186 $result =array(); // Search result, filename, + hits + hits per word + matches
187 $words =array(); // Temporary searchword handling
188 $useablewords =array(); // Temporary searchword handling
189 $tryword = ""; // Temporary searchword handling
190 $result['global']['maxhit'] = 0;
191 unset($_SESSION['lastresults']);
192 unset($_SESSION['parsed_search_keyword']);
193 $_SESSION['parsed_search_keyword']="";
195 /* prepare searchwords */
196 $word = trim($word);
198 /* Filter all unusable chars */
199 $word = preg_replace($allowed_chars_in_searchword,"",$word);
200 $words = split(" ",str_replace("+"," ",$word));
202 /* Check all wordlengths */
203 foreach($words as $tryword){
204 $tryword = trim($tryword);
206 /* Filter words smaler than 3 chars */
207 if(strlen($tryword)>=$minwordlength) {
208 $_SESSION['parsed_search_keyword'].=$tryword." ";
209 $useablewords[]=$tryword;
210 }
211 }
213 /* Use words to search the content */
214 foreach($arr as $key=>$val)
215 {
216 /* overallhits counts hits per page */
217 $overallhits=0;
219 /* Search all words */
220 foreach($useablewords as $word)
221 {
222 /* Skip key global, it contains no file data - it is a summary info*/
223 if($key!="global")
224 {
225 /* Get all hits for the word in $matches*/
226 preg_match_all("/".$word."/i",$arr[$key]['content'], $matches,PREG_OFFSET_CAPTURE);
228 /* Filter in Tag results*/
229 if(count($matches[0])){
230 foreach($matches[0] as $num=>$hit){
231 if(is_in_tag($arr[$key]['content'],$hit[1])) {
232 unset($matches[0][$num]);
233 }
234 }
235 }
237 /* Count matches */
238 $overallhits=$overallhits + count($matches[0]);
240 /* Save collected data */
241 $result[$key]['hits'][$word] = count($matches[0]);
242 $result[$key]['hits']['overall']= $overallhits;
244 /* Save max hits for page */
245 if($overallhits > $result['global']['maxhit']){
246 $result['global']['maxhit']=$overallhits;
247 }
249 /* Add results for word to return value*/
250 $result[$key]['match'][$word]=array();
251 $result[$key]['match'][$word]=$matches[0];
252 }
253 }
254 }
256 /* Save result in Session, so we can mark words later, or go back to search, without searching again*/
257 $_SESSION['lastresults'] = $result;
258 return($result);
259 }
262 /* Detect 10 Best result entries, sort and call createResultEntry to create HTML output for complete list */
263 function searchlist($arr,$res,$maxresults)
264 {
265 $global = $res['global'];
266 $topten = array(); // To detect 10 best solutions
267 $ret = ""; // return value
268 unset($res['global']);
270 /* Detect 10 best Sites */
271 foreach($res as $key=>$val){
273 /* Skip results with no hits */
274 if($val['hits']['overall']>0){
275 $topten[$key] = $val['hits']['overall'];
276 }
277 }
279 /* Sort by hit position in content, to easier mark words */
280 asort($topten);
281 $topten = array_reverse($topten);
282 $topten = (array_slice($topten,0,$maxresults));
284 /* We have a result, an array with all content, an array with hits and position and we have the 10 best hits */
285 /* Foreach */
286 foreach($topten as $name => $hits) {
287 $ret.= createResultEntry($arr[$name],$res[$name],$name,$global['maxhit']);
288 }
290 /* appending footer message for resultlist */
291 $ret.= "<br>
292 ".sprintf(_("%s results for your search with the keyword %s interpreted as %s"),
293 "<b>".count($topten)."</b>",
294 "<b>".($_SESSION['search_string'])."</b>",
295 "<b>".$_SESSION['parsed_search_keyword']."</b>");
296 $ret.="<br>
297 <br>";
298 return($ret);
299 }
302 /* This function marks a string with the given search result for this string*/
303 function markup_page($arr,$res)
304 {
305 global $pre_mark,$suf_mark;
307 $ret = ""; // return value
308 $repl = array();
309 $posadd = 0;
311 foreach($res['match'] as $word => $matches) {
312 foreach($matches as $matchnr=>$match) {
313 $repl[$match[1]]=$match[0];
314 }
315 }
317 ksort($repl);
319 foreach($repl as $position=>$word) {
320 $pos1 = strlen($arr);
321 $arr= markword($arr,($position+$posadd),$word,$pre_mark,$suf_mark);
322 $pos2 = strlen($arr);
323 $posadd =$posadd + ($pos2 - $pos1);
324 }
325 return($arr);
326 }
329 /* This function marks a single word with the specified prefix and suffix */
330 function markword($string,$position,$word,$prefix,$suffix)
331 {
332 $wordlength = strlen($word);
333 $wholelength = strlen($string);
335 $first = substr($string,0,$position);
336 $last = substr($string,($position+$wordlength),$wholelength);
338 return($first.$prefix.$word.$suffix.$last);
339 }
341 /* Creates HTML output for a single search result entry */
342 function createResultEntry($entry,$res,$name,$max)
343 {
344 $percentage = (int)(($res['hits']['overall'] / $max) * 100) ;
345 $color = dechex($percentage+150);
346 $color2 = dechex(150 - $percentage);
348 $entry['content'] = preg_replace("\"".$entry['headline']."\"","",$entry['content'],1);
350 if(strlen($color)==1) $color = "0".$color;
354 /* the object tag is needed for W3c */
355 $str = "<a href=\"?pg=".$name."&mark=1\" title=\"".$percentage."% ".$entry['headline']."\">
356 <object>
357 <table summary=\"\" width=\"98%\" align=\"center\">
358 <tr>
359 <td height=15>
360 <b>".$entry['headline']."</b> -".htmlentities( substr(strip_tags($entry['content']),0,120))."...
361 </td>
362 <td width=50 valign=\"top\">".progressbar($percentage,50,8)."</td>
363 </tr>
364 <tr>
365 <td colspan=2>
366 <b>
367 ".htmlentities(sprintf(_("%s%% hit rate in file %s"),$percentage,$name))."
368 </b>
369 </td>
370 </tr>
371 </table>
372 </object></a>
373 ";
374 $str.= "<hr size=\"1\">";
376 return($str);
377 }
380 /*Simple function to detect if we prepare to change a tag or visible text */
381 function is_in_tag($string,$pos)
382 {
383 $pos1 = strpos($string,"<",$pos);
384 $pos2 = strpos($string,">",$pos);
386 if ($pos1 > $pos2) {
387 return(true);
388 }else{
389 return(false);
390 }
391 }
393 /*Returns frist line of readable text, it should be the headline */
394 function getheader_from_content($str)
395 {
396 $str = strip_tags($str);
397 $pos = 0;
398 $arr = split("\n",$str);
399 foreach($arr as $possibleheadline){
400 if(strlen($possibleheadline)>=3){
401 return $possibleheadline;
402 }
403 }
404 }
406 // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler:
407 ?>