1 <?php
2 /*
3 This code is part of GOsa (https://gosa.gonicus.de)
4 Copyright (C) 2005, Fabian Hickert
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
22 /*******************************************
23 Only function definition will follow here
24 /*******************************************
27 /* Reads all files in specified directory with contents an some inforations about the file */
28 /* Read all files with contents*/
29 /* |Folder="/var/ww...",
30 | |Fileprefix="node"
31 | | |Filesuffix=".html"
32 | | | |WithoutContent=false(This means : read content)
33 | | | | |Singlepage=false(Means read all, if w want to read single, specify its filename)"*/
34 function readfiles($basedir,$prefix,$suffix,$onlyIndex,$singlepage=false)
35 {
36 global $replacements;
38 $str = array(); // Temporary variable
39 $cnt = 0; // Array index creation
40 $file = ""; // Contains Filename
42 $dir = opendir($basedir);
44 $str['global']['start'] = $cnt; // collect basic informations - Startpage
45 $str['global']['basedir'] = $basedir; // collect basic informations - Basedirectory
47 /* Startime for Benchmark */
48 $start = (time()+microtime());
50 /* if singlepage == false -> Get all pages, */
51 if(!$singlepage) {
53 /* While theres is an unreaded file in our resource */
54 while (($file = readdir($dir)) !== false) {
56 /* Filter all files which arn't intressting */
57 if((strstr($file,$suffix))&&($file!=".")&&($file!="..")&&(strstr($file,$prefix))){
59 /* Collect informations */
60 $str[$file]=array();
61 $str[$file]['name'] = $file;
62 $str[$file]['size'] = filesize($basedir.$file);
64 /* Readfile conent too ? */
65 if(!$onlyIndex){
66 $str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
67 $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
68 }
70 /* Include file status, for debugging, not used in script yet */
71 $str[$file]['stat'] = stat($basedir.$file);
72 $cnt++;
73 }
74 }
76 /* Only get on file*/
77 }else{
78 /* Pages read = 1 */
79 $cnt = 1;
81 /* Prepare result*/
82 $file = $singlepage;
83 $str[$file] = array();
84 $str[$file]['name'] = $file;
85 $str[$file]['size'] = filesize($basedir.$file);
87 /* If onlyIndex == true skip reading content */
88 if(!$onlyIndex){
89 $str[$file]['content'] = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
90 $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
91 }
93 /* Include file status, for debugging, not used in script yet */
94 $str[$file]['stat'] = stat($basedir.$file);
95 }
97 /* Sort to right order */
98 asort($str);
100 /* Endtime for Benchmark*/
101 $end = (time()+microtime());
102 $str['global']['cmptime'] = $end-$start;
104 /* Number of pages readed */
105 $str['global']['numpages']= $cnt;
106 closedir($dir);
107 return($str);
108 }
111 /* Read filecontent */
112 function getcontents($file)
113 {
114 $str = "" ; // Temporary variable for file contents
115 $tmp = "" ; // Temporary varibale for partitial file contents
117 /* open file and read*/
118 $fp = fopen($file,"r");
119 if($fp) {
120 while($tmp = fread($fp,512))
121 {
122 $str.= $tmp;
123 }
124 }else{
125 return(false);
126 }
127 return($str);
128 }
131 /*Remove tags */
132 function remove_unwanted_tags($str,$replacements)
133 {
134 $str=preg_replace($replacements['from'],$replacements['to'],$str);
135 return($str);
136 }
139 /*Converts the all links to specified path, is needed to get simple navigation */
140 function linkwrapper($str,$link)
141 {
142 $str = preg_replace("/HREF=\"http/i","target=\"_blank\" href=\"http",$str);
143 $str = preg_replace("/HREF=\"/","href=\"".$link."?pg=",$str);
144 // $str=str_replace("HREF=\"","href=\"".$link."?pg=",$str);
145 return($str);
146 }
149 /* Search content */
150 function search($arr,$word)
151 {
152 global $minwordlength,$allowed_chars_in_searchword;
153 /* Prepare Vars */
154 $result =array(); // Search result, filename, + hits + hits per word + matches
155 $words =array(); // Temporary searchword handling
156 $useablewords =array(); // Temporary searchword handling
157 $tryword = ""; // Temporary searchword handling
158 $result['global']['maxhit'] = 0;
159 unset($_SESSION['lastresults']);
160 unset($_SESSION['parsed_search_keyword']);
161 $_SESSION['parsed_search_keyword']="";
163 /* prepare searchwords */
164 $word = trim($word);
166 /* Filter all unusable chars */
167 $word = preg_replace($allowed_chars_in_searchword,"",$word);
168 $words = split(" ",str_replace("+"," ",$word));
170 /* Check all wordlengths */
171 foreach($words as $tryword){
172 $tryword = trim($tryword);
174 /* Filter words smaler than 3 chars */
175 if(strlen($tryword)>=$minwordlength) {
176 $_SESSION['parsed_search_keyword'].=$tryword." ";
177 $useablewords[]=$tryword;
178 }
179 }
181 /* Use words to search the content */
182 foreach($arr as $key=>$val)
183 {
184 /* overallhits counts hits per page */
185 $overallhits=0;
187 /* Search all words */
188 foreach($useablewords as $word)
189 {
190 /* Skip key global, it contains no file data - it is a summary info*/
191 if($key!="global")
192 {
193 /* Get all hits for the word in $matches*/
194 preg_match_all("/".$word."/i",$arr[$key]['content'], $matches,PREG_OFFSET_CAPTURE);
196 /* Filter in Tag results*/
197 if(count($matches[0])){
198 foreach($matches[0] as $num=>$hit){
199 if(is_in_tag($arr[$key]['content'],$hit[1])) {
200 unset($matches[0][$num]);
201 }
202 }
203 }
205 /* Count matches */
206 $overallhits=$overallhits + count($matches[0]);
208 /* Save collected data */
209 $result[$key]['hits'][$word] = count($matches[0]);
210 $result[$key]['hits']['overall']= $overallhits;
212 /* Save max hits for page */
213 if($overallhits > $result['global']['maxhit']){
214 $result['global']['maxhit']=$overallhits;
215 }
217 /* Add results for word to return value*/
218 $result[$key]['match'][$word]=array();
219 $result[$key]['match'][$word]=$matches[0];
220 }
221 }
222 }
224 /* Save result in Session, so we can mark words later, or go back to search, without searching again*/
225 $_SESSION['lastresults'] = $result;
226 return($result);
227 }
230 /* Detect 10 Best result entries, sort and call createResultEntry to create HTML output for complete list */
231 function searchlist($arr,$res,$maxresults)
232 {
233 $global = $res['global'];
234 $topten = array(); // To detect 10 best solutions
235 $ret = ""; // return value
236 unset($res['global']);
238 /* Detect 10 best Sites */
239 foreach($res as $key=>$val){
241 /* Skip results with no hits */
242 if($val['hits']['overall']>0){
243 $topten[$key] = $val['hits']['overall'];
244 }
245 }
247 /* Sort by hit position in content, to easier mark words */
248 asort($topten);
249 $topten = array_reverse($topten);
250 $topten = (array_slice($topten,0,$maxresults));
252 /* We have a result, an array with all content, an array with hits and position and we have the 10 best hits */
253 /* Foreach */
254 foreach($topten as $name => $hits) {
255 $ret.= createResultEntry($arr[$name],$res[$name],$name,$global['maxhit']);
256 }
258 /* appending footer message for resultlist */
259 $ret.= "<br>
260 ".sprintf(_("%s results for your search with the keyword %s interpreted as %s"),
261 "<b>".count($topten)."</b>",
262 "<b>".($_SESSION['search_string'])."</b>",
263 "<b>".$_SESSION['parsed_search_keyword']."</b>");
264 $ret.="<br>
265 <br>";
266 return($ret);
267 }
270 /* This function marks a string with the given search result for this string*/
271 function markup_page($arr,$res)
272 {
273 global $pre_mark,$suf_mark;
275 $ret = ""; // return value
276 $repl = array();
277 $posadd = 0;
279 foreach($res['match'] as $word => $matches) {
280 foreach($matches as $matchnr=>$match) {
281 $repl[$match[1]]=$match[0];
282 }
283 }
285 ksort($repl);
287 foreach($repl as $position=>$word) {
288 $pos1 = strlen($arr);
289 $arr= markword($arr,($position+$posadd),$word,$pre_mark,$suf_mark);
290 $pos2 = strlen($arr);
291 $posadd =$posadd + ($pos2 - $pos1);
292 }
293 return($arr);
294 }
297 /* This function marks a single word with the specified prefix and suffix */
298 function markword($string,$position,$word,$prefix,$suffix)
299 {
300 $wordlength = strlen($word);
301 $wholelength = strlen($string);
303 $first = substr($string,0,$position);
304 $last = substr($string,($position+$wordlength),$wholelength);
306 return($first.$prefix.$word.$suffix.$last);
307 }
309 /* Creates HTML output for a single search result entry */
310 function createResultEntry($entry,$res,$name,$max)
311 {
312 $percentage = (int)(($res['hits']['overall'] / $max) * 100) ;
313 $color = dechex($percentage+150);
314 $color2 = dechex(150 - $percentage);
316 $entry['content'] = preg_replace("\"".$entry['headline']."\"","",$entry['content'],1);
318 if(strlen($color)==1) $color = "0".$color;
322 /* the object tag is needed for W3c */
323 $str = "<a href=\"?pg=".$name."&mark=1\" title=\"".$percentage."% ".$entry['headline']."\">
324 <object>
325 <table summary=\"\" width=\"98%\" align=\"center\">
326 <tr>
327 <td height=15>
328 <b>".$entry['headline']."</b> -".htmlentities( substr(strip_tags($entry['content']),0,120))."...
329 </td>
330 <td width=50 valign=\"top\">".progressbar($percentage,50,8)."</td>
331 </tr>
332 <tr>
333 <td colspan=2>
334 <b>
335 ".htmlentities(sprintf(_("%s%% hit rate in file %s"),$percentage,$name))."
336 </b>
337 </td>
338 </tr>
339 </table>
340 </object></a>
341 ";
342 $str.= "<hr size=\"1\">";
344 return($str);
345 }
348 /*Simple function to detect if we prepare to change a tag or visible text */
349 function is_in_tag($string,$pos)
350 {
351 $pos1 = strpos($string,"<",$pos);
352 $pos2 = strpos($string,">",$pos);
354 if ($pos1 > $pos2) {
355 return(true);
356 }else{
357 return(false);
358 }
359 }
361 /*Returns frist line of readable text, it should be the headline */
362 function getheader_from_content($str)
363 {
364 $str = strip_tags($str);
365 $pos = 0;
366 $arr = split("\n",$str);
367 foreach($arr as $possibleheadline){
368 if(strlen($possibleheadline)>=3){
369 return $possibleheadline;
370 }
371 }
372 }
374 // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler:
375 ?>