96f990f4fec50d00bbbdbdc3b65f00ed2c3f31a1
1 <?php
2 /*
3 This code is part of GOsa (https://gosa.gonicus.de)
4 Copyright (C) 2003 Cajus Pollmeier, Fabian Hickert
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
22 /*******************************************
23 Only function definition will follow here
24 /*******************************************
26 /* Reads all files in specified directory with contents an some inforations about the file */
27 /* Read all files with contents*/
28 /* |Folder="/var/ww...",
29 | |Fileprefix="node"
30 | | |Filesuffix=".html"
31 | | | |WithoutContent=false(This means : read content)
32 | | | | |Singlepage=false(Means read all, if w want to read single, specify its filename)"*/
33 function readfiles($basedir,$prefix,$suffix,$onlyIndex,$singlepage=false)
34 {
35 global $replacements;
37 $str = array(); // Temporary variable
38 $cnt = 0; // Array index creation
39 $file = ""; // Contains Filename
41 $dir = opendir($basedir);
43 $str['global']['start'] = $cnt; // collect basic informations - Startpage
44 $str['global']['basedir'] = $basedir; // collect basic informations - Basedirectory
46 /* Startime for Benchmark */
47 $start = (time()+microtime());
49 /* if singlepage == false -> Get all pages, */
50 if(!$singlepage) {
52 /* While theres is an unreaded file in our resource */
53 while (($file = readdir($dir)) !== false) {
55 /* Filter all files which arn't intressting */
56 if((strstr($file,$suffix))&&($file!=".")&&($file!="..")&&(strstr($file,$prefix))){
58 /* Collect informations */
59 $str[$file]=array();
60 $str[$file]['name'] = $file;
61 $str[$file]['size'] = filesize($basedir.$file);
63 /* Readfile conent too ? */
64 if(!$onlyIndex){
65 $str[$file]['content']= remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
66 }
68 /* Include file status, for debugging, not used in script yet */
69 $str[$file]['stat'] = stat($basedir.$file);
70 $cnt++;
71 }
72 }
74 /* Only get on file*/
75 }else{
76 /* Pages read = 1 */
77 $cnt = 1;
79 /* Prepare result*/
80 $file = $singlepage;
81 $str[$file] = array();
82 $str[$file]['name'] = $file;
83 $str[$file]['size'] = filesize($basedir.$file);
85 /* If onlyIndex == true skip reading content */
86 if(!$onlyIndex){
87 $str[$file]['content']= remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
88 }
90 /* Include file status, for debugging, not used in script yet */
91 $str[$file]['stat'] = stat($basedir.$file);
92 }
94 /* Sort to right order */
95 asort($str);
97 /* Endtime for Benchmark*/
98 $end = (time()+microtime());
99 $str['global']['cmptime'] = $end-$start;
101 /* Number of pages readed */
102 $str['global']['numpages']= $cnt;
103 closedir($dir);
104 return($str);
105 }
107 /* Read filecontent */
108 function getcontents($file)
109 {
110 $str = "" ; // Temporary variable for file contents
111 $tmp = "" ; // Temporary varibale for partitial file contents
113 /* open file and read*/
114 $fp = fopen($file,"r");
115 if($fp) {
116 while($tmp = fread($fp,512))
117 {
118 $str.= $tmp;
119 }
120 }else{
121 return(false);
122 }
123 return($str);
124 }
126 /*Remove tags */
127 function remove_unwanted_tags($str,$replacements)
128 {
129 $str=preg_replace($replacements['from'],$replacements['to'],$str);
130 return($str);
131 }
133 /*Converts the all links to specified path, is needed to get simple navigation */
134 function linkwrapper($str,$link)
135 {
136 $str=str_replace("HREF=\"","href=\"".$link."?pg=",$str);
137 return($str);
138 }
140 /* Search content */
141 function search($arr,$word)
142 {
143 global $minwordlength,$allowed_chars_in_searchword;
144 /* Prepare Vars */
145 $result =array(); // Search result, filename, + hits + hits per word + matches
146 $words =array(); // Temporary searchword handling
147 $useablewords =array(); // Temporary searchword handling
148 $tryword = ""; // Temporary searchword handling
149 $result['global']['maxhit'] = 0;
150 unset($_SESSION['lastresults']);
151 unset($_SESSION['parsed_search_keyword']);
153 /* prepare searchwords */
154 $word = trim($word);
156 /* Filter all unusable chars */
157 $word = preg_replace($allowed_chars_in_searchword,"",$word);
158 $words = split(" ",str_replace("+"," ",$word));
160 /* Check all wordlengths */
161 foreach($words as $tryword){
162 $tryword = trim($tryword);
164 /* Filter words smaler than 3 chars */
165 if(strlen($tryword)>=$minwordlength) {
166 $_SESSION['parsed_search_keyword'].=$tryword." ";
167 $useablewords[]=$tryword;
168 }
169 }
171 /* Use words to search the content */
172 foreach($arr as $key=>$val)
173 {
174 /* overallhits counts hits per page */
175 $overallhits=0;
177 /* Search all words */
178 foreach($useablewords as $word)
179 {
180 /* Skip key global, it contains no file data - it is a summary info*/
181 if($key!="global")
182 {
183 /* Get all hits for the word in $matches*/
184 preg_match_all("/".$word."/i",$arr[$key]['content'], $matches,PREG_OFFSET_CAPTURE);
186 /* Filter in Tag results*/
187 if(count($matches[0])){
188 foreach($matches[0] as $num=>$hit){
189 if(is_in_tag($arr[$key]['content'],$hit[1])) {
190 unset($matches[0][$num]);
191 }
192 }
193 }
195 /* Count matches */
196 $overallhits=$overallhits + count($matches[0]);
198 /* Save collected data */
199 $result[$key]['hits'][$word] = count($matches[0]);
200 $result[$key]['hits']['overall']= $overallhits;
202 /* Save max hits for page */
203 if($overallhits > $result['global']['maxhit']){
204 $result['global']['maxhit']=$overallhits;
205 }
207 /* Add results for word to return value*/
208 $result[$key]['match'][$word]=array();
209 $result[$key]['match'][$word]=$matches[0];
210 }
211 }
212 }
214 /* Save result in Session, so we can mark words later, or go back to search, without searching again*/
215 $_SESSION['lastresults'] = $result;
216 return($result);
217 }
219 /* Detect 10 Best result entries, sort and call createResultEntry to create HTML output for complete list */
220 function searchlist($arr,$res,$maxresults)
221 {
222 $global = $res['global'];
223 $topten = array(); // To detect 10 best solutions
224 $ret = ""; // return value
225 unset($res['global']);
227 /* Detect 10 best Sites */
228 foreach($res as $key=>$val){
230 /* Skip results with no hits */
231 if($val['hits']['overall']>0){
232 $topten[$key] = $val['hits']['overall'];
233 }
234 }
236 /* Sort by hit position in content, to easier mark words */
237 asort($topten);
238 $topten = array_reverse($topten);
239 $topten = (array_slice($topten,0,$maxresults));
241 /* We have a result, an array with all content, an array with hits and position and we have the 10 best hits */
242 /* Foreach */
243 foreach($topten as $name => $hits) {
244 $ret.= createResultEntry($arr[$name],$res[$name],$name,$global['maxhit']);
245 }
247 /* appending footer message for resultlist */
248 $ret.= "<br> ".count($topten)." - "._("Results for your search with the keyword")." <b>".htmlentities($_SESSION['search_string'])."</b>"._(" interpreted as ")."<b>".$_SESSION['parsed_search_keyword']."</b>";
250 return($ret);
251 }
253 /* This function marks a string with the given search result for this string*/
254 function markup_page($arr,$res)
255 {
256 global $pre_mark,$suf_mark;
257 $ret = ""; // return value
258 $repl = array();
259 $posadd = 0;
261 foreach($res['match'] as $word => $matches) {
262 foreach($matches as $matchnr=>$match) {
263 $repl[$match[1]]=$match[0];
264 }
265 }
267 ksort($repl);
269 foreach($repl as $position=>$word) {
270 $pos1 = strlen($arr);
271 $arr= markword($arr,($position+$posadd),$word,$pre_mark,$suf_mark);
272 $pos2 = strlen($arr);
273 $posadd =$posadd + ($pos2 - $pos1);
274 }
275 return($arr);
276 }
278 /* This function marks a single word with the specified prefix and suffix */
279 function markword($string,$position,$word,$prefix,$suffix)
280 {
281 $wordlength = strlen($word);
282 $wholelength = strlen($string);
284 $first = substr($string,0,$position);
285 $last = substr($string,($position+$wordlength),$wholelength);
287 return($first.$prefix.$word.$suffix.$last);
288 }
291 /* Creates HTML output for a single search result entry */
292 function createResultEntry($entry,$res,$name,$max)
293 {
294 $percentage = (int)(($res['hits']['overall'] / $max) * 100) ;
296 $str = "<b><a href=\"?pg=".$name."&mark=1\">".$percentage."% "._("hit rate in following file ").$name."</a></b><br>" ;
297 $str.= substr(strip_tags($entry['content']),0,200);
298 $str.= "<hr>";
300 return($str);
301 }
303 /*Simple function to detect if we prepare to change a tag or visible text */
304 function is_in_tag($string,$pos)
305 {
306 $pos1 = strpos($string,"<",$pos);
307 $pos2 = strpos($string,">",$pos);
309 if ($pos1 > $pos2) {
310 return(true);
311 }else{
312 return(false);
313 }
314 }
316 // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler:
317 ?>