include/functions_helpviewer.inc

   1 <?php
   2 /*
   3    This code is part of GOsa (https://gosa.gonicus.de)
   4    Copyright (C) 2005, Fabian Hickert
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  */
  20
  21
  22 /*******************************************
  23   Only function definition will follow here
  24 /*******************************************
  25
  26
  27 /* Reads all files in specified directory with contents an some inforations about the file */
  28 /* Read all files with contents*/
  29 /*                 |Folder="/var/ww...",
  30                    |        |Fileprefix="node"
  31                    |        |       |Filesuffix=".html"
  32                    |        |       |       |WithoutContent=false(This means : read content)
  33                    |        |       |       |          |Singlepage=false(Means read all, if w want to read single, specify its filename)"*/
  34 function readfiles($basedir,$prefix,$suffix,$onlyIndex,$singlepage=false)
  35 {
  36   global $replacements;
  37
  38   $str    = array();  // Temporary variable
  39   $cnt    = 0;        // Array index creation
  40   $file   = "";       // Contains Filename
  41
  42   $dir = opendir($basedir);
  43
  44   $str['global']['start']       = $cnt;     // collect basic informations - Startpage
  45   $str['global']['basedir']     = $basedir; // collect basic informations - Basedirectory
  46
  47   /* Startime for Benchmark */
  48   $start =   (time()+microtime());
  49
  50   /* if singlepage == false -> Get all pages, */
  51   if(!$singlepage) {
  52
  53     /* While theres is an unreaded file in our resource */
  54     while (($file = readdir($dir)) !== false) {
  55
  56       /* Filter all files which arn't intressting */
  57       if((strstr($file,$suffix))&&($file!=".")&&($file!="..")&&(strstr($file,$prefix))){
  58
  59         /* Collect informations */
  60         $str[$file]=array();
  61         $str[$file]['name']   = $file;
  62         $str[$file]['size']   = filesize($basedir.$file);
  63
  64         /* Readfile conent too ? */
  65         if(!$onlyIndex){
  66           $str[$file]['content']  = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
  67           $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
  68         }
  69
  70         /* Include file status, for debugging, not used in script yet */
  71         $str[$file]['stat']   = stat($basedir.$file);
  72         $cnt++;
  73       }
  74     }
  75
  76     /* Only get on file*/
  77   }else{
  78     /* Pages read = 1 */
  79     $cnt = 1;
  80
  81     /* Prepare result*/
  82     $file                 = $singlepage;
  83     $str[$file]           = array();
  84     $str[$file]['name']   = $file;
  85     $str[$file]['size']   = filesize($basedir.$file);
  86
  87     /* If onlyIndex == true skip reading content */
  88     if(!$onlyIndex){
  89       $str[$file]['content']  = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
  90       $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
  91     }
  92
  93     /* Include file status, for debugging, not used in script yet */
  94     $str[$file]['stat']   = stat($basedir.$file);
  95   }
  96
  97   /* Sort to  right order */
  98   asort($str);
  99
 100   /* Endtime for Benchmark*/
 101   $end = (time()+microtime());
 102   $str['global']['cmptime'] = $end-$start;
 103
 104   /* Number of pages readed */
 105   $str['global']['numpages']= $cnt;
 106   closedir($dir);
 107   return($str);
 108 }
 109
 110
 111 /* Read filecontent */
 112 function getcontents($file)
 113 {
 114   $str = "" ;   // Temporary variable for file contents
 115   $tmp = "" ;   // Temporary varibale for partitial file contents
 116
 117   /* open file and read*/
 118   $fp = fopen($file,"r");
 119   if($fp) {
 120     while($tmp = fread($fp,512))
 121     {
 122       $str.=  $tmp;
 123     }
 124   }else{
 125     return(false);
 126   }
 127   return($str);
 128 }
 129
 130
 131 /*Remove tags */
 132 function remove_unwanted_tags($str,$replacements)
 133 {
 134   $str=preg_replace($replacements['from'],$replacements['to'],$str);
 135   return($str);
 136 }
 137
 138
 139 /*Converts the all links to specified path, is needed to get simple navigation */
 140 function linkwrapper($str,$link)
 141 {
 142   $str = preg_replace("/HREF=\"http/i","target=\"_blank\" href=\"http",$str);
 143   $str = preg_replace("/HREF=\"/","href=\"".$link."?pg=",$str);
 144 //  $str=str_replace("HREF=\"","href=\"".$link."?pg=",$str);
 145   return($str);
 146 }
 147
 148
 149 /* Search content */
 150 function search($arr,$word)
 151 {
 152   global $minwordlength,$allowed_chars_in_searchword;
 153   /* Prepare Vars */
 154   $result                     =array(); // Search result, filename, + hits + hits per word + matches
 155   $words                      =array(); // Temporary searchword handling
 156   $useablewords               =array(); // Temporary searchword handling
 157   $tryword                    = "";     // Temporary searchword handling
 158   $result['global']['maxhit'] = 0;
 159   unset($_SESSION['lastresults']);
 160   unset($_SESSION['parsed_search_keyword']);
 161   $_SESSION['parsed_search_keyword']="";
 162
 163   /* prepare searchwords */
 164   $word   = trim($word);
 165
 166   /* Filter all unusable chars */
 167   $word   = preg_replace($allowed_chars_in_searchword,"",$word);
 168   $words  = split(" ",str_replace("+"," ",$word));
 169
 170   /* Check all wordlengths */
 171   foreach($words as $tryword){
 172     $tryword = trim($tryword);
 173
 174     /* Filter words smaler than 3 chars */
 175     if(strlen($tryword)>=$minwordlength) {
 176       $_SESSION['parsed_search_keyword'].=$tryword." ";
 177       $useablewords[]=$tryword;
 178     }
 179   }
 180
 181   /* Use words to search the content */
 182   foreach($arr as $key=>$val)
 183   {
 184     /* overallhits counts hits per page */
 185     $overallhits=0;
 186
 187     /* Search all words */
 188     foreach($useablewords as $word)
 189     {
 190       /* Skip key global, it contains no file data - it is a summary info*/
 191       if($key!="global")
 192       {
 193         /* Get all hits for the word in $matches*/
 194         preg_match_all("/".$word."/i",$arr[$key]['content'], $matches,PREG_OFFSET_CAPTURE);
 195
 196         /* Filter in Tag results*/
 197         if(count($matches[0])){
 198           foreach($matches[0] as $num=>$hit){
 199             if(is_in_tag($arr[$key]['content'],$hit[1]))  {
 200               unset($matches[0][$num]);
 201             }
 202           }
 203         }
 204
 205         /* Count matches */
 206         $overallhits=$overallhits + count($matches[0]);
 207
 208         /* Save collected data */
 209         $result[$key]['hits'][$word]    = count($matches[0]);
 210         $result[$key]['hits']['overall']= $overallhits;
 211
 212         /* Save max hits for page */
 213         if($overallhits > $result['global']['maxhit']){
 214           $result['global']['maxhit']=$overallhits;
 215         }
 216
 217         /* Add results for word to return value*/
 218         $result[$key]['match'][$word]=array();
 219         $result[$key]['match'][$word]=$matches[0];
 220       }
 221     }
 222   }
 223
 224   /* Save result in Session, so we can mark words later, or go back to search, without searching again*/
 225   $_SESSION['lastresults'] = $result;
 226   return($result);
 227 }
 228
 229
 230 /* Detect 10 Best result entries, sort and call createResultEntry to create HTML output for  complete list */
 231 function searchlist($arr,$res,$maxresults)
 232 {
 233   $global = $res['global'];
 234   $topten = array();        // To detect 10 best solutions
 235   $ret    = "";             // return value
 236   unset($res['global']);
 237
 238   /* Detect 10 best Sites */
 239   foreach($res as $key=>$val){
 240
 241     /* Skip results with no hits */
 242     if($val['hits']['overall']>0){
 243       $topten[$key] = $val['hits']['overall'];
 244     }
 245   }
 246
 247   /* Sort by hit position in content, to easier mark words */
 248   asort($topten);
 249   $topten = array_reverse($topten);
 250   $topten = (array_slice($topten,0,$maxresults));
 251
 252   /* We have a result, an array with all content, an array with hits and position and we have the 10 best hits */
 253   /* Foreach */
 254   foreach($topten as $name => $hits)  {
 255     $ret.= createResultEntry($arr[$name],$res[$name],$name,$global['maxhit']);
 256   }
 257
 258   /* appending footer message for resultlist */
 259   $ret.= "<br>
 260               ".sprintf(_("%s results for your search with the keyword %s interpreted as %s"),
 261                             "<b>".count($topten)."</b>",
 262                             "<b>".($_SESSION['search_string'])."</b>",
 263                             "<b>".$_SESSION['parsed_search_keyword']."</b>");
 264   $ret.="<br>
 265         <br>";
 266   return($ret);
 267 }
 268
 269
 270 /* This function marks a string with the given search result for this string*/
 271 function markup_page($arr,$res)
 272 {
 273   global $pre_mark,$suf_mark;
 274
 275   $ret    = "";             // return value
 276   $repl   = array();
 277   $posadd = 0;
 278
 279   foreach($res['match'] as $word => $matches)   {
 280     foreach($matches as $matchnr=>$match)   {
 281       $repl[$match[1]]=$match[0];
 282     }
 283   }
 284
 285   ksort($repl);
 286
 287   foreach($repl as $position=>$word)  {
 288     $pos1 = strlen($arr);
 289     $arr= markword($arr,($position+$posadd),$word,$pre_mark,$suf_mark);
 290     $pos2 = strlen($arr);
 291     $posadd =$posadd + ($pos2 - $pos1);
 292   }
 293   return($arr);
 294 }
 295
 296
 297 /* This function marks a single word with the specified prefix and suffix */
 298 function markword($string,$position,$word,$prefix,$suffix)
 299 {
 300   $wordlength   = strlen($word);
 301   $wholelength  = strlen($string);
 302
 303   $first = substr($string,0,$position);
 304   $last  = substr($string,($position+$wordlength),$wholelength);
 305
 306   return($first.$prefix.$word.$suffix.$last);
 307 }
 308
 309 /* Creates HTML output for a single search result entry */
 310 function createResultEntry($entry,$res,$name,$max)
 311 {
 312   $percentage = (int)(($res['hits']['overall'] / $max) * 100) ;
 313   $color  = dechex($percentage+150);
 314   $color2 = dechex(150 - $percentage);
 315
 316   $entry['content'] = preg_replace("\"".$entry['headline']."\"","",$entry['content'],1);
 317
 318   if(strlen($color)==1) $color = "0".$color;
 319
 320
 321
 322   /* the object tag is needed for W3c */
 323   $str =  "<a href=\"?pg=".$name."&amp;mark=1\" title=\"".$percentage."% ".$entry['headline']."\">
 324           <object>
 325           <table summary=\"\"  width=\"98%\" align=\"center\">
 326             <tr>
 327               <td height=15>
 328                 <b>".$entry['headline']."</b> -".htmlentities( substr(strip_tags($entry['content']),0,120))."...
 329               </td>
 330               <td width=50 valign=\"top\">".progressbar($percentage,50,8)."</td>
 331              </tr>
 332              <tr>
 333               <td colspan=2>
 334                 <b>
 335                   ".htmlentities(sprintf(_("%s%% hit rate in file %s"),$percentage,$name))."
 336                 </b>
 337               </td>
 338             </tr>
 339           </table>
 340           </object></a>
 341           ";
 342   $str.=  "<hr size=\"1\">";
 343
 344   return($str);
 345 }
 346
 347
 348 /*Simple function to detect if we prepare to change a tag or visible text */
 349 function is_in_tag($string,$pos)
 350 {
 351   $pos1 = strpos($string,"<",$pos);
 352   $pos2 = strpos($string,">",$pos);
 353
 354   if ($pos1 > $pos2)  {
 355     return(true);
 356   }else{
 357     return(false);
 358   }
 359 }
 360
 361 /*Returns frist line of readable text, it should be the headline */
 362 function getheader_from_content($str)
 363 {
 364   $str = strip_tags($str);
 365   $pos = 0;
 366   $arr = split("\n",$str);
 367   foreach($arr as $possibleheadline){
 368     if(strlen($possibleheadline)>=3){
 369       return $possibleheadline;
 370     }
 371   }
 372 }
 373
 374 // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler:
 375 ?>