include/functions_helpviewer.inc

   1 <?php
   2 /*
   3    This code is part of GOsa (https://gosa.gonicus.de)
   4    Copyright (C) 2005, Fabian Hickert
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  */
  20
  21
  22 /*******************************************
  23   Only function definition will follow here
  24 /*******************************************
  25
  26 /* Define which tags musst be delete, header, navigation, banner */
  27 $replacements=array();
  28 $replacements['from']=array("@<!DOC.*<BODY >@si",
  29     "/border=\".*\"/i",
  30     "'<code.*code>'",
  31 //    "/alt=\".*\"/i",
  32     "/<HR>/",
  33     "@<ADDRESS[^>]*?>.*?ADDRESS>@si",
  34     "@<\/BODY[^>]*?>.*?HTML>@si",
  35     "'<TABLE.*>'",
  36     "/src.*icons/i",
  37     "/src=\"/i",
  38     "/<H1 ALIGN=\"CENTER\">/",
  39  /* picture replacements */
  40  //  "",
  41     );
  42
  43 $replacements['to']=array("",
  44     " border=\"0\" ",
  45     "",
  46   //  "",
  47     "",
  48     "",
  49     "",
  50     "<table border=1 cellspacing=0 bgcolor=\"#E0E0E0\" width=\"95%\" align=\"center\" cellpadding=\"3\" summary=\"\">",
  51     "src=\"",
  52     "src=\"images/",
  53     "<H1>",
  54  /* picture replacements */
  55 //    "",
  56   );
  57
  58
  59 /* Reads all files in specified directory with contents an some inforations about the file */
  60 /* Read all files with contents*/
  61 /*                 |Folder="/var/ww...",
  62                    |        |Fileprefix="node"
  63                    |        |       |Filesuffix=".html"
  64                    |        |       |       |WithoutContent=false(This means : read content)
  65                    |        |       |       |          |Singlepage=false(Means read all, if w want to read single, specify its filename)"*/
  66 function readfiles($basedir,$prefix,$suffix,$onlyIndex,$singlepage=false)
  67 {
  68   global $replacements;
  69
  70   $str    = array();  // Temporary variable
  71   $cnt    = 0;        // Array index creation
  72   $file   = "";       // Contains Filename
  73
  74   $dir = opendir($basedir);
  75
  76   $str['global']['start']       = $cnt;     // collect basic informations - Startpage
  77   $str['global']['basedir']     = $basedir; // collect basic informations - Basedirectory
  78
  79   /* Startime for Benchmark */
  80   $start =   (time()+microtime());
  81
  82   /* if singlepage == false -> Get all pages, */
  83   if(!$singlepage) {
  84
  85     /* While theres is an unreaded file in our resource */
  86     while (($file = readdir($dir)) !== false) {
  87
  88       /* Filter all files which arn't intressting */
  89       if((strstr($file,$suffix))&&($file!=".")&&($file!="..")&&(strstr($file,$prefix))){
  90
  91         /* Collect informations */
  92         $str[$file]=array();
  93         $str[$file]['name']   = $file;
  94         $str[$file]['size']   = filesize($basedir.$file);
  95
  96         /* Readfile conent too ? */
  97         if(!$onlyIndex){
  98           $str[$file]['content']  = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
  99           $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
 100         }
 101
 102         /* Include file status, for debugging, not used in script yet */
 103         $str[$file]['stat']   = stat($basedir.$file);
 104         $cnt++;
 105       }
 106     }
 107
 108     /* Only get on file*/
 109   }else{
 110     /* Pages read = 1 */
 111     $cnt = 1;
 112
 113     /* Prepare result*/
 114     $file                 = $singlepage;
 115     $str[$file]           = array();
 116     $str[$file]['name']   = $file;
 117     $str[$file]['size']   = filesize($basedir.$file);
 118
 119     /* If onlyIndex == true skip reading content */
 120     if(!$onlyIndex){
 121       $str[$file]['content']  = remove_unwanted_tags(linkwrapper(getcontents($basedir.$file),""),$replacements);
 122       $str[$file]['headline'] = getheader_from_content($str[$file]['content']);
 123     }
 124
 125     /* Include file status, for debugging, not used in script yet */
 126     $str[$file]['stat']   = stat($basedir.$file);
 127   }
 128
 129   /* Sort to  right order */
 130   asort($str);
 131
 132   /* Endtime for Benchmark*/
 133   $end = (time()+microtime());
 134   $str['global']['cmptime'] = $end-$start;
 135
 136   /* Number of pages readed */
 137   $str['global']['numpages']= $cnt;
 138   closedir($dir);
 139   return($str);
 140 }
 141
 142
 143 /* Read filecontent */
 144 function getcontents($file)
 145 {
 146   $str = "" ;   // Temporary variable for file contents
 147   $tmp = "" ;   // Temporary varibale for partitial file contents
 148
 149   /* open file and read*/
 150   $fp = fopen($file,"r");
 151   if($fp) {
 152     while($tmp = fread($fp,512))
 153     {
 154       $str.=  $tmp;
 155     }
 156   }else{
 157     return(false);
 158   }
 159   return($str);
 160 }
 161
 162
 163 /*Remove tags */
 164 function remove_unwanted_tags($str,$replacements)
 165 {
 166   $str=preg_replace($replacements['from'],$replacements['to'],$str);
 167   return($str);
 168 }
 169
 170
 171 /*Converts the all links to specified path, is needed to get simple navigation */
 172 function linkwrapper($str,$link)
 173 {
 174   $str = preg_replace("/HREF=\"http/i","target=\"_blank\" href=\"http",$str);
 175   $str = preg_replace("/HREF=\"/","href=\"".$link."?pg=",$str);
 176 //  $str=str_replace("HREF=\"","href=\"".$link."?pg=",$str);
 177   return($str);
 178 }
 179
 180
 181 /* Search content */
 182 function search($arr,$word)
 183 {
 184   global $minwordlength,$allowed_chars_in_searchword;
 185   /* Prepare Vars */
 186   $result                     =array(); // Search result, filename, + hits + hits per word + matches
 187   $words                      =array(); // Temporary searchword handling
 188   $useablewords               =array(); // Temporary searchword handling
 189   $tryword                    = "";     // Temporary searchword handling
 190   $result['global']['maxhit'] = 0;
 191   unset($_SESSION['lastresults']);
 192   unset($_SESSION['parsed_search_keyword']);
 193   $_SESSION['parsed_search_keyword']="";
 194
 195   /* prepare searchwords */
 196   $word   = trim($word);
 197
 198   /* Filter all unusable chars */
 199   $word   = preg_replace($allowed_chars_in_searchword,"",$word);
 200   $words  = split(" ",str_replace("+"," ",$word));
 201
 202   /* Check all wordlengths */
 203   foreach($words as $tryword){
 204     $tryword = trim($tryword);
 205
 206     /* Filter words smaler than 3 chars */
 207     if(strlen($tryword)>=$minwordlength) {
 208       $_SESSION['parsed_search_keyword'].=$tryword." ";
 209       $useablewords[]=$tryword;
 210     }
 211   }
 212
 213   /* Use words to search the content */
 214   foreach($arr as $key=>$val)
 215   {
 216     /* overallhits counts hits per page */
 217     $overallhits=0;
 218
 219     /* Search all words */
 220     foreach($useablewords as $word)
 221     {
 222       /* Skip key global, it contains no file data - it is a summary info*/
 223       if($key!="global")
 224       {
 225         /* Get all hits for the word in $matches*/
 226         preg_match_all("/".$word."/i",$arr[$key]['content'], $matches,PREG_OFFSET_CAPTURE);
 227
 228         /* Filter in Tag results*/
 229         if(count($matches[0])){
 230           foreach($matches[0] as $num=>$hit){
 231             if(is_in_tag($arr[$key]['content'],$hit[1]))  {
 232               unset($matches[0][$num]);
 233             }
 234           }
 235         }
 236
 237         /* Count matches */
 238         $overallhits=$overallhits + count($matches[0]);
 239
 240         /* Save collected data */
 241         $result[$key]['hits'][$word]    = count($matches[0]);
 242         $result[$key]['hits']['overall']= $overallhits;
 243
 244         /* Save max hits for page */
 245         if($overallhits > $result['global']['maxhit']){
 246           $result['global']['maxhit']=$overallhits;
 247         }
 248
 249         /* Add results for word to return value*/
 250         $result[$key]['match'][$word]=array();
 251         $result[$key]['match'][$word]=$matches[0];
 252       }
 253     }
 254   }
 255
 256   /* Save result in Session, so we can mark words later, or go back to search, without searching again*/
 257   $_SESSION['lastresults'] = $result;
 258   return($result);
 259 }
 260
 261
 262 /* Detect 10 Best result entries, sort and call createResultEntry to create HTML output for  complete list */
 263 function searchlist($arr,$res,$maxresults)
 264 {
 265   $global = $res['global'];
 266   $topten = array();        // To detect 10 best solutions
 267   $ret    = "";             // return value
 268   unset($res['global']);
 269
 270   /* Detect 10 best Sites */
 271   foreach($res as $key=>$val){
 272
 273     /* Skip results with no hits */
 274     if($val['hits']['overall']>0){
 275       $topten[$key] = $val['hits']['overall'];
 276     }
 277   }
 278
 279   /* Sort by hit position in content, to easier mark words */
 280   asort($topten);
 281   $topten = array_reverse($topten);
 282   $topten = (array_slice($topten,0,$maxresults));
 283
 284   /* We have a result, an array with all content, an array with hits and position and we have the 10 best hits */
 285   /* Foreach */
 286   foreach($topten as $name => $hits)  {
 287     $ret.= createResultEntry($arr[$name],$res[$name],$name,$global['maxhit']);
 288   }
 289
 290   /* appending footer message for resultlist */
 291   $ret.= "<br>
 292               ".sprintf(_("%s results for your search with the keyword %s interpreted as %s"),
 293                             "<b>".count($topten)."</b>",
 294                             "<b>".($_SESSION['search_string'])."</b>",
 295                             "<b>".$_SESSION['parsed_search_keyword']."</b>");
 296   $ret.="<br>
 297         <br>";
 298   return($ret);
 299 }
 300
 301
 302 /* This function marks a string with the given search result for this string*/
 303 function markup_page($arr,$res)
 304 {
 305   global $pre_mark,$suf_mark;
 306
 307   $ret    = "";             // return value
 308   $repl   = array();
 309   $posadd = 0;
 310
 311   foreach($res['match'] as $word => $matches)   {
 312     foreach($matches as $matchnr=>$match)   {
 313       $repl[$match[1]]=$match[0];
 314     }
 315   }
 316
 317   ksort($repl);
 318
 319   foreach($repl as $position=>$word)  {
 320     $pos1 = strlen($arr);
 321     $arr= markword($arr,($position+$posadd),$word,$pre_mark,$suf_mark);
 322     $pos2 = strlen($arr);
 323     $posadd =$posadd + ($pos2 - $pos1);
 324   }
 325   return($arr);
 326 }
 327
 328
 329 /* This function marks a single word with the specified prefix and suffix */
 330 function markword($string,$position,$word,$prefix,$suffix)
 331 {
 332   $wordlength   = strlen($word);
 333   $wholelength  = strlen($string);
 334
 335   $first = substr($string,0,$position);
 336   $last  = substr($string,($position+$wordlength),$wholelength);
 337
 338   return($first.$prefix.$word.$suffix.$last);
 339 }
 340
 341 /* Creates HTML output for a single search result entry */
 342 function createResultEntry($entry,$res,$name,$max)
 343 {
 344   $percentage = (int)(($res['hits']['overall'] / $max) * 100) ;
 345   $color  = dechex($percentage+150);
 346   $color2 = dechex(150 - $percentage);
 347
 348   $entry['content'] = preg_replace("\"".$entry['headline']."\"","",$entry['content'],1);
 349
 350   if(strlen($color)==1) $color = "0".$color;
 351
 352
 353
 354   /* the object tag is needed for W3c */
 355   $str =  "<a href=\"?pg=".$name."&amp;mark=1\" title=\"".$percentage."% ".$entry['headline']."\">
 356           <object>
 357           <table summary=\"\"  width=\"98%\" align=\"center\">
 358             <tr>
 359               <td height=15>
 360                 <b>".$entry['headline']."</b> -".htmlentities( substr(strip_tags($entry['content']),0,120))."...
 361               </td>
 362               <td width=50 valign=\"top\">".progressbar($percentage,50,8)."</td>
 363              </tr>
 364              <tr>
 365               <td colspan=2>
 366                 <b>
 367                   ".htmlentities(sprintf(_("%s%% hit rate in file %s"),$percentage,$name))."
 368                 </b>
 369               </td>
 370             </tr>
 371           </table>
 372           </object></a>
 373           ";
 374   $str.=  "<hr size=\"1\">";
 375
 376   return($str);
 377 }
 378
 379
 380 /*Simple function to detect if we prepare to change a tag or visible text */
 381 function is_in_tag($string,$pos)
 382 {
 383   $pos1 = strpos($string,"<",$pos);
 384   $pos2 = strpos($string,">",$pos);
 385
 386   if ($pos1 > $pos2)  {
 387     return(true);
 388   }else{
 389     return(false);
 390   }
 391 }
 392
 393 /*Returns frist line of readable text, it should be the headline */
 394 function getheader_from_content($str)
 395 {
 396   $str = strip_tags($str);
 397   $pos = 0;
 398   $arr = split("\n",$str);
 399   foreach($arr as $possibleheadline){
 400     if(strlen($possibleheadline)>=3){
 401       return $possibleheadline;
 402     }
 403   }
 404 }
 405
 406 // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler:
 407 ?>