|   1:  | <?php
 | 
|   2:  |  | 
|   3:  |  | 
|   4:  |  | 
|   5:  |  | 
|   6:  |  | 
|   7:  |  | 
|   8:  |  | 
|   9:  |  | 
|  10:  | 
 | 
|  11:  | 
 | 
|  12:  | namespace Xmf;
 | 
|  13:  | 
 | 
|  14:  |  | 
|  15:  |  | 
|  16:  |  | 
|  17:  |  | 
|  18:  |  | 
|  19:  |  | 
|  20:  |  | 
|  21:  |  | 
|  22:  |  | 
|  23:  |  | 
|  24:  | 
 | 
|  25:  | class Metagen
 | 
|  26:  | {
 | 
|  27:  | 
 | 
|  28:  |      | 
|  29:  |  | 
|  30:  | 
 | 
|  31:  |     const ENCODING = 'UTF-8';
 | 
|  32:  | 
 | 
|  33:  |      | 
|  34:  |  | 
|  35:  |  | 
|  36:  | 
 | 
|  37:  |     const ELLIPSIS = "...";
 | 
|  38:  | 
 | 
|  39:  |      | 
|  40:  |  | 
|  41:  |  | 
|  42:  |  | 
|  43:  |  | 
|  44:  |  | 
|  45:  | 
 | 
|  46:  |     public static function assignTitle($title)
 | 
|  47:  |     {
 | 
|  48:  |         $title = trim($title);
 | 
|  49:  |         $title = static::asPlainText($title);
 | 
|  50:  |         static::assignTemplateVar('xoops_pagetitle', $title);
 | 
|  51:  |     }
 | 
|  52:  | 
 | 
|  53:  |      | 
|  54:  |  | 
|  55:  |  | 
|  56:  |  | 
|  57:  |  | 
|  58:  |  | 
|  59:  | 
 | 
|  60:  |     public static function assignKeywords($keywords)
 | 
|  61:  |     {
 | 
|  62:  |         if (!empty($keywords) && \is_array($keywords)) {
 | 
|  63:  |             $keyword_tag = implode(', ', $keywords);
 | 
|  64:  |             static::assignThemeMeta('keywords', $keyword_tag);
 | 
|  65:  |         }
 | 
|  66:  |     }
 | 
|  67:  | 
 | 
|  68:  |      | 
|  69:  |  | 
|  70:  |  | 
|  71:  |  | 
|  72:  |  | 
|  73:  |  | 
|  74:  | 
 | 
|  75:  |     public static function assignDescription($description)
 | 
|  76:  |     {
 | 
|  77:  |         $description = trim($description);
 | 
|  78:  |         if (!empty($description)) {
 | 
|  79:  |             static::assignThemeMeta('description', $description);
 | 
|  80:  |         }
 | 
|  81:  |     }
 | 
|  82:  | 
 | 
|  83:  |      | 
|  84:  |  | 
|  85:  |  | 
|  86:  |  | 
|  87:  |  | 
|  88:  | 
 | 
|  89:  |     protected static function assignThemeMeta($name, $value)
 | 
|  90:  |     {
 | 
|  91:  |         if (class_exists('Xoops', false)) {
 | 
|  92:  |             \Xoops::getInstance()->theme()->addMeta('meta', $name, $value);
 | 
|  93:  |         } else {
 | 
|  94:  |             global $xoTheme;
 | 
|  95:  |             $xoTheme->addMeta('meta', $name, $value);
 | 
|  96:  |         }
 | 
|  97:  |     }
 | 
|  98:  | 
 | 
|  99:  |      | 
| 100:  |  | 
| 101:  |  | 
| 102:  |  | 
| 103:  |  | 
| 104:  | 
 | 
| 105:  |     protected static function assignTemplateVar($name, $value)
 | 
| 106:  |     {
 | 
| 107:  |         if (class_exists('Xoops', false)) {
 | 
| 108:  |             \Xoops::getInstance()->tpl()->assign($name, $value);
 | 
| 109:  |         } else {
 | 
| 110:  |             global $xoopsTpl;
 | 
| 111:  |             $xoopsTpl->assign($name, $value);
 | 
| 112:  |         }
 | 
| 113:  |     }
 | 
| 114:  | 
 | 
| 115:  |      | 
| 116:  |  | 
| 117:  |  | 
| 118:  |  | 
| 119:  |  | 
| 120:  |  | 
| 121:  |  | 
| 122:  |  | 
| 123:  |  | 
| 124:  | 
 | 
| 125:  |     public static function generateKeywords(
 | 
| 126:  |         $body,
 | 
| 127:  |         $count = 20,
 | 
| 128:  |         $minLength = 4,
 | 
| 129:  |         $forceKeys = null
 | 
| 130:  |     ) {
 | 
| 131:  |         $keyCount = array();
 | 
| 132:  |         if (!is_array($forceKeys)) {
 | 
| 133:  |             $forceKeys = array();
 | 
| 134:  |         }
 | 
| 135:  | 
 | 
| 136:  |         $text = static::asPlainText($body);
 | 
| 137:  |         if (function_exists('mb_strtolower')) {
 | 
| 138:  |             $text = mb_strtolower($text, static::ENCODING);
 | 
| 139:  |         } else {
 | 
| 140:  |             $text = strtolower($text);
 | 
| 141:  |         }
 | 
| 142:  | 
 | 
| 143:  |         $originalKeywords = preg_split(
 | 
| 144:  |             '/[^\w\']+/u',
 | 
| 145:  |             $text,
 | 
| 146:  |             -1,
 | 
| 147:  |             PREG_SPLIT_NO_EMPTY
 | 
| 148:  |         );
 | 
| 149:  | 
 | 
| 150:  |         foreach ($originalKeywords as $originalKeyword) {
 | 
| 151:  |             if (static::stopWordsObject()->check($originalKeyword)) {
 | 
| 152:  |                 $secondRoundKeywords = explode("'", $originalKeyword);
 | 
| 153:  |                 foreach ($secondRoundKeywords as $secondRoundKeyword) {
 | 
| 154:  |                     if (static::stopWordsObject()->check($secondRoundKeyword)
 | 
| 155:  |                         && strlen($secondRoundKeyword) >= $minLength
 | 
| 156:  |                     ) {
 | 
| 157:  |                         $keyCount[$secondRoundKeyword] =
 | 
| 158:  |                             empty($keyCount[$secondRoundKeyword]) ? 1 : $keyCount[$secondRoundKeyword] + 1;
 | 
| 159:  |                     }
 | 
| 160:  |                 }
 | 
| 161:  |             }
 | 
| 162:  |         }
 | 
| 163:  | 
 | 
| 164:  |         while (!empty($forceKeys)) {
 | 
| 165:  |             $tempKey = strtolower(array_pop($forceKeys));
 | 
| 166:  |             $keyCount[$tempKey] = 999999;
 | 
| 167:  |         }
 | 
| 168:  | 
 | 
| 169:  |         arsort($keyCount, SORT_NUMERIC);
 | 
| 170:  |         $key = array_keys($keyCount);
 | 
| 171:  |         $keywords = array_slice($key, 0, $count);
 | 
| 172:  | 
 | 
| 173:  |         return $keywords;
 | 
| 174:  |     }
 | 
| 175:  | 
 | 
| 176:  |      | 
| 177:  |  | 
| 178:  |  | 
| 179:  |  | 
| 180:  |  | 
| 181:  |  | 
| 182:  |  | 
| 183:  | 
 | 
| 184:  |     public static function generateDescription($body, $wordCount = 100)
 | 
| 185:  |     {
 | 
| 186:  |         $text = static::asPlainText($body);
 | 
| 187:  | 
 | 
| 188:  |         $words = explode(" ", $text);
 | 
| 189:  | 
 | 
| 190:  |         
 | 
| 191:  |         $newWords = array();
 | 
| 192:  |         $i = 0;
 | 
| 193:  |         while ($i < $wordCount - 1 && $i < count($words)) {
 | 
| 194:  |             $newWords[] = $words[$i];
 | 
| 195:  |             ++$i;
 | 
| 196:  |         }
 | 
| 197:  |         $ret = implode(' ', $newWords);
 | 
| 198:  |         if (function_exists('mb_strlen')) {
 | 
| 199:  |             $len = mb_strlen($ret, static::ENCODING);
 | 
| 200:  |             $lastPeriod = mb_strrpos($ret, '.', 0, static::ENCODING);
 | 
| 201:  |             $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
 | 
| 202:  |             if ($len > 100 && ($len - $lastPeriod) < 30) {
 | 
| 203:  |                 $ret = mb_substr($ret, 0, $lastPeriod + 1, static::ENCODING);
 | 
| 204:  |             }
 | 
| 205:  |         } else {
 | 
| 206:  |             $len = strlen($ret);
 | 
| 207:  |             $lastPeriod = strrpos($ret, '.');
 | 
| 208:  |             $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
 | 
| 209:  |             if ($len > 100 && ($len - $lastPeriod) < 30) {
 | 
| 210:  |                 $ret = substr($ret, 0, $lastPeriod + 1);
 | 
| 211:  |             }
 | 
| 212:  |         }
 | 
| 213:  | 
 | 
| 214:  |         return $ret;
 | 
| 215:  |     }
 | 
| 216:  | 
 | 
| 217:  |      | 
| 218:  |  | 
| 219:  |  | 
| 220:  |  | 
| 221:  |  | 
| 222:  |  | 
| 223:  |  | 
| 224:  |  | 
| 225:  |  | 
| 226:  |  | 
| 227:  |  | 
| 228:  | 
 | 
| 229:  |     public static function generateMetaTags(
 | 
| 230:  |         $title,
 | 
| 231:  |         $body,
 | 
| 232:  |         $count = 20,
 | 
| 233:  |         $minLength = 4,
 | 
| 234:  |         $wordCount = 100,
 | 
| 235:  |         $forceKeys = null
 | 
| 236:  |     ) {
 | 
| 237:  |         $title_keywords = static::generateKeywords($title, $count, 3, $forceKeys);
 | 
| 238:  |         $keywords = static::generateKeywords($body, $count, $minLength, $title_keywords);
 | 
| 239:  |         $description = static::generateDescription($body, $wordCount);
 | 
| 240:  |         static::assignTitle($title);
 | 
| 241:  |         static::assignKeywords($keywords);
 | 
| 242:  |         static::assignDescription($description);
 | 
| 243:  |     }
 | 
| 244:  | 
 | 
| 245:  |      | 
| 246:  |  | 
| 247:  |  | 
| 248:  |  | 
| 249:  |  | 
| 250:  |  | 
| 251:  |  | 
| 252:  |  | 
| 253:  | 
 | 
| 254:  |     protected static function nonEmptyString($var)
 | 
| 255:  |     {
 | 
| 256:  |         return (strlen($var) > 0);
 | 
| 257:  |     }
 | 
| 258:  | 
 | 
| 259:  |      | 
| 260:  |  | 
| 261:  |  | 
| 262:  |  | 
| 263:  |  | 
| 264:  |  | 
| 265:  |  | 
| 266:  |  | 
| 267:  |  | 
| 268:  | 
 | 
| 269:  |     public static function generateSeoTitle($title = '', $extension = '')
 | 
| 270:  |     {
 | 
| 271:  |         $title = preg_replace("/[^\p{N}\p{L}]/u", "-", $title);
 | 
| 272:  | 
 | 
| 273:  |         $tableau = explode("-", $title);
 | 
| 274:  |         $tableau = array_filter($tableau, 'static::nonEmptyString');
 | 
| 275:  |         $tableau = array_filter($tableau, array(static::stopWordsObject(), 'check'));
 | 
| 276:  |         $title = implode("-", $tableau);
 | 
| 277:  | 
 | 
| 278:  |         $title = (empty($title)) ? '' : $title . $extension;
 | 
| 279:  |         return $title;
 | 
| 280:  |     }
 | 
| 281:  | 
 | 
| 282:  |      | 
| 283:  |  | 
| 284:  |  | 
| 285:  |  | 
| 286:  |  | 
| 287:  |  | 
| 288:  |  | 
| 289:  |  | 
| 290:  |  | 
| 291:  |  | 
| 292:  |  | 
| 293:  |  | 
| 294:  |  | 
| 295:  |  | 
| 296:  | 
 | 
| 297:  |     public static function getSearchSummary($haystack, $needles = null, $length = 120)
 | 
| 298:  |     {
 | 
| 299:  |         $haystack = static::asPlainText($haystack);
 | 
| 300:  |         $pos = static::getNeedlePositions($haystack, $needles);
 | 
| 301:  | 
 | 
| 302:  |         $start = empty($pos) ? 0 : min($pos);
 | 
| 303:  | 
 | 
| 304:  |         $start = max($start - (int) ($length / 2), 0);
 | 
| 305:  | 
 | 
| 306:  |         $pre = ($start > 0); 
 | 
| 307:  |         if (function_exists('mb_strlen')) {
 | 
| 308:  |             if ($pre) {
 | 
| 309:  |                 
 | 
| 310:  |                 $temp = mb_strpos($haystack, ' ', $start, static::ENCODING);
 | 
| 311:  |                 $start = ($temp === false) ? $start : $temp;
 | 
| 312:  |                 $haystack = mb_substr($haystack, $start, mb_strlen($haystack), static::ENCODING);
 | 
| 313:  |             }
 | 
| 314:  | 
 | 
| 315:  |             $post = !(mb_strlen($haystack, static::ENCODING) < $length); 
 | 
| 316:  |             if ($post) {
 | 
| 317:  |                 $haystack = mb_substr($haystack, 0, $length, static::ENCODING);
 | 
| 318:  |                 $end = mb_strrpos($haystack, ' ', 0, static::ENCODING);
 | 
| 319:  |                 if ($end) {
 | 
| 320:  |                     $haystack = mb_substr($haystack, 0, $end, static::ENCODING);
 | 
| 321:  |                 }
 | 
| 322:  |             }
 | 
| 323:  |         } else {
 | 
| 324:  |             if ($pre) {
 | 
| 325:  |                 
 | 
| 326:  |                 $temp = strpos($haystack, ' ', $start);
 | 
| 327:  |                 $start = ($temp === false) ? $start : $temp;
 | 
| 328:  |                 $haystack = substr($haystack, $start);
 | 
| 329:  |             }
 | 
| 330:  | 
 | 
| 331:  |             $post = !(strlen($haystack) < $length); 
 | 
| 332:  |             if ($post) {
 | 
| 333:  |                 $haystack = substr($haystack, 0, $length);
 | 
| 334:  |                 $end = strrpos($haystack, ' ', 0);
 | 
| 335:  |                 if ($end) {
 | 
| 336:  |                     $haystack = substr($haystack, 0, $end);
 | 
| 337:  |                 }
 | 
| 338:  |             }
 | 
| 339:  |         }
 | 
| 340:  |         $haystack = ($pre ? static::ELLIPSIS : '') . trim($haystack) . ($post ? static::ELLIPSIS : '');
 | 
| 341:  |         return $haystack;
 | 
| 342:  |     }
 | 
| 343:  | 
 | 
| 344:  |      | 
| 345:  |  | 
| 346:  |  | 
| 347:  |  | 
| 348:  |  | 
| 349:  |  | 
| 350:  |  | 
| 351:  | 
 | 
| 352:  |     protected static function asPlainText($rawText)
 | 
| 353:  |     {
 | 
| 354:  |         $text = $rawText;
 | 
| 355:  |         $text = static::html2text($text);
 | 
| 356:  |         $text = static::purifyText($text);
 | 
| 357:  | 
 | 
| 358:  |         $text = str_replace(array("\n", "\r"), ' ', $text);
 | 
| 359:  |         $text = preg_replace('/[ ]* [ ]*/', ' ', $text);
 | 
| 360:  | 
 | 
| 361:  |         return trim($text);
 | 
| 362:  |     }
 | 
| 363:  | 
 | 
| 364:  |      | 
| 365:  |  | 
| 366:  |  | 
| 367:  |  | 
| 368:  |  | 
| 369:  |  | 
| 370:  |  | 
| 371:  |  | 
| 372:  |  | 
| 373:  | 
 | 
| 374:  |     protected static function getNeedlePositions($haystack, $needles)
 | 
| 375:  |     {
 | 
| 376:  |         $pos = array();
 | 
| 377:  |         $needles = empty($needles) ? array() : (array) $needles;
 | 
| 378:  |         foreach ($needles as $needle) {
 | 
| 379:  |             if (function_exists('mb_stripos')) {
 | 
| 380:  |                 $i = mb_stripos($haystack, $needle, 0, static::ENCODING);
 | 
| 381:  |             } else {
 | 
| 382:  |                 $i = stripos($haystack, $needle, 0);
 | 
| 383:  |             }
 | 
| 384:  |             if ($i !== false) {
 | 
| 385:  |                 $pos[] = $i; 
 | 
| 386:  |             }
 | 
| 387:  |         }
 | 
| 388:  |         return $pos;
 | 
| 389:  |     }
 | 
| 390:  | 
 | 
| 391:  |      | 
| 392:  |  | 
| 393:  |  | 
| 394:  |  | 
| 395:  |  | 
| 396:  |  | 
| 397:  |  | 
| 398:  | 
 | 
| 399:  |     protected static function purifyText($text, $keyword = false)
 | 
| 400:  |     {
 | 
| 401:  |         $text = str_replace(' ', ' ', $text);
 | 
| 402:  |         $text = str_replace('<br />', ' ', $text);
 | 
| 403:  |         $text = str_replace('<br/>', ' ', $text);
 | 
| 404:  |         $text = str_replace('<br', ' ', $text);
 | 
| 405:  |         $text = strip_tags($text);
 | 
| 406:  |         $text = html_entity_decode($text);
 | 
| 407:  |         $text = htmlspecialchars_decode($text, ENT_QUOTES);
 | 
| 408:  |         $text = str_replace(')', ' ', $text);
 | 
| 409:  |         $text = str_replace('(', ' ', $text);
 | 
| 410:  |         $text = str_replace(':', ' ', $text);
 | 
| 411:  |         $text = str_replace('&euro', ' euro ', $text);
 | 
| 412:  |         $text = str_replace('&hellip', '...', $text);
 | 
| 413:  |         $text = str_replace('&rsquo', ' ', $text);
 | 
| 414:  |         $text = str_replace('!', ' ', $text);
 | 
| 415:  |         $text = str_replace('?', ' ', $text);
 | 
| 416:  |         $text = str_replace('"', ' ', $text);
 | 
| 417:  |         $text = str_replace('-', ' ', $text);
 | 
| 418:  |         $text = str_replace('\n', ' ', $text);
 | 
| 419:  |         $text = str_replace('―', ' ', $text);
 | 
| 420:  | 
 | 
| 421:  |         if ($keyword) {
 | 
| 422:  |             $text = str_replace('.', ' ', $text);
 | 
| 423:  |             $text = str_replace(',', ' ', $text);
 | 
| 424:  |             $text = str_replace('\'', ' ', $text);
 | 
| 425:  |         }
 | 
| 426:  |         $text = str_replace(';', ' ', $text);
 | 
| 427:  | 
 | 
| 428:  |         return $text;
 | 
| 429:  |     }
 | 
| 430:  | 
 | 
| 431:  |      | 
| 432:  |  | 
| 433:  |  | 
| 434:  |  | 
| 435:  |  | 
| 436:  |  | 
| 437:  |  | 
| 438:  |  | 
| 439:  | 
 | 
| 440:  |     protected static function html2text($document)
 | 
| 441:  |     {
 | 
| 442:  |         $search = array(
 | 
| 443:  |             "'<script[^>]*?>.*?</script>'si", 
 | 
| 444:  |             "'<img.*?/>'si",                  
 | 
| 445:  |             "'<[\/\!]*?[^<>]*?>'si",          
 | 
| 446:  |             "'([\r\n])[\s]+'",                
 | 
| 447:  |             "'&(quot|#34);'i",                
 | 
| 448:  |             "'&(amp|#38);'i",
 | 
| 449:  |             "'&(lt|#60);'i",
 | 
| 450:  |             "'&(gt|#62);'i",
 | 
| 451:  |             "'&(nbsp|#160);'i",
 | 
| 452:  |             "'&(iexcl|#161);'i",
 | 
| 453:  |             "'&(cent|#162);'i",
 | 
| 454:  |             "'&(pound|#163);'i",
 | 
| 455:  |             "'&(copy|#169);'i"
 | 
| 456:  |         );
 | 
| 457:  | 
 | 
| 458:  |         $replace = array(
 | 
| 459:  |             "",
 | 
| 460:  |             "",
 | 
| 461:  |             "",
 | 
| 462:  |             "\\1",
 | 
| 463:  |             "\"",
 | 
| 464:  |             "&",
 | 
| 465:  |             "<",
 | 
| 466:  |             ">",
 | 
| 467:  |             " ",
 | 
| 468:  |             chr(161),
 | 
| 469:  |             chr(162),
 | 
| 470:  |             chr(163),
 | 
| 471:  |             chr(169)
 | 
| 472:  |         );
 | 
| 473:  | 
 | 
| 474:  |         $text = preg_replace($search, $replace, $document);
 | 
| 475:  | 
 | 
| 476:  |         preg_replace_callback(
 | 
| 477:  |             '/&#(\d+);/',
 | 
| 478:  |             function ($matches) {
 | 
| 479:  |                 return chr($matches[1]);
 | 
| 480:  |             },
 | 
| 481:  |             $document
 | 
| 482:  |         );
 | 
| 483:  | 
 | 
| 484:  |         return $text;
 | 
| 485:  |     }
 | 
| 486:  | 
 | 
| 487:  |      | 
| 488:  |  | 
| 489:  |  | 
| 490:  |  | 
| 491:  |  | 
| 492:  |  | 
| 493:  |  | 
| 494:  |  | 
| 495:  | 
 | 
| 496:  |     public static function checkStopWords($key)
 | 
| 497:  |     {
 | 
| 498:  |         return static::stopWordsObject()->check($key);
 | 
| 499:  |     }
 | 
| 500:  | 
 | 
| 501:  |      | 
| 502:  |  | 
| 503:  |  | 
| 504:  |  | 
| 505:  | 
 | 
| 506:  |     protected static function stopWordsObject()
 | 
| 507:  |     {
 | 
| 508:  |         static $object;
 | 
| 509:  |         if (null === $object) {
 | 
| 510:  |             $object = new StopWords();
 | 
| 511:  |         }
 | 
| 512:  |         return $object;
 | 
| 513:  |     }
 | 
| 514:  | }
 | 
| 515:  |  |