| 1: | <?php | 
| 2: |  | 
| 3: |  | 
| 4: |  | 
| 5: |  | 
| 6: |  | 
| 7: |  | 
| 8: |  | 
| 9: |  | 
| 10: |  | 
| 11: |  | 
| 12: | namespace Xmf; | 
| 13: |  | 
| 14: |  | 
| 15: |  | 
| 16: |  | 
| 17: |  | 
| 18: |  | 
| 19: |  | 
| 20: |  | 
| 21: |  | 
| 22: |  | 
| 23: |  | 
| 24: |  | 
| 25: | class Metagen | 
| 26: | { | 
| 27: |  | 
| 28: |  | 
| 29: |  | 
| 30: |  | 
| 31: | const ENCODING = 'UTF-8'; | 
| 32: |  | 
| 33: |  | 
| 34: |  | 
| 35: |  | 
| 36: |  | 
| 37: | const ELLIPSIS = "..."; | 
| 38: |  | 
| 39: |  | 
| 40: |  | 
| 41: |  | 
| 42: |  | 
| 43: |  | 
| 44: |  | 
| 45: |  | 
| 46: | public static function assignTitle($title) | 
| 47: | { | 
| 48: | $title = trim($title); | 
| 49: | $title = static::asPlainText($title); | 
| 50: | static::assignTemplateVar('xoops_pagetitle', $title); | 
| 51: | } | 
| 52: |  | 
| 53: |  | 
| 54: |  | 
| 55: |  | 
| 56: |  | 
| 57: |  | 
| 58: |  | 
| 59: |  | 
| 60: | public static function assignKeywords($keywords) | 
| 61: | { | 
| 62: | if (!empty($keywords) && \is_array($keywords)) { | 
| 63: | $keyword_tag = implode(', ', $keywords); | 
| 64: | static::assignThemeMeta('keywords', $keyword_tag); | 
| 65: | } | 
| 66: | } | 
| 67: |  | 
| 68: |  | 
| 69: |  | 
| 70: |  | 
| 71: |  | 
| 72: |  | 
| 73: |  | 
| 74: |  | 
| 75: | public static function assignDescription($description) | 
| 76: | { | 
| 77: | $description = trim($description); | 
| 78: | if (!empty($description)) { | 
| 79: | static::assignThemeMeta('description', $description); | 
| 80: | } | 
| 81: | } | 
| 82: |  | 
| 83: |  | 
| 84: |  | 
| 85: |  | 
| 86: |  | 
| 87: |  | 
| 88: |  | 
| 89: | protected static function assignThemeMeta($name, $value) | 
| 90: | { | 
| 91: | if (class_exists('Xoops', false)) { | 
| 92: | \Xoops::getInstance()->theme()->addMeta('meta', $name, $value); | 
| 93: | } else { | 
| 94: | global $xoTheme; | 
| 95: | $xoTheme->addMeta('meta', $name, $value); | 
| 96: | } | 
| 97: | } | 
| 98: |  | 
| 99: |  | 
| 100: |  | 
| 101: |  | 
| 102: |  | 
| 103: |  | 
| 104: |  | 
| 105: | protected static function assignTemplateVar($name, $value) | 
| 106: | { | 
| 107: | if (class_exists('Xoops', false)) { | 
| 108: | \Xoops::getInstance()->tpl()->assign($name, $value); | 
| 109: | } else { | 
| 110: | global $xoopsTpl; | 
| 111: | $xoopsTpl->assign($name, $value); | 
| 112: | } | 
| 113: | } | 
| 114: |  | 
| 115: |  | 
| 116: |  | 
| 117: |  | 
| 118: |  | 
| 119: |  | 
| 120: |  | 
| 121: |  | 
| 122: |  | 
| 123: |  | 
| 124: |  | 
| 125: | public static function generateKeywords( | 
| 126: | $body, | 
| 127: | $count = 20, | 
| 128: | $minLength = 4, | 
| 129: | $forceKeys = null | 
| 130: | ) { | 
| 131: | $keyCount = array(); | 
| 132: | if (!is_array($forceKeys)) { | 
| 133: | $forceKeys = array(); | 
| 134: | } | 
| 135: |  | 
| 136: | $text = static::asPlainText($body); | 
| 137: | if (function_exists('mb_strtolower')) { | 
| 138: | $text = mb_strtolower($text, static::ENCODING); | 
| 139: | } else { | 
| 140: | $text = strtolower($text); | 
| 141: | } | 
| 142: |  | 
| 143: | $originalKeywords = preg_split( | 
| 144: | '/[^\w\']+/u', | 
| 145: | $text, | 
| 146: | -1, | 
| 147: | PREG_SPLIT_NO_EMPTY | 
| 148: | ); | 
| 149: |  | 
| 150: | foreach ($originalKeywords as $originalKeyword) { | 
| 151: | if (static::stopWordsObject()->check($originalKeyword)) { | 
| 152: | $secondRoundKeywords = explode("'", $originalKeyword); | 
| 153: | foreach ($secondRoundKeywords as $secondRoundKeyword) { | 
| 154: | if (static::stopWordsObject()->check($secondRoundKeyword) | 
| 155: | && strlen($secondRoundKeyword) >= $minLength | 
| 156: | ) { | 
| 157: | $keyCount[$secondRoundKeyword] = | 
| 158: | empty($keyCount[$secondRoundKeyword]) ? 1 : $keyCount[$secondRoundKeyword] + 1; | 
| 159: | } | 
| 160: | } | 
| 161: | } | 
| 162: | } | 
| 163: |  | 
| 164: | while (!empty($forceKeys)) { | 
| 165: | $tempKey = strtolower(array_pop($forceKeys)); | 
| 166: | $keyCount[$tempKey] = 999999; | 
| 167: | } | 
| 168: |  | 
| 169: | arsort($keyCount, SORT_NUMERIC); | 
| 170: | $key = array_keys($keyCount); | 
| 171: | $keywords = array_slice($key, 0, $count); | 
| 172: |  | 
| 173: | return $keywords; | 
| 174: | } | 
| 175: |  | 
| 176: |  | 
| 177: |  | 
| 178: |  | 
| 179: |  | 
| 180: |  | 
| 181: |  | 
| 182: |  | 
| 183: |  | 
| 184: | public static function generateDescription($body, $wordCount = 100) | 
| 185: | { | 
| 186: | $text = static::asPlainText($body); | 
| 187: |  | 
| 188: | $words = explode(" ", $text); | 
| 189: |  | 
| 190: |  | 
| 191: | $newWords = array(); | 
| 192: | $i = 0; | 
| 193: | while ($i < $wordCount - 1 && $i < count($words)) { | 
| 194: | $newWords[] = $words[$i]; | 
| 195: | ++$i; | 
| 196: | } | 
| 197: | $ret = implode(' ', $newWords); | 
| 198: | if (function_exists('mb_strlen')) { | 
| 199: | $len = mb_strlen($ret, static::ENCODING); | 
| 200: | $lastPeriod = mb_strrpos($ret, '.', 0, static::ENCODING); | 
| 201: | $ret .= ($lastPeriod === false) ? static::ELLIPSIS : ''; | 
| 202: | if ($len > 100 && ($len - $lastPeriod) < 30) { | 
| 203: | $ret = mb_substr($ret, 0, $lastPeriod + 1, static::ENCODING); | 
| 204: | } | 
| 205: | } else { | 
| 206: | $len = strlen($ret); | 
| 207: | $lastPeriod = strrpos($ret, '.'); | 
| 208: | $ret .= ($lastPeriod === false) ? static::ELLIPSIS : ''; | 
| 209: | if ($len > 100 && ($len - $lastPeriod) < 30) { | 
| 210: | $ret = substr($ret, 0, $lastPeriod + 1); | 
| 211: | } | 
| 212: | } | 
| 213: |  | 
| 214: | return $ret; | 
| 215: | } | 
| 216: |  | 
| 217: |  | 
| 218: |  | 
| 219: |  | 
| 220: |  | 
| 221: |  | 
| 222: |  | 
| 223: |  | 
| 224: |  | 
| 225: |  | 
| 226: |  | 
| 227: |  | 
| 228: |  | 
| 229: | public static function generateMetaTags( | 
| 230: | $title, | 
| 231: | $body, | 
| 232: | $count = 20, | 
| 233: | $minLength = 4, | 
| 234: | $wordCount = 100, | 
| 235: | $forceKeys = null | 
| 236: | ) { | 
| 237: | $title_keywords = static::generateKeywords($title, $count, 3, $forceKeys); | 
| 238: | $keywords = static::generateKeywords($body, $count, $minLength, $title_keywords); | 
| 239: | $description = static::generateDescription($body, $wordCount); | 
| 240: | static::assignTitle($title); | 
| 241: | static::assignKeywords($keywords); | 
| 242: | static::assignDescription($description); | 
| 243: | } | 
| 244: |  | 
| 245: |  | 
| 246: |  | 
| 247: |  | 
| 248: |  | 
| 249: |  | 
| 250: |  | 
| 251: |  | 
| 252: |  | 
| 253: |  | 
| 254: | protected static function nonEmptyString($var) | 
| 255: | { | 
| 256: | return (strlen($var) > 0); | 
| 257: | } | 
| 258: |  | 
| 259: |  | 
| 260: |  | 
| 261: |  | 
| 262: |  | 
| 263: |  | 
| 264: |  | 
| 265: |  | 
| 266: |  | 
| 267: |  | 
| 268: |  | 
| 269: | public static function generateSeoTitle($title = '', $extension = '') | 
| 270: | { | 
| 271: | $title = preg_replace("/[^\p{N}\p{L}]/u", "-", $title); | 
| 272: |  | 
| 273: | $tableau = explode("-", $title); | 
| 274: | $tableau = array_filter($tableau, 'static::nonEmptyString'); | 
| 275: | $tableau = array_filter($tableau, array(static::stopWordsObject(), 'check')); | 
| 276: | $title = implode("-", $tableau); | 
| 277: |  | 
| 278: | $title = (empty($title)) ? '' : $title . $extension; | 
| 279: | return $title; | 
| 280: | } | 
| 281: |  | 
| 282: |  | 
| 283: |  | 
| 284: |  | 
| 285: |  | 
| 286: |  | 
| 287: |  | 
| 288: |  | 
| 289: |  | 
| 290: |  | 
| 291: |  | 
| 292: |  | 
| 293: |  | 
| 294: |  | 
| 295: |  | 
| 296: |  | 
| 297: | public static function getSearchSummary($haystack, $needles = null, $length = 120) | 
| 298: | { | 
| 299: | $haystack = static::asPlainText($haystack); | 
| 300: | $pos = static::getNeedlePositions($haystack, $needles); | 
| 301: |  | 
| 302: | $start = empty($pos) ? 0 : min($pos); | 
| 303: |  | 
| 304: | $start = max($start - (int) ($length / 2), 0); | 
| 305: |  | 
| 306: | $pre = ($start > 0); | 
| 307: | if (function_exists('mb_strlen')) { | 
| 308: | if ($pre) { | 
| 309: |  | 
| 310: | $temp = mb_strpos($haystack, ' ', $start, static::ENCODING); | 
| 311: | $start = ($temp === false) ? $start : $temp; | 
| 312: | $haystack = mb_substr($haystack, $start, mb_strlen($haystack), static::ENCODING); | 
| 313: | } | 
| 314: |  | 
| 315: | $post = !(mb_strlen($haystack, static::ENCODING) < $length); | 
| 316: | if ($post) { | 
| 317: | $haystack = mb_substr($haystack, 0, $length, static::ENCODING); | 
| 318: | $end = mb_strrpos($haystack, ' ', 0, static::ENCODING); | 
| 319: | if ($end) { | 
| 320: | $haystack = mb_substr($haystack, 0, $end, static::ENCODING); | 
| 321: | } | 
| 322: | } | 
| 323: | } else { | 
| 324: | if ($pre) { | 
| 325: |  | 
| 326: | $temp = strpos($haystack, ' ', $start); | 
| 327: | $start = ($temp === false) ? $start : $temp; | 
| 328: | $haystack = substr($haystack, $start); | 
| 329: | } | 
| 330: |  | 
| 331: | $post = !(strlen($haystack) < $length); | 
| 332: | if ($post) { | 
| 333: | $haystack = substr($haystack, 0, $length); | 
| 334: | $end = strrpos($haystack, ' ', 0); | 
| 335: | if ($end) { | 
| 336: | $haystack = substr($haystack, 0, $end); | 
| 337: | } | 
| 338: | } | 
| 339: | } | 
| 340: | $haystack = ($pre ? static::ELLIPSIS : '') . trim($haystack) . ($post ? static::ELLIPSIS : ''); | 
| 341: | return $haystack; | 
| 342: | } | 
| 343: |  | 
| 344: |  | 
| 345: |  | 
| 346: |  | 
| 347: |  | 
| 348: |  | 
| 349: |  | 
| 350: |  | 
| 351: |  | 
| 352: | protected static function asPlainText($rawText) | 
| 353: | { | 
| 354: | $text = $rawText; | 
| 355: | $text = static::html2text($text); | 
| 356: | $text = static::purifyText($text); | 
| 357: |  | 
| 358: | $text = str_replace(array("\n", "\r"), ' ', $text); | 
| 359: | $text = preg_replace('/[ ]* [ ]*/', ' ', $text); | 
| 360: |  | 
| 361: | return trim($text); | 
| 362: | } | 
| 363: |  | 
| 364: |  | 
| 365: |  | 
| 366: |  | 
| 367: |  | 
| 368: |  | 
| 369: |  | 
| 370: |  | 
| 371: |  | 
| 372: |  | 
| 373: |  | 
| 374: | protected static function getNeedlePositions($haystack, $needles) | 
| 375: | { | 
| 376: | $pos = array(); | 
| 377: | $needles = empty($needles) ? array() : (array) $needles; | 
| 378: | foreach ($needles as $needle) { | 
| 379: | if (function_exists('mb_stripos')) { | 
| 380: | $i = mb_stripos($haystack, $needle, 0, static::ENCODING); | 
| 381: | } else { | 
| 382: | $i = stripos($haystack, $needle, 0); | 
| 383: | } | 
| 384: | if ($i !== false) { | 
| 385: | $pos[] = $i; | 
| 386: | } | 
| 387: | } | 
| 388: | return $pos; | 
| 389: | } | 
| 390: |  | 
| 391: |  | 
| 392: |  | 
| 393: |  | 
| 394: |  | 
| 395: |  | 
| 396: |  | 
| 397: |  | 
| 398: |  | 
| 399: | protected static function purifyText($text, $keyword = false) | 
| 400: | { | 
| 401: | $text = str_replace(' ', ' ', $text); | 
| 402: | $text = str_replace('<br />', ' ', $text); | 
| 403: | $text = str_replace('<br/>', ' ', $text); | 
| 404: | $text = str_replace('<br', ' ', $text); | 
| 405: | $text = strip_tags($text); | 
| 406: | $text = html_entity_decode($text); | 
| 407: | $text = htmlspecialchars_decode($text, ENT_QUOTES); | 
| 408: | $text = str_replace(')', ' ', $text); | 
| 409: | $text = str_replace('(', ' ', $text); | 
| 410: | $text = str_replace(':', ' ', $text); | 
| 411: | $text = str_replace('&euro', ' euro ', $text); | 
| 412: | $text = str_replace('&hellip', '...', $text); | 
| 413: | $text = str_replace('&rsquo', ' ', $text); | 
| 414: | $text = str_replace('!', ' ', $text); | 
| 415: | $text = str_replace('?', ' ', $text); | 
| 416: | $text = str_replace('"', ' ', $text); | 
| 417: | $text = str_replace('-', ' ', $text); | 
| 418: | $text = str_replace('\n', ' ', $text); | 
| 419: | $text = str_replace('―', ' ', $text); | 
| 420: |  | 
| 421: | if ($keyword) { | 
| 422: | $text = str_replace('.', ' ', $text); | 
| 423: | $text = str_replace(',', ' ', $text); | 
| 424: | $text = str_replace('\'', ' ', $text); | 
| 425: | } | 
| 426: | $text = str_replace(';', ' ', $text); | 
| 427: |  | 
| 428: | return $text; | 
| 429: | } | 
| 430: |  | 
| 431: |  | 
| 432: |  | 
| 433: |  | 
| 434: |  | 
| 435: |  | 
| 436: |  | 
| 437: |  | 
| 438: |  | 
| 439: |  | 
| 440: | protected static function html2text($document) | 
| 441: | { | 
| 442: | $search = array( | 
| 443: | "'<script[^>]*?>.*?</script>'si", | 
| 444: | "'<img.*?/>'si", | 
| 445: | "'<[\/\!]*?[^<>]*?>'si", | 
| 446: | "'([\r\n])[\s]+'", | 
| 447: | "'&(quot|#34);'i", | 
| 448: | "'&(amp|#38);'i", | 
| 449: | "'&(lt|#60);'i", | 
| 450: | "'&(gt|#62);'i", | 
| 451: | "'&(nbsp|#160);'i", | 
| 452: | "'&(iexcl|#161);'i", | 
| 453: | "'&(cent|#162);'i", | 
| 454: | "'&(pound|#163);'i", | 
| 455: | "'&(copy|#169);'i" | 
| 456: | ); | 
| 457: |  | 
| 458: | $replace = array( | 
| 459: | "", | 
| 460: | "", | 
| 461: | "", | 
| 462: | "\\1", | 
| 463: | "\"", | 
| 464: | "&", | 
| 465: | "<", | 
| 466: | ">", | 
| 467: | " ", | 
| 468: | chr(161), | 
| 469: | chr(162), | 
| 470: | chr(163), | 
| 471: | chr(169) | 
| 472: | ); | 
| 473: |  | 
| 474: | $text = preg_replace($search, $replace, $document); | 
| 475: |  | 
| 476: | preg_replace_callback( | 
| 477: | '/&#(\d+);/', | 
| 478: | function ($matches) { | 
| 479: | return chr($matches[1]); | 
| 480: | }, | 
| 481: | $document | 
| 482: | ); | 
| 483: |  | 
| 484: | return $text; | 
| 485: | } | 
| 486: |  | 
| 487: |  | 
| 488: |  | 
| 489: |  | 
| 490: |  | 
| 491: |  | 
| 492: |  | 
| 493: |  | 
| 494: |  | 
| 495: |  | 
| 496: | public static function checkStopWords($key) | 
| 497: | { | 
| 498: | return static::stopWordsObject()->check($key); | 
| 499: | } | 
| 500: |  | 
| 501: |  | 
| 502: |  | 
| 503: |  | 
| 504: |  | 
| 505: |  | 
| 506: | protected static function stopWordsObject() | 
| 507: | { | 
| 508: | static $object; | 
| 509: | if (null === $object) { | 
| 510: | $object = new StopWords(); | 
| 511: | } | 
| 512: | return $object; | 
| 513: | } | 
| 514: | } | 
| 515: |  |