| 1: | <?php
|
| 2: | |
| 3: | |
| 4: | |
| 5: | |
| 6: | |
| 7: | |
| 8: | |
| 9: | |
| 10: |
|
| 11: |
|
| 12: | namespace Xmf;
|
| 13: |
|
| 14: | |
| 15: | |
| 16: | |
| 17: | |
| 18: | |
| 19: | |
| 20: | |
| 21: | |
| 22: | |
| 23: | |
| 24: |
|
| 25: | class Metagen
|
| 26: | {
|
| 27: |
|
| 28: | |
| 29: | |
| 30: |
|
| 31: | const ENCODING = 'UTF-8';
|
| 32: |
|
| 33: | |
| 34: | |
| 35: | |
| 36: |
|
| 37: | const ELLIPSIS = "...";
|
| 38: |
|
| 39: | |
| 40: | |
| 41: | |
| 42: | |
| 43: | |
| 44: | |
| 45: |
|
| 46: | public static function assignTitle($title)
|
| 47: | {
|
| 48: | $title = trim($title);
|
| 49: | $title = static::asPlainText($title);
|
| 50: | static::assignTemplateVar('xoops_pagetitle', $title);
|
| 51: | }
|
| 52: |
|
| 53: | |
| 54: | |
| 55: | |
| 56: | |
| 57: | |
| 58: | |
| 59: |
|
| 60: | public static function assignKeywords($keywords)
|
| 61: | {
|
| 62: | if (!empty($keywords) && \is_array($keywords)) {
|
| 63: | $keyword_tag = implode(', ', $keywords);
|
| 64: | static::assignThemeMeta('keywords', $keyword_tag);
|
| 65: | }
|
| 66: | }
|
| 67: |
|
| 68: | |
| 69: | |
| 70: | |
| 71: | |
| 72: | |
| 73: | |
| 74: |
|
| 75: | public static function assignDescription($description)
|
| 76: | {
|
| 77: | $description = trim($description);
|
| 78: | if (!empty($description)) {
|
| 79: | static::assignThemeMeta('description', $description);
|
| 80: | }
|
| 81: | }
|
| 82: |
|
| 83: | |
| 84: | |
| 85: | |
| 86: | |
| 87: | |
| 88: |
|
| 89: | protected static function assignThemeMeta($name, $value)
|
| 90: | {
|
| 91: | if (class_exists('Xoops', false)) {
|
| 92: | \Xoops::getInstance()->theme()->addMeta('meta', $name, $value);
|
| 93: | } else {
|
| 94: | global $xoTheme;
|
| 95: | $xoTheme->addMeta('meta', $name, $value);
|
| 96: | }
|
| 97: | }
|
| 98: |
|
| 99: | |
| 100: | |
| 101: | |
| 102: | |
| 103: | |
| 104: |
|
| 105: | protected static function assignTemplateVar($name, $value)
|
| 106: | {
|
| 107: | if (class_exists('Xoops', false)) {
|
| 108: | \Xoops::getInstance()->tpl()->assign($name, $value);
|
| 109: | } else {
|
| 110: | global $xoopsTpl;
|
| 111: | $xoopsTpl->assign($name, $value);
|
| 112: | }
|
| 113: | }
|
| 114: |
|
| 115: | |
| 116: | |
| 117: | |
| 118: | |
| 119: | |
| 120: | |
| 121: | |
| 122: | |
| 123: | |
| 124: |
|
| 125: | public static function generateKeywords(
|
| 126: | $body,
|
| 127: | $count = 20,
|
| 128: | $minLength = 4,
|
| 129: | $forceKeys = null
|
| 130: | ) {
|
| 131: | $keyCount = array();
|
| 132: | if (!is_array($forceKeys)) {
|
| 133: | $forceKeys = array();
|
| 134: | }
|
| 135: |
|
| 136: | $text = static::asPlainText($body);
|
| 137: | if (function_exists('mb_strtolower')) {
|
| 138: | $text = mb_strtolower($text, static::ENCODING);
|
| 139: | } else {
|
| 140: | $text = strtolower($text);
|
| 141: | }
|
| 142: |
|
| 143: | $originalKeywords = preg_split(
|
| 144: | '/[^\w\']+/u',
|
| 145: | $text,
|
| 146: | -1,
|
| 147: | PREG_SPLIT_NO_EMPTY
|
| 148: | );
|
| 149: |
|
| 150: | foreach ($originalKeywords as $originalKeyword) {
|
| 151: | if (static::stopWordsObject()->check($originalKeyword)) {
|
| 152: | $secondRoundKeywords = explode("'", $originalKeyword);
|
| 153: | foreach ($secondRoundKeywords as $secondRoundKeyword) {
|
| 154: | if (static::stopWordsObject()->check($secondRoundKeyword)
|
| 155: | && strlen($secondRoundKeyword) >= $minLength
|
| 156: | ) {
|
| 157: | $keyCount[$secondRoundKeyword] =
|
| 158: | empty($keyCount[$secondRoundKeyword]) ? 1 : $keyCount[$secondRoundKeyword] + 1;
|
| 159: | }
|
| 160: | }
|
| 161: | }
|
| 162: | }
|
| 163: |
|
| 164: | while (!empty($forceKeys)) {
|
| 165: | $tempKey = strtolower(array_pop($forceKeys));
|
| 166: | $keyCount[$tempKey] = 999999;
|
| 167: | }
|
| 168: |
|
| 169: | arsort($keyCount, SORT_NUMERIC);
|
| 170: | $key = array_keys($keyCount);
|
| 171: | $keywords = array_slice($key, 0, $count);
|
| 172: |
|
| 173: | return $keywords;
|
| 174: | }
|
| 175: |
|
| 176: | |
| 177: | |
| 178: | |
| 179: | |
| 180: | |
| 181: | |
| 182: | |
| 183: |
|
| 184: | public static function generateDescription($body, $wordCount = 100)
|
| 185: | {
|
| 186: | $text = static::asPlainText($body);
|
| 187: |
|
| 188: | $words = explode(" ", $text);
|
| 189: |
|
| 190: |
|
| 191: | $newWords = array();
|
| 192: | $i = 0;
|
| 193: | while ($i < $wordCount - 1 && $i < count($words)) {
|
| 194: | $newWords[] = $words[$i];
|
| 195: | ++$i;
|
| 196: | }
|
| 197: | $ret = implode(' ', $newWords);
|
| 198: | if (function_exists('mb_strlen')) {
|
| 199: | $len = mb_strlen($ret, static::ENCODING);
|
| 200: | $lastPeriod = mb_strrpos($ret, '.', 0, static::ENCODING);
|
| 201: | $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
|
| 202: | if ($len > 100 && ($len - $lastPeriod) < 30) {
|
| 203: | $ret = mb_substr($ret, 0, $lastPeriod + 1, static::ENCODING);
|
| 204: | }
|
| 205: | } else {
|
| 206: | $len = strlen($ret);
|
| 207: | $lastPeriod = strrpos($ret, '.');
|
| 208: | $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
|
| 209: | if ($len > 100 && ($len - $lastPeriod) < 30) {
|
| 210: | $ret = substr($ret, 0, $lastPeriod + 1);
|
| 211: | }
|
| 212: | }
|
| 213: |
|
| 214: | return $ret;
|
| 215: | }
|
| 216: |
|
| 217: | |
| 218: | |
| 219: | |
| 220: | |
| 221: | |
| 222: | |
| 223: | |
| 224: | |
| 225: | |
| 226: | |
| 227: | |
| 228: |
|
| 229: | public static function generateMetaTags(
|
| 230: | $title,
|
| 231: | $body,
|
| 232: | $count = 20,
|
| 233: | $minLength = 4,
|
| 234: | $wordCount = 100,
|
| 235: | $forceKeys = null
|
| 236: | ) {
|
| 237: | $title_keywords = static::generateKeywords($title, $count, 3, $forceKeys);
|
| 238: | $keywords = static::generateKeywords($body, $count, $minLength, $title_keywords);
|
| 239: | $description = static::generateDescription($body, $wordCount);
|
| 240: | static::assignTitle($title);
|
| 241: | static::assignKeywords($keywords);
|
| 242: | static::assignDescription($description);
|
| 243: | }
|
| 244: |
|
| 245: | |
| 246: | |
| 247: | |
| 248: | |
| 249: | |
| 250: | |
| 251: | |
| 252: | |
| 253: |
|
| 254: | protected static function nonEmptyString($var)
|
| 255: | {
|
| 256: | return (strlen($var) > 0);
|
| 257: | }
|
| 258: |
|
| 259: | |
| 260: | |
| 261: | |
| 262: | |
| 263: | |
| 264: | |
| 265: | |
| 266: | |
| 267: | |
| 268: |
|
| 269: | public static function generateSeoTitle($title = '', $extension = '')
|
| 270: | {
|
| 271: | $title = preg_replace("/[^\p{N}\p{L}]/u", "-", $title);
|
| 272: |
|
| 273: | $tableau = explode("-", $title);
|
| 274: | $tableau = array_filter($tableau, 'static::nonEmptyString');
|
| 275: | $tableau = array_filter($tableau, array(static::stopWordsObject(), 'check'));
|
| 276: | $title = implode("-", $tableau);
|
| 277: |
|
| 278: | $title = (empty($title)) ? '' : $title . $extension;
|
| 279: | return $title;
|
| 280: | }
|
| 281: |
|
| 282: | |
| 283: | |
| 284: | |
| 285: | |
| 286: | |
| 287: | |
| 288: | |
| 289: | |
| 290: | |
| 291: | |
| 292: | |
| 293: | |
| 294: | |
| 295: | |
| 296: |
|
| 297: | public static function getSearchSummary($haystack, $needles = null, $length = 120)
|
| 298: | {
|
| 299: | $haystack = static::asPlainText($haystack);
|
| 300: | $pos = static::getNeedlePositions($haystack, $needles);
|
| 301: |
|
| 302: | $start = empty($pos) ? 0 : min($pos);
|
| 303: |
|
| 304: | $start = max($start - (int) ($length / 2), 0);
|
| 305: |
|
| 306: | $pre = ($start > 0);
|
| 307: | if (function_exists('mb_strlen')) {
|
| 308: | if ($pre) {
|
| 309: |
|
| 310: | $temp = mb_strpos($haystack, ' ', $start, static::ENCODING);
|
| 311: | $start = ($temp === false) ? $start : $temp;
|
| 312: | $haystack = mb_substr($haystack, $start, mb_strlen($haystack), static::ENCODING);
|
| 313: | }
|
| 314: |
|
| 315: | $post = !(mb_strlen($haystack, static::ENCODING) < $length);
|
| 316: | if ($post) {
|
| 317: | $haystack = mb_substr($haystack, 0, $length, static::ENCODING);
|
| 318: | $end = mb_strrpos($haystack, ' ', 0, static::ENCODING);
|
| 319: | if ($end) {
|
| 320: | $haystack = mb_substr($haystack, 0, $end, static::ENCODING);
|
| 321: | }
|
| 322: | }
|
| 323: | } else {
|
| 324: | if ($pre) {
|
| 325: |
|
| 326: | $temp = strpos($haystack, ' ', $start);
|
| 327: | $start = ($temp === false) ? $start : $temp;
|
| 328: | $haystack = substr($haystack, $start);
|
| 329: | }
|
| 330: |
|
| 331: | $post = !(strlen($haystack) < $length);
|
| 332: | if ($post) {
|
| 333: | $haystack = substr($haystack, 0, $length);
|
| 334: | $end = strrpos($haystack, ' ', 0);
|
| 335: | if ($end) {
|
| 336: | $haystack = substr($haystack, 0, $end);
|
| 337: | }
|
| 338: | }
|
| 339: | }
|
| 340: | $haystack = ($pre ? static::ELLIPSIS : '') . trim($haystack) . ($post ? static::ELLIPSIS : '');
|
| 341: | return $haystack;
|
| 342: | }
|
| 343: |
|
| 344: | |
| 345: | |
| 346: | |
| 347: | |
| 348: | |
| 349: | |
| 350: | |
| 351: |
|
| 352: | protected static function asPlainText($rawText)
|
| 353: | {
|
| 354: | $text = $rawText;
|
| 355: | $text = static::html2text($text);
|
| 356: | $text = static::purifyText($text);
|
| 357: |
|
| 358: | $text = str_replace(array("\n", "\r"), ' ', $text);
|
| 359: | $text = preg_replace('/[ ]* [ ]*/', ' ', $text);
|
| 360: |
|
| 361: | return trim($text);
|
| 362: | }
|
| 363: |
|
| 364: | |
| 365: | |
| 366: | |
| 367: | |
| 368: | |
| 369: | |
| 370: | |
| 371: | |
| 372: | |
| 373: |
|
| 374: | protected static function getNeedlePositions($haystack, $needles)
|
| 375: | {
|
| 376: | $pos = array();
|
| 377: | $needles = empty($needles) ? array() : (array) $needles;
|
| 378: | foreach ($needles as $needle) {
|
| 379: | if (function_exists('mb_stripos')) {
|
| 380: | $i = mb_stripos($haystack, $needle, 0, static::ENCODING);
|
| 381: | } else {
|
| 382: | $i = stripos($haystack, $needle, 0);
|
| 383: | }
|
| 384: | if ($i !== false) {
|
| 385: | $pos[] = $i;
|
| 386: | }
|
| 387: | }
|
| 388: | return $pos;
|
| 389: | }
|
| 390: |
|
| 391: | |
| 392: | |
| 393: | |
| 394: | |
| 395: | |
| 396: | |
| 397: | |
| 398: |
|
| 399: | protected static function purifyText($text, $keyword = false)
|
| 400: | {
|
| 401: | $text = str_replace(' ', ' ', $text);
|
| 402: | $text = str_replace('<br />', ' ', $text);
|
| 403: | $text = str_replace('<br/>', ' ', $text);
|
| 404: | $text = str_replace('<br', ' ', $text);
|
| 405: | $text = strip_tags($text);
|
| 406: | $text = html_entity_decode($text);
|
| 407: | $text = htmlspecialchars_decode($text, ENT_QUOTES);
|
| 408: | $text = str_replace(')', ' ', $text);
|
| 409: | $text = str_replace('(', ' ', $text);
|
| 410: | $text = str_replace(':', ' ', $text);
|
| 411: | $text = str_replace('&euro', ' euro ', $text);
|
| 412: | $text = str_replace('&hellip', '...', $text);
|
| 413: | $text = str_replace('&rsquo', ' ', $text);
|
| 414: | $text = str_replace('!', ' ', $text);
|
| 415: | $text = str_replace('?', ' ', $text);
|
| 416: | $text = str_replace('"', ' ', $text);
|
| 417: | $text = str_replace('-', ' ', $text);
|
| 418: | $text = str_replace('\n', ' ', $text);
|
| 419: | $text = str_replace('―', ' ', $text);
|
| 420: |
|
| 421: | if ($keyword) {
|
| 422: | $text = str_replace('.', ' ', $text);
|
| 423: | $text = str_replace(',', ' ', $text);
|
| 424: | $text = str_replace('\'', ' ', $text);
|
| 425: | }
|
| 426: | $text = str_replace(';', ' ', $text);
|
| 427: |
|
| 428: | return $text;
|
| 429: | }
|
| 430: |
|
| 431: | |
| 432: | |
| 433: | |
| 434: | |
| 435: | |
| 436: | |
| 437: | |
| 438: | |
| 439: |
|
| 440: | protected static function html2text($document)
|
| 441: | {
|
| 442: | $search = array(
|
| 443: | "'<script[^>]*?>.*?</script>'si",
|
| 444: | "'<img.*?/>'si",
|
| 445: | "'<[\/\!]*?[^<>]*?>'si",
|
| 446: | "'([\r\n])[\s]+'",
|
| 447: | "'&(quot|#34);'i",
|
| 448: | "'&(amp|#38);'i",
|
| 449: | "'&(lt|#60);'i",
|
| 450: | "'&(gt|#62);'i",
|
| 451: | "'&(nbsp|#160);'i",
|
| 452: | "'&(iexcl|#161);'i",
|
| 453: | "'&(cent|#162);'i",
|
| 454: | "'&(pound|#163);'i",
|
| 455: | "'&(copy|#169);'i"
|
| 456: | );
|
| 457: |
|
| 458: | $replace = array(
|
| 459: | "",
|
| 460: | "",
|
| 461: | "",
|
| 462: | "\\1",
|
| 463: | "\"",
|
| 464: | "&",
|
| 465: | "<",
|
| 466: | ">",
|
| 467: | " ",
|
| 468: | chr(161),
|
| 469: | chr(162),
|
| 470: | chr(163),
|
| 471: | chr(169)
|
| 472: | );
|
| 473: |
|
| 474: | $text = preg_replace($search, $replace, $document);
|
| 475: |
|
| 476: | preg_replace_callback(
|
| 477: | '/&#(\d+);/',
|
| 478: | function ($matches) {
|
| 479: | return chr($matches[1]);
|
| 480: | },
|
| 481: | $document
|
| 482: | );
|
| 483: |
|
| 484: | return $text;
|
| 485: | }
|
| 486: |
|
| 487: | |
| 488: | |
| 489: | |
| 490: | |
| 491: | |
| 492: | |
| 493: | |
| 494: | |
| 495: |
|
| 496: | public static function checkStopWords($key)
|
| 497: | {
|
| 498: | return static::stopWordsObject()->check($key);
|
| 499: | }
|
| 500: |
|
| 501: | |
| 502: | |
| 503: | |
| 504: | |
| 505: |
|
| 506: | protected static function stopWordsObject()
|
| 507: | {
|
| 508: | static $object;
|
| 509: | if (null === $object) {
|
| 510: | $object = new StopWords();
|
| 511: | }
|
| 512: | return $object;
|
| 513: | }
|
| 514: | }
|
| 515: | |