1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Xmf;
13:
14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24:
25: class Metagen
26: {
27:
28: 29: 30:
31: const ENCODING = 'UTF-8';
32:
33: 34: 35: 36:
37: const ELLIPSIS = "...";
38:
39: 40: 41: 42: 43: 44: 45:
46: public static function assignTitle($title)
47: {
48: $title = trim($title);
49: $title = static::asPlainText($title);
50: static::assignTemplateVar('xoops_pagetitle', $title);
51: }
52:
53: 54: 55: 56: 57: 58: 59:
60: public static function assignKeywords($keywords)
61: {
62: if (!empty($keywords) && is_array($keywords)) {
63: $keyword_tag = implode(', ', $keywords);
64: static::assignThemeMeta('keywords', $keyword_tag);
65: }
66: }
67:
68: 69: 70: 71: 72: 73: 74:
75: public static function assignDescription($description)
76: {
77: $description = trim($description);
78: if (!empty($description)) {
79: static::assignThemeMeta('description', $description);
80: }
81: }
82:
83: 84: 85: 86: 87: 88:
89: protected static function assignThemeMeta($name, $value)
90: {
91: if (class_exists('Xoops', false)) {
92: \Xoops::getInstance()->theme()->addMeta('meta', $name, $value);
93: } else {
94: global $xoTheme;
95: $xoTheme->addMeta('meta', $name, $value);
96: }
97: }
98:
99: 100: 101: 102: 103: 104:
105: protected static function assignTemplateVar($name, $value)
106: {
107: if (class_exists('Xoops', false)) {
108: \Xoops::getInstance()->tpl()->assign($name, $value);
109: } else {
110: global $xoopsTpl;
111: $xoopsTpl->assign($name, $value);
112: }
113: }
114:
115: 116: 117: 118: 119: 120: 121: 122: 123: 124:
125: public static function generateKeywords(
126: $body,
127: $count = 20,
128: $minLength = 4,
129: $forceKeys = null
130: ) {
131: $keyCount = array();
132: if (!is_array($forceKeys)) {
133: $forceKeys = array();
134: }
135:
136: $text = static::asPlainText($body);
137: if (function_exists('mb_strtolower')) {
138: $text = mb_strtolower($text, static::ENCODING);
139: } else {
140: $text = strtolower($text);
141: }
142:
143: $originalKeywords = preg_split(
144: '/[^\w\']+/u',
145: $text,
146: -1,
147: PREG_SPLIT_NO_EMPTY
148: );
149:
150: foreach ($originalKeywords as $originalKeyword) {
151: if (static::stopWordsObject()->check($originalKeyword)) {
152: $secondRoundKeywords = explode("'", $originalKeyword);
153: foreach ($secondRoundKeywords as $secondRoundKeyword) {
154: if (static::stopWordsObject()->check($secondRoundKeyword)
155: && strlen($secondRoundKeyword) >= $minLength
156: ) {
157: $keyCount[$secondRoundKeyword] =
158: empty($keyCount[$secondRoundKeyword]) ? 1 : $keyCount[$secondRoundKeyword] + 1;
159: }
160: }
161: }
162: }
163:
164: while (!empty($forceKeys)) {
165: $tempKey = strtolower(array_pop($forceKeys));
166: $keyCount[$tempKey] = 999999;
167: }
168:
169: arsort($keyCount, SORT_NUMERIC);
170: $key = array_keys($keyCount);
171: $keywords = array_slice($key, 0, $count);
172:
173: return $keywords;
174: }
175:
176: 177: 178: 179: 180: 181: 182: 183:
184: public static function generateDescription($body, $wordCount = 100)
185: {
186: $text = static::asPlainText($body);
187:
188: $words = explode(" ", $text);
189:
190:
191: $newWords = array();
192: $i = 0;
193: while ($i < $wordCount - 1 && $i < count($words)) {
194: $newWords[] = $words[$i];
195: ++$i;
196: }
197: $ret = implode(' ', $newWords);
198: if (function_exists('mb_strlen')) {
199: $len = mb_strlen($ret, static::ENCODING);
200: $lastPeriod = mb_strrpos($ret, '.', 0, static::ENCODING);
201: $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
202: if ($len > 100 && ($len - $lastPeriod) < 30) {
203: $ret = mb_substr($ret, 0, $lastPeriod + 1, static::ENCODING);
204: }
205: } else {
206: $len = strlen($ret);
207: $lastPeriod = strrpos($ret, '.');
208: $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
209: if ($len > 100 && ($len - $lastPeriod) < 30) {
210: $ret = substr($ret, 0, $lastPeriod + 1);
211: }
212: }
213:
214: return $ret;
215: }
216:
217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228:
229: public static function generateMetaTags(
230: $title,
231: $body,
232: $count = 20,
233: $minLength = 4,
234: $wordCount = 100,
235: $forceKeys = null
236: ) {
237: $title_keywords = static::generateKeywords($title, $count, 3, $forceKeys);
238: $keywords = static::generateKeywords($body, $count, $minLength, $title_keywords);
239: $description = static::generateDescription($body, $wordCount);
240: static::assignTitle($title);
241: static::assignKeywords($keywords);
242: static::assignDescription($description);
243: }
244:
245: 246: 247: 248: 249: 250: 251: 252: 253:
254: protected static function nonEmptyString($var)
255: {
256: return (strlen($var) > 0);
257: }
258:
259: 260: 261: 262: 263: 264: 265: 266: 267: 268:
269: public static function generateSeoTitle($title = '', $extension = '')
270: {
271: $title = preg_replace("/[^\p{N}\p{L}]/u", "-", $title);
272:
273: $tableau = explode("-", $title);
274: $tableau = array_filter($tableau, 'static::nonEmptyString');
275: $tableau = array_filter($tableau, array(static::stopWordsObject(), 'check'));
276: $title = implode("-", $tableau);
277:
278: $title = (empty($title)) ? '' : $title . $extension;
279: return $title;
280: }
281:
282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296:
297: public static function getSearchSummary($haystack, $needles = null, $length = 120)
298: {
299: $haystack = static::asPlainText($haystack);
300: $pos = static::getNeedlePositions($haystack, $needles);
301:
302: $start = empty($pos) ? 0 : min($pos);
303:
304: $start = max($start - (int) ($length / 2), 0);
305:
306: $pre = ($start > 0);
307: if (function_exists('mb_strlen')) {
308: if ($pre) {
309:
310: $temp = mb_strpos($haystack, ' ', $start, static::ENCODING);
311: $start = ($temp === false) ? $start : $temp;
312: $haystack = mb_substr($haystack, $start, mb_strlen($haystack), static::ENCODING);
313: }
314:
315: $post = !(mb_strlen($haystack, static::ENCODING) < $length);
316: if ($post) {
317: $haystack = mb_substr($haystack, 0, $length, static::ENCODING);
318: $end = mb_strrpos($haystack, ' ', 0, static::ENCODING);
319: if ($end) {
320: $haystack = mb_substr($haystack, 0, $end, static::ENCODING);
321: }
322: }
323: } else {
324: if ($pre) {
325:
326: $temp = strpos($haystack, ' ', $start);
327: $start = ($temp === false) ? $start : $temp;
328: $haystack = substr($haystack, $start);
329: }
330:
331: $post = !(strlen($haystack) < $length);
332: if ($post) {
333: $haystack = substr($haystack, 0, $length);
334: $end = strrpos($haystack, ' ', 0);
335: if ($end) {
336: $haystack = substr($haystack, 0, $end);
337: }
338: }
339: }
340: $haystack = ($pre ? static::ELLIPSIS : '') . trim($haystack) . ($post ? static::ELLIPSIS : '');
341: return $haystack;
342: }
343:
344: 345: 346: 347: 348: 349: 350: 351:
352: protected static function asPlainText($rawText)
353: {
354: $text = $rawText;
355: $text = static::html2text($text);
356: $text = static::purifyText($text);
357:
358: $text = str_replace(array("\n", "\r"), ' ', $text);
359: $text = preg_replace('/[ ]* [ ]*/', ' ', $text);
360:
361: return trim($text);
362: }
363:
364: 365: 366: 367: 368: 369: 370: 371: 372: 373:
374: protected static function getNeedlePositions($haystack, $needles)
375: {
376: $pos = array();
377: $needles = empty($needles) ? array() : (array) $needles;
378: foreach ($needles as $needle) {
379: if (function_exists('mb_stripos')) {
380: $i = mb_stripos($haystack, $needle, 0, static::ENCODING);
381: } else {
382: $i = stripos($haystack, $needle, 0);
383: }
384: if ($i !== false) {
385: $pos[] = $i;
386: }
387: }
388: return $pos;
389: }
390:
391: 392: 393: 394: 395: 396: 397: 398:
399: protected static function purifyText($text, $keyword = false)
400: {
401: $text = str_replace(' ', ' ', $text);
402: $text = str_replace('<br />', ' ', $text);
403: $text = str_replace('<br/>', ' ', $text);
404: $text = str_replace('<br', ' ', $text);
405: $text = strip_tags($text);
406: $text = html_entity_decode($text);
407: $text = htmlspecialchars_decode($text, ENT_QUOTES);
408: $text = str_replace(')', ' ', $text);
409: $text = str_replace('(', ' ', $text);
410: $text = str_replace(':', ' ', $text);
411: $text = str_replace('&euro', ' euro ', $text);
412: $text = str_replace('&hellip', '...', $text);
413: $text = str_replace('&rsquo', ' ', $text);
414: $text = str_replace('!', ' ', $text);
415: $text = str_replace('?', ' ', $text);
416: $text = str_replace('"', ' ', $text);
417: $text = str_replace('-', ' ', $text);
418: $text = str_replace('\n', ' ', $text);
419: $text = str_replace('―', ' ', $text);
420:
421: if ($keyword) {
422: $text = str_replace('.', ' ', $text);
423: $text = str_replace(',', ' ', $text);
424: $text = str_replace('\'', ' ', $text);
425: }
426: $text = str_replace(';', ' ', $text);
427:
428: return $text;
429: }
430:
431: 432: 433: 434: 435: 436: 437: 438: 439:
440: protected static function html2text($document)
441: {
442: $search = array(
443: "'<script[^>]*?>.*?</script>'si",
444: "'<img.*?/>'si",
445: "'<[\/\!]*?[^<>]*?>'si",
446: "'([\r\n])[\s]+'",
447: "'&(quot|#34);'i",
448: "'&(amp|#38);'i",
449: "'&(lt|#60);'i",
450: "'&(gt|#62);'i",
451: "'&(nbsp|#160);'i",
452: "'&(iexcl|#161);'i",
453: "'&(cent|#162);'i",
454: "'&(pound|#163);'i",
455: "'&(copy|#169);'i"
456: );
457:
458: $replace = array(
459: "",
460: "",
461: "",
462: "\\1",
463: "\"",
464: "&",
465: "<",
466: ">",
467: " ",
468: chr(161),
469: chr(162),
470: chr(163),
471: chr(169)
472: );
473:
474: $text = preg_replace($search, $replace, $document);
475:
476: preg_replace_callback(
477: '/&#(\d+);/',
478: function ($matches) {
479: return chr($matches[1]);
480: },
481: $document
482: );
483:
484: return $text;
485: }
486:
487: 488: 489: 490: 491: 492: 493: 494: 495:
496: public static function checkStopWords($key)
497: {
498: return static::stopWordsObject()->check($key);
499: }
500:
501: 502: 503: 504: 505:
506: protected static function stopWordsObject()
507: {
508: static $object;
509: if (null === $object) {
510: $object = new StopWords();
511: }
512: return $object;
513: }
514: }
515: