1: | <?php
|
2: | |
3: | |
4: | |
5: | |
6: | |
7: | |
8: | |
9: | |
10: |
|
11: |
|
12: | namespace Xmf;
|
13: |
|
14: | |
15: | |
16: | |
17: | |
18: | |
19: | |
20: | |
21: | |
22: | |
23: | |
24: |
|
25: | class Metagen
|
26: | {
|
27: |
|
28: | |
29: | |
30: |
|
31: | const ENCODING = 'UTF-8';
|
32: |
|
33: | |
34: | |
35: | |
36: |
|
37: | const ELLIPSIS = "...";
|
38: |
|
39: | |
40: | |
41: | |
42: | |
43: | |
44: | |
45: |
|
46: | public static function assignTitle($title)
|
47: | {
|
48: | $title = trim($title);
|
49: | $title = static::asPlainText($title);
|
50: | static::assignTemplateVar('xoops_pagetitle', $title);
|
51: | }
|
52: |
|
53: | |
54: | |
55: | |
56: | |
57: | |
58: | |
59: |
|
60: | public static function assignKeywords($keywords)
|
61: | {
|
62: | if (!empty($keywords) && \is_array($keywords)) {
|
63: | $keyword_tag = implode(', ', $keywords);
|
64: | static::assignThemeMeta('keywords', $keyword_tag);
|
65: | }
|
66: | }
|
67: |
|
68: | |
69: | |
70: | |
71: | |
72: | |
73: | |
74: |
|
75: | public static function assignDescription($description)
|
76: | {
|
77: | $description = trim($description);
|
78: | if (!empty($description)) {
|
79: | static::assignThemeMeta('description', $description);
|
80: | }
|
81: | }
|
82: |
|
83: | |
84: | |
85: | |
86: | |
87: | |
88: |
|
89: | protected static function assignThemeMeta($name, $value)
|
90: | {
|
91: | if (class_exists('Xoops', false)) {
|
92: | \Xoops::getInstance()->theme()->addMeta('meta', $name, $value);
|
93: | } else {
|
94: | global $xoTheme;
|
95: | $xoTheme->addMeta('meta', $name, $value);
|
96: | }
|
97: | }
|
98: |
|
99: | |
100: | |
101: | |
102: | |
103: | |
104: |
|
105: | protected static function assignTemplateVar($name, $value)
|
106: | {
|
107: | if (class_exists('Xoops', false)) {
|
108: | \Xoops::getInstance()->tpl()->assign($name, $value);
|
109: | } else {
|
110: | global $xoopsTpl;
|
111: | $xoopsTpl->assign($name, $value);
|
112: | }
|
113: | }
|
114: |
|
115: | |
116: | |
117: | |
118: | |
119: | |
120: | |
121: | |
122: | |
123: | |
124: |
|
125: | public static function generateKeywords(
|
126: | $body,
|
127: | $count = 20,
|
128: | $minLength = 4,
|
129: | $forceKeys = null
|
130: | ) {
|
131: | $keyCount = array();
|
132: | if (!is_array($forceKeys)) {
|
133: | $forceKeys = array();
|
134: | }
|
135: |
|
136: | $text = static::asPlainText($body);
|
137: | if (function_exists('mb_strtolower')) {
|
138: | $text = mb_strtolower($text, static::ENCODING);
|
139: | } else {
|
140: | $text = strtolower($text);
|
141: | }
|
142: |
|
143: | $originalKeywords = preg_split(
|
144: | '/[^\w\']+/u',
|
145: | $text,
|
146: | -1,
|
147: | PREG_SPLIT_NO_EMPTY
|
148: | );
|
149: |
|
150: | foreach ($originalKeywords as $originalKeyword) {
|
151: | if (static::stopWordsObject()->check($originalKeyword)) {
|
152: | $secondRoundKeywords = explode("'", $originalKeyword);
|
153: | foreach ($secondRoundKeywords as $secondRoundKeyword) {
|
154: | if (static::stopWordsObject()->check($secondRoundKeyword)
|
155: | && strlen($secondRoundKeyword) >= $minLength
|
156: | ) {
|
157: | $keyCount[$secondRoundKeyword] =
|
158: | empty($keyCount[$secondRoundKeyword]) ? 1 : $keyCount[$secondRoundKeyword] + 1;
|
159: | }
|
160: | }
|
161: | }
|
162: | }
|
163: |
|
164: | while (!empty($forceKeys)) {
|
165: | $tempKey = strtolower(array_pop($forceKeys));
|
166: | $keyCount[$tempKey] = 999999;
|
167: | }
|
168: |
|
169: | arsort($keyCount, SORT_NUMERIC);
|
170: | $key = array_keys($keyCount);
|
171: | $keywords = array_slice($key, 0, $count);
|
172: |
|
173: | return $keywords;
|
174: | }
|
175: |
|
176: | |
177: | |
178: | |
179: | |
180: | |
181: | |
182: | |
183: |
|
184: | public static function generateDescription($body, $wordCount = 100)
|
185: | {
|
186: | $text = static::asPlainText($body);
|
187: |
|
188: | $words = explode(" ", $text);
|
189: |
|
190: |
|
191: | $newWords = array();
|
192: | $i = 0;
|
193: | while ($i < $wordCount - 1 && $i < count($words)) {
|
194: | $newWords[] = $words[$i];
|
195: | ++$i;
|
196: | }
|
197: | $ret = implode(' ', $newWords);
|
198: | if (function_exists('mb_strlen')) {
|
199: | $len = mb_strlen($ret, static::ENCODING);
|
200: | $lastPeriod = mb_strrpos($ret, '.', 0, static::ENCODING);
|
201: | $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
|
202: | if ($len > 100 && ($len - $lastPeriod) < 30) {
|
203: | $ret = mb_substr($ret, 0, $lastPeriod + 1, static::ENCODING);
|
204: | }
|
205: | } else {
|
206: | $len = strlen($ret);
|
207: | $lastPeriod = strrpos($ret, '.');
|
208: | $ret .= ($lastPeriod === false) ? static::ELLIPSIS : '';
|
209: | if ($len > 100 && ($len - $lastPeriod) < 30) {
|
210: | $ret = substr($ret, 0, $lastPeriod + 1);
|
211: | }
|
212: | }
|
213: |
|
214: | return $ret;
|
215: | }
|
216: |
|
217: | |
218: | |
219: | |
220: | |
221: | |
222: | |
223: | |
224: | |
225: | |
226: | |
227: | |
228: |
|
229: | public static function generateMetaTags(
|
230: | $title,
|
231: | $body,
|
232: | $count = 20,
|
233: | $minLength = 4,
|
234: | $wordCount = 100,
|
235: | $forceKeys = null
|
236: | ) {
|
237: | $title_keywords = static::generateKeywords($title, $count, 3, $forceKeys);
|
238: | $keywords = static::generateKeywords($body, $count, $minLength, $title_keywords);
|
239: | $description = static::generateDescription($body, $wordCount);
|
240: | static::assignTitle($title);
|
241: | static::assignKeywords($keywords);
|
242: | static::assignDescription($description);
|
243: | }
|
244: |
|
245: | |
246: | |
247: | |
248: | |
249: | |
250: | |
251: | |
252: | |
253: |
|
254: | protected static function nonEmptyString($var)
|
255: | {
|
256: | return (strlen($var) > 0);
|
257: | }
|
258: |
|
259: | |
260: | |
261: | |
262: | |
263: | |
264: | |
265: | |
266: | |
267: | |
268: |
|
269: | public static function generateSeoTitle($title = '', $extension = '')
|
270: | {
|
271: | $title = preg_replace("/[^\p{N}\p{L}]/u", "-", $title);
|
272: |
|
273: | $tableau = explode("-", $title);
|
274: | $tableau = array_filter($tableau, 'static::nonEmptyString');
|
275: | $tableau = array_filter($tableau, array(static::stopWordsObject(), 'check'));
|
276: | $title = implode("-", $tableau);
|
277: |
|
278: | $title = (empty($title)) ? '' : $title . $extension;
|
279: | return $title;
|
280: | }
|
281: |
|
282: | |
283: | |
284: | |
285: | |
286: | |
287: | |
288: | |
289: | |
290: | |
291: | |
292: | |
293: | |
294: | |
295: | |
296: |
|
297: | public static function getSearchSummary($haystack, $needles = null, $length = 120)
|
298: | {
|
299: | $haystack = static::asPlainText($haystack);
|
300: | $pos = static::getNeedlePositions($haystack, $needles);
|
301: |
|
302: | $start = empty($pos) ? 0 : min($pos);
|
303: |
|
304: | $start = max($start - (int) ($length / 2), 0);
|
305: |
|
306: | $pre = ($start > 0);
|
307: | if (function_exists('mb_strlen')) {
|
308: | if ($pre) {
|
309: |
|
310: | $temp = mb_strpos($haystack, ' ', $start, static::ENCODING);
|
311: | $start = ($temp === false) ? $start : $temp;
|
312: | $haystack = mb_substr($haystack, $start, mb_strlen($haystack), static::ENCODING);
|
313: | }
|
314: |
|
315: | $post = !(mb_strlen($haystack, static::ENCODING) < $length);
|
316: | if ($post) {
|
317: | $haystack = mb_substr($haystack, 0, $length, static::ENCODING);
|
318: | $end = mb_strrpos($haystack, ' ', 0, static::ENCODING);
|
319: | if ($end) {
|
320: | $haystack = mb_substr($haystack, 0, $end, static::ENCODING);
|
321: | }
|
322: | }
|
323: | } else {
|
324: | if ($pre) {
|
325: |
|
326: | $temp = strpos($haystack, ' ', $start);
|
327: | $start = ($temp === false) ? $start : $temp;
|
328: | $haystack = substr($haystack, $start);
|
329: | }
|
330: |
|
331: | $post = !(strlen($haystack) < $length);
|
332: | if ($post) {
|
333: | $haystack = substr($haystack, 0, $length);
|
334: | $end = strrpos($haystack, ' ', 0);
|
335: | if ($end) {
|
336: | $haystack = substr($haystack, 0, $end);
|
337: | }
|
338: | }
|
339: | }
|
340: | $haystack = ($pre ? static::ELLIPSIS : '') . trim($haystack) . ($post ? static::ELLIPSIS : '');
|
341: | return $haystack;
|
342: | }
|
343: |
|
344: | |
345: | |
346: | |
347: | |
348: | |
349: | |
350: | |
351: |
|
352: | protected static function asPlainText($rawText)
|
353: | {
|
354: | $text = $rawText;
|
355: | $text = static::html2text($text);
|
356: | $text = static::purifyText($text);
|
357: |
|
358: | $text = str_replace(array("\n", "\r"), ' ', $text);
|
359: | $text = preg_replace('/[ ]* [ ]*/', ' ', $text);
|
360: |
|
361: | return trim($text);
|
362: | }
|
363: |
|
364: | |
365: | |
366: | |
367: | |
368: | |
369: | |
370: | |
371: | |
372: | |
373: |
|
374: | protected static function getNeedlePositions($haystack, $needles)
|
375: | {
|
376: | $pos = array();
|
377: | $needles = empty($needles) ? array() : (array) $needles;
|
378: | foreach ($needles as $needle) {
|
379: | if (function_exists('mb_stripos')) {
|
380: | $i = mb_stripos($haystack, $needle, 0, static::ENCODING);
|
381: | } else {
|
382: | $i = stripos($haystack, $needle, 0);
|
383: | }
|
384: | if ($i !== false) {
|
385: | $pos[] = $i;
|
386: | }
|
387: | }
|
388: | return $pos;
|
389: | }
|
390: |
|
391: | |
392: | |
393: | |
394: | |
395: | |
396: | |
397: | |
398: |
|
399: | protected static function purifyText($text, $keyword = false)
|
400: | {
|
401: | $text = str_replace(' ', ' ', $text);
|
402: | $text = str_replace('<br />', ' ', $text);
|
403: | $text = str_replace('<br/>', ' ', $text);
|
404: | $text = str_replace('<br', ' ', $text);
|
405: | $text = strip_tags($text);
|
406: | $text = html_entity_decode($text);
|
407: | $text = htmlspecialchars_decode($text, ENT_QUOTES);
|
408: | $text = str_replace(')', ' ', $text);
|
409: | $text = str_replace('(', ' ', $text);
|
410: | $text = str_replace(':', ' ', $text);
|
411: | $text = str_replace('&euro', ' euro ', $text);
|
412: | $text = str_replace('&hellip', '...', $text);
|
413: | $text = str_replace('&rsquo', ' ', $text);
|
414: | $text = str_replace('!', ' ', $text);
|
415: | $text = str_replace('?', ' ', $text);
|
416: | $text = str_replace('"', ' ', $text);
|
417: | $text = str_replace('-', ' ', $text);
|
418: | $text = str_replace('\n', ' ', $text);
|
419: | $text = str_replace('―', ' ', $text);
|
420: |
|
421: | if ($keyword) {
|
422: | $text = str_replace('.', ' ', $text);
|
423: | $text = str_replace(',', ' ', $text);
|
424: | $text = str_replace('\'', ' ', $text);
|
425: | }
|
426: | $text = str_replace(';', ' ', $text);
|
427: |
|
428: | return $text;
|
429: | }
|
430: |
|
431: | |
432: | |
433: | |
434: | |
435: | |
436: | |
437: | |
438: | |
439: |
|
440: | protected static function html2text($document)
|
441: | {
|
442: | $search = array(
|
443: | "'<script[^>]*?>.*?</script>'si",
|
444: | "'<img.*?/>'si",
|
445: | "'<[\/\!]*?[^<>]*?>'si",
|
446: | "'([\r\n])[\s]+'",
|
447: | "'&(quot|#34);'i",
|
448: | "'&(amp|#38);'i",
|
449: | "'&(lt|#60);'i",
|
450: | "'&(gt|#62);'i",
|
451: | "'&(nbsp|#160);'i",
|
452: | "'&(iexcl|#161);'i",
|
453: | "'&(cent|#162);'i",
|
454: | "'&(pound|#163);'i",
|
455: | "'&(copy|#169);'i"
|
456: | );
|
457: |
|
458: | $replace = array(
|
459: | "",
|
460: | "",
|
461: | "",
|
462: | "\\1",
|
463: | "\"",
|
464: | "&",
|
465: | "<",
|
466: | ">",
|
467: | " ",
|
468: | chr(161),
|
469: | chr(162),
|
470: | chr(163),
|
471: | chr(169)
|
472: | );
|
473: |
|
474: | $text = preg_replace($search, $replace, $document);
|
475: |
|
476: | preg_replace_callback(
|
477: | '/&#(\d+);/',
|
478: | function ($matches) {
|
479: | return chr($matches[1]);
|
480: | },
|
481: | $document
|
482: | );
|
483: |
|
484: | return $text;
|
485: | }
|
486: |
|
487: | |
488: | |
489: | |
490: | |
491: | |
492: | |
493: | |
494: | |
495: |
|
496: | public static function checkStopWords($key)
|
497: | {
|
498: | return static::stopWordsObject()->check($key);
|
499: | }
|
500: |
|
501: | |
502: | |
503: | |
504: | |
505: |
|
506: | protected static function stopWordsObject()
|
507: | {
|
508: | static $object;
|
509: | if (null === $object) {
|
510: | $object = new StopWords();
|
511: | }
|
512: | return $object;
|
513: | }
|
514: | }
|
515: | |