1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Xmf;
13:
14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26:
27: class Metagen
28: {
29:
30: 31: 32: 33:
34: const ELLIPSIS = "…";
35:
36: 37: 38: 39: 40: 41: 42:
43: public static function assignTitle($title)
44: {
45: global $xoopsTpl, $xoTheme;
46:
47: $title = trim($title);
48: $title = self::asPlainText($title);
49: if (!empty($title)) {
50: if (is_object($xoTheme)) {
51: $xoTheme->addMeta('meta', 'title', $title);
52: }
53: $xoopsTpl->assign('xoops_pagetitle', $title);
54: }
55: }
56:
57: 58: 59: 60: 61: 62: 63:
64: public static function assignKeywords($keywords)
65: {
66: global $xoopsTpl, $xoTheme;
67:
68: if (!empty($keywords) && is_array($keywords)) {
69: $keyword_tag = implode(', ', $keywords);
70:
71: if (!empty($keyword_tag)) {
72: if (is_object($xoTheme)) {
73: $xoTheme->addMeta('meta', 'keywords', $keyword_tag);
74: } else {
75: $xoopsTpl->assign('xoops_meta_keywords', $keyword_tag);
76: }
77: }
78: }
79: }
80:
81: 82: 83: 84: 85: 86: 87:
88: public static function assignDescription($description)
89: {
90: global $xoopsTpl, $xoTheme;
91:
92: $description = trim($description);
93: if (!empty($description)) {
94: if (is_object($xoTheme)) {
95: $xoTheme->addMeta('meta', 'description', $description);
96: } else {
97: $xoopsTpl->assign('xoops_meta_description', $description);
98: }
99: }
100: }
101:
102: 103: 104: 105: 106: 107: 108: 109: 110: 111:
112: public static function generateKeywords(
113: $body,
114: $count = 20,
115: $minLength = 4,
116: $forceKeys = null
117: ) {
118: $keyCount = array();
119: if (!is_array($forceKeys)) {
120: $forceKeys = array();
121: }
122:
123: $text = self::asPlainText($body);
124: $text = mb_strtolower($text);
125:
126: $originalKeywords = preg_split(
127: '/[^a-zA-Z\'"-]+/',
128: $text,
129: -1,
130: PREG_SPLIT_NO_EMPTY
131: );
132:
133: foreach ($originalKeywords as $originalKeyword) {
134: if (self::checkStopWords($originalKeyword)) {
135: $secondRoundKeywords = explode("'", $originalKeyword);
136: foreach ($secondRoundKeywords as $secondRoundKeyword) {
137: if (self::checkStopWords($secondRoundKeyword)
138: && strlen($secondRoundKeyword) >= $minLength
139: ) {
140: $keyCount[$secondRoundKeyword] =
141: empty($keyCount[$secondRoundKeyword]) ? 1 : $keyCount[$secondRoundKeyword] + 1;
142: }
143: }
144: }
145: }
146:
147: while (!empty($forceKeys)) {
148: $tempKey = strtolower(array_pop($forceKeys));
149: $keyCount[$tempKey] = 999999;
150: }
151:
152: arsort($keyCount, SORT_NUMERIC);
153: $key = array_keys($keyCount);
154: $keywords = array_slice($key, 0, $count);
155:
156: return $keywords;
157: }
158:
159: 160: 161: 162: 163: 164: 165: 166:
167: protected static function checkStopWords($key)
168: {
169: static $stopwords = null;
170:
171: if (!$stopwords) {
172: if (!defined('_XMF_STOPWORDS')) {
173: \Xmf\Language::load('stopwords', 'xmf');
174: }
175: if (defined('_XMF_STOPWORDS')) {
176: $sw = explode(' ', _XMF_STOPWORDS);
177: $stopwords = array_fill_keys($sw, true);
178: } else {
179: $stopwords = array('_'=> true);
180: }
181: }
182: if ($stopwords) {
183: return !isset($stopwords[mb_strtolower($key)]);
184: }
185: return true;
186: }
187:
188: 189: 190: 191: 192: 193: 194: 195:
196: public static function generateDescription($body, $wordCount = 100)
197: {
198: $text = self::asPlainText($body);
199:
200: $words = explode(" ", $text);
201:
202:
203: $newWords = array();
204: $i = 0;
205: while ($i < $wordCount - 1 && $i < count($words)) {
206: $newWords[] = $words[$i];
207: ++$i;
208: }
209: $ret = implode(' ', $newWords);
210: $len = mb_strlen($ret);
211: $lastPeriod = mb_strrpos($ret, '.');
212: $ret .= ($lastPeriod === false) ? self::ELLIPSIS : '';
213: if ($len>100 && ($len-$lastPeriod)<30) {
214: $ret = mb_substr($ret, 0, $lastPeriod+1);
215: }
216:
217: return $ret;
218: }
219:
220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231:
232: public static function generateMetaTags(
233: $title,
234: $body,
235: $count = 20,
236: $minLength = 4,
237: $wordCount = 100,
238: $forceKeys = null
239: ) {
240: $title_keywords = self::generateKeywords($title, $count, 3, $forceKeys);
241: $keywords = self::generateKeywords($body, $count, $minLength, $title_keywords);
242: $description = self::generateDescription($body, $wordCount);
243: self::assignTitle($title);
244: self::assignKeywords($keywords);
245: self::assignDescription($description);
246: }
247:
248: 249: 250: 251: 252: 253: 254: 255: 256:
257: protected static function nonEmptyString($var)
258: {
259: return (strlen($var) > 0);
260: }
261:
262: 263: 264: 265: 266: 267: 268: 269: 270: 271:
272: public static function generateSeoTitle($title = '', $extension = '')
273: {
274: $title = preg_replace("/[^a-zA-Z0-9]/", "-", $title);
275: $title = \Normalizer::normalize($title, \Normalizer::FORM_C);
276:
277: $tableau = explode("-", $title);
278: $tableau = array_filter($tableau, 'self::nonEmptyString');
279: $tableau = array_filter($tableau, 'self::checkStopWords');
280: $title = implode("-", $tableau);
281:
282: $title = (empty($title)) ? '' : $title . $extension;
283: return $title;
284: }
285:
286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300:
301: public static function getSearchSummary($haystack, $needles = null, $length = 120)
302: {
303: $encoding = 'UTF-8';
304:
305: $haystack = self::asPlainText($haystack);
306: $pos = self::getNeedlePositions($haystack, $needles);
307:
308: $start = empty($pos) ? 0 : min($pos);
309:
310: $start = max($start - (int)($length/2), 0);
311:
312: $pre = ($start > 0);
313: if ($pre) {
314:
315: $temp = mb_strpos($haystack, ' ', $start, $encoding);
316: $start = ($temp === false) ? $start : $temp;
317: $haystack = mb_substr($haystack, $start, null, $encoding);
318: }
319:
320: $post = !(mb_strlen($haystack, $encoding) < $length);
321: if ($post) {
322: $haystack = mb_substr($haystack, 0, $length, $encoding);
323: $end = mb_strrpos($haystack, ' ', 0, $encoding);
324: if ($end) {
325: $haystack = mb_substr($haystack, 0, $end, $encoding);
326: }
327: }
328:
329: $haystack = ($pre ? self::ELLIPSIS : '') . trim($haystack) . ($post ? self::ELLIPSIS : '');
330: return $haystack;
331: }
332:
333: 334: 335: 336: 337: 338: 339: 340:
341: protected static function asPlainText($rawText)
342: {
343: $text = $rawText;
344: $utilities = new Utilities();
345: $text = $utilities->html2text($text);
346: $text = $utilities->purifyText($text);
347:
348: $text = str_replace(array("\n", "\r"), ' ', $text);
349: $text = preg_replace('/[ ]* [ ]*/', ' ', $text);
350:
351: return trim($text);
352: }
353:
354: 355: 356: 357: 358: 359: 360: 361: 362: 363:
364: private static function getNeedlePositions($haystack, $needles)
365: {
366: $pos=array();
367: $needles = empty($needles) ? array() : (array) $needles;
368: foreach ($needles as $needle) {
369: $i = mb_stripos($haystack, $needle, 0, 'UTF-8');
370: if ($i!==false) {
371: $pos[] = $i;
372: }
373: }
374: return $pos;
375: }
376: }
377: