| 1: | <?php
|
| 2: |
|
| 3: | |
| 4: | |
| 5: | |
| 6: | |
| 7: | |
| 8: | |
| 9: | |
| 10: | |
| 11: | |
| 12: | |
| 13: | |
| 14: |
|
| 15: | class SaxParser
|
| 16: | {
|
| 17: | public $level;
|
| 18: | public $parser;
|
| 19: |
|
| 20: | public $isCaseFolding;
|
| 21: | public $targetEncoding;
|
| 22: |
|
| 23: |
|
| 24: | public $tagHandlers = array();
|
| 25: |
|
| 26: |
|
| 27: | public $tags = array();
|
| 28: |
|
| 29: |
|
| 30: | public $xmlInput;
|
| 31: |
|
| 32: | public $errors = array();
|
| 33: |
|
| 34: | |
| 35: | |
| 36: | |
| 37: | |
| 38: | |
| 39: | |
| 40: | |
| 41: |
|
| 42: | public function __construct($input)
|
| 43: | {
|
| 44: | $this->level = 0;
|
| 45: | $this->parser = xml_parser_create('UTF-8');
|
| 46: | xml_set_object($this->parser, $this);
|
| 47: | $this->input = $input;
|
| 48: | $this->setCaseFolding(false);
|
| 49: | $this->useUtfEncoding();
|
| 50: | xml_set_element_handler($this->parser, 'handleBeginElement', 'handleEndElement');
|
| 51: | xml_set_character_data_handler($this->parser, 'handleCharacterData');
|
| 52: | xml_set_processing_instruction_handler($this->parser, 'handleProcessingInstruction');
|
| 53: | xml_set_default_handler($this->parser, 'handleDefault');
|
| 54: | xml_set_unparsed_entity_decl_handler($this->parser, 'handleUnparsedEntityDecl');
|
| 55: | xml_set_notation_decl_handler($this->parser, 'handleNotationDecl');
|
| 56: | xml_set_external_entity_ref_handler($this->parser, 'handleExternalEntityRef');
|
| 57: | }
|
| 58: |
|
| 59: | |
| 60: | |
| 61: |
|
| 62: |
|
| 63: | |
| 64: | |
| 65: |
|
| 66: | public function getCurrentLevel()
|
| 67: | {
|
| 68: | return $this->level;
|
| 69: | }
|
| 70: |
|
| 71: | |
| 72: | |
| 73: | |
| 74: |
|
| 75: | public function setCaseFolding($isCaseFolding)
|
| 76: | {
|
| 77: | assert(is_bool($isCaseFolding));
|
| 78: |
|
| 79: | $this->isCaseFolding = $isCaseFolding;
|
| 80: | xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, $this->isCaseFolding);
|
| 81: | }
|
| 82: |
|
| 83: | |
| 84: | |
| 85: |
|
| 86: | public function useIsoEncoding()
|
| 87: | {
|
| 88: | $this->targetEncoding = 'ISO-8859-1';
|
| 89: | xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, $this->targetEncoding);
|
| 90: | }
|
| 91: |
|
| 92: | |
| 93: | |
| 94: |
|
| 95: | public function useAsciiEncoding()
|
| 96: | {
|
| 97: | $this->targetEncoding = 'US-ASCII';
|
| 98: | xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, $this->targetEncoding);
|
| 99: | }
|
| 100: |
|
| 101: | |
| 102: | |
| 103: |
|
| 104: | public function useUtfEncoding()
|
| 105: | {
|
| 106: | $this->targetEncoding = 'UTF-8';
|
| 107: | xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, $this->targetEncoding);
|
| 108: | }
|
| 109: |
|
| 110: | |
| 111: | |
| 112: | |
| 113: |
|
| 114: | public function getCurrentTag()
|
| 115: | {
|
| 116: | return $this->tags[count($this->tags) - 1];
|
| 117: | }
|
| 118: |
|
| 119: | |
| 120: | |
| 121: |
|
| 122: | public function getParentTag()
|
| 123: | {
|
| 124: | if (isset($this->tags[count($this->tags) - 2])) {
|
| 125: | return $this->tags[count($this->tags) - 2];
|
| 126: | }
|
| 127: |
|
| 128: | return false;
|
| 129: | }
|
| 130: |
|
| 131: | |
| 132: | |
| 133: |
|
| 134: |
|
| 135: | |
| 136: |
|
| 137: | public function parse()
|
| 138: | {
|
| 139: | if (!is_resource($this->input)) {
|
| 140: | if (!xml_parse($this->parser, $this->input)) {
|
| 141: | $this->setErrors($this->getXmlError());
|
| 142: |
|
| 143: | return false;
|
| 144: | }
|
| 145: |
|
| 146: |
|
| 147: |
|
| 148: |
|
| 149: | } else {
|
| 150: | while ($data = fread($this->input, 4096)) {
|
| 151: | if (!xml_parse($this->parser, str_replace("'", ''', $data), feof($this->input))) {
|
| 152: | $this->setErrors($this->getXmlError());
|
| 153: | fclose($this->input);
|
| 154: |
|
| 155: | return false;
|
| 156: | }
|
| 157: | }
|
| 158: | fclose($this->input);
|
| 159: | }
|
| 160: |
|
| 161: | return true;
|
| 162: | }
|
| 163: |
|
| 164: | |
| 165: | |
| 166: |
|
| 167: | public function free()
|
| 168: | {
|
| 169: | xml_parser_free($this->parser);
|
| 170: | }
|
| 171: |
|
| 172: | |
| 173: | |
| 174: | |
| 175: |
|
| 176: | public function getXmlError()
|
| 177: | {
|
| 178: | return sprintf('XmlParse error: %s at line %d', xml_error_string(xml_get_error_code($this->parser)), xml_get_current_line_number($this->parser));
|
| 179: | }
|
| 180: |
|
| 181: | |
| 182: | |
| 183: |
|
| 184: |
|
| 185: | |
| 186: | |
| 187: | |
| 188: | |
| 189: | |
| 190: | |
| 191: | |
| 192: | |
| 193: | |
| 194: | |
| 195: |
|
| 196: | |
| 197: | |
| 198: |
|
| 199: | public function addTagHandler($tagHandler)
|
| 200: | {
|
| 201: | $name = $tagHandler->getName();
|
| 202: | if (is_array($name)) {
|
| 203: | foreach ($name as $n) {
|
| 204: | $this->tagHandlers[$n] = $tagHandler;
|
| 205: | }
|
| 206: | } else {
|
| 207: | $this->tagHandlers[$name] = $tagHandler;
|
| 208: | }
|
| 209: | }
|
| 210: |
|
| 211: | |
| 212: | |
| 213: |
|
| 214: |
|
| 215: | |
| 216: | |
| 217: | |
| 218: | |
| 219: | |
| 220: | |
| 221: | |
| 222: | |
| 223: |
|
| 224: | public function handleBeginElement($parser, $tagName, $attributesArray)
|
| 225: | {
|
| 226: | $this->tags[] = $tagName;
|
| 227: | $this->level++;
|
| 228: | if (isset($this->tagHandlers[$tagName]) && is_subclass_of($this->tagHandlers[$tagName], 'XmlTagHandler')) {
|
| 229: | $this->tagHandlers[$tagName]->handleBeginElement($this, $attributesArray);
|
| 230: | } else {
|
| 231: | $this->handleBeginElementDefault($parser, $tagName, $attributesArray);
|
| 232: | }
|
| 233: | }
|
| 234: |
|
| 235: | |
| 236: | |
| 237: | |
| 238: | |
| 239: | |
| 240: | |
| 241: | |
| 242: |
|
| 243: | public function handleEndElement($parser, $tagName)
|
| 244: | {
|
| 245: | array_pop($this->tags);
|
| 246: | if (isset($this->tagHandlers[$tagName]) && is_subclass_of($this->tagHandlers[$tagName], 'XmlTagHandler')) {
|
| 247: | $this->tagHandlers[$tagName]->handleEndElement($this);
|
| 248: | } else {
|
| 249: | $this->handleEndElementDefault($parser, $tagName);
|
| 250: | }
|
| 251: | $this->level--;
|
| 252: | }
|
| 253: |
|
| 254: | |
| 255: | |
| 256: | |
| 257: | |
| 258: | |
| 259: | |
| 260: |
|
| 261: | public function handleCharacterData($parser, $data)
|
| 262: | {
|
| 263: | $tagHandler =& $this->tagHandlers[$this->getCurrentTag()];
|
| 264: | if (isset($tagHandler) && is_subclass_of($tagHandler, 'XmlTagHandler')) {
|
| 265: | $tagHandler->handleCharacterData($this, $data);
|
| 266: | } else {
|
| 267: | $this->handleCharacterDataDefault($parser, $data);
|
| 268: | }
|
| 269: | }
|
| 270: |
|
| 271: | |
| 272: | |
| 273: | |
| 274: | |
| 275: | |
| 276: |
|
| 277: | public function handleProcessingInstruction($parser, &$target, &$data)
|
| 278: | {
|
| 279: |
|
| 280: |
|
| 281: |
|
| 282: | }
|
| 283: |
|
| 284: | |
| 285: | |
| 286: | |
| 287: | |
| 288: |
|
| 289: | public function handleDefault($parser, $data)
|
| 290: | {
|
| 291: | }
|
| 292: |
|
| 293: | |
| 294: | |
| 295: | |
| 296: | |
| 297: | |
| 298: | |
| 299: | |
| 300: | |
| 301: | |
| 302: |
|
| 303: | public function handleUnparsedEntityDecl($parser, $entityName, $base, $systemId, $publicId, $notationName)
|
| 304: | {
|
| 305: | }
|
| 306: |
|
| 307: | |
| 308: | |
| 309: | |
| 310: | |
| 311: | |
| 312: | |
| 313: | |
| 314: |
|
| 315: | public function handleNotationDecl($parser, $notationName, $base, $systemId, $publicId)
|
| 316: | {
|
| 317: | }
|
| 318: |
|
| 319: | |
| 320: | |
| 321: | |
| 322: | |
| 323: | |
| 324: | |
| 325: | |
| 326: |
|
| 327: | public function handleExternalEntityRef($parser, $openEntityNames, $base, $systemId, $publicId)
|
| 328: | {
|
| 329: | }
|
| 330: |
|
| 331: | |
| 332: | |
| 333: | |
| 334: | |
| 335: | |
| 336: | |
| 337: | |
| 338: |
|
| 339: | public function handleBeginElementDefault($parser, $tagName, $attributesArray)
|
| 340: | {
|
| 341: | }
|
| 342: |
|
| 343: | |
| 344: | |
| 345: | |
| 346: | |
| 347: | |
| 348: | |
| 349: |
|
| 350: | public function handleEndElementDefault($parser, $tagName)
|
| 351: | {
|
| 352: | }
|
| 353: |
|
| 354: | |
| 355: | |
| 356: | |
| 357: | |
| 358: | |
| 359: | |
| 360: |
|
| 361: | public function handleCharacterDataDefault($parser, $data)
|
| 362: | {
|
| 363: | }
|
| 364: |
|
| 365: | |
| 366: | |
| 367: | |
| 368: | |
| 369: |
|
| 370: | public function setErrors($error)
|
| 371: | {
|
| 372: | $this->errors[] = trim($error);
|
| 373: | }
|
| 374: |
|
| 375: | |
| 376: | |
| 377: | |
| 378: | |
| 379: | |
| 380: |
|
| 381: | public function &getErrors($ashtml = true)
|
| 382: | {
|
| 383: | if (!$ashtml) {
|
| 384: | return $this->errors;
|
| 385: | } else {
|
| 386: | $ret = '';
|
| 387: | if (count($this->errors) > 0) {
|
| 388: | foreach ($this->errors as $error) {
|
| 389: | $ret .= $error . '<br>';
|
| 390: | }
|
| 391: | }
|
| 392: |
|
| 393: | return $ret;
|
| 394: | }
|
| 395: | }
|
| 396: | }
|
| 397: | |