1: <?php
2: // $Id$
3: /*******************************************************************************
4: Location: <strong>xml/SaxParser.class</strong><br>
5: <br>
6: Provides basic functionality to read and parse XML documents. Subclasses
7: must implement all the their custom handlers by using add* function methods.
8: They may also use the handle*() methods to parse a specific XML begin and end
9: tags, but this is not recommended as it is more difficult.<br>
10: <br>
11: Copyright © 2001 eXtremePHP. All rights reserved.<br>
12: <br>
13: @author Ken Egervari<br>
14: *******************************************************************************/
15:
16: class SaxParser
17: {
18: var $level;
19: var $parser;
20:
21: var $isCaseFolding;
22: var $targetEncoding;
23:
24: /* Custom Handler Variables */
25: var $tagHandlers = array();
26:
27: /* Tag stack */
28: var $tags = array();
29:
30: /* Xml Source Input */
31: var $xmlInput;
32:
33: var $errors = array();
34:
35: /**
36: * Creates a SaxParser object using a FileInput to represent the stream
37: * of XML data to parse. Use the static methods createFileInput or
38: * createStringInput to construct xml input source objects to supply
39: * to the constructor, or the implementor can construct them individually.
40: *
41: * @param $input
42: */
43: function __construct(&$input)
44: {
45: $this->level = 0;
46: $this->parser = xml_parser_create('UTF-8');
47: xml_set_object($this->parser, $this);
48: $this->input = $input;
49: $this->setCaseFolding(false);
50: $this->useUtfEncoding();
51: xml_set_element_handler($this->parser, 'handleBeginElement', 'handleEndElement');
52: xml_set_character_data_handler($this->parser, 'handleCharacterData');
53: xml_set_processing_instruction_handler($this->parser, 'handleProcessingInstruction');
54: xml_set_default_handler($this->parser, 'handleDefault');
55: xml_set_unparsed_entity_decl_handler($this->parser, 'handleUnparsedEntityDecl');
56: xml_set_notation_decl_handler($this->parser, 'handleNotationDecl');
57: xml_set_external_entity_ref_handler($this->parser, 'handleExternalEntityRef');
58: }
59:
60: /*---------------------------------------------------------------------------
61: Property Methods
62: ---------------------------------------------------------------------------*/
63:
64: function getCurrentLevel()
65: {
66: return $this->level;
67: }
68:
69: /****************************************************************************
70: * @param $isCaseFolding
71: * @returns void
72: ****************************************************************************/
73: /**
74: * @param boolean $isCaseFolding
75: */
76: function setCaseFolding($isCaseFolding)
77: {
78: assert(is_bool($isCaseFolding));
79:
80: $this->isCaseFolding = $isCaseFolding;
81: xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, $this->isCaseFolding);
82: }
83:
84: /****************************************************************************
85: * @returns void
86: ****************************************************************************/
87: function useIsoEncoding()
88: {
89: $this->targetEncoding = 'ISO-8859-1';
90: xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, $this->targetEncoding);
91: }
92:
93: /****************************************************************************
94: * @returns void
95: ****************************************************************************/
96: function useAsciiEncoding()
97: {
98: $this->targetEncoding = 'US-ASCII';
99: xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, $this->targetEncoding);
100: }
101:
102: /****************************************************************************
103: * @returns void
104: ****************************************************************************/
105: function useUtfEncoding()
106: {
107: $this->targetEncoding = 'UTF-8';
108: xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, $this->targetEncoding);
109: }
110:
111: /****************************************************************************
112: Returns the name of the xml tag being parsed
113: * @returns string
114: ****************************************************************************/
115: function getCurrentTag()
116: {
117: return $this->tags[count($this->tags) - 1];
118: }
119:
120: function getParentTag()
121: {
122: if (isset($this->tags[count($this->tags) - 2])) {
123: return $this->tags[count($this->tags) - 2];
124: }
125: return false;
126: }
127:
128:
129: /*---------------------------------------------------------------------------
130: Parser methods
131: ---------------------------------------------------------------------------*/
132:
133: /**
134: * @return bool
135: */
136: function parse()
137: {
138: if (!is_resource($this->input)) {
139: if (!xml_parse($this->parser, $this->input)) {
140: $this->setErrors($this->getXmlError());
141: return false;
142: }
143: //if (!$fp = fopen($this->input, 'r')) {
144: // $this->setErrors('Could not open file: '.$this->input);
145: // return false;
146: //}
147: } else {
148: while ($data = fread($this->input, 4096)) {
149: if (!xml_parse($this->parser, str_replace("'", "'", $data), feof($this->input))) {
150: $this->setErrors($this->getXmlError());
151: fclose($this->input);
152: return false;
153: }
154: }
155: fclose($this->input);
156: }
157: return true;
158: }
159:
160: /****************************************************************************
161: * @returns void
162: ****************************************************************************/
163: function free()
164: {
165: xml_parser_free($this->parser);
166:
167: if (!method_exists($this, '__destruct')) {
168: unset($this);
169: } else {
170: $this->__destruct();
171: }
172: }
173:
174: /**
175: * @private
176: * @return string
177: */
178: function getXmlError()
179: {
180: return sprintf("XmlParse error: %s at line %d", xml_error_string(xml_get_error_code($this->parser)), xml_get_current_line_number($this->parser));
181: }
182:
183: /*---------------------------------------------------------------------------
184: Custom Handler Methods
185: ---------------------------------------------------------------------------*/
186:
187: /**
188: * Adds a callback function to be called when a tag is encountered.<br>
189: * @param XmlTagHandler $tagHandler
190: * @return void
191: */
192: function addTagHandler(XmlTagHandler $tagHandler)
193: {
194: $name = $tagHandler->getName();
195: if (is_array($name)) {
196: foreach ($name as $n) {
197: $this->tagHandlers[$n] = $tagHandler;
198: }
199: } else {
200: $this->tagHandlers[$name] = $tagHandler;
201: }
202: }
203:
204:
205: /*---------------------------------------------------------------------------
206: Private Handler Methods
207: ---------------------------------------------------------------------------*/
208:
209: /****************************************************************************
210: Callback function that executes whenever a the start of a tag
211: occurs when being parsed.
212: * @param $parser int. The handle to the parser.
213: * @param $tagName string. The name of the tag currently being parsed.
214: * @param $attributesArray attay. The list of attributes associated with
215: the tag.
216: * @private
217: * @returns void
218: ****************************************************************************/
219: function handleBeginElement($parser, $tagName, $attributesArray)
220: {
221: array_push($this->tags, $tagName);
222: $this->level++;
223: if (isset($this->tagHandlers[$tagName]) && is_subclass_of($this->tagHandlers[$tagName], 'xmltaghandler')) {
224: $this->tagHandlers[$tagName]->handleBeginElement($this, $attributesArray);
225: } else {
226: $this->handleBeginElementDefault($parser, $tagName, $attributesArray);
227: }
228: }
229:
230: /****************************************************************************
231: Callback function that executes whenever the end of a tag
232: occurs when being parsed.
233: * @param $parser int. The handle to the parser.
234: * @param $tagName string. The name of the tag currently being parsed.
235: * @private
236: * @returns void
237: ****************************************************************************/
238: function handleEndElement($parser, $tagName)
239: {
240: array_pop($this->tags);
241: if (isset($this->tagHandlers[$tagName]) && is_subclass_of($this->tagHandlers[$tagName], 'xmltaghandler')) {
242: $this->tagHandlers[$tagName]->handleEndElement($this);
243: } else {
244: $this->handleEndElementDefault($parser, $tagName);
245: }
246: $this->level--;
247: }
248:
249: /****************************************************************************
250: Callback function that executes whenever character data is encountered
251: while being parsed.
252: * @param $parser int. The handle to the parser.
253: * @param $data string. Character data inside the tag
254: * @returns void
255: ****************************************************************************/
256: function handleCharacterData($parser, $data)
257: {
258: $tagHandler = isset($this->tagHandlers[$this->getCurrentTag()]) ? $this->tagHandlers[$this->getCurrentTag()] : null;
259: if (null != $tagHandler && is_subclass_of($tagHandler, 'xmltaghandler')) {
260: $tagHandler->handleCharacterData($this, $data);
261: } else {
262: $this->handleCharacterDataDefault($parser, $data);
263: }
264: }
265:
266: /**
267: * @param $parser int. The handle to the parser.
268: * @param $target
269: * @param $data
270: * @return void
271: */
272: function handleProcessingInstruction($parser, &$target, &$data)
273: {
274: // if($target == 'php') {
275: // eval($data);
276: // }
277: }
278:
279: /**
280: * @param $parser
281: * @param $data
282: * @return void
283: */
284: function handleDefault($parser, $data)
285: {
286:
287: }
288:
289: /**
290: * @param $parser
291: * @param $entityName
292: * @param $base
293: * @param $systemId
294: * @param $publicId
295: * @param $notationName
296: * @return void
297: */
298: function handleUnparsedEntityDecl($parser, $entityName, $base, $systemId, $publicId, $notationName)
299: {
300:
301: }
302:
303: /**
304: * @param $parser
305: * @param $notationName
306: * @param $base
307: * @param $systemId
308: * @param $publicId
309: * @return void
310: */
311: function handleNotationDecl($parser, $notationName, $base, $systemId, $publicId)
312: {
313:
314: }
315:
316: /**
317: * @param $parser
318: * @param $openEntityNames
319: * @param $base
320: * @param $systemId
321: * @param $publicId
322: * @return void
323: */
324: function handleExternalEntityRef($parser, $openEntityNames, $base, $systemId, $publicId)
325: {
326:
327: }
328:
329: /**
330: * The default tag handler method for a tag with no handler
331: *
332: * @param $parser
333: * @param $tagName
334: * @param $attributesArray
335: * @return void
336: */
337: function handleBeginElementDefault($parser, $tagName, $attributesArray)
338: {
339: }
340:
341: /**
342: * The default tag handler method for a tag with no handler
343: *
344: * @param $parser
345: * @param $tagName
346: * @return void
347: */
348: function handleEndElementDefault($parser, $tagName)
349: {
350: }
351:
352: /**
353: * The default tag handler method for a tag with no handler
354: *
355: * @abstract
356: *
357: * @param $parser
358: * @param $data
359: * @return void
360: */
361: function handleCharacterDataDefault($parser, $data)
362: {
363: }
364:
365: /**
366: * Sets error messages
367: *
368: * @param string $error string an error message
369: */
370: function setErrors($error)
371: {
372: $this->errors[] = trim($error);
373: }
374:
375: /**
376: * Gets all the error messages
377: *
378: * @param $ashtml bool return as html?
379: * @return mixed
380: */
381: function getErrors($ashtml = true)
382: {
383: if (!$ashtml) {
384: return $this->errors;
385: } else {
386: $ret = '';
387: if (count($this->errors) > 0) {
388: foreach ($this->errors as $error) {
389: $ret .= $error . '<br />';
390: }
391: }
392: return $ret;
393: }
394: }
395: }
396: