blob: f982712182031ba43578a474568f3e10e11336aa [file] [log] [blame]
Meki Cherkaoui97e7a592012-04-14 02:50:06 -07001// wrapper for non-node envs
2;(function (sax) {
3
4sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
5sax.SAXParser = SAXParser
6sax.SAXStream = SAXStream
7sax.createStream = createStream
8
9// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
10// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
11// since that's the earliest that a buffer overrun could occur. This way, checks are
12// as rare as required, but as often as necessary to ensure never crossing this bound.
13// Furthermore, buffers are only tested at most once per write(), so passing a very
14// large string into write() might have undesirable effects, but this is manageable by
15// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
16// edge case, result in creating at most one complete copy of the string passed in.
17// Set to Infinity to have unlimited buffers.
18sax.MAX_BUFFER_LENGTH = 64 * 1024
19
20var buffers = [
21 "comment", "sgmlDecl", "textNode", "tagName", "doctype",
22 "procInstName", "procInstBody", "entity", "attribName",
23 "attribValue", "cdata", "script"
24]
25
26sax.EVENTS = // for discoverability.
27 [ "text"
28 , "processinginstruction"
29 , "sgmldeclaration"
30 , "doctype"
31 , "comment"
32 , "attribute"
33 , "opentag"
34 , "closetag"
35 , "opencdata"
36 , "cdata"
37 , "closecdata"
38 , "error"
39 , "end"
40 , "ready"
41 , "script"
42 , "opennamespace"
43 , "closenamespace"
44 ]
45
46function SAXParser (strict, opt) {
47 if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
48
49 var parser = this
50 clearBuffers(parser)
51 parser.q = parser.c = ""
52 parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
53 parser.opt = opt || {}
54 parser.tagCase = parser.opt.lowercasetags ? "toLowerCase" : "toUpperCase"
55 parser.tags = []
56 parser.closed = parser.closedRoot = parser.sawRoot = false
57 parser.tag = parser.error = null
58 parser.strict = !!strict
59 parser.noscript = !!(strict || parser.opt.noscript)
60 parser.state = S.BEGIN
61 parser.ENTITIES = Object.create(sax.ENTITIES)
62 parser.attribList = []
63
64 // namespaces form a prototype chain.
65 // it always points at the current tag,
66 // which protos to its parent tag.
67 if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
68
69 // mostly just for error reporting
70 parser.position = parser.line = parser.column = 0
71 emit(parser, "onready")
72}
73
74if (!Object.create) Object.create = function (o) {
75 function f () { this.__proto__ = o }
76 f.prototype = o
77 return new f
78}
79
80if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
81 return o.__proto__
82}
83
84if (!Object.keys) Object.keys = function (o) {
85 var a = []
86 for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
87 return a
88}
89
90function checkBufferLength (parser) {
91 var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
92 , maxActual = 0
93 for (var i = 0, l = buffers.length; i < l; i ++) {
94 var len = parser[buffers[i]].length
95 if (len > maxAllowed) {
96 // Text/cdata nodes can get big, and since they're buffered,
97 // we can get here under normal conditions.
98 // Avoid issues by emitting the text node now,
99 // so at least it won't get any bigger.
100 switch (buffers[i]) {
101 case "textNode":
102 closeText(parser)
103 break
104
105 case "cdata":
106 emitNode(parser, "oncdata", parser.cdata)
107 parser.cdata = ""
108 break
109
110 case "script":
111 emitNode(parser, "onscript", parser.script)
112 parser.script = ""
113 break
114
115 default:
116 error(parser, "Max buffer length exceeded: "+buffers[i])
117 }
118 }
119 maxActual = Math.max(maxActual, len)
120 }
121 // schedule the next check for the earliest possible buffer overrun.
122 parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
123 + parser.position
124}
125
126function clearBuffers (parser) {
127 for (var i = 0, l = buffers.length; i < l; i ++) {
128 parser[buffers[i]] = ""
129 }
130}
131
132SAXParser.prototype =
133 { end: function () { end(this) }
134 , write: write
135 , resume: function () { this.error = null; return this }
136 , close: function () { return this.write(null) }
137 }
138
139try {
140 var Stream = require("stream").Stream
141} catch (ex) {
142 var Stream = function () {}
143}
144
145
146var streamWraps = sax.EVENTS.filter(function (ev) {
147 return ev !== "error" && ev !== "end"
148})
149
150function createStream (strict, opt) {
151 return new SAXStream(strict, opt)
152}
153
154function SAXStream (strict, opt) {
155 if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
156
157 Stream.apply(me)
158
159 this._parser = new SAXParser(strict, opt)
160 this.writable = true
161 this.readable = true
162
163
164 var me = this
165
166 this._parser.onend = function () {
167 me.emit("end")
168 }
169
170 this._parser.onerror = function (er) {
171 me.emit("error", er)
172
173 // if didn't throw, then means error was handled.
174 // go ahead and clear error, so we can write again.
175 me._parser.error = null
176 }
177
178 streamWraps.forEach(function (ev) {
179 Object.defineProperty(me, "on" + ev, {
180 get: function () { return me._parser["on" + ev] },
181 set: function (h) {
182 if (!h) {
183 me.removeAllListeners(ev)
184 return me._parser["on"+ev] = h
185 }
186 me.on(ev, h)
187 },
188 enumerable: true,
189 configurable: false
190 })
191 })
192}
193
194SAXStream.prototype = Object.create(Stream.prototype,
195 { constructor: { value: SAXStream } })
196
197SAXStream.prototype.write = function (data) {
198 this._parser.write(data.toString())
199 this.emit("data", data)
200 return true
201}
202
203SAXStream.prototype.end = function (chunk) {
204 if (chunk && chunk.length) this._parser.write(chunk.toString())
205 this._parser.end()
206 return true
207}
208
209SAXStream.prototype.on = function (ev, handler) {
210 var me = this
211 if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
212 me._parser["on"+ev] = function () {
213 var args = arguments.length === 1 ? [arguments[0]]
214 : Array.apply(null, arguments)
215 args.splice(0, 0, ev)
216 me.emit.apply(me, args)
217 }
218 }
219
220 return Stream.prototype.on.call(me, ev, handler)
221}
222
223
224
225// character classes and tokens
226var whitespace = "\r\n\t "
227 // this really needs to be replaced with character classes.
228 // XML allows all manner of ridiculous numbers and digits.
229 , number = "0124356789"
230 , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
231 // (Letter | "_" | ":")
232 , nameStart = letter+"_:"
233 , nameBody = nameStart+number+"-."
234 , quote = "'\""
235 , entity = number+letter+"#"
236 , attribEnd = whitespace + ">"
237 , CDATA = "[CDATA["
238 , DOCTYPE = "DOCTYPE"
239 , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
240 , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
241 , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
242
243// turn all the string character sets into character class objects.
244whitespace = charClass(whitespace)
245number = charClass(number)
246letter = charClass(letter)
247nameStart = charClass(nameStart)
248nameBody = charClass(nameBody)
249quote = charClass(quote)
250entity = charClass(entity)
251attribEnd = charClass(attribEnd)
252
253function charClass (str) {
254 return str.split("").reduce(function (s, c) {
255 s[c] = true
256 return s
257 }, {})
258}
259
260function is (charclass, c) {
261 return charclass[c]
262}
263
264function not (charclass, c) {
265 return !charclass[c]
266}
267
268var S = 0
269sax.STATE =
270{ BEGIN : S++
271, TEXT : S++ // general stuff
272, TEXT_ENTITY : S++ // &amp and such.
273, OPEN_WAKA : S++ // <
274, SGML_DECL : S++ // <!BLARG
275, SGML_DECL_QUOTED : S++ // <!BLARG foo "bar
276, DOCTYPE : S++ // <!DOCTYPE
277, DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah
278, DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ...
279, DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo
280, COMMENT_STARTING : S++ // <!-
281, COMMENT : S++ // <!--
282, COMMENT_ENDING : S++ // <!-- blah -
283, COMMENT_ENDED : S++ // <!-- blah --
284, CDATA : S++ // <![CDATA[ something
285, CDATA_ENDING : S++ // ]
286, CDATA_ENDING_2 : S++ // ]]
287, PROC_INST : S++ // <?hi
288, PROC_INST_BODY : S++ // <?hi there
289, PROC_INST_QUOTED : S++ // <?hi "there
290, PROC_INST_ENDING : S++ // <?hi "there" ?
291, OPEN_TAG : S++ // <strong
292, OPEN_TAG_SLASH : S++ // <strong /
293, ATTRIB : S++ // <a
294, ATTRIB_NAME : S++ // <a foo
295, ATTRIB_NAME_SAW_WHITE : S++ // <a foo _
296, ATTRIB_VALUE : S++ // <a foo=
297, ATTRIB_VALUE_QUOTED : S++ // <a foo="bar
298, ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar
299, ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar="&quot;"
300, ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=&quot;
301, CLOSE_TAG : S++ // </a
302, CLOSE_TAG_SAW_WHITE : S++ // </a >
303, SCRIPT : S++ // <script> ...
304, SCRIPT_ENDING : S++ // <script> ... <
305}
306
307sax.ENTITIES =
308{ "apos" : "'"
309, "quot" : "\""
310, "amp" : "&"
311, "gt" : ">"
312, "lt" : "<"
313}
314
315for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
316
317// shorthand
318S = sax.STATE
319
320function emit (parser, event, data) {
321 parser[event] && parser[event](data)
322}
323
324function emitNode (parser, nodeType, data) {
325 if (parser.textNode) closeText(parser)
326 emit(parser, nodeType, data)
327}
328
329function closeText (parser) {
330 parser.textNode = textopts(parser.opt, parser.textNode)
331 if (parser.textNode) emit(parser, "ontext", parser.textNode)
332 parser.textNode = ""
333}
334
335function textopts (opt, text) {
336 if (opt.trim) text = text.trim()
337 if (opt.normalize) text = text.replace(/\s+/g, " ")
338 return text
339}
340
341function error (parser, er) {
342 closeText(parser)
343 er += "\nLine: "+parser.line+
344 "\nColumn: "+parser.column+
345 "\nChar: "+parser.c
346 er = new Error(er)
347 parser.error = er
348 emit(parser, "onerror", er)
349 return parser
350}
351
352function end (parser) {
353 if (parser.state !== S.TEXT) error(parser, "Unexpected end")
354 closeText(parser)
355 parser.c = ""
356 parser.closed = true
357 emit(parser, "onend")
358 SAXParser.call(parser, parser.strict, parser.opt)
359 return parser
360}
361
362function strictFail (parser, message) {
363 if (parser.strict) error(parser, message)
364}
365
366function newTag (parser) {
367 if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]()
368 var parent = parser.tags[parser.tags.length - 1] || parser
369 , tag = parser.tag = { name : parser.tagName, attributes : {} }
370
371 // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
372 if (parser.opt.xmlns) tag.ns = parent.ns
373 parser.attribList.length = 0
374}
375
376function qname (name) {
377 var i = name.indexOf(":")
378 , qualName = i < 0 ? [ "", name ] : name.split(":")
379 , prefix = qualName[0]
380 , local = qualName[1]
381
382 // <x "xmlns"="http://foo">
383 if (name === "xmlns") {
384 prefix = "xmlns"
385 local = ""
386 }
387
388 return { prefix: prefix, local: local }
389}
390
391function attrib (parser) {
392 if (parser.opt.xmlns) {
393 var qn = qname(parser.attribName)
394 , prefix = qn.prefix
395 , local = qn.local
396
397 if (prefix === "xmlns") {
398 // namespace binding attribute; push the binding into scope
399 if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
400 strictFail( parser
401 , "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
402 + "Actual: " + parser.attribValue )
403 } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
404 strictFail( parser
405 , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
406 + "Actual: " + parser.attribValue )
407 } else {
408 var tag = parser.tag
409 , parent = parser.tags[parser.tags.length - 1] || parser
410 if (tag.ns === parent.ns) {
411 tag.ns = Object.create(parent.ns)
412 }
413 tag.ns[local] = parser.attribValue
414 }
415 }
416
417 // defer onattribute events until all attributes have been seen
418 // so any new bindings can take effect; preserve attribute order
419 // so deferred events can be emitted in document order
420 parser.attribList.push([parser.attribName, parser.attribValue])
421 } else {
422 // in non-xmlns mode, we can emit the event right away
423 parser.tag.attributes[parser.attribName] = parser.attribValue
424 emitNode( parser
425 , "onattribute"
426 , { name: parser.attribName
427 , value: parser.attribValue } )
428 }
429
430 parser.attribName = parser.attribValue = ""
431}
432
433function openTag (parser, selfClosing) {
434 if (parser.opt.xmlns) {
435 // emit namespace binding events
436 var tag = parser.tag
437
438 // add namespace info to tag
439 var qn = qname(parser.tagName)
440 tag.prefix = qn.prefix
441 tag.local = qn.local
442 tag.uri = tag.ns[qn.prefix] || qn.prefix
443
444 if (tag.prefix && !tag.uri) {
445 strictFail(parser, "Unbound namespace prefix: "
446 + JSON.stringify(parser.tagName))
447 }
448
449 var parent = parser.tags[parser.tags.length - 1] || parser
450 if (tag.ns && parent.ns !== tag.ns) {
451 Object.keys(tag.ns).forEach(function (p) {
452 emitNode( parser
453 , "onopennamespace"
454 , { prefix: p , uri: tag.ns[p] } )
455 })
456 }
457
458 // handle deferred onattribute events
459 for (var i = 0, l = parser.attribList.length; i < l; i ++) {
460 var nv = parser.attribList[i]
461 var name = nv[0]
462 , value = nv[1]
463 , qualName = qname(name)
464 , prefix = qualName.prefix
465 , local = qualName.local
466 , uri = tag.ns[prefix] || ""
467 , a = { name: name
468 , value: value
469 , prefix: prefix
470 , local: local
471 , uri: uri
472 }
473
474 // if there's any attributes with an undefined namespace,
475 // then fail on them now.
476 if (prefix && prefix != "xmlns" && !uri) {
477 strictFail(parser, "Unbound namespace prefix: "
478 + JSON.stringify(prefix))
479 a.uri = prefix
480 }
481 parser.tag.attributes[name] = a
482 emitNode(parser, "onattribute", a)
483 }
484 parser.attribList.length = 0
485 }
486
487 // process the tag
488 parser.sawRoot = true
489 parser.tags.push(parser.tag)
490 emitNode(parser, "onopentag", parser.tag)
491 if (!selfClosing) {
492 // special case for <script> in non-strict mode.
493 if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
494 parser.state = S.SCRIPT
495 } else {
496 parser.state = S.TEXT
497 }
498 parser.tag = null
499 parser.tagName = ""
500 }
501 parser.attribName = parser.attribValue = ""
502 parser.attribList.length = 0
503}
504
505function closeTag (parser) {
506 if (!parser.tagName) {
507 strictFail(parser, "Weird empty close tag.")
508 parser.textNode += "</>"
509 parser.state = S.TEXT
510 return
511 }
512 // first make sure that the closing tag actually exists.
513 // <a><b></c></b></a> will close everything, otherwise.
514 var t = parser.tags.length
515 var tagName = parser.tagName
516 if (!parser.strict) tagName = tagName[parser.tagCase]()
517 var closeTo = tagName
518 while (t --) {
519 var close = parser.tags[t]
520 if (close.name !== closeTo) {
521 // fail the first time in strict mode
522 strictFail(parser, "Unexpected close tag")
523 } else break
524 }
525
526 // didn't find it. we already failed for strict, so just abort.
527 if (t < 0) {
528 strictFail(parser, "Unmatched closing tag: "+parser.tagName)
529 parser.textNode += "</" + parser.tagName + ">"
530 parser.state = S.TEXT
531 return
532 }
533 parser.tagName = tagName
534 var s = parser.tags.length
535 while (s --> t) {
536 var tag = parser.tag = parser.tags.pop()
537 parser.tagName = parser.tag.name
538 emitNode(parser, "onclosetag", parser.tagName)
539
540 var x = {}
541 for (var i in tag.ns) x[i] = tag.ns[i]
542
543 var parent = parser.tags[parser.tags.length - 1] || parser
544 if (parser.opt.xmlns && tag.ns !== parent.ns) {
545 // remove namespace bindings introduced by tag
546 Object.keys(tag.ns).forEach(function (p) {
547 var n = tag.ns[p]
548 emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
549 })
550 }
551 }
552 if (t === 0) parser.closedRoot = true
553 parser.tagName = parser.attribValue = parser.attribName = ""
554 parser.attribList.length = 0
555 parser.state = S.TEXT
556}
557
558function parseEntity (parser) {
559 var entity = parser.entity.toLowerCase()
560 , num
561 , numStr = ""
562 if (parser.ENTITIES[entity]) return parser.ENTITIES[entity]
563 if (entity.charAt(0) === "#") {
564 if (entity.charAt(1) === "x") {
565 entity = entity.slice(2)
566 num = parseInt(entity, 16)
567 numStr = num.toString(16)
568 } else {
569 entity = entity.slice(1)
570 num = parseInt(entity, 10)
571 numStr = num.toString(10)
572 }
573 }
574 entity = entity.replace(/^0+/, "")
575 if (numStr.toLowerCase() !== entity) {
576 strictFail(parser, "Invalid character entity")
577 return "&"+parser.entity + ";"
578 }
579 return String.fromCharCode(num)
580}
581
582function write (chunk) {
583 var parser = this
584 if (this.error) throw this.error
585 if (parser.closed) return error(parser,
586 "Cannot write after close. Assign an onready handler.")
587 if (chunk === null) return end(parser)
588 var i = 0, c = ""
589 while (parser.c = c = chunk.charAt(i++)) {
590 parser.position ++
591 if (c === "\n") {
592 parser.line ++
593 parser.column = 0
594 } else parser.column ++
595 switch (parser.state) {
596
597 case S.BEGIN:
598 if (c === "<") parser.state = S.OPEN_WAKA
599 else if (not(whitespace,c)) {
600 // have to process this as a text node.
601 // weird, but happens.
602 strictFail(parser, "Non-whitespace before first tag.")
603 parser.textNode = c
604 parser.state = S.TEXT
605 }
606 continue
607
608 case S.TEXT:
609 if (parser.sawRoot && !parser.closedRoot) {
610 var starti = i-1
611 while (c && c!=="<" && c!=="&") {
612 c = chunk.charAt(i++)
613 if (c) {
614 parser.position ++
615 if (c === "\n") {
616 parser.line ++
617 parser.column = 0
618 } else parser.column ++
619 }
620 }
621 parser.textNode += chunk.substring(starti, i-1)
622 }
623 if (c === "<") parser.state = S.OPEN_WAKA
624 else {
625 if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
626 strictFail("Text data outside of root node.")
627 if (c === "&") parser.state = S.TEXT_ENTITY
628 else parser.textNode += c
629 }
630 continue
631
632 case S.SCRIPT:
633 // only non-strict
634 if (c === "<") {
635 parser.state = S.SCRIPT_ENDING
636 } else parser.script += c
637 continue
638
639 case S.SCRIPT_ENDING:
640 if (c === "/") {
641 emitNode(parser, "onscript", parser.script)
642 parser.state = S.CLOSE_TAG
643 parser.script = ""
644 parser.tagName = ""
645 } else {
646 parser.script += "<" + c
647 parser.state = S.SCRIPT
648 }
649 continue
650
651 case S.OPEN_WAKA:
652 // either a /, ?, !, or text is coming next.
653 if (c === "!") {
654 parser.state = S.SGML_DECL
655 parser.sgmlDecl = ""
656 } else if (is(whitespace, c)) {
657 // wait for it...
658 } else if (is(nameStart,c)) {
659 parser.startTagPosition = parser.position - 1
660 parser.state = S.OPEN_TAG
661 parser.tagName = c
662 } else if (c === "/") {
663 parser.startTagPosition = parser.position - 1
664 parser.state = S.CLOSE_TAG
665 parser.tagName = ""
666 } else if (c === "?") {
667 parser.state = S.PROC_INST
668 parser.procInstName = parser.procInstBody = ""
669 } else {
670 strictFail(parser, "Unencoded <")
671 parser.textNode += "<" + c
672 parser.state = S.TEXT
673 }
674 continue
675
676 case S.SGML_DECL:
677 if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
678 emitNode(parser, "onopencdata")
679 parser.state = S.CDATA
680 parser.sgmlDecl = ""
681 parser.cdata = ""
682 } else if (parser.sgmlDecl+c === "--") {
683 parser.state = S.COMMENT
684 parser.comment = ""
685 parser.sgmlDecl = ""
686 } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
687 parser.state = S.DOCTYPE
688 if (parser.doctype || parser.sawRoot) strictFail(parser,
689 "Inappropriately located doctype declaration")
690 parser.doctype = ""
691 parser.sgmlDecl = ""
692 } else if (c === ">") {
693 emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
694 parser.sgmlDecl = ""
695 parser.state = S.TEXT
696 } else if (is(quote, c)) {
697 parser.state = S.SGML_DECL_QUOTED
698 parser.sgmlDecl += c
699 } else parser.sgmlDecl += c
700 continue
701
702 case S.SGML_DECL_QUOTED:
703 if (c === parser.q) {
704 parser.state = S.SGML_DECL
705 parser.q = ""
706 }
707 parser.sgmlDecl += c
708 continue
709
710 case S.DOCTYPE:
711 if (c === ">") {
712 parser.state = S.TEXT
713 emitNode(parser, "ondoctype", parser.doctype)
714 parser.doctype = true // just remember that we saw it.
715 } else {
716 parser.doctype += c
717 if (c === "[") parser.state = S.DOCTYPE_DTD
718 else if (is(quote, c)) {
719 parser.state = S.DOCTYPE_QUOTED
720 parser.q = c
721 }
722 }
723 continue
724
725 case S.DOCTYPE_QUOTED:
726 parser.doctype += c
727 if (c === parser.q) {
728 parser.q = ""
729 parser.state = S.DOCTYPE
730 }
731 continue
732
733 case S.DOCTYPE_DTD:
734 parser.doctype += c
735 if (c === "]") parser.state = S.DOCTYPE
736 else if (is(quote,c)) {
737 parser.state = S.DOCTYPE_DTD_QUOTED
738 parser.q = c
739 }
740 continue
741
742 case S.DOCTYPE_DTD_QUOTED:
743 parser.doctype += c
744 if (c === parser.q) {
745 parser.state = S.DOCTYPE_DTD
746 parser.q = ""
747 }
748 continue
749
750 case S.COMMENT:
751 if (c === "-") parser.state = S.COMMENT_ENDING
752 else parser.comment += c
753 continue
754
755 case S.COMMENT_ENDING:
756 if (c === "-") {
757 parser.state = S.COMMENT_ENDED
758 parser.comment = textopts(parser.opt, parser.comment)
759 if (parser.comment) emitNode(parser, "oncomment", parser.comment)
760 parser.comment = ""
761 } else {
762 parser.comment += "-" + c
763 parser.state = S.COMMENT
764 }
765 continue
766
767 case S.COMMENT_ENDED:
768 if (c !== ">") {
769 strictFail(parser, "Malformed comment")
770 // allow <!-- blah -- bloo --> in non-strict mode,
771 // which is a comment of " blah -- bloo "
772 parser.comment += "--" + c
773 parser.state = S.COMMENT
774 } else parser.state = S.TEXT
775 continue
776
777 case S.CDATA:
778 if (c === "]") parser.state = S.CDATA_ENDING
779 else parser.cdata += c
780 continue
781
782 case S.CDATA_ENDING:
783 if (c === "]") parser.state = S.CDATA_ENDING_2
784 else {
785 parser.cdata += "]" + c
786 parser.state = S.CDATA
787 }
788 continue
789
790 case S.CDATA_ENDING_2:
791 if (c === ">") {
792 if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
793 emitNode(parser, "onclosecdata")
794 parser.cdata = ""
795 parser.state = S.TEXT
796 } else if (c === "]") {
797 parser.cdata += "]"
798 } else {
799 parser.cdata += "]]" + c
800 parser.state = S.CDATA
801 }
802 continue
803
804 case S.PROC_INST:
805 if (c === "?") parser.state = S.PROC_INST_ENDING
806 else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
807 else parser.procInstName += c
808 continue
809
810 case S.PROC_INST_BODY:
811 if (!parser.procInstBody && is(whitespace, c)) continue
812 else if (c === "?") parser.state = S.PROC_INST_ENDING
813 else if (is(quote, c)) {
814 parser.state = S.PROC_INST_QUOTED
815 parser.q = c
816 parser.procInstBody += c
817 } else parser.procInstBody += c
818 continue
819
820 case S.PROC_INST_ENDING:
821 if (c === ">") {
822 emitNode(parser, "onprocessinginstruction", {
823 name : parser.procInstName,
824 body : parser.procInstBody
825 })
826 parser.procInstName = parser.procInstBody = ""
827 parser.state = S.TEXT
828 } else {
829 parser.procInstBody += "?" + c
830 parser.state = S.PROC_INST_BODY
831 }
832 continue
833
834 case S.PROC_INST_QUOTED:
835 parser.procInstBody += c
836 if (c === parser.q) {
837 parser.state = S.PROC_INST_BODY
838 parser.q = ""
839 }
840 continue
841
842 case S.OPEN_TAG:
843 if (is(nameBody, c)) parser.tagName += c
844 else {
845 newTag(parser)
846 if (c === ">") openTag(parser)
847 else if (c === "/") parser.state = S.OPEN_TAG_SLASH
848 else {
849 if (not(whitespace, c)) strictFail(
850 parser, "Invalid character in tag name")
851 parser.state = S.ATTRIB
852 }
853 }
854 continue
855
856 case S.OPEN_TAG_SLASH:
857 if (c === ">") {
858 openTag(parser, true)
859 closeTag(parser)
860 } else {
861 strictFail(parser, "Forward-slash in opening tag not followed by >")
862 parser.state = S.ATTRIB
863 }
864 continue
865
866 case S.ATTRIB:
867 // haven't read the attribute name yet.
868 if (is(whitespace, c)) continue
869 else if (c === ">") openTag(parser)
870 else if (c === "/") parser.state = S.OPEN_TAG_SLASH
871 else if (is(nameStart, c)) {
872 parser.attribName = c
873 parser.attribValue = ""
874 parser.state = S.ATTRIB_NAME
875 } else strictFail(parser, "Invalid attribute name")
876 continue
877
878 case S.ATTRIB_NAME:
879 if (c === "=") parser.state = S.ATTRIB_VALUE
880 else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
881 else if (is(nameBody, c)) parser.attribName += c
882 else strictFail(parser, "Invalid attribute name")
883 continue
884
885 case S.ATTRIB_NAME_SAW_WHITE:
886 if (c === "=") parser.state = S.ATTRIB_VALUE
887 else if (is(whitespace, c)) continue
888 else {
889 strictFail(parser, "Attribute without value")
890 parser.tag.attributes[parser.attribName] = ""
891 parser.attribValue = ""
892 emitNode(parser, "onattribute",
893 { name : parser.attribName, value : "" })
894 parser.attribName = ""
895 if (c === ">") openTag(parser)
896 else if (is(nameStart, c)) {
897 parser.attribName = c
898 parser.state = S.ATTRIB_NAME
899 } else {
900 strictFail(parser, "Invalid attribute name")
901 parser.state = S.ATTRIB
902 }
903 }
904 continue
905
906 case S.ATTRIB_VALUE:
907 if (is(whitespace, c)) continue
908 else if (is(quote, c)) {
909 parser.q = c
910 parser.state = S.ATTRIB_VALUE_QUOTED
911 } else {
912 strictFail(parser, "Unquoted attribute value")
913 parser.state = S.ATTRIB_VALUE_UNQUOTED
914 parser.attribValue = c
915 }
916 continue
917
918 case S.ATTRIB_VALUE_QUOTED:
919 if (c !== parser.q) {
920 if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
921 else parser.attribValue += c
922 continue
923 }
924 attrib(parser)
925 parser.q = ""
926 parser.state = S.ATTRIB
927 continue
928
929 case S.ATTRIB_VALUE_UNQUOTED:
930 if (not(attribEnd,c)) {
931 if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
932 else parser.attribValue += c
933 continue
934 }
935 attrib(parser)
936 if (c === ">") openTag(parser)
937 else parser.state = S.ATTRIB
938 continue
939
940 case S.CLOSE_TAG:
941 if (!parser.tagName) {
942 if (is(whitespace, c)) continue
943 else if (not(nameStart, c)) strictFail(parser,
944 "Invalid tagname in closing tag.")
945 else parser.tagName = c
946 }
947 else if (c === ">") closeTag(parser)
948 else if (is(nameBody, c)) parser.tagName += c
949 else {
950 if (not(whitespace, c)) strictFail(parser,
951 "Invalid tagname in closing tag")
952 parser.state = S.CLOSE_TAG_SAW_WHITE
953 }
954 continue
955
956 case S.CLOSE_TAG_SAW_WHITE:
957 if (is(whitespace, c)) continue
958 if (c === ">") closeTag(parser)
959 else strictFail("Invalid characters in closing tag")
960 continue
961
962 case S.TEXT_ENTITY:
963 case S.ATTRIB_VALUE_ENTITY_Q:
964 case S.ATTRIB_VALUE_ENTITY_U:
965 switch(parser.state) {
966 case S.TEXT_ENTITY:
967 var returnState = S.TEXT, buffer = "textNode"
968 break
969
970 case S.ATTRIB_VALUE_ENTITY_Q:
971 var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
972 break
973
974 case S.ATTRIB_VALUE_ENTITY_U:
975 var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
976 break
977 }
978 if (c === ";") {
979 parser[buffer] += parseEntity(parser)
980 parser.entity = ""
981 parser.state = returnState
982 }
983 else if (is(entity, c)) parser.entity += c
984 else {
985 strictFail("Invalid character entity")
986 parser[buffer] += "&" + parser.entity + c
987 parser.entity = ""
988 parser.state = returnState
989 }
990 continue
991
992 default:
993 throw new Error(parser, "Unknown state: " + parser.state)
994 }
995 } // while
996 // cdata blocks can get very big under normal conditions. emit and move on.
997 // if (parser.state === S.CDATA && parser.cdata) {
998 // emitNode(parser, "oncdata", parser.cdata)
999 // parser.cdata = ""
1000 // }
1001 if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
1002 return parser
1003}
1004
1005})(typeof exports === "undefined" ? sax = {} : exports)