1:
53:
54: package ;
55:
56: import ;
57: import ;
58: import ;
59: import ;
60: import ;
61: import ;
62: import ;
63: import ;
64: import ;
65: import ;
66: import ;
67: import ;
68: import ;
69: import ;
70: import ;
71: import ;
72: import ;
73: import ;
74: import ;
75: import ;
76: import ;
77: import ;
78:
79: import ;
80: import ;
81: import ;
82: import ;
83: import ;
84: import ;
85: import ;
86: import ;
87: import ;
88: import ;
89:
90: import ;
91: import ;
92: import ;
93:
94:
117: public class XMLParser
118: implements XMLStreamReader, NamespaceContext
119: {
120:
121:
122: private static final int INIT = 0;
123: private static final int PROLOG = 1;
124: private static final int CONTENT = 2;
125: private static final int EMPTY_ELEMENT = 3;
126: private static final int MISC = 4;
127:
128:
129: private final static int LIT_ENTITY_REF = 2;
130: private final static int LIT_NORMALIZE = 4;
131: private final static int LIT_ATTRIBUTE = 8;
132: private final static int LIT_DISABLE_PE = 16;
133: private final static int LIT_DISABLE_CREF = 32;
134: private final static int LIT_DISABLE_EREF = 64;
135: private final static int LIT_PUBID = 256;
136:
137:
138: final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
139: final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
140: final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
141: final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
142: final static int ATTRIBUTE_DEFAULT_FIXED = 34;
143:
144:
145: final static int START_ENTITY = 50;
146: final static int END_ENTITY = 51;
147:
148:
151: private Input input;
152:
153:
158: private LinkedList inputStack = new LinkedList();
159:
160:
163: private LinkedList startEntityStack = new LinkedList();
164:
165:
168: private LinkedList endEntityStack = new LinkedList();
169:
170:
173: private int state = INIT;
174:
175:
178: private int event;
179:
180:
183: private boolean lookahead;
184:
185:
189: private LinkedList stack = new LinkedList();
190:
191:
196: private LinkedList namespaces = new LinkedList();
197:
198:
203: private LinkedList bases = new LinkedList();
204:
205:
209: private ArrayList attrs = new ArrayList();
210:
211:
214: private StringBuffer buf = new StringBuffer();
215:
216:
219: private StringBuffer nmtokenBuf = new StringBuffer();
220:
221:
224: private StringBuffer literalBuf = new StringBuffer();
225:
226:
229: private int[] tmpBuf = new int[1024];
230:
231:
234: private ContentModel currentContentModel;
235:
236:
242: private LinkedList validationStack;
243:
244:
249: private HashSet ids, idrefs;
250:
251:
255: private String piTarget, piData;
256:
257:
260: private String xmlVersion;
261:
262:
265: private String xmlEncoding;
266:
267:
270: private Boolean xmlStandalone;
271:
272:
275: Doctype doctype;
276:
277:
280: private boolean expandPE, peIsError;
281:
282:
285: private final boolean validating;
286:
287:
290: private final boolean stringInterning;
291:
292:
296: private final boolean coalescing;
297:
298:
303: private final boolean replaceERefs;
304:
305:
308: private final boolean externalEntities;
309:
310:
313: private final boolean supportDTD;
314:
315:
320: private final boolean namespaceAware;
321:
322:
326: private final boolean baseAware;
327:
328:
332: private final boolean extendedEventTypes;
333:
334:
337: final XMLReporter reporter;
338:
339:
342: final XMLResolver resolver;
343:
344:
345: private static final String TEST_START_ELEMENT = "<";
346: private static final String TEST_END_ELEMENT = "</";
347: private static final String TEST_COMMENT = "<!--";
348: private static final String TEST_PI = "<?";
349: private static final String TEST_CDATA = "<![CDATA[";
350: private static final String TEST_XML_DECL = "<?xml";
351: private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
352: private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
353: private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
354: private static final String TEST_ENTITY_DECL = "<!ENTITY";
355: private static final String TEST_NOTATION_DECL = "<!NOTATION";
356: private static final String TEST_KET = ">";
357: private static final String TEST_END_COMMENT = "--";
358: private static final String TEST_END_PI = "?>";
359: private static final String TEST_END_CDATA = "]]>";
360:
361:
364: private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
365: static
366: {
367: PREDEFINED_ENTITIES.put("amp", "&");
368: PREDEFINED_ENTITIES.put("lt", "<");
369: PREDEFINED_ENTITIES.put("gt", ">");
370: PREDEFINED_ENTITIES.put("apos", "'");
371: PREDEFINED_ENTITIES.put("quot", "\"");
372: }
373:
374:
397: public XMLParser(InputStream in, String systemId,
398: boolean validating,
399: boolean namespaceAware,
400: boolean coalescing,
401: boolean replaceERefs,
402: boolean externalEntities,
403: boolean supportDTD,
404: boolean baseAware,
405: boolean stringInterning,
406: boolean extendedEventTypes,
407: XMLReporter reporter,
408: XMLResolver resolver)
409: {
410: this.validating = validating;
411: this.namespaceAware = namespaceAware;
412: this.coalescing = coalescing;
413: this.replaceERefs = replaceERefs;
414: this.externalEntities = externalEntities;
415: this.supportDTD = supportDTD;
416: this.baseAware = baseAware;
417: this.stringInterning = stringInterning;
418: this.extendedEventTypes = extendedEventTypes;
419: this.reporter = reporter;
420: this.resolver = resolver;
421: if (validating)
422: {
423: validationStack = new LinkedList();
424: ids = new HashSet();
425: idrefs = new HashSet();
426: }
427: String debug = System.getProperty("gnu.xml.debug.input");
428: if (debug != null)
429: {
430: try
431: {
432: File file = File.createTempFile(debug, ".xml");
433: in = new TeeInputStream(in, new FileOutputStream(file));
434: }
435: catch (IOException e)
436: {
437: RuntimeException e2 = new RuntimeException();
438: e2.initCause(e);
439: throw e2;
440: }
441: }
442: pushInput(new Input(in, null, null, systemId, null, null, false, true));
443: }
444:
445:
471: public XMLParser(Reader reader, String systemId,
472: boolean validating,
473: boolean namespaceAware,
474: boolean coalescing,
475: boolean replaceERefs,
476: boolean externalEntities,
477: boolean supportDTD,
478: boolean baseAware,
479: boolean stringInterning,
480: boolean extendedEventTypes,
481: XMLReporter reporter,
482: XMLResolver resolver)
483: {
484: this.validating = validating;
485: this.namespaceAware = namespaceAware;
486: this.coalescing = coalescing;
487: this.replaceERefs = replaceERefs;
488: this.externalEntities = externalEntities;
489: this.supportDTD = supportDTD;
490: this.baseAware = baseAware;
491: this.stringInterning = stringInterning;
492: this.extendedEventTypes = extendedEventTypes;
493: this.reporter = reporter;
494: this.resolver = resolver;
495: if (validating)
496: {
497: validationStack = new LinkedList();
498: ids = new HashSet();
499: idrefs = new HashSet();
500: }
501: String debug = System.getProperty("gnu.xml.debug.input");
502: if (debug != null)
503: {
504: try
505: {
506: File file = File.createTempFile(debug, ".xml");
507: reader = new TeeReader(reader, new FileWriter(file));
508: }
509: catch (IOException e)
510: {
511: RuntimeException e2 = new RuntimeException();
512: e2.initCause(e);
513: throw e2;
514: }
515: }
516: pushInput(new Input(null, reader, null, systemId, null, null, false, true));
517: }
518:
519:
520:
521: public String getNamespaceURI(String prefix)
522: {
523: if (XMLConstants.XML_NS_PREFIX.equals(prefix))
524: return XMLConstants.XML_NS_URI;
525: if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
526: return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
527: for (Iterator i = namespaces.iterator(); i.hasNext(); )
528: {
529: LinkedHashMap ctx = (LinkedHashMap) i.next();
530: String namespaceURI = (String) ctx.get(prefix);
531: if (namespaceURI != null)
532: return namespaceURI;
533: }
534: return null;
535: }
536:
537: public String getPrefix(String namespaceURI)
538: {
539: if (XMLConstants.XML_NS_URI.equals(namespaceURI))
540: return XMLConstants.XML_NS_PREFIX;
541: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
542: return XMLConstants.XMLNS_ATTRIBUTE;
543: for (Iterator i = namespaces.iterator(); i.hasNext(); )
544: {
545: LinkedHashMap ctx = (LinkedHashMap) i.next();
546: if (ctx.containsValue(namespaceURI))
547: {
548: for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
549: {
550: Map.Entry entry = (Map.Entry) i.next();
551: String uri = (String) entry.getValue();
552: if (uri.equals(namespaceURI))
553: return (String) entry.getKey();
554: }
555: }
556: }
557: return null;
558: }
559:
560: public Iterator getPrefixes(String namespaceURI)
561: {
562: if (XMLConstants.XML_NS_URI.equals(namespaceURI))
563: return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
564: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
565: return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
566: LinkedList acc = new LinkedList();
567: for (Iterator i = namespaces.iterator(); i.hasNext(); )
568: {
569: LinkedHashMap ctx = (LinkedHashMap) i.next();
570: if (ctx.containsValue(namespaceURI))
571: {
572: for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
573: {
574: Map.Entry entry = (Map.Entry) i.next();
575: String uri = (String) entry.getValue();
576: if (uri.equals(namespaceURI))
577: acc.add(entry.getKey());
578: }
579: }
580: }
581: return acc.iterator();
582: }
583:
584:
585:
586: public void close()
587: throws XMLStreamException
588: {
589: stack = null;
590: namespaces = null;
591: bases = null;
592: buf = null;
593: attrs = null;
594: doctype = null;
595:
596: inputStack = null;
597: validationStack = null;
598: ids = null;
599: idrefs = null;
600: }
601:
602: public NamespaceContext getNamespaceContext()
603: {
604: return this;
605: }
606:
607: public int getAttributeCount()
608: {
609: return attrs.size();
610: }
611:
612: public String getAttributeLocalName(int index)
613: {
614: Attribute a = (Attribute) attrs.get(index);
615: return a.localName;
616: }
617:
618: public String getAttributeNamespace(int index)
619: {
620: String prefix = getAttributePrefix(index);
621: return getNamespaceURI(prefix);
622: }
623:
624: public String getAttributePrefix(int index)
625: {
626: Attribute a = (Attribute) attrs.get(index);
627: return a.prefix;
628: }
629:
630: public QName getAttributeName(int index)
631: {
632: Attribute a = (Attribute) attrs.get(index);
633: String namespaceURI = getNamespaceURI(a.prefix);
634: return new QName(namespaceURI, a.localName, a.prefix);
635: }
636:
637: public String getAttributeType(int index)
638: {
639: Attribute a = (Attribute) attrs.get(index);
640: return a.type;
641: }
642:
643: private String getAttributeType(String elementName, String attName)
644: {
645: if (doctype != null)
646: {
647: AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
648: if (att != null)
649: return att.type;
650: }
651: return "CDATA";
652: }
653:
654: public String getAttributeValue(int index)
655: {
656: Attribute a = (Attribute) attrs.get(index);
657: return a.value;
658: }
659:
660: public String getAttributeValue(String namespaceURI, String localName)
661: {
662: for (Iterator i = attrs.iterator(); i.hasNext(); )
663: {
664: Attribute a = (Attribute) i.next();
665: if (a.localName.equals(localName))
666: {
667: String uri = getNamespaceURI(a.prefix);
668: if ((uri == null && namespaceURI == null) ||
669: (uri != null && uri.equals(namespaceURI)))
670: return a.value;
671: }
672: }
673: return null;
674: }
675:
676: boolean isAttributeDeclared(int index)
677: {
678: if (doctype == null)
679: return false;
680: Attribute a = (Attribute) attrs.get(index);
681: String qn = ("".equals(a.prefix)) ? a.localName :
682: a.prefix + ":" + a.localName;
683: String elementName = buf.toString();
684: return doctype.isAttributeDeclared(elementName, qn);
685: }
686:
687: public String getCharacterEncodingScheme()
688: {
689: return xmlEncoding;
690: }
691:
692: public String getElementText()
693: throws XMLStreamException
694: {
695: if (event != XMLStreamConstants.START_ELEMENT)
696: throw new XMLStreamException("current event must be START_ELEMENT");
697: StringBuffer elementText = new StringBuffer();
698: int depth = stack.size();
699: while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
700: {
701: switch (next())
702: {
703: case XMLStreamConstants.CHARACTERS:
704: case XMLStreamConstants.SPACE:
705: elementText.append(buf.toString());
706: }
707: }
708: return elementText.toString();
709: }
710:
711: public String getEncoding()
712: {
713: return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
714: }
715:
716: public int getEventType()
717: {
718: return event;
719: }
720:
721: public String getLocalName()
722: {
723: switch (event)
724: {
725: case XMLStreamConstants.START_ELEMENT:
726: case XMLStreamConstants.END_ELEMENT:
727: String qName = buf.toString();
728: int ci = qName.indexOf(':');
729: return (ci == -1) ? qName : qName.substring(ci + 1);
730: default:
731: return null;
732: }
733: }
734:
735: public Location getLocation()
736: {
737: return input;
738: }
739:
740: public QName getName()
741: {
742: switch (event)
743: {
744: case XMLStreamConstants.START_ELEMENT:
745: case XMLStreamConstants.END_ELEMENT:
746: String qName = buf.toString();
747: int ci = qName.indexOf(':');
748: String localName = (ci == -1) ? qName : qName.substring(ci + 1);
749: String prefix = (ci == -1) ?
750: (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
751: qName.substring(0, ci);
752: String namespaceURI = getNamespaceURI(prefix);
753: return new QName(namespaceURI, localName, prefix);
754: default:
755: return null;
756: }
757: }
758:
759: public int getNamespaceCount()
760: {
761: if (!namespaceAware || namespaces.isEmpty())
762: return 0;
763: switch (event)
764: {
765: case XMLStreamConstants.START_ELEMENT:
766: case XMLStreamConstants.END_ELEMENT:
767: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
768: return ctx.size();
769: default:
770: return 0;
771: }
772: }
773:
774: public String getNamespacePrefix(int index)
775: {
776: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
777: int count = 0;
778: for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
779: {
780: String prefix = (String) i.next();
781: if (count++ == index)
782: return prefix;
783: }
784: return null;
785: }
786:
787: public String getNamespaceURI()
788: {
789: switch (event)
790: {
791: case XMLStreamConstants.START_ELEMENT:
792: case XMLStreamConstants.END_ELEMENT:
793: String qName = buf.toString();
794: int ci = qName.indexOf(':');
795: if (ci == -1)
796: return null;
797: String prefix = qName.substring(0, ci);
798: return getNamespaceURI(prefix);
799: default:
800: return null;
801: }
802: }
803:
804: public String getNamespaceURI(int index)
805: {
806: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
807: int count = 0;
808: for (Iterator i = ctx.values().iterator(); i.hasNext(); )
809: {
810: String uri = (String) i.next();
811: if (count++ == index)
812: return uri;
813: }
814: return null;
815: }
816:
817: public String getPIData()
818: {
819: return piData;
820: }
821:
822: public String getPITarget()
823: {
824: return piTarget;
825: }
826:
827: public String getPrefix()
828: {
829: switch (event)
830: {
831: case XMLStreamConstants.START_ELEMENT:
832: case XMLStreamConstants.END_ELEMENT:
833: String qName = buf.toString();
834: int ci = qName.indexOf(':');
835: return (ci == -1) ?
836: (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
837: qName.substring(0, ci);
838: default:
839: return null;
840: }
841: }
842:
843: public Object getProperty(String name)
844: throws IllegalArgumentException
845: {
846: if (name == null)
847: throw new IllegalArgumentException("name is null");
848: if (XMLInputFactory.ALLOCATOR.equals(name))
849: return null;
850: if (XMLInputFactory.IS_COALESCING.equals(name))
851: return coalescing ? Boolean.TRUE : Boolean.FALSE;
852: if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
853: return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
854: if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
855: return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
856: if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
857: return externalEntities ? Boolean.TRUE : Boolean.FALSE;
858: if (XMLInputFactory.IS_VALIDATING.equals(name))
859: return Boolean.FALSE;
860: if (XMLInputFactory.REPORTER.equals(name))
861: return reporter;
862: if (XMLInputFactory.RESOLVER.equals(name))
863: return resolver;
864: if (XMLInputFactory.SUPPORT_DTD.equals(name))
865: return supportDTD ? Boolean.TRUE : Boolean.FALSE;
866: if ("gnu.xml.stream.stringInterning".equals(name))
867: return stringInterning ? Boolean.TRUE : Boolean.FALSE;
868: if ("gnu.xml.stream.xmlBase".equals(name))
869: return baseAware ? Boolean.TRUE : Boolean.FALSE;
870: if ("gnu.xml.stream.baseURI".equals(name))
871: return getXMLBase();
872: return null;
873: }
874:
875: public String getText()
876: {
877: return buf.toString();
878: }
879:
880: public char[] getTextCharacters()
881: {
882: return buf.toString().toCharArray();
883: }
884:
885: public int getTextCharacters(int sourceStart, char[] target,
886: int targetStart, int length)
887: throws XMLStreamException
888: {
889: length = Math.min(sourceStart + buf.length(), length);
890: int sourceEnd = sourceStart + length;
891: buf.getChars(sourceStart, sourceEnd, target, targetStart);
892: return length;
893: }
894:
895: public int getTextLength()
896: {
897: return buf.length();
898: }
899:
900: public int getTextStart()
901: {
902: return 0;
903: }
904:
905: public String getVersion()
906: {
907: return (xmlVersion == null) ? "1.0" : xmlVersion;
908: }
909:
910: public boolean hasName()
911: {
912: switch (event)
913: {
914: case XMLStreamConstants.START_ELEMENT:
915: case XMLStreamConstants.END_ELEMENT:
916: return true;
917: default:
918: return false;
919: }
920: }
921:
922: public boolean hasText()
923: {
924: switch (event)
925: {
926: case XMLStreamConstants.CHARACTERS:
927: case XMLStreamConstants.SPACE:
928: return true;
929: default:
930: return false;
931: }
932: }
933:
934: public boolean isAttributeSpecified(int index)
935: {
936: Attribute a = (Attribute) attrs.get(index);
937: return a.specified;
938: }
939:
940: public boolean isCharacters()
941: {
942: return (event == XMLStreamConstants.CHARACTERS);
943: }
944:
945: public boolean isEndElement()
946: {
947: return (event == XMLStreamConstants.END_ELEMENT);
948: }
949:
950: public boolean isStandalone()
951: {
952: return Boolean.TRUE.equals(xmlStandalone);
953: }
954:
955: public boolean isStartElement()
956: {
957: return (event == XMLStreamConstants.START_ELEMENT);
958: }
959:
960: public boolean isWhiteSpace()
961: {
962: return (event == XMLStreamConstants.SPACE);
963: }
964:
965: public int nextTag()
966: throws XMLStreamException
967: {
968: do
969: {
970: switch (next())
971: {
972: case XMLStreamConstants.START_ELEMENT:
973: case XMLStreamConstants.END_ELEMENT:
974: case XMLStreamConstants.CHARACTERS:
975: case XMLStreamConstants.SPACE:
976: case XMLStreamConstants.COMMENT:
977: case XMLStreamConstants.PROCESSING_INSTRUCTION:
978: break;
979: default:
980: throw new XMLStreamException("Unexpected event type: " + event);
981: }
982: }
983: while (event != XMLStreamConstants.START_ELEMENT &&
984: event != XMLStreamConstants.END_ELEMENT);
985: return event;
986: }
987:
988: public void require(int type, String namespaceURI, String localName)
989: throws XMLStreamException
990: {
991: if (event != type)
992: throw new XMLStreamException("Current event type is " + event);
993: if (event == XMLStreamConstants.START_ELEMENT ||
994: event == XMLStreamConstants.END_ELEMENT)
995: {
996: String ln = getLocalName();
997: if (!ln.equals(localName))
998: throw new XMLStreamException("Current local-name is " + ln);
999: String uri = getNamespaceURI();
1000: if ((uri == null && namespaceURI != null) ||
1001: (uri != null && !uri.equals(namespaceURI)))
1002: throw new XMLStreamException("Current namespace URI is " + uri);
1003: }
1004: }
1005:
1006: public boolean standaloneSet()
1007: {
1008: return (xmlStandalone != null);
1009: }
1010:
1011: public boolean hasNext()
1012: throws XMLStreamException
1013: {
1014: if (event == XMLStreamConstants.END_DOCUMENT)
1015: return false;
1016: if (!lookahead)
1017: {
1018: next();
1019: lookahead = true;
1020: }
1021: return event != -1;
1022: }
1023:
1024: public int next()
1025: throws XMLStreamException
1026: {
1027: if (lookahead)
1028: {
1029: lookahead = false;
1030: return event;
1031: }
1032: if (event == XMLStreamConstants.END_ELEMENT)
1033: {
1034:
1035: if (namespaceAware && !namespaces.isEmpty())
1036: namespaces.removeFirst();
1037:
1038: if (baseAware && !bases.isEmpty())
1039: bases.removeFirst();
1040: }
1041: if (!startEntityStack.isEmpty())
1042: {
1043: String entityName = (String) startEntityStack.removeFirst();
1044: buf.setLength(0);
1045: buf.append(entityName);
1046: event = START_ENTITY;
1047: return extendedEventTypes ? event : next();
1048: }
1049: else if (!endEntityStack.isEmpty())
1050: {
1051: String entityName = (String) endEntityStack.removeFirst();
1052: buf.setLength(0);
1053: buf.append(entityName);
1054: event = END_ENTITY;
1055: return extendedEventTypes ? event : next();
1056: }
1057: try
1058: {
1059: if (!input.initialized)
1060: input.init();
1061: switch (state)
1062: {
1063: case CONTENT:
1064: if (tryRead(TEST_END_ELEMENT))
1065: {
1066: readEndElement();
1067: if (stack.isEmpty())
1068: state = MISC;
1069: event = XMLStreamConstants.END_ELEMENT;
1070: }
1071: else if (tryRead(TEST_COMMENT))
1072: {
1073: readComment(false);
1074: event = XMLStreamConstants.COMMENT;
1075: }
1076: else if (tryRead(TEST_PI))
1077: {
1078: readPI(false);
1079: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1080: }
1081: else if (tryRead(TEST_CDATA))
1082: {
1083: readCDSect();
1084: event = XMLStreamConstants.CDATA;
1085: }
1086: else if (tryRead(TEST_START_ELEMENT))
1087: {
1088: state = readStartElement();
1089: event = XMLStreamConstants.START_ELEMENT;
1090: }
1091: else
1092: {
1093:
1094: mark(8);
1095: int c = readCh();
1096: if (c == 0x26)
1097: {
1098: c = readCh();
1099: if (c == 0x23)
1100: {
1101: reset();
1102: event = readCharData(null);
1103: }
1104: else
1105: {
1106:
1107: reset();
1108: readCh();
1109: readReference();
1110: String ref = buf.toString();
1111: String text = (String) PREDEFINED_ENTITIES.get(ref);
1112: if (text != null)
1113: {
1114: event = readCharData(text);
1115: }
1116: else if (replaceERefs && !isUnparsedEntity(ref))
1117: {
1118:
1119: boolean external = false;
1120: if (doctype != null)
1121: {
1122: Object entity = doctype.getEntity(ref);
1123: if (entity instanceof ExternalIds)
1124: external = true;
1125: }
1126: expandEntity(ref, false, external);
1127: event = next();
1128: }
1129: else
1130: {
1131: event = XMLStreamConstants.ENTITY_REFERENCE;
1132: }
1133: }
1134: }
1135: else
1136: {
1137: reset();
1138: event = readCharData(null);
1139: if (validating && doctype != null)
1140: validatePCData(buf.toString());
1141: }
1142: }
1143: break;
1144: case EMPTY_ELEMENT:
1145: String elementName = (String) stack.removeLast();
1146: buf.setLength(0);
1147: buf.append(elementName);
1148: state = stack.isEmpty() ? MISC : CONTENT;
1149: event = XMLStreamConstants.END_ELEMENT;
1150: if (validating && doctype != null)
1151: endElementValidationHook();
1152: break;
1153: case INIT:
1154: if (tryRead(TEST_XML_DECL))
1155: readXMLDecl();
1156: input.finalizeEncoding();
1157: event = XMLStreamConstants.START_DOCUMENT;
1158: state = PROLOG;
1159: break;
1160: case PROLOG:
1161: skipWhitespace();
1162: if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1163: {
1164: readDoctypeDecl();
1165: event = XMLStreamConstants.DTD;
1166: }
1167: else if (tryRead(TEST_COMMENT))
1168: {
1169: readComment(false);
1170: event = XMLStreamConstants.COMMENT;
1171: }
1172: else if (tryRead(TEST_PI))
1173: {
1174: readPI(false);
1175: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1176: }
1177: else if (tryRead(TEST_START_ELEMENT))
1178: {
1179: state = readStartElement();
1180: event = XMLStreamConstants.START_ELEMENT;
1181: }
1182: else
1183: {
1184: int c = readCh();
1185: error("no root element: U+" + Integer.toHexString(c));
1186: }
1187: break;
1188: case MISC:
1189: skipWhitespace();
1190: if (tryRead(TEST_COMMENT))
1191: {
1192: readComment(false);
1193: event = XMLStreamConstants.COMMENT;
1194: }
1195: else if (tryRead(TEST_PI))
1196: {
1197: readPI(false);
1198: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1199: }
1200: else
1201: {
1202: if (event == XMLStreamConstants.END_DOCUMENT)
1203: throw new NoSuchElementException();
1204: int c = readCh();
1205: if (c != -1)
1206: error("Only comments and PIs may appear after " +
1207: "the root element");
1208: event = XMLStreamConstants.END_DOCUMENT;
1209: }
1210: break;
1211: default:
1212: event = -1;
1213: }
1214: return event;
1215: }
1216: catch (IOException e)
1217: {
1218: XMLStreamException e2 = new XMLStreamException();
1219: e2.initCause(e);
1220: throw e2;
1221: }
1222: }
1223:
1224:
1225:
1226:
1229: String getCurrentElement()
1230: {
1231: return (String) stack.getLast();
1232: }
1233:
1234:
1235:
1236: private void mark(int limit)
1237: throws IOException
1238: {
1239: input.mark(limit);
1240: }
1241:
1242: private void reset()
1243: throws IOException
1244: {
1245: input.reset();
1246: }
1247:
1248: private int read()
1249: throws IOException
1250: {
1251: return input.read();
1252: }
1253:
1254: private int read(int[] b, int off, int len)
1255: throws IOException
1256: {
1257: return input.read(b, off, len);
1258: }
1259:
1260:
1263: private int readCh()
1264: throws IOException, XMLStreamException
1265: {
1266: int c = read();
1267: if (expandPE && c == 0x25)
1268: {
1269: if (peIsError)
1270: error("PE reference within decl in internal subset.");
1271: expandPEReference();
1272: return readCh();
1273: }
1274: return c;
1275: }
1276:
1277:
1283: private void require(char delim)
1284: throws IOException, XMLStreamException
1285: {
1286: mark(1);
1287: int c = readCh();
1288: if (delim != c)
1289: {
1290: reset();
1291: error("required character (got U+" + Integer.toHexString(c) + ")",
1292: new Character(delim));
1293: }
1294: }
1295:
1296:
1302: private void require(String delim)
1303: throws IOException, XMLStreamException
1304: {
1305: char[] chars = delim.toCharArray();
1306: int len = chars.length;
1307: mark(len);
1308: int off = 0;
1309: do
1310: {
1311: int l2 = read(tmpBuf, off, len - off);
1312: if (l2 == -1)
1313: {
1314: reset();
1315: error("EOF before required string", delim);
1316: }
1317: off += l2;
1318: }
1319: while (off < len);
1320: for (int i = 0; i < chars.length; i++)
1321: {
1322: if (chars[i] != tmpBuf[i])
1323: {
1324: reset();
1325: error("required string", delim);
1326: }
1327: }
1328: }
1329:
1330:
1335: private boolean tryRead(char delim)
1336: throws IOException, XMLStreamException
1337: {
1338: mark(1);
1339: int c = readCh();
1340: if (delim != c)
1341: {
1342: reset();
1343: return false;
1344: }
1345: return true;
1346: }
1347:
1348:
1355: private boolean tryRead(String test)
1356: throws IOException
1357: {
1358: char[] chars = test.toCharArray();
1359: int len = chars.length;
1360: mark(len);
1361: int count = 0;
1362: int l2 = read(tmpBuf, 0, len);
1363: if (l2 == -1)
1364: {
1365: reset();
1366: return false;
1367: }
1368: count += l2;
1369:
1370: for (int i = 0; i < count; i++)
1371: {
1372: if (chars[i] != tmpBuf[i])
1373: {
1374: reset();
1375: return false;
1376: }
1377: }
1378: while (count < len)
1379: {
1380:
1381: int c = read();
1382: if (c == -1)
1383: {
1384: reset();
1385: return false;
1386: }
1387: tmpBuf[count] = (char) c;
1388:
1389: if (chars[count] != tmpBuf[count])
1390: {
1391: reset();
1392: return false;
1393: }
1394: count++;
1395: }
1396: return true;
1397: }
1398:
1399:
1403: private void readUntil(String delim)
1404: throws IOException, XMLStreamException
1405: {
1406: int startLine = input.line;
1407: try
1408: {
1409: while (!tryRead(delim))
1410: {
1411: int c = readCh();
1412: if (c == -1)
1413: throw new EOFException();
1414: else if (input.xml11)
1415: {
1416: if (!isXML11Char(c) || isXML11RestrictedChar(c))
1417: error("illegal XML 1.1 character",
1418: "U+" + Integer.toHexString(c));
1419: }
1420: else if (!isChar(c))
1421: error("illegal XML character",
1422: "U+" + Integer.toHexString(c));
1423: buf.append(Character.toChars(c));
1424: }
1425: }
1426: catch (EOFException e)
1427: {
1428: error("end of input while looking for delimiter "+
1429: "(started on line " + startLine + ')', delim);
1430: }
1431: }
1432:
1433:
1437: private boolean tryWhitespace()
1438: throws IOException, XMLStreamException
1439: {
1440: boolean white;
1441: boolean ret = false;
1442: do
1443: {
1444: mark(1);
1445: int c = readCh();
1446: while (c == -1 && inputStack.size() > 1)
1447: {
1448: popInput();
1449: c = readCh();
1450: }
1451: white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1452: if (white)
1453: ret = true;
1454: }
1455: while (white);
1456: reset();
1457: return ret;
1458: }
1459:
1460:
1463: private void skipWhitespace()
1464: throws IOException, XMLStreamException
1465: {
1466: boolean white;
1467: do
1468: {
1469: mark(1);
1470: int c = readCh();
1471: while (c == -1 && inputStack.size() > 1)
1472: {
1473: popInput();
1474: c = readCh();
1475: }
1476: white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1477: }
1478: while (white);
1479: reset();
1480: }
1481:
1482:
1486: private void requireWhitespace()
1487: throws IOException, XMLStreamException
1488: {
1489: if (!tryWhitespace())
1490: error("whitespace required");
1491: }
1492:
1493:
1496: String getXMLBase()
1497: {
1498: if (baseAware)
1499: {
1500: for (Iterator i = bases.iterator(); i.hasNext(); )
1501: {
1502: String base = (String) i.next();
1503: if (base != null)
1504: return base;
1505: }
1506: }
1507: return input.systemId;
1508: }
1509:
1510:
1513: private void pushInput(String name, String text, boolean report,
1514: boolean normalize)
1515: throws IOException, XMLStreamException
1516: {
1517:
1518: if (name != null && !"".equals(name))
1519: {
1520: for (Iterator i = inputStack.iterator(); i.hasNext(); )
1521: {
1522: Input ctx = (Input) i.next();
1523: if (name.equals(ctx.name))
1524: error("entities may not be self-recursive", name);
1525: }
1526: }
1527: else
1528: report = false;
1529: pushInput(new Input(null, new StringReader(text), input.publicId,
1530: input.systemId, name, input.inputEncoding, report,
1531: normalize));
1532: }
1533:
1534:
1537: private void pushInput(String name, ExternalIds ids, boolean report,
1538: boolean normalize)
1539: throws IOException, XMLStreamException
1540: {
1541: if (!externalEntities)
1542: return;
1543: String url = absolutize(input.systemId, ids.systemId);
1544:
1545: for (Iterator i = inputStack.iterator(); i.hasNext(); )
1546: {
1547: Input ctx = (Input) i.next();
1548: if (url.equals(ctx.systemId))
1549: error("entities may not be self-recursive", url);
1550: if (name != null && !"".equals(name) && name.equals(ctx.name))
1551: error("entities may not be self-recursive", name);
1552: }
1553: if (name == null || "".equals(name))
1554: report = false;
1555: InputStream in = null;
1556: if (resolver != null)
1557: {
1558: Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
1559: null);
1560: if (obj instanceof InputStream)
1561: in = (InputStream) obj;
1562: }
1563: if (in == null)
1564: in = resolve(url);
1565: if (in == null)
1566: error("unable to resolve external entity",
1567: (ids.systemId != null) ? ids.systemId : ids.publicId);
1568: pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1569: normalize));
1570: input.init();
1571: if (tryRead(TEST_XML_DECL))
1572: readTextDecl();
1573: input.finalizeEncoding();
1574: }
1575:
1576:
1579: private void pushInput(Input input)
1580: {
1581: if (input.report)
1582: startEntityStack.addFirst(input.name);
1583: inputStack.addLast(input);
1584: if (this.input != null)
1585: input.xml11 = this.input.xml11;
1586: this.input = input;
1587: }
1588:
1589:
1594: public static String absolutize(String base, String href)
1595: throws MalformedURLException
1596: {
1597: if (href == null)
1598: return null;
1599: int ci = href.indexOf(':');
1600: if (ci > 1 && isURLScheme(href.substring(0, ci)))
1601: {
1602:
1603: return href;
1604: }
1605: if (base == null)
1606: base = "";
1607: else
1608: {
1609: int i = base.lastIndexOf('/');
1610: if (i != -1)
1611: base = base.substring(0, i + 1);
1612: else
1613: base = "";
1614: }
1615: if ("".equals(base))
1616: {
1617:
1618: base = System.getProperty("user.dir");
1619: if (base.charAt(0) == '/')
1620: base = base.substring(1);
1621: base = "file:///" + base.replace(File.separatorChar, '/');
1622: if (!base.endsWith("/"))
1623: base += "/";
1624: }
1625: return new URL(new URL(base), href).toString();
1626: }
1627:
1628:
1633: private static boolean isURLScheme(String text)
1634: {
1635: int len = text.length();
1636: for (int i = 0; i < len; i++)
1637: {
1638: char c = text.charAt(i);
1639: if (c == '+' || c == '.' || c == '-')
1640: continue;
1641: if (c < 65 || (c > 90 && c < 97) || c > 122)
1642: return false;
1643: }
1644: return true;
1645: }
1646:
1647:
1650: static InputStream resolve(String url)
1651: throws IOException
1652: {
1653: try
1654: {
1655: return new URL(url).openStream();
1656: }
1657: catch (MalformedURLException e)
1658: {
1659: return null;
1660: }
1661: catch (IOException e)
1662: {
1663: IOException e2 = new IOException("error resolving " + url);
1664: e2.initCause(e);
1665: throw e2;
1666: }
1667: }
1668:
1669:
1672: private void popInput()
1673: {
1674: Input old = (Input) inputStack.removeLast();
1675: if (old.report)
1676: endEntityStack.addFirst(old.name);
1677: input = (Input) inputStack.getLast();
1678: }
1679:
1680:
1683: private void readTextDecl()
1684: throws IOException, XMLStreamException
1685: {
1686: final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1687: requireWhitespace();
1688: if (tryRead("version"))
1689: {
1690: readEq();
1691: String v = readLiteral(flags, false);
1692: if ("1.0".equals(v))
1693: input.xml11 = false;
1694: else if ("1.1".equals(v))
1695: {
1696: Input i1 = (Input) inputStack.getFirst();
1697: if (!i1.xml11)
1698: error("external entity specifies later version number");
1699: input.xml11 = true;
1700: }
1701: else
1702: throw new XMLStreamException("illegal XML version: " + v);
1703: requireWhitespace();
1704: }
1705: require("encoding");
1706: readEq();
1707: String enc = readLiteral(flags, false);
1708: skipWhitespace();
1709: require("?>");
1710: input.setInputEncoding(enc);
1711: }
1712:
1713:
1716: private void readXMLDecl()
1717: throws IOException, XMLStreamException
1718: {
1719: final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1720:
1721: requireWhitespace();
1722: require("version");
1723: readEq();
1724: xmlVersion = readLiteral(flags, false);
1725: if ("1.0".equals(xmlVersion))
1726: input.xml11 = false;
1727: else if ("1.1".equals(xmlVersion))
1728: input.xml11 = true;
1729: else
1730: throw new XMLStreamException("illegal XML version: " + xmlVersion);
1731:
1732: boolean white = tryWhitespace();
1733:
1734: if (tryRead("encoding"))
1735: {
1736: if (!white)
1737: error("whitespace required before 'encoding='");
1738: readEq();
1739: xmlEncoding = readLiteral(flags, false);
1740: white = tryWhitespace();
1741: }
1742:
1743: if (tryRead("standalone"))
1744: {
1745: if (!white)
1746: error("whitespace required before 'standalone='");
1747: readEq();
1748: String standalone = readLiteral(flags, false);
1749: if ("yes".equals(standalone))
1750: xmlStandalone = Boolean.TRUE;
1751: else if ("no".equals(standalone))
1752: xmlStandalone = Boolean.FALSE;
1753: else
1754: error("standalone flag must be 'yes' or 'no'", standalone);
1755: }
1756:
1757: skipWhitespace();
1758: require("?>");
1759: if (xmlEncoding != null)
1760: input.setInputEncoding(xmlEncoding);
1761: }
1762:
1763:
1766: private void readDoctypeDecl()
1767: throws IOException, XMLStreamException
1768: {
1769: if (!supportDTD)
1770: error("parser was configured not to support DTDs");
1771: requireWhitespace();
1772: String rootName = readNmtoken(true);
1773: skipWhitespace();
1774: ExternalIds ids = readExternalIds(false, true);
1775: doctype =
1776: this.new Doctype(rootName, ids.publicId, ids.systemId);
1777:
1778:
1779: skipWhitespace();
1780: if (tryRead('['))
1781: {
1782: while (true)
1783: {
1784: expandPE = true;
1785: skipWhitespace();
1786: expandPE = false;
1787: if (tryRead(']'))
1788: break;
1789: else
1790: readMarkupdecl(false);
1791: }
1792: }
1793: skipWhitespace();
1794: require('>');
1795:
1796:
1797: if (ids.systemId != null && externalEntities)
1798: {
1799: pushInput("", ">", false, false);
1800: pushInput("[dtd]", ids, true, true);
1801:
1802: while (true)
1803: {
1804: expandPE = true;
1805: skipWhitespace();
1806: expandPE = false;
1807: mark(1);
1808: int c = readCh();
1809: if (c == 0x3e)
1810: break;
1811: else if (c == -1)
1812: popInput();
1813: else
1814: {
1815: reset();
1816: expandPE = true;
1817: readMarkupdecl(true);
1818: expandPE = true;
1819: }
1820: }
1821: if (inputStack.size() != 2)
1822: error("external subset has unmatched '>'");
1823: popInput();
1824: }
1825: checkDoctype();
1826: if (validating)
1827: validateDoctype();
1828:
1829:
1830: buf.setLength(0);
1831: buf.append(rootName);
1832: }
1833:
1834:
1837: private void checkDoctype()
1838: throws XMLStreamException
1839: {
1840:
1841: }
1842:
1843:
1846: private void readMarkupdecl(boolean inExternalSubset)
1847: throws IOException, XMLStreamException
1848: {
1849: boolean saved = expandPE;
1850: mark(1);
1851: require('<');
1852: reset();
1853: expandPE = false;
1854: if (tryRead(TEST_ELEMENT_DECL))
1855: {
1856: expandPE = saved;
1857: readElementDecl();
1858: }
1859: else if (tryRead(TEST_ATTLIST_DECL))
1860: {
1861: expandPE = saved;
1862: readAttlistDecl();
1863: }
1864: else if (tryRead(TEST_ENTITY_DECL))
1865: {
1866: expandPE = saved;
1867: readEntityDecl(inExternalSubset);
1868: }
1869: else if (tryRead(TEST_NOTATION_DECL))
1870: {
1871: expandPE = saved;
1872: readNotationDecl(inExternalSubset);
1873: }
1874: else if (tryRead(TEST_PI))
1875: {
1876: readPI(true);
1877: expandPE = saved;
1878: }
1879: else if (tryRead(TEST_COMMENT))
1880: {
1881: readComment(true);
1882: expandPE = saved;
1883: }
1884: else if (tryRead("<!["))
1885: {
1886:
1887: expandPE = saved;
1888: if (inputStack.size() < 2)
1889: error("conditional sections illegal in internal subset");
1890: skipWhitespace();
1891: if (tryRead("INCLUDE"))
1892: {
1893: skipWhitespace();
1894: require('[');
1895: skipWhitespace();
1896: while (!tryRead("]]>"))
1897: {
1898: readMarkupdecl(inExternalSubset);
1899: skipWhitespace();
1900: }
1901: }
1902: else if (tryRead("IGNORE"))
1903: {
1904: skipWhitespace();
1905: require('[');
1906: expandPE = false;
1907: for (int nesting = 1; nesting > 0; )
1908: {
1909: int c = readCh();
1910: switch (c)
1911: {
1912: case 0x3c:
1913: if (tryRead("!["))
1914: nesting++;
1915: break;
1916: case 0x5d:
1917: if (tryRead("]>"))
1918: nesting--;
1919: break;
1920: case -1:
1921: throw new EOFException();
1922: }
1923: }
1924: expandPE = saved;
1925: }
1926: else
1927: error("conditional section must begin with INCLUDE or IGNORE");
1928: }
1929: else
1930: error("expected markup declaration");
1931: }
1932:
1933:
1936: private void readElementDecl()
1937: throws IOException, XMLStreamException
1938: {
1939: requireWhitespace();
1940: boolean saved = expandPE;
1941: expandPE = (inputStack.size() > 1);
1942: String name = readNmtoken(true);
1943: expandPE = saved;
1944: requireWhitespace();
1945: readContentspec(name);
1946: skipWhitespace();
1947: require('>');
1948: }
1949:
1950:
1953: private void readContentspec(String elementName)
1954: throws IOException, XMLStreamException
1955: {
1956: if (tryRead("EMPTY"))
1957: doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
1958: else if (tryRead("ANY"))
1959: doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
1960: else
1961: {
1962: ContentModel model;
1963: StringBuffer acc = new StringBuffer();
1964: require('(');
1965: acc.append('(');
1966: skipWhitespace();
1967: if (tryRead("#PCDATA"))
1968: {
1969:
1970: acc.append("#PCDATA");
1971: MixedContentModel mm = new MixedContentModel();
1972: model = mm;
1973: skipWhitespace();
1974: if (tryRead(')'))
1975: {
1976: acc.append(")");
1977: if (tryRead('*'))
1978: {
1979: mm.min = 0;
1980: mm.max = -1;
1981: }
1982: }
1983: else
1984: {
1985: while (!tryRead(")"))
1986: {
1987: require('|');
1988: acc.append('|');
1989: skipWhitespace();
1990: String name = readNmtoken(true);
1991: acc.append(name);
1992: mm.addName(name);
1993: skipWhitespace();
1994: }
1995: require('*');
1996: acc.append(")*");
1997: mm.min = 0;
1998: mm.max = -1;
1999: }
2000: }
2001: else
2002: model = readElements(acc);
2003: doctype.addElementDecl(elementName, acc.toString(), model);
2004: }
2005: }
2006:
2007:
2010: private ElementContentModel readElements(StringBuffer acc)
2011: throws IOException, XMLStreamException
2012: {
2013: int separator;
2014: ElementContentModel model = new ElementContentModel();
2015:
2016:
2017: skipWhitespace();
2018: model.addContentParticle(readContentParticle(acc));
2019:
2020: skipWhitespace();
2021: int c = readCh();
2022: switch (c)
2023: {
2024: case 0x29:
2025: acc.append(')');
2026: mark(1);
2027: c = readCh();
2028: switch (c)
2029: {
2030: case 0x3f:
2031: acc.append('?');
2032: model.min = 0;
2033: model.max = 1;
2034: break;
2035: case 0x2a:
2036: acc.append('*');
2037: model.min = 0;
2038: model.max = -1;
2039: break;
2040: case 0x2b:
2041: acc.append('+');
2042: model.min = 1;
2043: model.max = -1;
2044: break;
2045: default:
2046: reset();
2047: }
2048: return model;
2049: case 0x7c:
2050: model.or = true;
2051:
2052: case 0x2c:
2053: separator = c;
2054: acc.append(Character.toChars(c));
2055: break;
2056: default:
2057: error("bad separator in content model",
2058: "U+" + Integer.toHexString(c));
2059: return model;
2060: }
2061:
2062: while (true)
2063: {
2064: skipWhitespace();
2065: model.addContentParticle(readContentParticle(acc));
2066: skipWhitespace();
2067: c = readCh();
2068: if (c == 0x29)
2069: {
2070: acc.append(')');
2071: break;
2072: }
2073: else if (c != separator)
2074: {
2075: error("bad separator in content model",
2076: "U+" + Integer.toHexString(c));
2077: return model;
2078: }
2079: else
2080: acc.append(c);
2081: }
2082:
2083: mark(1);
2084: c = readCh();
2085: switch (c)
2086: {
2087: case 0x3f:
2088: acc.append('?');
2089: model.min = 0;
2090: model.max = 1;
2091: break;
2092: case 0x2a:
2093: acc.append('*');
2094: model.min = 0;
2095: model.max = -1;
2096: break;
2097: case 0x2b:
2098: acc.append('+');
2099: model.min = 1;
2100: model.max = -1;
2101: break;
2102: default:
2103: reset();
2104: }
2105: return model;
2106: }
2107:
2108:
2111: private ContentParticle readContentParticle(StringBuffer acc)
2112: throws IOException, XMLStreamException
2113: {
2114: ContentParticle cp = new ContentParticle();
2115: if (tryRead('('))
2116: {
2117: acc.append('(');
2118: cp.content = readElements(acc);
2119: }
2120: else
2121: {
2122: String name = readNmtoken(true);
2123: acc.append(name);
2124: cp.content = name;
2125: mark(1);
2126: int c = readCh();
2127: switch (c)
2128: {
2129: case 0x3f:
2130: acc.append('?');
2131: cp.min = 0;
2132: cp.max = 1;
2133: break;
2134: case 0x2a:
2135: acc.append('*');
2136: cp.min = 0;
2137: cp.max = -1;
2138: break;
2139: case 0x2b:
2140: acc.append('+');
2141: cp.min = 1;
2142: cp.max = -1;
2143: break;
2144: default:
2145: reset();
2146: }
2147: }
2148: return cp;
2149: }
2150:
2151:
2154: private void readAttlistDecl()
2155: throws IOException, XMLStreamException
2156: {
2157: requireWhitespace();
2158: boolean saved = expandPE;
2159: expandPE = (inputStack.size() > 1);
2160: String elementName = readNmtoken(true);
2161: expandPE = saved;
2162: boolean white = tryWhitespace();
2163: while (!tryRead('>'))
2164: {
2165: if (!white)
2166: error("whitespace required before attribute definition");
2167: readAttDef(elementName);
2168: white = tryWhitespace();
2169: }
2170: }
2171:
2172:
2175: private void readAttDef(String elementName)
2176: throws IOException, XMLStreamException
2177: {
2178: String name = readNmtoken(true);
2179: requireWhitespace();
2180: StringBuffer acc = new StringBuffer();
2181: HashSet values = new HashSet();
2182: String type = readAttType(acc, values);
2183: if (validating)
2184: {
2185: if ("ID".equals(type))
2186: {
2187:
2188: for (Iterator i = doctype.attlistIterator(elementName);
2189: i.hasNext(); )
2190: {
2191: Map.Entry entry = (Map.Entry) i.next();
2192: AttributeDecl decl = (AttributeDecl) entry.getValue();
2193: if ("ID".equals(decl.type))
2194: error("element types must not have more than one ID " +
2195: "attribute");
2196: }
2197: }
2198: else if ("NOTATION".equals(type))
2199: {
2200:
2201: for (Iterator i = doctype.attlistIterator(elementName);
2202: i.hasNext(); )
2203: {
2204: Map.Entry entry = (Map.Entry) i.next();
2205: AttributeDecl decl = (AttributeDecl) entry.getValue();
2206: if ("NOTATION".equals(decl.type))
2207: error("element types must not have more than one NOTATION " +
2208: "attribute");
2209: }
2210:
2211: ContentModel model = doctype.getElementModel(elementName);
2212: if (model != null && model.type == ContentModel.EMPTY)
2213: error("attributes of type NOTATION must not be declared on an " +
2214: "element declared EMPTY");
2215: }
2216: }
2217: String enumer = null;
2218: if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2219: enumer = acc.toString();
2220: else
2221: values = null;
2222: requireWhitespace();
2223: readDefault(elementName, name, type, enumer, values);
2224: }
2225:
2226:
2229: private String readAttType(StringBuffer acc, HashSet values)
2230: throws IOException, XMLStreamException
2231: {
2232: if (tryRead('('))
2233: {
2234: readEnumeration(false, acc, values);
2235: return "ENUMERATION";
2236: }
2237: else
2238: {
2239: String typeString = readNmtoken(true);
2240: if ("NOTATION".equals(typeString))
2241: {
2242: readNotationType(acc, values);
2243: return typeString;
2244: }
2245: else if ("CDATA".equals(typeString) ||
2246: "ID".equals(typeString) ||
2247: "IDREF".equals(typeString) ||
2248: "IDREFS".equals(typeString) ||
2249: "ENTITY".equals(typeString) ||
2250: "ENTITIES".equals(typeString) ||
2251: "NMTOKEN".equals(typeString) ||
2252: "NMTOKENS".equals(typeString))
2253: return typeString;
2254: else
2255: {
2256: error("illegal attribute type", typeString);
2257: return null;
2258: }
2259: }
2260: }
2261:
2262:
2265: private void readEnumeration(boolean isNames, StringBuffer acc,
2266: HashSet values)
2267: throws IOException, XMLStreamException
2268: {
2269: acc.append('(');
2270:
2271: skipWhitespace();
2272: String token = readNmtoken(isNames);
2273: acc.append(token);
2274: values.add(token);
2275:
2276: skipWhitespace();
2277: while (!tryRead(')'))
2278: {
2279: require('|');
2280: acc.append('|');
2281: skipWhitespace();
2282: token = readNmtoken(isNames);
2283:
2284: if (validating && values.contains(token))
2285: error("duplicate token", token);
2286: acc.append(token);
2287: values.add(token);
2288: skipWhitespace();
2289: }
2290: acc.append(')');
2291: }
2292:
2293:
2296: private void readNotationType(StringBuffer acc, HashSet values)
2297: throws IOException, XMLStreamException
2298: {
2299: requireWhitespace();
2300: require('(');
2301: readEnumeration(true, acc, values);
2302: }
2303:
2304:
2307: private void readDefault(String elementName, String name,
2308: String type, String enumeration, HashSet values)
2309: throws IOException, XMLStreamException
2310: {
2311: int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2312: int flags = LIT_ATTRIBUTE;
2313: String value = null, defaultType = null;
2314: boolean saved = expandPE;
2315:
2316: if (!"CDATA".equals(type))
2317: flags |= LIT_NORMALIZE;
2318:
2319: expandPE = false;
2320: if (tryRead('#'))
2321: {
2322: if (tryRead("FIXED"))
2323: {
2324: defaultType = "#FIXED";
2325: valueType = ATTRIBUTE_DEFAULT_FIXED;
2326: requireWhitespace();
2327: value = readLiteral(flags, false);
2328: }
2329: else if (tryRead("REQUIRED"))
2330: {
2331: defaultType = "#REQUIRED";
2332: valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2333: }
2334: else if (tryRead("IMPLIED"))
2335: {
2336: defaultType = "#IMPLIED";
2337: valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2338: }
2339: else
2340: error("illegal keyword for attribute default value");
2341: }
2342: else
2343: value = readLiteral(flags, false);
2344: expandPE = saved;
2345: if (validating)
2346: {
2347: if ("ID".equals(type))
2348: {
2349:
2350: if (value != null && !isNmtoken(value, true))
2351: error("default value must match Name production", value);
2352:
2353: if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2354: valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2355: error("ID attributes must have a declared default of " +
2356: "#IMPLIED or #REQUIRED");
2357: }
2358: else if (value != null)
2359: {
2360:
2361: if ("IDREF".equals(type) || "ENTITY".equals(type))
2362: {
2363: if (!isNmtoken(value, true))
2364: error("default value must match Name production", value);
2365: }
2366: else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2367: {
2368: StringTokenizer st = new StringTokenizer(value);
2369: while (st.hasMoreTokens())
2370: {
2371: String token = st.nextToken();
2372: if (!isNmtoken(token, true))
2373: error("default value must match Name production", token);
2374: }
2375: }
2376: else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2377: {
2378: if (!isNmtoken(value, false))
2379: error("default value must match Nmtoken production", value);
2380: }
2381: else if ("NMTOKENS".equals(type))
2382: {
2383: StringTokenizer st = new StringTokenizer(value);
2384: while (st.hasMoreTokens())
2385: {
2386: String token = st.nextToken();
2387: if (!isNmtoken(token, false))
2388: error("default value must match Nmtoken production",
2389: token);
2390: }
2391: }
2392: }
2393: }
2394:
2395: AttributeDecl attribute =
2396: new AttributeDecl(type, value, valueType, enumeration, values,
2397: inputStack.size() != 1);
2398: doctype.addAttributeDecl(elementName, name, attribute);
2399: }
2400:
2401:
2404: private void readEntityDecl(boolean inExternalSubset)
2405: throws IOException, XMLStreamException
2406: {
2407: int flags = 0;
2408:
2409: boolean peFlag = false;
2410: expandPE = false;
2411: requireWhitespace();
2412: if (tryRead('%'))
2413: {
2414: peFlag = true;
2415: requireWhitespace();
2416: }
2417: expandPE = true;
2418:
2419: String name = readNmtoken(true);
2420: if (name.indexOf(':') != -1)
2421: error("illegal character ':' in entity name", name);
2422: if (peFlag)
2423: name = "%" + name;
2424: requireWhitespace();
2425: mark(1);
2426: int c = readCh();
2427: reset();
2428: if (c == 0x22 || c == 0x27)
2429: {
2430:
2431: String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2432: int ai = value.indexOf('&');
2433: while (ai != -1)
2434: {
2435: int sci = value.indexOf(';', ai);
2436: if (sci == -1)
2437: error("malformed reference in entity value", value);
2438: String ref = value.substring(ai + 1, sci);
2439: int[] cp = UnicodeReader.toCodePointArray(ref);
2440: if (cp.length == 0)
2441: error("malformed reference in entity value", value);
2442: if (cp[0] == 0x23)
2443: {
2444: if (cp.length == 1)
2445: error("malformed reference in entity value", value);
2446: if (cp[1] == 0x78)
2447: {
2448: if (cp.length == 2)
2449: error("malformed reference in entity value", value);
2450: for (int i = 2; i < cp.length; i++)
2451: {
2452: int x = cp[i];
2453: if (x < 0x30 ||
2454: (x > 0x39 && x < 0x41) ||
2455: (x > 0x46 && x < 0x61) ||
2456: x > 0x66)
2457: error("malformed character reference in entity value",
2458: value);
2459: }
2460: }
2461: else
2462: {
2463: for (int i = 1; i < cp.length; i++)
2464: {
2465: int x = cp[i];
2466: if (x < 0x30 || x > 0x39)
2467: error("malformed character reference in entity value",
2468: value);
2469: }
2470: }
2471: }
2472: else
2473: {
2474: if (!isNameStartCharacter(cp[0], input.xml11))
2475: error("malformed reference in entity value", value);
2476: for (int i = 1; i < cp.length; i++)
2477: {
2478: if (!isNameCharacter(cp[i], input.xml11))
2479: error("malformed reference in entity value", value);
2480: }
2481: }
2482: ai = value.indexOf('&', sci);
2483: }
2484: doctype.addEntityDecl(name, value, inExternalSubset);
2485: }
2486: else
2487: {
2488: ExternalIds ids = readExternalIds(false, false);
2489:
2490: boolean white = tryWhitespace();
2491: if (!peFlag && tryRead("NDATA"))
2492: {
2493: if (!white)
2494: error("whitespace required before NDATA");
2495: requireWhitespace();
2496: ids.notationName = readNmtoken(true);
2497: }
2498: doctype.addEntityDecl(name, ids, inExternalSubset);
2499: }
2500:
2501: skipWhitespace();
2502: require('>');
2503: }
2504:
2505:
2508: private void readNotationDecl(boolean inExternalSubset)
2509: throws IOException, XMLStreamException
2510: {
2511: requireWhitespace();
2512: String notationName = readNmtoken(true);
2513: if (notationName.indexOf(':') != -1)
2514: error("illegal character ':' in notation name", notationName);
2515: if (validating)
2516: {
2517:
2518: ExternalIds notation = doctype.getNotation(notationName);
2519: if (notation != null)
2520: error("duplicate notation name", notationName);
2521: }
2522: requireWhitespace();
2523: ExternalIds ids = readExternalIds(true, false);
2524: ids.notationName = notationName;
2525: doctype.addNotationDecl(notationName, ids, inExternalSubset);
2526: skipWhitespace();
2527: require('>');
2528: }
2529:
2530:
2533: private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2534: throws IOException, XMLStreamException
2535: {
2536: int c;
2537: int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2538: ExternalIds ids = new ExternalIds();
2539:
2540: if (tryRead("PUBLIC"))
2541: {
2542: requireWhitespace();
2543: ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2544: if (inNotation)
2545: {
2546: skipWhitespace();
2547: mark(1);
2548: c = readCh();
2549: reset();
2550: if (c == 0x22 || c == 0x27)
2551: {
2552: String href = readLiteral(flags, false);
2553: ids.systemId = absolutize(input.systemId, href);
2554: }
2555: }
2556: else
2557: {
2558: requireWhitespace();
2559: String href = readLiteral(flags, false);
2560: ids.systemId = absolutize(input.systemId, href);
2561: }
2562:
2563: for (int i = 0; i < ids.publicId.length(); i++)
2564: {
2565: char d = ids.publicId.charAt(i);
2566: if (d >= 'a' && d <= 'z')
2567: continue;
2568: if (d >= 'A' && d <= 'Z')
2569: continue;
2570: if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2571: continue;
2572: error("illegal PUBLIC id character",
2573: "U+" + Integer.toHexString(d));
2574: }
2575: }
2576: else if (tryRead("SYSTEM"))
2577: {
2578: requireWhitespace();
2579: String href = readLiteral(flags, false);
2580: ids.systemId = absolutize(input.systemId, href);
2581: }
2582: else if (!isSubset)
2583: {
2584: error("missing SYSTEM or PUBLIC keyword");
2585: }
2586: if (ids.systemId != null && !inNotation)
2587: {
2588: if (ids.systemId.indexOf('#') != -1)
2589: error("SYSTEM id has a URI fragment", ids.systemId);
2590: }
2591: return ids;
2592: }
2593:
2594:
2598: private int readStartElement()
2599: throws IOException, XMLStreamException
2600: {
2601:
2602: String elementName = readNmtoken(true);
2603: attrs.clear();
2604:
2605: if (namespaceAware)
2606: {
2607: if (elementName.charAt(0) == ':' ||
2608: elementName.charAt(elementName.length() - 1) == ':')
2609: error("not a QName", elementName);
2610: namespaces.addFirst(new LinkedHashMap());
2611: }
2612:
2613: boolean white = tryWhitespace();
2614: mark(1);
2615: int c = readCh();
2616: while (c != 0x2f && c != 0x3e)
2617: {
2618:
2619: reset();
2620: if (!white)
2621: error("need whitespace between attributes");
2622: readAttribute(elementName);
2623: white = tryWhitespace();
2624: mark(1);
2625: c = readCh();
2626: }
2627:
2628: if (doctype != null)
2629: {
2630: for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2631: {
2632: Map.Entry entry = (Map.Entry) i.next();
2633: String attName = (String) entry.getKey();
2634: AttributeDecl decl = (AttributeDecl) entry.getValue();
2635: if (validating)
2636: {
2637: switch (decl.valueType)
2638: {
2639: case ATTRIBUTE_DEFAULT_REQUIRED:
2640:
2641: if (decl.value == null && !attributeSpecified(attName))
2642: error("value for " + attName + " attribute is required");
2643: break;
2644: case ATTRIBUTE_DEFAULT_FIXED:
2645:
2646: for (Iterator j = attrs.iterator(); j.hasNext(); )
2647: {
2648: Attribute a = (Attribute) j.next();
2649: if (attName.equals(a.name) &&
2650: !decl.value.equals(a.value))
2651: error("value for " + attName + " attribute must be " +
2652: decl.value);
2653: }
2654: break;
2655: }
2656: }
2657: if (namespaceAware && attName.equals("xmlns"))
2658: {
2659: LinkedHashMap ctx =
2660: (LinkedHashMap) namespaces.getFirst();
2661: if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2662: continue;
2663: }
2664: else if (namespaceAware && attName.startsWith("xmlns:"))
2665: {
2666: LinkedHashMap ctx =
2667: (LinkedHashMap) namespaces.getFirst();
2668: if (ctx.containsKey(attName.substring(6)))
2669: continue;
2670: }
2671: else if (attributeSpecified(attName))
2672: continue;
2673: if (decl.value == null)
2674: continue;
2675:
2676: if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2677: error("standalone must be 'no' if attributes inherit values " +
2678: "from externally declared markup declarations");
2679: Attribute attr =
2680: new Attribute(attName, decl.type, false, decl.value);
2681: if (namespaceAware)
2682: {
2683: if (!addNamespace(attr))
2684: attrs.add(attr);
2685: }
2686: else
2687: attrs.add(attr);
2688: }
2689: }
2690: if (baseAware)
2691: {
2692: String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2693: String base = getXMLBase();
2694: bases.addFirst(absolutize(base, uri));
2695: }
2696: if (namespaceAware)
2697: {
2698:
2699: int ci = elementName.indexOf(':');
2700: if (ci != -1)
2701: {
2702: String prefix = elementName.substring(0, ci);
2703: String uri = getNamespaceURI(prefix);
2704: if (uri == null)
2705: error("unbound element prefix", prefix);
2706: else if (input.xml11 && "".equals(uri))
2707: error("XML 1.1 unbound element prefix", prefix);
2708: }
2709: for (Iterator i = attrs.iterator(); i.hasNext(); )
2710: {
2711: Attribute attr = (Attribute) i.next();
2712: if (attr.prefix != null &&
2713: !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2714: {
2715: String uri = getNamespaceURI(attr.prefix);
2716: if (uri == null)
2717: error("unbound attribute prefix", attr.prefix);
2718: else if (input.xml11 && "".equals(uri))
2719: error("XML 1.1 unbound attribute prefix", attr.prefix);
2720: }
2721: }
2722: }
2723: if (validating && doctype != null)
2724: {
2725: validateStartElement(elementName);
2726: currentContentModel = doctype.getElementModel(elementName);
2727: if (currentContentModel == null)
2728: error("no element declaration", elementName);
2729: validationStack.add(new LinkedList());
2730: }
2731:
2732: buf.setLength(0);
2733: buf.append(elementName);
2734:
2735: stack.addLast(elementName);
2736: switch (c)
2737: {
2738: case 0x3e:
2739: return CONTENT;
2740: case 0x2f:
2741: require('>');
2742: return EMPTY_ELEMENT;
2743: }
2744: return -1;
2745: }
2746:
2747:
2751: private boolean attributeSpecified(String attName)
2752: {
2753: for (Iterator j = attrs.iterator(); j.hasNext(); )
2754: {
2755: Attribute a = (Attribute) j.next();
2756: if (attName.equals(a.name))
2757: return true;
2758: }
2759: return false;
2760: }
2761:
2762:
2765: private void readAttribute(String elementName)
2766: throws IOException, XMLStreamException
2767: {
2768:
2769: String attributeName = readNmtoken(true);
2770: String type = getAttributeType(elementName, attributeName);
2771: readEq();
2772:
2773: final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
2774: String value = (type == null || "CDATA".equals(type)) ?
2775: readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2776:
2777: Attribute attr = this.new Attribute(attributeName, type, true, value);
2778: if (namespaceAware)
2779: {
2780: if (attributeName.charAt(0) == ':' ||
2781: attributeName.charAt(attributeName.length() - 1) == ':')
2782: error("not a QName", attributeName);
2783: else if (attributeName.equals("xmlns"))
2784: {
2785: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2786: if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2787: error("duplicate default namespace");
2788: }
2789: else if (attributeName.startsWith("xmlns:"))
2790: {
2791: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2792: if (ctx.containsKey(attributeName.substring(6)))
2793: error("duplicate namespace", attributeName.substring(6));
2794: }
2795: else if (attrs.contains(attr))
2796: error("duplicate attribute", attributeName);
2797: }
2798: else if (attrs.contains(attr))
2799: error("duplicate attribute", attributeName);
2800: if (validating && doctype != null)
2801: {
2802:
2803: AttributeDecl decl =
2804: doctype.getAttributeDecl(elementName, attributeName);
2805: if (decl == null)
2806: error("attribute must be declared", attributeName);
2807: if ("ENUMERATION".equals(decl.type))
2808: {
2809:
2810: if (!decl.values.contains(value))
2811: error("value does not match enumeration " + decl.enumeration,
2812: value);
2813: }
2814: else if ("ID".equals(decl.type))
2815: {
2816:
2817: if (!isNmtoken(value, true))
2818: error("ID values must match the Name production");
2819: if (ids.contains(value))
2820: error("Duplicate ID", value);
2821: ids.add(value);
2822: }
2823: else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2824: {
2825: StringTokenizer st = new StringTokenizer(value);
2826: while (st.hasMoreTokens())
2827: {
2828: String token = st.nextToken();
2829:
2830: if (!isNmtoken(token, true))
2831: error("IDREF values must match the Name production");
2832: idrefs.add(token);
2833: }
2834: }
2835: else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2836: {
2837: StringTokenizer st = new StringTokenizer(value);
2838: while (st.hasMoreTokens())
2839: {
2840: String token = st.nextToken();
2841:
2842: if (!isNmtoken(token, false))
2843: error("NMTOKEN values must match the Nmtoken production");
2844: }
2845: }
2846: else if ("ENTITY".equals(decl.type))
2847: {
2848:
2849: if (!isNmtoken(value, true))
2850: error("ENTITY values must match the Name production");
2851: Object entity = doctype.getEntity(value);
2852: if (entity == null || !(entity instanceof ExternalIds) ||
2853: ((ExternalIds) entity).notationName == null)
2854: error("ENTITY values must match the name of an unparsed " +
2855: "entity declared in the DTD");
2856: }
2857: else if ("NOTATION".equals(decl.type))
2858: {
2859: if (!decl.values.contains(value))
2860: error("NOTATION values must match a declared notation name",
2861: value);
2862:
2863: ExternalIds notation = doctype.getNotation(value);
2864: if (notation == null)
2865: error("NOTATION values must match the name of a notation " +
2866: "declared in the DTD", value);
2867: }
2868: }
2869: if (namespaceAware)
2870: {
2871: if (!addNamespace(attr))
2872: attrs.add(attr);
2873: }
2874: else
2875: attrs.add(attr);
2876: }
2877:
2878:
2883: private boolean addNamespace(Attribute attr)
2884: throws XMLStreamException
2885: {
2886: if ("xmlns".equals(attr.name))
2887: {
2888: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2889: if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2890: error("Duplicate default namespace declaration");
2891: if (XMLConstants.XML_NS_URI.equals(attr.value))
2892: error("can't bind XML namespace");
2893: ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2894: return true;
2895: }
2896: else if ("xmlns".equals(attr.prefix))
2897: {
2898: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2899: if (ctx.get(attr.localName) != null)
2900: error("Duplicate namespace declaration for prefix",
2901: attr.localName);
2902: if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2903: {
2904: if (!XMLConstants.XML_NS_URI.equals(attr.value))
2905: error("can't redeclare xml prefix");
2906: else
2907: return false;
2908: }
2909: if (XMLConstants.XML_NS_URI.equals(attr.value))
2910: error("can't bind non-xml prefix to XML namespace");
2911: if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2912: error("can't redeclare xmlns prefix");
2913: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2914: error("can't bind non-xmlns prefix to XML Namespace namespace");
2915: if ("".equals(attr.value) && !input.xml11)
2916: error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2917: ctx.put(attr.localName, attr.value);
2918: return true;
2919: }
2920: return false;
2921: }
2922:
2923:
2926: private void readEndElement()
2927: throws IOException, XMLStreamException
2928: {
2929:
2930: String expected = (String) stack.removeLast();
2931: require(expected);
2932: skipWhitespace();
2933: require('>');
2934:
2935: buf.setLength(0);
2936: buf.append(expected);
2937: if (validating && doctype != null)
2938: endElementValidationHook();
2939: }
2940:
2941:
2945: private void endElementValidationHook()
2946: throws XMLStreamException
2947: {
2948: validateEndElement();
2949: validationStack.removeLast();
2950: if (stack.isEmpty())
2951: currentContentModel = null;
2952: else
2953: {
2954: String parent = (String) stack.getLast();
2955: currentContentModel = doctype.getElementModel(parent);
2956: }
2957: }
2958:
2959:
2962: private void readComment(boolean inDTD)
2963: throws IOException, XMLStreamException
2964: {
2965: boolean saved = expandPE;
2966: expandPE = false;
2967: buf.setLength(0);
2968: readUntil(TEST_END_COMMENT);
2969: require('>');
2970: expandPE = saved;
2971: if (inDTD)
2972: doctype.addComment(buf.toString());
2973: }
2974:
2975:
2978: private void readPI(boolean inDTD)
2979: throws IOException, XMLStreamException
2980: {
2981: boolean saved = expandPE;
2982: expandPE = false;
2983: piTarget = readNmtoken(true);
2984: if (piTarget.indexOf(':') != -1)
2985: error("illegal character in PI target", new Character(':'));
2986: if ("xml".equalsIgnoreCase(piTarget))
2987: error("illegal PI target", piTarget);
2988: if (tryRead(TEST_END_PI))
2989: piData = null;
2990: else
2991: {
2992: if (!tryWhitespace())
2993: error("whitespace required between PI target and data");
2994: buf.setLength(0);
2995: readUntil(TEST_END_PI);
2996: piData = buf.toString();
2997: }
2998: expandPE = saved;
2999: if (inDTD)
3000: doctype.addPI(piTarget, piData);
3001: }
3002:
3003:
3006: private void readReference()
3007: throws IOException, XMLStreamException
3008: {
3009: buf.setLength(0);
3010: String entityName = readNmtoken(true);
3011: require(';');
3012: buf.setLength(0);
3013: buf.append(entityName);
3014: }
3015:
3016:
3019: private void readCDSect()
3020: throws IOException, XMLStreamException
3021: {
3022: buf.setLength(0);
3023: readUntil(TEST_END_CDATA);
3024: }
3025:
3026:
3030: private int readCharData(String prefix)
3031: throws IOException, XMLStreamException
3032: {
3033: boolean white = true;
3034: buf.setLength(0);
3035: if (prefix != null)
3036: buf.append(prefix);
3037: boolean done = false;
3038: boolean entities = false;
3039: while (!done)
3040: {
3041:
3042: mark(tmpBuf.length);
3043: int len = read(tmpBuf, 0, tmpBuf.length);
3044: if (len == -1)
3045: {
3046: if (inputStack.size() > 1)
3047: {
3048: popInput();
3049:
3050: done = true;
3051: }
3052: else
3053: throw new EOFException();
3054: }
3055: for (int i = 0; i < len && !done; i++)
3056: {
3057: int c = tmpBuf[i];
3058: switch (c)
3059: {
3060: case 0x20:
3061: case 0x09:
3062: case 0x0a:
3063: case 0x0d:
3064: buf.append(Character.toChars(c));
3065: break;
3066: case 0x26:
3067: reset();
3068: read(tmpBuf, 0, i);
3069:
3070: mark(3);
3071: c = readCh();
3072: c = readCh();
3073: if (c == 0x23)
3074: {
3075: mark(1);
3076: c = readCh();
3077: boolean hex = (c == 0x78);
3078: if (!hex)
3079: reset();
3080: char[] ch = readCharacterRef(hex ? 16 : 10);
3081: buf.append(ch, 0, ch.length);
3082: for (int j = 0; j < ch.length; j++)
3083: {
3084: switch (ch[j])
3085: {
3086: case 0x20:
3087: case 0x09:
3088: case 0x0a:
3089: case 0x0d:
3090: break;
3091: default:
3092: white = false;
3093: }
3094: }
3095: }
3096: else
3097: {
3098:
3099: reset();
3100: c = readCh();
3101: String entityName = readNmtoken(true);
3102: require(';');
3103: String text =
3104: (String) PREDEFINED_ENTITIES.get(entityName);
3105: if (text != null)
3106: buf.append(text);
3107: else
3108: {
3109: pushInput("", "&" + entityName + ";", false, false);
3110: done = true;
3111: break;
3112: }
3113: }
3114:
3115: i = -1;
3116: mark(tmpBuf.length);
3117: len = read(tmpBuf, 0, tmpBuf.length);
3118: if (len == -1)
3119: {
3120: if (inputStack.size() > 1)
3121: {
3122: popInput();
3123: done = true;
3124: }
3125: else
3126: throw new EOFException();
3127: }
3128: entities = true;
3129: break;
3130: case 0x3e:
3131: int l = buf.length();
3132: if (l > 1 &&
3133: buf.charAt(l - 1) == ']' &&
3134: buf.charAt(l - 2) == ']')
3135: error("Character data may not contain unescaped ']]>'");
3136: buf.append(Character.toChars(c));
3137: break;
3138: case 0x3c:
3139: reset();
3140:
3141: int count = 0, remaining = i;
3142: do
3143: {
3144: int r = read(tmpBuf, 0, remaining);
3145: count += r;
3146: remaining -= r;
3147: }
3148: while (count < i);
3149: i = len;
3150: if (coalescing && tryRead(TEST_CDATA))
3151: readUntil(TEST_END_CDATA);
3152: else
3153: done = true;
3154: break;
3155: default:
3156: if (input.xml11)
3157: {
3158: if (!isXML11Char(c) || isXML11RestrictedChar(c))
3159: error("illegal XML 1.1 character",
3160: "U+" + Integer.toHexString(c));
3161: }
3162: else if (!isChar(c))
3163: error("illegal XML character",
3164: "U+" + Integer.toHexString(c));
3165: white = false;
3166: buf.append(Character.toChars(c));
3167: }
3168: }
3169:
3170:
3171: if (buf.length() >= 2097152)
3172: done = true;
3173: }
3174: if (entities)
3175: normalizeCRLF(buf);
3176: return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3177: }
3178:
3179:
3182: private void expandEntity(String name, boolean inAttr, boolean normalize)
3183: throws IOException, XMLStreamException
3184: {
3185: if (doctype != null)
3186: {
3187: Object value = doctype.getEntity(name);
3188: if (value != null)
3189: {
3190: if (xmlStandalone == Boolean.TRUE)
3191: {
3192:
3193: if (doctype.isEntityExternal(name))
3194: error("reference to external entity in standalone document");
3195: else if (value instanceof ExternalIds)
3196: {
3197: ExternalIds ids = (ExternalIds) value;
3198: if (ids.notationName != null &&
3199: doctype.isNotationExternal(ids.notationName))
3200: error("reference to external notation in " +
3201: "standalone document");
3202: }
3203: }
3204: if (value instanceof String)
3205: {
3206: String text = (String) value;
3207: if (inAttr && text.indexOf('<') != -1)
3208: error("< in attribute value");
3209: pushInput(name, text, !inAttr, normalize);
3210: }
3211: else if (inAttr)
3212: error("reference to external entity in attribute value", name);
3213: else
3214: pushInput(name, (ExternalIds) value, !inAttr, normalize);
3215: return;
3216: }
3217: }
3218: error("reference to undeclared entity", name);
3219: }
3220:
3221:
3224: private boolean isUnparsedEntity(String name)
3225: {
3226: if (doctype != null)
3227: {
3228: Object value = doctype.getEntity(name);
3229: if (value != null && value instanceof ExternalIds)
3230: return ((ExternalIds) value).notationName != null;
3231: }
3232: return false;
3233: }
3234:
3235:
3238: private void readEq()
3239: throws IOException, XMLStreamException
3240: {
3241: skipWhitespace();
3242: require('=');
3243: skipWhitespace();
3244: }
3245:
3246:
3250: private int literalReadCh(boolean recognizePEs)
3251: throws IOException, XMLStreamException
3252: {
3253: int c = recognizePEs ? readCh() : read();
3254: while (c == -1)
3255: {
3256: if (inputStack.size() > 1)
3257: {
3258: inputStack.removeLast();
3259: input = (Input) inputStack.getLast();
3260:
3261: c = recognizePEs ? readCh() : read();
3262: }
3263: else
3264: throw new EOFException();
3265: }
3266: return c;
3267: }
3268:
3269:
3272: private String readLiteral(int flags, boolean recognizePEs)
3273: throws IOException, XMLStreamException
3274: {
3275: boolean saved = expandPE;
3276: int delim = readCh();
3277: if (delim != 0x27 && delim != 0x22)
3278: error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3279: literalBuf.setLength(0);
3280: if ((flags & LIT_DISABLE_PE) != 0)
3281: expandPE = false;
3282: boolean entities = false;
3283: int inputStackSize = inputStack.size();
3284: do
3285: {
3286: int c = literalReadCh(recognizePEs);
3287: if (c == delim && inputStackSize == inputStack.size())
3288: break;
3289: switch (c)
3290: {
3291: case 0x0a:
3292: case 0x0d:
3293: if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3294: c = 0x20;
3295: break;
3296: case 0x09:
3297: if ((flags & LIT_ATTRIBUTE) != 0)
3298: c = 0x20;
3299: break;
3300: case 0x26:
3301: mark(2);
3302: c = readCh();
3303: if (c == 0x23)
3304: {
3305: if ((flags & LIT_DISABLE_CREF) != 0)
3306: {
3307: reset();
3308: c = 0x26;
3309: }
3310: else
3311: {
3312: mark(1);
3313: c = readCh();
3314: boolean hex = (c == 0x78);
3315: if (!hex)
3316: reset();
3317: char[] ref = readCharacterRef(hex ? 16 : 10);
3318: for (int i = 0; i < ref.length; i++)
3319: literalBuf.append(ref[i]);
3320: entities = true;
3321: continue;
3322: }
3323: }
3324: else
3325: {
3326: if ((flags & LIT_DISABLE_EREF) != 0)
3327: {
3328: reset();
3329: c = 0x26;
3330: }
3331: else
3332: {
3333: reset();
3334: String entityName = readNmtoken(true);
3335: require(';');
3336: String text =
3337: (String) PREDEFINED_ENTITIES.get(entityName);
3338: if (text != null)
3339: literalBuf.append(text);
3340: else
3341: expandEntity(entityName,
3342: (flags & LIT_ATTRIBUTE) != 0,
3343: true);
3344: entities = true;
3345: continue;
3346: }
3347: }
3348: break;
3349: case 0x3c:
3350: if ((flags & LIT_ATTRIBUTE) != 0)
3351: error("attribute values may not contain '<'");
3352: break;
3353: case -1:
3354: if (inputStack.size() > 1)
3355: {
3356: popInput();
3357: continue;
3358: }
3359: throw new EOFException();
3360: default:
3361: if ((c < 0x0020 || c > 0xfffd) ||
3362: (c >= 0xd800 && c < 0xdc00) ||
3363: (input.xml11 && (c >= 0x007f) &&
3364: (c <= 0x009f) && (c != 0x0085)))
3365: error("illegal character", "U+" + Integer.toHexString(c));
3366: }
3367: literalBuf.append(Character.toChars(c));
3368: }
3369: while (true);
3370: expandPE = saved;
3371: if (entities)
3372: normalizeCRLF(literalBuf);
3373: if ((flags & LIT_NORMALIZE) > 0)
3374: literalBuf = normalize(literalBuf);
3375: return literalBuf.toString();
3376: }
3377:
3378:
3383: private StringBuffer normalize(StringBuffer buf)
3384: {
3385: StringBuffer acc = new StringBuffer();
3386: int len = buf.length();
3387: int avState = 0;
3388: for (int i = 0; i < len; i++)
3389: {
3390: char c = buf.charAt(i);
3391: if (c == ' ')
3392: avState = (avState == 0) ? 0 : 1;
3393: else
3394: {
3395: if (avState == 1)
3396: acc.append(' ');
3397: acc.append(c);
3398: avState = 2;
3399: }
3400: }
3401: return acc;
3402: }
3403:
3404:
3409: private void normalizeCRLF(StringBuffer buf)
3410: {
3411: int len = buf.length() - 1;
3412: for (int i = 0; i < len; i++)
3413: {
3414: char c = buf.charAt(i);
3415: if (c == '\r' && buf.charAt(i + 1) == '\n')
3416: {
3417: buf.deleteCharAt(i--);
3418: len--;
3419: }
3420: }
3421: }
3422:
3423:
3426: private void expandPEReference()
3427: throws IOException, XMLStreamException
3428: {
3429: String name = readNmtoken(true, new StringBuffer());
3430: require(';');
3431: mark(1);
3432: if (doctype != null)
3433: {
3434: String entityName = "%" + name;
3435: Object entity = doctype.getEntity(entityName);
3436: if (entity != null)
3437: {
3438: if (xmlStandalone == Boolean.TRUE)
3439: {
3440: if (doctype.isEntityExternal(entityName))
3441: error("reference to external parameter entity in " +
3442: "standalone document");
3443: }
3444: if (entity instanceof String)
3445: {
3446: pushInput(name, (String) entity, false, input.normalize);
3447:
3448: }
3449: else
3450: {
3451:
3452: pushInput(name, (ExternalIds) entity, false, input.normalize);
3453:
3454: }
3455: }
3456: else
3457: error("reference to undeclared parameter entity", name);
3458: }
3459: else
3460: error("reference to parameter entity without doctype", name);
3461: }
3462:
3463:
3467: private char[] readCharacterRef(int base)
3468: throws IOException, XMLStreamException
3469: {
3470: StringBuffer b = new StringBuffer();
3471: for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3472: b.append(Character.toChars(c));
3473: try
3474: {
3475: int ord = Integer.parseInt(b.toString(), base);
3476: if (input.xml11)
3477: {
3478: if (!isXML11Char(ord))
3479: error("illegal XML 1.1 character reference " +
3480: "U+" + Integer.toHexString(ord));
3481: }
3482: else
3483: {
3484: if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3485: || (ord >= 0xd800 && ord <= 0xdfff)
3486: || ord == 0xfffe || ord == 0xffff
3487: || ord > 0x0010ffff)
3488: error("illegal XML character reference " +
3489: "U+" + Integer.toHexString(ord));
3490: }
3491: return Character.toChars(ord);
3492: }
3493: catch (NumberFormatException e)
3494: {
3495: error("illegal characters in character reference", b.toString());
3496: return null;
3497: }
3498: }
3499:
3500:
3504: private String readNmtoken(boolean isName)
3505: throws IOException, XMLStreamException
3506: {
3507: return readNmtoken(isName, nmtokenBuf);
3508: }
3509:
3510:
3515: private String readNmtoken(boolean isName, StringBuffer buf)
3516: throws IOException, XMLStreamException
3517: {
3518: buf.setLength(0);
3519: int c = readCh();
3520: if (isName)
3521: {
3522: if (!isNameStartCharacter(c, input.xml11))
3523: error("not a name start character",
3524: "U+" + Integer.toHexString(c));
3525: }
3526: else
3527: {
3528: if (!isNameCharacter(c, input.xml11))
3529: error("not a name character",
3530: "U+" + Integer.toHexString(c));
3531: }
3532: buf.append(Character.toChars(c));
3533: do
3534: {
3535: mark(1);
3536: c = readCh();
3537: switch (c)
3538: {
3539: case 0x25:
3540: case 0x3c:
3541: case 0x3e:
3542: case 0x26:
3543: case 0x2c:
3544: case 0x7c:
3545: case 0x2a:
3546: case 0x2b:
3547: case 0x3f:
3548: case 0x29:
3549: case 0x3d:
3550: case 0x27:
3551: case 0x22:
3552: case 0x5b:
3553: case 0x20:
3554: case 0x09:
3555: case 0x0a:
3556: case 0x0d:
3557: case 0x3b:
3558: case 0x2f:
3559: case -1:
3560: reset();
3561: return intern(buf.toString());
3562: default:
3563: if (!isNameCharacter(c, input.xml11))
3564: error("not a name character",
3565: "U+" + Integer.toHexString(c));
3566: else
3567: buf.append(Character.toChars(c));
3568: }
3569: }
3570: while (true);
3571: }
3572:
3573:
3576: public static boolean isXML11Char(int c)
3577: {
3578: return ((c >= 0x0001 && c <= 0xD7FF) ||
3579: (c >= 0xE000 && c < 0xFFFE) ||
3580: (c >= 0x10000 && c <= 0x10FFFF));
3581: }
3582:
3583:
3587: public static boolean isXML11RestrictedChar(int c)
3588: {
3589: return ((c >= 0x0001 && c <= 0x0008) ||
3590: (c >= 0x000B && c <= 0x000C) ||
3591: (c >= 0x000E && c <= 0x001F) ||
3592: (c >= 0x007F && c <= 0x0084) ||
3593: (c >= 0x0086 && c <= 0x009F));
3594: }
3595:
3596:
3600: private boolean isNmtoken(String text, boolean isName)
3601: {
3602: try
3603: {
3604: int[] cp = UnicodeReader.toCodePointArray(text);
3605: if (cp.length == 0)
3606: return false;
3607: if (isName)
3608: {
3609: if (!isNameStartCharacter(cp[0], input.xml11))
3610: return false;
3611: }
3612: else
3613: {
3614: if (!isNameCharacter(cp[0], input.xml11))
3615: return false;
3616: }
3617: for (int i = 1; i < cp.length; i++)
3618: {
3619: if (!isNameCharacter(cp[i], input.xml11))
3620: return false;
3621: }
3622: return true;
3623: }
3624: catch (IOException e)
3625: {
3626: return false;
3627: }
3628: }
3629:
3630:
3634: public static boolean isNameStartCharacter(int c, boolean xml11)
3635: {
3636: if (xml11)
3637: return ((c >= 0x0041 && c <= 0x005a) ||
3638: (c >= 0x0061 && c <= 0x007a) ||
3639: c == 0x3a |
3640: c == 0x5f |
3641: (c >= 0xC0 && c <= 0xD6) ||
3642: (c >= 0xD8 && c <= 0xF6) ||
3643: (c >= 0xF8 && c <= 0x2FF) ||
3644: (c >= 0x370 && c <= 0x37D) ||
3645: (c >= 0x37F && c <= 0x1FFF) ||
3646: (c >= 0x200C && c <= 0x200D) ||
3647: (c >= 0x2070 && c <= 0x218F) ||
3648: (c >= 0x2C00 && c <= 0x2FEF) ||
3649: (c >= 0x3001 && c <= 0xD7FF) ||
3650: (c >= 0xF900 && c <= 0xFDCF) ||
3651: (c >= 0xFDF0 && c <= 0xFFFD) ||
3652: (c >= 0x10000 && c <= 0xEFFFF));
3653: else
3654: return (c == 0x5f || c == 0x3a || isLetter(c));
3655: }
3656:
3657:
3661: public static boolean isNameCharacter(int c, boolean xml11)
3662: {
3663: if (xml11)
3664: return ((c >= 0x0041 && c <= 0x005a) ||
3665: (c >= 0x0061 && c <= 0x007a) ||
3666: (c >= 0x0030 && c <= 0x0039) ||
3667: c == 0x3a |
3668: c == 0x5f |
3669: c == 0x2d |
3670: c == 0x2e |
3671: c == 0xB7 |
3672: (c >= 0xC0 && c <= 0xD6) ||
3673: (c >= 0xD8 && c <= 0xF6) ||
3674: (c >= 0xF8 && c <= 0x2FF) ||
3675: (c >= 0x300 && c <= 0x37D) ||
3676: (c >= 0x37F && c <= 0x1FFF) ||
3677: (c >= 0x200C && c <= 0x200D) ||
3678: (c >= 0x203F && c <= 0x2040) ||
3679: (c >= 0x2070 && c <= 0x218F) ||
3680: (c >= 0x2C00 && c <= 0x2FEF) ||
3681: (c >= 0x3001 && c <= 0xD7FF) ||
3682: (c >= 0xF900 && c <= 0xFDCF) ||
3683: (c >= 0xFDF0 && c <= 0xFFFD) ||
3684: (c >= 0x10000 && c <= 0xEFFFF));
3685: else
3686: return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3687: isLetter(c) || isDigit(c) ||
3688: isCombiningChar(c) || isExtender(c));
3689: }
3690:
3691:
3695: public static boolean isLetter(int c)
3696: {
3697: if ((c >= 0x0041 && c <= 0x005A) ||
3698: (c >= 0x0061 && c <= 0x007A) ||
3699: (c >= 0x00C0 && c <= 0x00D6) ||
3700: (c >= 0x00D8 && c <= 0x00F6) ||
3701: (c >= 0x00F8 && c <= 0x00FF) ||
3702: (c >= 0x0100 && c <= 0x0131) ||
3703: (c >= 0x0134 && c <= 0x013E) ||
3704: (c >= 0x0141 && c <= 0x0148) ||
3705: (c >= 0x014A && c <= 0x017E) ||
3706: (c >= 0x0180 && c <= 0x01C3) ||
3707: (c >= 0x01CD && c <= 0x01F0) ||
3708: (c >= 0x01F4 && c <= 0x01F5) ||
3709: (c >= 0x01FA && c <= 0x0217) ||
3710: (c >= 0x0250 && c <= 0x02A8) ||
3711: (c >= 0x02BB && c <= 0x02C1) ||
3712: c == 0x0386 ||
3713: (c >= 0x0388 && c <= 0x038A) ||
3714: c == 0x038C ||
3715: (c >= 0x038E && c <= 0x03A1) ||
3716: (c >= 0x03A3 && c <= 0x03CE) ||
3717: (c >= 0x03D0 && c <= 0x03D6) ||
3718: c == 0x03DA ||
3719: c == 0x03DC ||
3720: c == 0x03DE ||
3721: c == 0x03E0 ||
3722: (c >= 0x03E2 && c <= 0x03F3) ||
3723: (c >= 0x0401 && c <= 0x040C) ||
3724: (c >= 0x040E && c <= 0x044F) ||
3725: (c >= 0x0451 && c <= 0x045C) ||
3726: (c >= 0x045E && c <= 0x0481) ||
3727: (c >= 0x0490 && c <= 0x04C4) ||
3728: (c >= 0x04C7 && c <= 0x04C8) ||
3729: (c >= 0x04CB && c <= 0x04CC) ||
3730: (c >= 0x04D0 && c <= 0x04EB) ||
3731: (c >= 0x04EE && c <= 0x04F5) ||
3732: (c >= 0x04F8 && c <= 0x04F9) ||
3733: (c >= 0x0531 && c <= 0x0556) ||
3734: c == 0x0559 ||
3735: (c >= 0x0561 && c <= 0x0586) ||
3736: (c >= 0x05D0 && c <= 0x05EA) ||
3737: (c >= 0x05F0 && c <= 0x05F2) ||
3738: (c >= 0x0621 && c <= 0x063A) ||
3739: (c >= 0x0641 && c <= 0x064A) ||
3740: (c >= 0x0671 && c <= 0x06B7) ||
3741: (c >= 0x06BA && c <= 0x06BE) ||
3742: (c >= 0x06C0 && c <= 0x06CE) ||
3743: (c >= 0x06D0 && c <= 0x06D3) ||
3744: c == 0x06D5 ||
3745: (c >= 0x06E5 && c <= 0x06E6) ||
3746: (c >= 0x0905 && c <= 0x0939) ||
3747: c == 0x093D ||
3748: (c >= 0x0958 && c <= 0x0961) ||
3749: (c >= 0x0985 && c <= 0x098C) ||
3750: (c >= 0x098F && c <= 0x0990) ||
3751: (c >= 0x0993 && c <= 0x09A8) ||
3752: (c >= 0x09AA && c <= 0x09B0) ||
3753: c == 0x09B2 ||
3754: (c >= 0x09B6 && c <= 0x09B9) ||
3755: (c >= 0x09DC && c <= 0x09DD) ||
3756: (c >= 0x09DF && c <= 0x09E1) ||
3757: (c >= 0x09F0 && c <= 0x09F1) ||
3758: (c >= 0x0A05 && c <= 0x0A0A) ||
3759: (c >= 0x0A0F && c <= 0x0A10) ||
3760: (c >= 0x0A13 && c <= 0x0A28) ||
3761: (c >= 0x0A2A && c <= 0x0A30) ||
3762: (c >= 0x0A32 && c <= 0x0A33) ||
3763: (c >= 0x0A35 && c <= 0x0A36) ||
3764: (c >= 0x0A38 && c <= 0x0A39) ||
3765: (c >= 0x0A59 && c <= 0x0A5C) ||
3766: c == 0x0A5E ||
3767: (c >= 0x0A72 && c <= 0x0A74) ||
3768: (c >= 0x0A85 && c <= 0x0A8B) ||
3769: c == 0x0A8D ||
3770: (c >= 0x0A8F && c <= 0x0A91) ||
3771: (c >= 0x0A93 && c <= 0x0AA8) ||
3772: (c >= 0x0AAA && c <= 0x0AB0) ||
3773: (c >= 0x0AB2 && c <= 0x0AB3) ||
3774: (c >= 0x0AB5 && c <= 0x0AB9) ||
3775: c == 0x0ABD ||
3776: c == 0x0AE0 ||
3777: (c >= 0x0B05 && c <= 0x0B0C) ||
3778: (c >= 0x0B0F && c <= 0x0B10) ||
3779: (c >= 0x0B13 && c <= 0x0B28) ||
3780: (c >= 0x0B2A && c <= 0x0B30) ||
3781: (c >= 0x0B32 && c <= 0x0B33) ||
3782: (c >= 0x0B36 && c <= 0x0B39) ||
3783: c == 0x0B3D ||
3784: (c >= 0x0B5C && c <= 0x0B5D) ||
3785: (c >= 0x0B5F && c <= 0x0B61) ||
3786: (c >= 0x0B85 && c <= 0x0B8A) ||
3787: (c >= 0x0B8E && c <= 0x0B90) ||
3788: (c >= 0x0B92 && c <= 0x0B95) ||
3789: (c >= 0x0B99 && c <= 0x0B9A) ||
3790: c == 0x0B9C ||
3791: (c >= 0x0B9E && c <= 0x0B9F) ||
3792: (c >= 0x0BA3 && c <= 0x0BA4) ||
3793: (c >= 0x0BA8 && c <= 0x0BAA) ||
3794: (c >= 0x0BAE && c <= 0x0BB5) ||
3795: (c >= 0x0BB7 && c <= 0x0BB9) ||
3796: (c >= 0x0C05 && c <= 0x0C0C) ||
3797: (c >= 0x0C0E && c <= 0x0C10) ||
3798: (c >= 0x0C12 && c <= 0x0C28) ||
3799: (c >= 0x0C2A && c <= 0x0C33) ||
3800: (c >= 0x0C35 && c <= 0x0C39) ||
3801: (c >= 0x0C60 && c <= 0x0C61) ||
3802: (c >= 0x0C85 && c <= 0x0C8C) ||
3803: (c >= 0x0C8E && c <= 0x0C90) ||
3804: (c >= 0x0C92 && c <= 0x0CA8) ||
3805: (c >= 0x0CAA && c <= 0x0CB3) ||
3806: (c >= 0x0CB5 && c <= 0x0CB9) ||
3807: c == 0x0CDE ||
3808: (c >= 0x0CE0 && c <= 0x0CE1) ||
3809: (c >= 0x0D05 && c <= 0x0D0C) ||
3810: (c >= 0x0D0E && c <= 0x0D10) ||
3811: (c >= 0x0D12 && c <= 0x0D28) ||
3812: (c >= 0x0D2A && c <= 0x0D39) ||
3813: (c >= 0x0D60 && c <= 0x0D61) ||
3814: (c >= 0x0E01 && c <= 0x0E2E) ||
3815: c == 0x0E30 ||
3816: (c >= 0x0E32 && c <= 0x0E33) ||
3817: (c >= 0x0E40 && c <= 0x0E45) ||
3818: (c >= 0x0E81 && c <= 0x0E82) ||
3819: c == 0x0E84 ||
3820: (c >= 0x0E87 && c <= 0x0E88) ||
3821: c == 0x0E8A ||
3822: c == 0x0E8D ||
3823: (c >= 0x0E94 && c <= 0x0E97) ||
3824: (c >= 0x0E99 && c <= 0x0E9F) ||
3825: (c >= 0x0EA1 && c <= 0x0EA3) ||
3826: c == 0x0EA5 ||
3827: c == 0x0EA7 ||
3828: (c >= 0x0EAA && c <= 0x0EAB) ||
3829: (c >= 0x0EAD && c <= 0x0EAE) ||
3830: c == 0x0EB0 ||
3831: (c >= 0x0EB2 && c <= 0x0EB3) ||
3832: c == 0x0EBD ||
3833: (c >= 0x0EC0 && c <= 0x0EC4) ||
3834: (c >= 0x0F40 && c <= 0x0F47) ||
3835: (c >= 0x0F49 && c <= 0x0F69) ||
3836: (c >= 0x10A0 && c <= 0x10C5) ||
3837: (c >= 0x10D0 && c <= 0x10F6) ||
3838: c == 0x1100 ||
3839: (c >= 0x1102 && c <= 0x1103) ||
3840: (c >= 0x1105 && c <= 0x1107) ||
3841: c == 0x1109 ||
3842: (c >= 0x110B && c <= 0x110C) ||
3843: (c >= 0x110E && c <= 0x1112) ||
3844: c == 0x113C ||
3845: c == 0x113E ||
3846: c == 0x1140 ||
3847: c == 0x114C ||
3848: c == 0x114E ||
3849: c == 0x1150 ||
3850: (c >= 0x1154 && c <= 0x1155) ||
3851: c == 0x1159 ||
3852: (c >= 0x115F && c <= 0x1161) ||
3853: c == 0x1163 ||
3854: c == 0x1165 ||
3855: c == 0x1167 ||
3856: c == 0x1169 ||
3857: (c >= 0x116D && c <= 0x116E) ||
3858: (c >= 0x1172 && c <= 0x1173) ||
3859: c == 0x1175 ||
3860: c == 0x119E ||
3861: c == 0x11A8 ||
3862: c == 0x11AB ||
3863: (c >= 0x11AE && c <= 0x11AF) ||
3864: (c >= 0x11B7 && c <= 0x11B8) ||
3865: c == 0x11BA ||
3866: (c >= 0x11BC && c <= 0x11C2) ||
3867: c == 0x11EB ||
3868: c == 0x11F0 ||
3869: c == 0x11F9 ||
3870: (c >= 0x1E00 && c <= 0x1E9B) ||
3871: (c >= 0x1EA0 && c <= 0x1EF9) ||
3872: (c >= 0x1F00 && c <= 0x1F15) ||
3873: (c >= 0x1F18 && c <= 0x1F1D) ||
3874: (c >= 0x1F20 && c <= 0x1F45) ||
3875: (c >= 0x1F48 && c <= 0x1F4D) ||
3876: (c >= 0x1F50 && c <= 0x1F57) ||
3877: c == 0x1F59 ||
3878: c == 0x1F5B ||
3879: c == 0x1F5D ||
3880: (c >= 0x1F5F && c <= 0x1F7D) ||
3881: (c >= 0x1F80 && c <= 0x1FB4) ||
3882: (c >= 0x1FB6 && c <= 0x1FBC) ||
3883: c == 0x1FBE ||
3884: (c >= 0x1FC2 && c <= 0x1FC4) ||
3885: (c >= 0x1FC6 && c <= 0x1FCC) ||
3886: (c >= 0x1FD0 && c <= 0x1FD3) ||
3887: (c >= 0x1FD6 && c <= 0x1FDB) ||
3888: (c >= 0x1FE0 && c <= 0x1FEC) ||
3889: (c >= 0x1FF2 && c <= 0x1FF4) ||
3890: (c >= 0x1FF6 && c <= 0x1FFC) ||
3891: c == 0x2126 ||
3892: (c >= 0x212A && c <= 0x212B) ||
3893: c == 0x212E ||
3894: (c >= 0x2180 && c <= 0x2182) ||
3895: (c >= 0x3041 && c <= 0x3094) ||
3896: (c >= 0x30A1 && c <= 0x30FA) ||
3897: (c >= 0x3105 && c <= 0x312C) ||
3898: (c >= 0xAC00 && c <= 0xD7A3))
3899: return true;
3900: if ((c >= 0x4e00 && c <= 0x9fa5) ||
3901: c == 0x3007 ||
3902: (c >= 0x3021 && c <= 0x3029))
3903: return true;
3904: return false;
3905: }
3906:
3907:
3911: public static boolean isDigit(int c)
3912: {
3913: return ((c >= 0x0030 && c <= 0x0039) ||
3914: (c >= 0x0660 && c <= 0x0669) ||
3915: (c >= 0x06F0 && c <= 0x06F9) ||
3916: (c >= 0x0966 && c <= 0x096F) ||
3917: (c >= 0x09E6 && c <= 0x09EF) ||
3918: (c >= 0x0A66 && c <= 0x0A6F) ||
3919: (c >= 0x0AE6 && c <= 0x0AEF) ||
3920: (c >= 0x0B66 && c <= 0x0B6F) ||
3921: (c >= 0x0BE7 && c <= 0x0BEF) ||
3922: (c >= 0x0C66 && c <= 0x0C6F) ||
3923: (c >= 0x0CE6 && c <= 0x0CEF) ||
3924: (c >= 0x0D66 && c <= 0x0D6F) ||
3925: (c >= 0x0E50 && c <= 0x0E59) ||
3926: (c >= 0x0ED0 && c <= 0x0ED9) ||
3927: (c >= 0x0F20 && c <= 0x0F29));
3928: }
3929:
3930:
3934: public static boolean isCombiningChar(int c)
3935: {
3936: return ((c >= 0x0300 && c <= 0x0345) ||
3937: (c >= 0x0360 && c <= 0x0361) ||
3938: (c >= 0x0483 && c <= 0x0486) ||
3939: (c >= 0x0591 && c <= 0x05A1) ||
3940: (c >= 0x05A3 && c <= 0x05B9) ||
3941: (c >= 0x05BB && c <= 0x05BD) ||
3942: c == 0x05BF ||
3943: (c >= 0x05C1 && c <= 0x05C2) ||
3944: c == 0x05C4 ||
3945: (c >= 0x064B && c <= 0x0652) ||
3946: c == 0x0670 ||
3947: (c >= 0x06D6 && c <= 0x06DC) ||
3948: (c >= 0x06DD && c <= 0x06DF) ||
3949: (c >= 0x06E0 && c <= 0x06E4) ||
3950: (c >= 0x06E7 && c <= 0x06E8) ||
3951: (c >= 0x06EA && c <= 0x06ED) ||
3952: (c >= 0x0901 && c <= 0x0903) ||
3953: c == 0x093C ||
3954: (c >= 0x093E && c <= 0x094C) ||
3955: c == 0x094D ||
3956: (c >= 0x0951 && c <= 0x0954) ||
3957: (c >= 0x0962 && c <= 0x0963) ||
3958: (c >= 0x0981 && c <= 0x0983) ||
3959: c == 0x09BC ||
3960: c == 0x09BE ||
3961: c == 0x09BF ||
3962: (c >= 0x09C0 && c <= 0x09C4) ||
3963: (c >= 0x09C7 && c <= 0x09C8) ||
3964: (c >= 0x09CB && c <= 0x09CD) ||
3965: c == 0x09D7 ||
3966: (c >= 0x09E2 && c <= 0x09E3) ||
3967: c == 0x0A02 ||
3968: c == 0x0A3C ||
3969: c == 0x0A3E ||
3970: c == 0x0A3F ||
3971: (c >= 0x0A40 && c <= 0x0A42) ||
3972: (c >= 0x0A47 && c <= 0x0A48) ||
3973: (c >= 0x0A4B && c <= 0x0A4D) ||
3974: (c >= 0x0A70 && c <= 0x0A71) ||
3975: (c >= 0x0A81 && c <= 0x0A83) ||
3976: c == 0x0ABC ||
3977: (c >= 0x0ABE && c <= 0x0AC5) ||
3978: (c >= 0x0AC7 && c <= 0x0AC9) ||
3979: (c >= 0x0ACB && c <= 0x0ACD) ||
3980: (c >= 0x0B01 && c <= 0x0B03) ||
3981: c == 0x0B3C ||
3982: (c >= 0x0B3E && c <= 0x0B43) ||
3983: (c >= 0x0B47 && c <= 0x0B48) ||
3984: (c >= 0x0B4B && c <= 0x0B4D) ||
3985: (c >= 0x0B56 && c <= 0x0B57) ||
3986: (c >= 0x0B82 && c <= 0x0B83) ||
3987: (c >= 0x0BBE && c <= 0x0BC2) ||
3988: (c >= 0x0BC6 && c <= 0x0BC8) ||
3989: (c >= 0x0BCA && c <= 0x0BCD) ||
3990: c == 0x0BD7 ||
3991: (c >= 0x0C01 && c <= 0x0C03) ||
3992: (c >= 0x0C3E && c <= 0x0C44) ||
3993: (c >= 0x0C46 && c <= 0x0C48) ||
3994: (c >= 0x0C4A && c <= 0x0C4D) ||
3995: (c >= 0x0C55 && c <= 0x0C56) ||
3996: (c >= 0x0C82 && c <= 0x0C83) ||
3997: (c >= 0x0CBE && c <= 0x0CC4) ||
3998: (c >= 0x0CC6 && c <= 0x0CC8) ||
3999: (c >= 0x0CCA && c <= 0x0CCD) ||
4000: (c >= 0x0CD5 && c <= 0x0CD6) ||
4001: (c >= 0x0D02 && c <= 0x0D03) ||
4002: (c >= 0x0D3E && c <= 0x0D43) ||
4003: (c >= 0x0D46 && c <= 0x0D48) ||
4004: (c >= 0x0D4A && c <= 0x0D4D) ||
4005: c == 0x0D57 ||
4006: c == 0x0E31 ||
4007: (c >= 0x0E34 && c <= 0x0E3A) ||
4008: (c >= 0x0E47 && c <= 0x0E4E) ||
4009: c == 0x0EB1 ||
4010: (c >= 0x0EB4 && c <= 0x0EB9) ||
4011: (c >= 0x0EBB && c <= 0x0EBC) ||
4012: (c >= 0x0EC8 && c <= 0x0ECD) ||
4013: (c >= 0x0F18 && c <= 0x0F19) ||
4014: c == 0x0F35 ||
4015: c == 0x0F37 ||
4016: c == 0x0F39 ||
4017: c == 0x0F3E ||
4018: c == 0x0F3F ||
4019: (c >= 0x0F71 && c <= 0x0F84) ||
4020: (c >= 0x0F86 && c <= 0x0F8B) ||
4021: (c >= 0x0F90 && c <= 0x0F95) ||
4022: c == 0x0F97 ||
4023: (c >= 0x0F99 && c <= 0x0FAD) ||
4024: (c >= 0x0FB1 && c <= 0x0FB7) ||
4025: c == 0x0FB9 ||
4026: (c >= 0x20D0 && c <= 0x20DC) ||
4027: c == 0x20E1 ||
4028: (c >= 0x302A && c <= 0x302F) ||
4029: c == 0x3099 ||
4030: c == 0x309A);
4031: }
4032:
4033:
4037: public static boolean isExtender(int c)
4038: {
4039: return (c == 0x00B7 ||
4040: c == 0x02D0 ||
4041: c == 0x02D1 ||
4042: c == 0x0387 ||
4043: c == 0x0640 ||
4044: c == 0x0E46 ||
4045: c == 0x0EC6 ||
4046: c == 0x3005 ||
4047: (c >= 0x3031 && c <= 0x3035) ||
4048: (c >= 0x309D && c <= 0x309E) ||
4049: (c >= 0x30FC && c <= 0x30FE));
4050: }
4051:
4052:
4056: public static boolean isChar(int c)
4057: {
4058: return (c >= 0x20 && c < 0xd800) ||
4059: (c >= 0xe00 && c < 0xfffe) ||
4060: (c >= 0x10000 && c < 0x110000) ||
4061: c == 0xa || c == 0x9 || c == 0xd;
4062: }
4063:
4064:
4068: private String intern(String text)
4069: {
4070: return stringInterning ? text.intern() : text;
4071: }
4072:
4073:
4076: private void error(String message)
4077: throws XMLStreamException
4078: {
4079: error(message, null);
4080: }
4081:
4082:
4085: private void error(String message, Object info)
4086: throws XMLStreamException
4087: {
4088: if (info != null)
4089: {
4090: if (info instanceof String)
4091: message += ": \"" + ((String) info) + "\"";
4092: else if (info instanceof Character)
4093: message += ": '" + ((Character) info) + "'";
4094: }
4095: throw new XMLStreamException(message);
4096: }
4097:
4098:
4101: private void validateStartElement(String elementName)
4102: throws XMLStreamException
4103: {
4104: if (currentContentModel == null)
4105: {
4106:
4107:
4108: if (!elementName.equals(doctype.rootName))
4109: error("root element name must match name in DTD");
4110: return;
4111: }
4112:
4113: switch (currentContentModel.type)
4114: {
4115: case ContentModel.EMPTY:
4116: error("child element found in empty element", elementName);
4117: break;
4118: case ContentModel.ELEMENT:
4119: LinkedList ctx = (LinkedList) validationStack.getLast();
4120: ctx.add(elementName);
4121: break;
4122: case ContentModel.MIXED:
4123: MixedContentModel mm = (MixedContentModel) currentContentModel;
4124: if (!mm.containsName(elementName))
4125: error("illegal element for content model", elementName);
4126: break;
4127: }
4128: }
4129:
4130:
4133: private void validateEndElement()
4134: throws XMLStreamException
4135: {
4136: if (currentContentModel == null)
4137: {
4138:
4139:
4140: if (!idrefs.containsAll(ids))
4141: error("IDREF values must match the value of some ID attribute");
4142: return;
4143: }
4144:
4145: switch (currentContentModel.type)
4146: {
4147: case ContentModel.ELEMENT:
4148: LinkedList ctx = (LinkedList) validationStack.getLast();
4149: ElementContentModel ecm = (ElementContentModel) currentContentModel;
4150: validateElementContent(ecm, ctx);
4151: break;
4152: }
4153: }
4154:
4155:
4158: private void validatePCData(String text)
4159: throws XMLStreamException
4160: {
4161:
4162: switch (currentContentModel.type)
4163: {
4164: case ContentModel.EMPTY:
4165: error("character data found in empty element", text);
4166: break;
4167: case ContentModel.ELEMENT:
4168: boolean white = true;
4169: int len = text.length();
4170: for (int i = 0; i < len; i++)
4171: {
4172: char c = text.charAt(i);
4173: if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4174: {
4175: white = false;
4176: break;
4177: }
4178: }
4179: if (!white)
4180: error("character data found in element with element content", text);
4181: else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4182:
4183: error("whitespace in element content of externally declared " +
4184: "element in standalone document");
4185: break;
4186: }
4187: }
4188:
4189:
4193: private void validateElementContent(ElementContentModel model,
4194: LinkedList children)
4195: throws XMLStreamException
4196: {
4197:
4198: StringBuffer buf = new StringBuffer();
4199: for (Iterator i = children.iterator(); i.hasNext(); )
4200: {
4201: buf.append((String) i.next());
4202: buf.append(' ');
4203: }
4204: String c = buf.toString();
4205: String regex = createRegularExpression(model);
4206: if (!c.matches(regex))
4207: error("element content "+model.text+" does not match expression "+regex, c);
4208: }
4209:
4210:
4214: private String createRegularExpression(ElementContentModel model)
4215: {
4216: if (model.regex == null)
4217: {
4218: StringBuffer buf = new StringBuffer();
4219: buf.append('(');
4220: for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4221: {
4222: ContentParticle cp = (ContentParticle) i.next();
4223: if (cp.content instanceof String)
4224: {
4225: buf.append('(');
4226: buf.append((String) cp.content);
4227: buf.append(' ');
4228: buf.append(')');
4229: if (cp.max == -1)
4230: {
4231: if (cp.min == 0)
4232: buf.append('*');
4233: else
4234: buf.append('+');
4235: }
4236: else if (cp.min == 0)
4237: buf.append('?');
4238: }
4239: else
4240: {
4241: ElementContentModel ecm = (ElementContentModel) cp.content;
4242: buf.append(createRegularExpression(ecm));
4243: }
4244: if (model.or && i.hasNext())
4245: buf.append('|');
4246: }
4247: buf.append(')');
4248: if (model.max == -1)
4249: {
4250: if (model.min == 0)
4251: buf.append('*');
4252: else
4253: buf.append('+');
4254: }
4255: else if (model.min == 0)
4256: buf.append('?');
4257: model.regex = buf.toString();
4258: }
4259: return model.regex;
4260: }
4261:
4262:
4265: void validateDoctype()
4266: throws XMLStreamException
4267: {
4268: for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4269: {
4270: Map.Entry entry = (Map.Entry) i.next();
4271: Object entity = entry.getValue();
4272: if (entity instanceof ExternalIds)
4273: {
4274: ExternalIds ids = (ExternalIds) entity;
4275: if (ids.notationName != null)
4276: {
4277:
4278: ExternalIds notation = doctype.getNotation(ids.notationName);
4279: if (notation == null)
4280: error("Notation name must match the declared name of a " +
4281: "notation", ids.notationName);
4282: }
4283: }
4284: }
4285: }
4286:
4287:
4292: public static void main(String[] args)
4293: throws Exception
4294: {
4295: boolean validating = false;
4296: boolean namespaceAware = false;
4297: boolean xIncludeAware = false;
4298: int pos = 0;
4299: while (pos < args.length && args[pos].startsWith("-"))
4300: {
4301: if ("-x".equals(args[pos]))
4302: xIncludeAware = true;
4303: else if ("-v".equals(args[pos]))
4304: validating = true;
4305: else if ("-n".equals(args[pos]))
4306: namespaceAware = true;
4307: pos++;
4308: }
4309: if (pos >= args.length)
4310: {
4311: System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4312: System.out.println("\t-n: use namespace aware mode");
4313: System.out.println("\t-v: use validating parser");
4314: System.out.println("\t-x: use XInclude aware mode");
4315: System.exit(2);
4316: }
4317: while (pos < args.length)
4318: {
4319: XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
4320: absolutize(null, args[pos]),
4321: validating,
4322: namespaceAware,
4323: true,
4324: true,
4325: true,
4326: true,
4327: true,
4328: true,
4329: true,
4330: null,
4331: null);
4332: XMLStreamReader reader = p;
4333: if (xIncludeAware)
4334: reader = new XIncludeFilter(p, args[pos], true, true, true);
4335: try
4336: {
4337: int event;
4338:
4339: while (reader.hasNext())
4340: {
4341: event = reader.next();
4342: Location loc = reader.getLocation();
4343: System.out.print(loc.getLineNumber() + ":" +
4344: loc.getColumnNumber() + " ");
4345: switch (event)
4346: {
4347: case XMLStreamConstants.START_DOCUMENT:
4348: System.out.println("START_DOCUMENT version=" +
4349: reader.getVersion() +
4350: " encoding=" +
4351: reader.getEncoding());
4352: break;
4353: case XMLStreamConstants.END_DOCUMENT:
4354: System.out.println("END_DOCUMENT");
4355: break;
4356: case XMLStreamConstants.START_ELEMENT:
4357: System.out.println("START_ELEMENT " +
4358: reader.getName());
4359: int l = reader.getNamespaceCount();
4360: for (int i = 0; i < l; i++)
4361: System.out.println("\tnamespace " +
4362: reader.getNamespacePrefix(i) + "='" +
4363: reader.getNamespaceURI(i)+"'");
4364: l = reader.getAttributeCount();
4365: for (int i = 0; i < l; i++)
4366: System.out.println("\tattribute " +
4367: reader.getAttributeName(i) + "='" +
4368: reader.getAttributeValue(i) + "'");
4369: break;
4370: case XMLStreamConstants.END_ELEMENT:
4371: System.out.println("END_ELEMENT " + reader.getName());
4372: break;
4373: case XMLStreamConstants.CHARACTERS:
4374: System.out.println("CHARACTERS '" +
4375: encodeText(reader.getText()) + "'");
4376: break;
4377: case XMLStreamConstants.CDATA:
4378: System.out.println("CDATA '" +
4379: encodeText(reader.getText()) + "'");
4380: break;
4381: case XMLStreamConstants.SPACE:
4382: System.out.println("SPACE '" +
4383: encodeText(reader.getText()) + "'");
4384: break;
4385: case XMLStreamConstants.DTD:
4386: System.out.println("DTD " + reader.getText());
4387: break;
4388: case XMLStreamConstants.ENTITY_REFERENCE:
4389: System.out.println("ENTITY_REFERENCE " + reader.getText());
4390: break;
4391: case XMLStreamConstants.COMMENT:
4392: System.out.println("COMMENT '" +
4393: encodeText(reader.getText()) + "'");
4394: break;
4395: case XMLStreamConstants.PROCESSING_INSTRUCTION:
4396: System.out.println("PROCESSING_INSTRUCTION " +
4397: reader.getPITarget() + " " +
4398: reader.getPIData());
4399: break;
4400: case START_ENTITY:
4401: System.out.println("START_ENTITY " + reader.getText());
4402: break;
4403: case END_ENTITY:
4404: System.out.println("END_ENTITY " + reader.getText());
4405: break;
4406: default:
4407: System.out.println("Unknown event: " + event);
4408: }
4409: }
4410: }
4411: catch (XMLStreamException e)
4412: {
4413: Location l = reader.getLocation();
4414: System.out.println("At line "+l.getLineNumber()+
4415: ", column "+l.getColumnNumber()+
4416: " of "+l.getSystemId());
4417: throw e;
4418: }
4419: pos++;
4420: }
4421: }
4422:
4423:
4426: private static String encodeText(String text)
4427: {
4428: StringBuffer b = new StringBuffer();
4429: int len = text.length();
4430: for (int i = 0; i < len; i++)
4431: {
4432: char c = text.charAt(i);
4433: switch (c)
4434: {
4435: case '\t':
4436: b.append("\\t");
4437: break;
4438: case '\n':
4439: b.append("\\n");
4440: break;
4441: case '\r':
4442: b.append("\\r");
4443: break;
4444: default:
4445: b.append(c);
4446: }
4447: }
4448: return b.toString();
4449: }
4450:
4451:
4454: class Attribute
4455: {
4456:
4457:
4460: final String name;
4461:
4462:
4465: final String type;
4466:
4467:
4470: final boolean specified;
4471:
4472:
4475: final String value;
4476:
4477:
4480: final String prefix;
4481:
4482:
4485: final String localName;
4486:
4487: Attribute(String name, String type, boolean specified, String value)
4488: {
4489: this.name = name;
4490: this.type = type;
4491: this.specified = specified;
4492: this.value = value;
4493: int ci = name.indexOf(':');
4494: if (ci == -1)
4495: {
4496: prefix = null;
4497: localName = intern(name);
4498: }
4499: else
4500: {
4501: prefix = intern(name.substring(0, ci));
4502: localName = intern(name.substring(ci + 1));
4503: }
4504: }
4505:
4506: public boolean equals(Object other)
4507: {
4508: if (other instanceof Attribute)
4509: {
4510: Attribute a = (Attribute) other;
4511: if (namespaceAware)
4512: {
4513: if (!a.localName.equals(localName))
4514: return false;
4515: String auri = getNamespaceURI(a.prefix);
4516: String uri = getNamespaceURI(prefix);
4517: if (uri == null && (auri == null ||
4518: (input.xml11 && "".equals(auri))))
4519: return true;
4520: if (uri != null)
4521: {
4522: if ("".equals(uri) && input.xml11 && "".equals(auri))
4523: return true;
4524: return uri.equals(auri);
4525: }
4526: return false;
4527: }
4528: else
4529: return a.name.equals(name);
4530: }
4531: return false;
4532: }
4533:
4534: }
4535:
4536:
4539: class Doctype
4540: {
4541:
4542:
4545: final String rootName;
4546:
4547:
4550: final String publicId;
4551:
4552:
4555: final String systemId;
4556:
4557:
4560: private final LinkedHashMap elements = new LinkedHashMap();
4561:
4562:
4565: private final LinkedHashMap attlists = new LinkedHashMap();
4566:
4567:
4570: private final LinkedHashMap entities = new LinkedHashMap();
4571:
4572:
4575: private final LinkedHashMap notations = new LinkedHashMap();
4576:
4577:
4580: private final LinkedHashMap comments = new LinkedHashMap();
4581:
4582:
4586: private final LinkedHashMap pis = new LinkedHashMap();
4587:
4588:
4591: private final LinkedList entries = new LinkedList();
4592:
4593:
4596: private final HashSet externalEntities = new HashSet();
4597:
4598:
4601: private final HashSet externalNotations = new HashSet();
4602:
4603:
4606: private int anon = 1;
4607:
4608:
4611: Doctype(String rootName, String publicId, String systemId)
4612: {
4613: this.rootName = rootName;
4614: this.publicId = publicId;
4615: this.systemId = systemId;
4616: }
4617:
4618:
4624: void addElementDecl(String name, String text, ContentModel model)
4625: {
4626: if (elements.containsKey(name))
4627: return;
4628: model.text = text;
4629: model.external = (inputStack.size() != 1);
4630: elements.put(name, model);
4631: entries.add("E" + name);
4632: }
4633:
4634:
4640: void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4641: {
4642: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4643: if (attlist == null)
4644: {
4645: attlist = new LinkedHashMap();
4646: attlists.put(ename, attlist);
4647: }
4648: else if (attlist.containsKey(aname))
4649: return;
4650: attlist.put(aname, decl);
4651: String key = "A" + ename;
4652: if (!entries.contains(key))
4653: entries.add(key);
4654: }
4655:
4656:
4662: void addEntityDecl(String name, String text, boolean inExternalSubset)
4663: {
4664: if (entities.containsKey(name))
4665: return;
4666: entities.put(name, text);
4667: entries.add("e" + name);
4668: if (inExternalSubset)
4669: externalEntities.add(name);
4670: }
4671:
4672:
4678: void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4679: {
4680: if (entities.containsKey(name))
4681: return;
4682: entities.put(name, ids);
4683: entries.add("e" + name);
4684: if (inExternalSubset)
4685: externalEntities.add(name);
4686: }
4687:
4688:
4694: void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4695: {
4696: if (notations.containsKey(name))
4697: return;
4698: notations.put(name, ids);
4699: entries.add("n" + name);
4700: if (inExternalSubset)
4701: externalNotations.add(name);
4702: }
4703:
4704:
4707: void addComment(String text)
4708: {
4709: String key = Integer.toString(anon++);
4710: comments.put(key, text);
4711: entries.add("c" + key);
4712: }
4713:
4714:
4717: void addPI(String target, String data)
4718: {
4719: String key = Integer.toString(anon++);
4720: pis.put(key, new String[] {target, data});
4721: entries.add("p" + key);
4722: }
4723:
4724:
4728: ContentModel getElementModel(String name)
4729: {
4730: return (ContentModel) elements.get(name);
4731: }
4732:
4733:
4738: AttributeDecl getAttributeDecl(String ename, String aname)
4739: {
4740: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4741: return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4742: }
4743:
4744:
4749: boolean isAttributeDeclared(String ename, String aname)
4750: {
4751: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4752: return (attlist == null) ? false : attlist.containsKey(aname);
4753: }
4754:
4755:
4760: Iterator attlistIterator(String ename)
4761: {
4762: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4763: return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4764: attlist.entrySet().iterator();
4765: }
4766:
4767:
4770: Object getEntity(String name)
4771: {
4772: return entities.get(name);
4773: }
4774:
4775:
4779: boolean isEntityExternal(String name)
4780: {
4781: return externalEntities.contains(name);
4782: }
4783:
4784:
4787: Iterator entityIterator()
4788: {
4789: return entities.entrySet().iterator();
4790: }
4791:
4792:
4795: ExternalIds getNotation(String name)
4796: {
4797: return (ExternalIds) notations.get(name);
4798: }
4799:
4800:
4804: boolean isNotationExternal(String name)
4805: {
4806: return externalNotations.contains(name);
4807: }
4808:
4809:
4812: String getComment(String key)
4813: {
4814: return (String) comments.get(key);
4815: }
4816:
4817:
4821: String[] getPI(String key)
4822: {
4823: return (String[]) pis.get(key);
4824: }
4825:
4826:
4830: Iterator entryIterator()
4831: {
4832: return entries.iterator();
4833: }
4834:
4835: }
4836:
4837:
4840: class ExternalIds
4841: {
4842:
4843:
4846: String publicId;
4847:
4848:
4851: String systemId;
4852:
4853:
4856: String notationName;
4857: }
4858:
4859:
4862: abstract class ContentModel
4863: {
4864: static final int EMPTY = 0;
4865: static final int ANY = 1;
4866: static final int ELEMENT = 2;
4867: static final int MIXED = 3;
4868:
4869: int min;
4870: int max;
4871: final int type;
4872: String text;
4873: boolean external;
4874:
4875: ContentModel(int type)
4876: {
4877: this.type = type;
4878: min = 1;
4879: max = 1;
4880: }
4881:
4882: }
4883:
4884:
4887: class EmptyContentModel
4888: extends ContentModel
4889: {
4890:
4891: EmptyContentModel()
4892: {
4893: super(ContentModel.EMPTY);
4894: min = 0;
4895: max = 0;
4896: }
4897:
4898: }
4899:
4900:
4903: class AnyContentModel
4904: extends ContentModel
4905: {
4906:
4907: AnyContentModel()
4908: {
4909: super(ContentModel.ANY);
4910: min = 0;
4911: max = -1;
4912: }
4913:
4914: }
4915:
4916:
4919: class ElementContentModel
4920: extends ContentModel
4921: {
4922:
4923: LinkedList contentParticles;
4924: boolean or;
4925: String regex;
4926:
4927: ElementContentModel()
4928: {
4929: super(ContentModel.ELEMENT);
4930: contentParticles = new LinkedList();
4931: }
4932:
4933: void addContentParticle(ContentParticle cp)
4934: {
4935: contentParticles.add(cp);
4936: }
4937:
4938: }
4939:
4940: class ContentParticle
4941: {
4942:
4943: int min = 1;
4944: int max = 1;
4945: Object content;
4946:
4947: }
4948:
4949:
4952: class MixedContentModel
4953: extends ContentModel
4954: {
4955:
4956: private HashSet names;
4957:
4958: MixedContentModel()
4959: {
4960: super(ContentModel.MIXED);
4961: names = new HashSet();
4962: }
4963:
4964: void addName(String name)
4965: {
4966: names.add(name);
4967: }
4968:
4969: boolean containsName(String name)
4970: {
4971: return names.contains(name);
4972: }
4973:
4974: }
4975:
4976:
4979: class AttributeDecl
4980: {
4981:
4982:
4985: final String type;
4986:
4987:
4990: final String value;
4991:
4992:
4995: final int valueType;
4996:
4997:
5000: final String enumeration;
5001:
5002:
5005: final HashSet values;
5006:
5007:
5010: final boolean external;
5011:
5012: AttributeDecl(String type, String value,
5013: int valueType, String enumeration,
5014: HashSet values, boolean external)
5015: {
5016: this.type = type;
5017: this.value = value;
5018: this.valueType = valueType;
5019: this.enumeration = enumeration;
5020: this.values = values;
5021: this.external = external;
5022: }
5023:
5024: }
5025:
5026:
5029: static class Input
5030: implements Location
5031: {
5032:
5033: int line = 1, markLine;
5034: int column, markColumn;
5035: int offset, markOffset;
5036: final String publicId, systemId, name;
5037: final boolean report;
5038: final boolean normalize;
5039:
5040: InputStream in;
5041: Reader reader;
5042: UnicodeReader unicodeReader;
5043: boolean initialized;
5044: boolean encodingDetected;
5045: String inputEncoding;
5046: boolean xml11;
5047:
5048: Input(InputStream in, Reader reader, String publicId, String systemId,
5049: String name, String inputEncoding, boolean report,
5050: boolean normalize)
5051: {
5052: if (inputEncoding == null)
5053: inputEncoding = "UTF-8";
5054: this.inputEncoding = inputEncoding;
5055: this.publicId = publicId;
5056: this.systemId = systemId;
5057: this.name = name;
5058: this.report = report;
5059: this.normalize = normalize;
5060: if (in != null)
5061: {
5062: if (reader != null)
5063: throw new IllegalStateException("both byte and char streams "+
5064: "specified");
5065: if (normalize)
5066: in = new CRLFInputStream(in);
5067: in = new BufferedInputStream(in);
5068: this.in = in;
5069: }
5070: else
5071: {
5072: this.reader = normalize ? new CRLFReader(reader) : reader;
5073: unicodeReader = new UnicodeReader(this.reader);
5074: }
5075: initialized = false;
5076: }
5077:
5078:
5079:
5080: public int getCharacterOffset()
5081: {
5082: return offset;
5083: }
5084:
5085: public int getColumnNumber()
5086: {
5087: return column;
5088: }
5089:
5090: public int getLineNumber()
5091: {
5092: return line;
5093: }
5094:
5095: public String getPublicId()
5096: {
5097: return publicId;
5098: }
5099:
5100: public String getSystemId()
5101: {
5102: return systemId;
5103: }
5104:
5105: void init()
5106: throws IOException
5107: {
5108: if (initialized)
5109: return;
5110: if (in != null)
5111: detectEncoding();
5112: initialized = true;
5113: }
5114:
5115: void mark(int len)
5116: throws IOException
5117: {
5118: markOffset = offset;
5119: markLine = line;
5120: markColumn = column;
5121: if (unicodeReader != null)
5122: unicodeReader.mark(len);
5123: else
5124: in.mark(len);
5125: }
5126:
5127:
5130: int read()
5131: throws IOException
5132: {
5133: offset++;
5134: int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5135: if (normalize &&
5136: (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5137: {
5138:
5139: ret = 0x0a;
5140: }
5141:
5142: if (ret == 0x0a)
5143: {
5144: line++;
5145: column = 0;
5146: }
5147: else
5148: column++;
5149: return ret;
5150: }
5151:
5152:
5155: int read(int[] b, int off, int len)
5156: throws IOException
5157: {
5158: int ret;
5159: if (unicodeReader != null)
5160: {
5161: ret = unicodeReader.read(b, off, len);
5162: }
5163: else
5164: {
5165: byte[] b2 = new byte[len];
5166: ret = in.read(b2, 0, len);
5167: if (ret != -1)
5168: {
5169: String s = new String(b2, 0, ret, inputEncoding);
5170: int[] c = UnicodeReader.toCodePointArray(s);
5171: ret = c.length;
5172: System.arraycopy(c, 0, b, off, ret);
5173: }
5174: }
5175: if (ret != -1)
5176: {
5177:
5178: for (int i = 0; i < ret; i++)
5179: {
5180: int c = b[off + i];
5181: if (normalize &&
5182: (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5183: {
5184:
5185: c = 0x0a;
5186: b[off + i] = c;
5187: }
5188: if (c == 0x0a)
5189: {
5190: line++;
5191: column = 0;
5192: }
5193: else
5194: column++;
5195: }
5196: }
5197: return ret;
5198: }
5199:
5200: void reset()
5201: throws IOException
5202: {
5203: if (unicodeReader != null)
5204: unicodeReader.reset();
5205: else
5206: in.reset();
5207: offset = markOffset;
5208: line = markLine;
5209: column = markColumn;
5210: }
5211:
5212:
5213:
5214: private static final int[] SIGNATURE_UCS_4_1234 =
5215: new int[] { 0x00, 0x00, 0x00, 0x3c };
5216: private static final int[] SIGNATURE_UCS_4_4321 =
5217: new int[] { 0x3c, 0x00, 0x00, 0x00 };
5218: private static final int[] SIGNATURE_UCS_4_2143 =
5219: new int[] { 0x00, 0x00, 0x3c, 0x00 };
5220: private static final int[] SIGNATURE_UCS_4_3412 =
5221: new int[] { 0x00, 0x3c, 0x00, 0x00 };
5222: private static final int[] SIGNATURE_UCS_2_12 =
5223: new int[] { 0xfe, 0xff };
5224: private static final int[] SIGNATURE_UCS_2_21 =
5225: new int[] { 0xff, 0xfe };
5226: private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5227: new int[] { 0x00, 0x3c, 0x00, 0x3f };
5228: private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5229: new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5230: private static final int[] SIGNATURE_UTF_8 =
5231: new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5232: private static final int[] SIGNATURE_UTF_8_BOM =
5233: new int[] { 0xef, 0xbb, 0xbf };
5234:
5235:
5238: private void detectEncoding()
5239: throws IOException
5240: {
5241: int[] signature = new int[4];
5242: in.mark(4);
5243: for (int i = 0; i < 4; i++)
5244: signature[i] = in.read();
5245: in.reset();
5246:
5247:
5248: if (equals(SIGNATURE_UCS_4_1234, signature))
5249: {
5250: in.read();
5251: in.read();
5252: in.read();
5253: in.read();
5254: setInputEncoding("UTF-32BE");
5255: encodingDetected = true;
5256: }
5257: else if (equals(SIGNATURE_UCS_4_4321, signature))
5258: {
5259: in.read();
5260: in.read();
5261: in.read();
5262: in.read();
5263: setInputEncoding("UTF-32LE");
5264: encodingDetected = true;
5265: }
5266: else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5267: equals(SIGNATURE_UCS_4_3412, signature))
5268: throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5269:
5270:
5271: else if (equals(SIGNATURE_UCS_2_12, signature))
5272: {
5273: in.read();
5274: in.read();
5275: setInputEncoding("UTF-16BE");
5276: encodingDetected = true;
5277: }
5278: else if (equals(SIGNATURE_UCS_2_21, signature))
5279: {
5280: in.read();
5281: in.read();
5282: setInputEncoding("UTF-16LE");
5283: encodingDetected = true;
5284: }
5285: else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5286: {
5287:
5288: throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5289: }
5290: else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5291: {
5292:
5293: throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5294: }
5295:
5296: else if (equals(SIGNATURE_UTF_8, signature))
5297: {
5298:
5299: }
5300: else if (equals(SIGNATURE_UTF_8_BOM, signature))
5301: {
5302: in.read();
5303: in.read();
5304: in.read();
5305: setInputEncoding("UTF-8");
5306: encodingDetected = true;
5307: }
5308: }
5309:
5310: private static boolean equals(int[] b1, int[] b2)
5311: {
5312: for (int i = 0; i < b1.length; i++)
5313: {
5314: if (b1[i] != b2[i])
5315: return false;
5316: }
5317: return true;
5318: }
5319:
5320: void setInputEncoding(String encoding)
5321: throws IOException
5322: {
5323: if (encoding.equals(inputEncoding))
5324: return;
5325: if ("UTF-16".equalsIgnoreCase(encoding) &&
5326: inputEncoding.startsWith("UTF-16"))
5327: return;
5328: if (encodingDetected)
5329: throw new UnsupportedEncodingException("document is not in its " +
5330: "declared encoding " +
5331: inputEncoding +
5332: ": " + encoding);
5333: inputEncoding = encoding;
5334: finalizeEncoding();
5335: }
5336:
5337: void finalizeEncoding()
5338: throws IOException
5339: {
5340: if (reader != null)
5341: return;
5342: reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5343: unicodeReader = new UnicodeReader(reader);
5344: mark(1);
5345: }
5346:
5347: }
5348:
5349: }