1:
37:
38: package ;
39:
40: import ;
41: import ;
42: import ;
43: import ;
44: import ;
45: import ;
46: import ;
47: import ;
48: import ;
49: import ;
50: import ;
51: import ;
52: import ;
53: import ;
54: import ;
55: import ;
56: import ;
57: import ;
58: import ;
59: import ;
60: import ;
61:
62:
67: public class StreamSerializer
68: {
69:
70: static final int SPACE = 0x20;
71: static final int BANG = 0x21;
72: static final int APOS = 0x27;
73: static final int SLASH = 0x2f;
74: static final int BRA = 0x3c;
75: static final int KET = 0x3e;
76: static final int EQ = 0x3d;
77:
78:
81: static final Map HTML_BOOLEAN_ATTRIBUTES = new HashMap();
82: static
83: {
84: HashSet set;
85:
86: set = new HashSet();
87: set.add("nohref");
88: HTML_BOOLEAN_ATTRIBUTES.put("area", set);
89:
90: set = new HashSet();
91: set.add("ismap");
92: HTML_BOOLEAN_ATTRIBUTES.put("img", set);
93:
94: set = new HashSet();
95: set.add("declare");
96: HTML_BOOLEAN_ATTRIBUTES.put("object", set);
97:
98: set = new HashSet();
99: set.add("noshade");
100: HTML_BOOLEAN_ATTRIBUTES.put("hr", set);
101:
102: set = new HashSet();
103: set.add("compact");
104: HTML_BOOLEAN_ATTRIBUTES.put("dl", set);
105: HTML_BOOLEAN_ATTRIBUTES.put("ol", set);
106: HTML_BOOLEAN_ATTRIBUTES.put("ul", set);
107: HTML_BOOLEAN_ATTRIBUTES.put("dir", set);
108: HTML_BOOLEAN_ATTRIBUTES.put("menu", set);
109:
110: set = new HashSet();
111: set.add("checked");
112: set.add("disabled");
113: set.add("readonly");
114: set.add("ismap");
115: HTML_BOOLEAN_ATTRIBUTES.put("input", set);
116:
117: set = new HashSet();
118: set.add("multiple");
119: set.add("disabled");
120: HTML_BOOLEAN_ATTRIBUTES.put("select", set);
121:
122: set = new HashSet();
123: set.add("disabled");
124: HTML_BOOLEAN_ATTRIBUTES.put("optgroup", set);
125:
126: set = new HashSet();
127: set.add("selected");
128: set.add("disabled");
129: HTML_BOOLEAN_ATTRIBUTES.put("option", set);
130:
131: set = new HashSet();
132: set.add("disabled");
133: set.add("readonly");
134: HTML_BOOLEAN_ATTRIBUTES.put("textarea", set);
135:
136: set = new HashSet();
137: set.add("disabled");
138: HTML_BOOLEAN_ATTRIBUTES.put("button", set);
139:
140: set = new HashSet();
141: set.add("nowrap");
142: HTML_BOOLEAN_ATTRIBUTES.put("th", set);
143: HTML_BOOLEAN_ATTRIBUTES.put("td", set);
144:
145: set = new HashSet();
146: set.add("noresize");
147: HTML_BOOLEAN_ATTRIBUTES.put("frame", set);
148:
149: set = new HashSet();
150: set.add("defer");
151: HTML_BOOLEAN_ATTRIBUTES.put("script", set);
152: }
153:
154:
155: static final HashSet HTML_URIS = new HashSet();
156: static {
157: HTML_URIS.add("http://www.w3.org/1999/xhtml");
158: }
159:
160: protected final String encoding;
161: final Charset charset;
162: final CharsetEncoder encoder;
163: final int mode;
164: final LinkedList namespaces;
165: protected String eol;
166: Collection cdataSectionElements = Collections.EMPTY_SET;
167:
168: protected boolean discardDefaultContent;
169: protected boolean xmlDeclaration = true;
170:
171:
172: private boolean htmlEncoded;
173:
174: public StreamSerializer()
175: {
176: this(Stylesheet.OUTPUT_XML, null, null);
177: }
178:
179: public StreamSerializer(String encoding)
180: {
181: this(Stylesheet.OUTPUT_XML, encoding, null);
182: }
183:
184: public StreamSerializer(int mode, String encoding, String eol)
185: {
186: this.mode = mode;
187: if (encoding == null)
188: encoding = (mode == Stylesheet.OUTPUT_HTML) ? "ISO-8859-1" : "UTF-8";
189: this.encoding = encoding.intern();
190: charset = Charset.forName(this.encoding);
191: encoder = charset.newEncoder();
192: this.eol = (eol != null) ? eol : System.getProperty("line.separator");
193: namespaces = new LinkedList();
194: }
195:
196: void setCdataSectionElements(Collection c)
197: {
198: cdataSectionElements = c;
199: }
200:
201: public void serialize(final Node node, final OutputStream out)
202: throws IOException
203: {
204: serialize(node, out, false);
205: }
206:
207: void serialize(Node node, final OutputStream out,
208: boolean convertToCdata)
209: throws IOException
210: {
211: while (node != null)
212: {
213: Node next = node.getNextSibling();
214: doSerialize(node, out, convertToCdata);
215: node = next;
216: }
217: }
218:
219: private void doSerialize(final Node node, final OutputStream out,
220: boolean convertToCdata)
221: throws IOException
222: {
223: if (out == null)
224: throw new NullPointerException("no output stream");
225: htmlEncoded = false;
226: String value, prefix;
227: Node children;
228: String uri = node.getNamespaceURI();
229: short nt = node.getNodeType();
230: if (convertToCdata && nt == Node.TEXT_NODE)
231: nt = Node.CDATA_SECTION_NODE;
232: switch (nt)
233: {
234: case Node.ATTRIBUTE_NODE:
235: prefix = node.getPrefix();
236: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
237: XMLConstants.XMLNS_ATTRIBUTE.equals(prefix) ||
238: (prefix != null && prefix.startsWith("xmlns:")))
239: {
240: String nsuri = node.getNodeValue();
241: if (isDefined(nsuri, prefix))
242: break;
243: String name = node.getLocalName();
244: if (name == null)
245: {
246:
247: name = node.getNodeName();
248: int ci = name.indexOf(':');
249: if (ci != -1)
250: name = name.substring(ci + 1);
251: }
252: define(nsuri, name);
253: }
254: else if (uri != null && !isDefined(uri, prefix))
255: {
256: prefix = define(uri, prefix);
257: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
258: out.write(SPACE);
259: out.write(encodeText(nsname));
260: out.write(EQ);
261: String nsvalue = "\"" + encode(uri, true, true) + "\"";
262: out.write(nsvalue.getBytes(encoding));
263: }
264: out.write(SPACE);
265: String a_nodeName = node.getNodeName();
266: out.write(encodeText(a_nodeName));
267: String a_nodeValue = node.getNodeValue();
268: if (mode == Stylesheet.OUTPUT_HTML &&
269: a_nodeName.equals(a_nodeValue) &&
270: isHTMLBoolean((Attr) node, a_nodeName))
271: break;
272: out.write(EQ);
273: value = "\"" + encode(a_nodeValue, true, true) + "\"";
274: out.write(encodeText(value));
275: break;
276: case Node.ELEMENT_NODE:
277: pushNamespaceContext();
278: value = node.getNodeName();
279: out.write(BRA);
280: out.write(encodeText(value));
281: prefix = node.getPrefix();
282: if (uri != null && !isDefined(uri, prefix))
283: {
284: prefix = define(uri, prefix);
285: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
286: out.write(SPACE);
287: out.write(encodeText(nsname));
288: out.write(EQ);
289: String nsvalue = "\"" + encode(uri, true, true) + "\"";
290: out.write(encodeText(nsvalue));
291: }
292: NamedNodeMap attrs = node.getAttributes();
293: if (attrs != null)
294: {
295: int len = attrs.getLength();
296: for (int i = 0; i < len; i++)
297: {
298: Attr attr = (Attr) attrs.item(i);
299: if (discardDefaultContent && !attr.getSpecified())
300: {
301:
302: }
303: else
304: serialize(attr, out, false);
305: }
306: }
307: convertToCdata = cdataSectionElements.contains(value);
308: children = node.getFirstChild();
309: if (children == null)
310: {
311: out.write(SLASH);
312: out.write(KET);
313: }
314: else
315: {
316: out.write(KET);
317: serialize(children, out, convertToCdata);
318: out.write(BRA);
319: out.write(SLASH);
320: out.write(encodeText(value));
321: out.write(KET);
322: }
323: popNamespaceContext();
324: break;
325: case Node.TEXT_NODE:
326: value = node.getNodeValue();
327: if (!"yes".equals(node.getUserData("disable-output-escaping")))
328: value = encode(value, false, false);
329: out.write(encodeText(value));
330: break;
331: case Node.CDATA_SECTION_NODE:
332: value = node.getNodeValue();
333:
334:
335: int bbk = value.indexOf("]]>");
336: while (bbk != -1)
337: {
338: String head = value.substring(0, bbk + 2);
339: out.write(encodeText("<![CDATA[" + head + "]]>"));
340: value = value.substring(bbk + 2);
341: bbk = value.indexOf("]]>");
342: }
343:
344: out.write(encodeText("<![CDATA[" + value + "]]>"));
345: break;
346: case Node.COMMENT_NODE:
347: value = "<!--" + node.getNodeValue() + "-->";
348: out.write(encodeText(value));
349: Node cp = node.getParentNode();
350: if (cp != null && cp.getNodeType() == Node.DOCUMENT_NODE)
351: out.write(encodeText(eol));
352: break;
353: case Node.DOCUMENT_NODE:
354: case Node.DOCUMENT_FRAGMENT_NODE:
355: if (mode == Stylesheet.OUTPUT_XML)
356: {
357: if ("UTF-16".equalsIgnoreCase(encoding))
358: {
359: out.write(0xfe);
360: out.write(0xff);
361: }
362: if (!"yes".equals(node.getUserData("omit-xml-declaration")) &&
363: xmlDeclaration)
364: {
365: Document doc = (node instanceof Document) ?
366: (Document) node : null;
367: String version = (doc != null) ? doc.getXmlVersion() : null;
368: if (version == null)
369: version = (String) node.getUserData("version");
370: if (version == null)
371: version = "1.0";
372: out.write(BRA);
373: out.write(0x3f);
374: out.write("xml version=\"".getBytes("US-ASCII"));
375: out.write(version.getBytes("US-ASCII"));
376: out.write(0x22);
377: if (!("UTF-8".equalsIgnoreCase(encoding)))
378: {
379: out.write(" encoding=\"".getBytes("US-ASCII"));
380: out.write(encoding.getBytes("US-ASCII"));
381: out.write(0x22);
382: }
383: if ((doc != null && doc.getXmlStandalone()) ||
384: "yes".equals(node.getUserData("standalone")))
385: out.write(" standalone=\"yes\"".getBytes("US-ASCII"));
386: out.write(0x3f);
387: out.write(KET);
388: out.write(encodeText(eol));
389: }
390:
391:
392: }
393: else if (mode == Stylesheet.OUTPUT_HTML)
394: {
395:
396: String mediaType = (String) node.getUserData("media-type");
397: if (mediaType == null)
398: mediaType = "text/html";
399: String contentType = mediaType + "; charset=" +
400: ((encoding.indexOf(' ') != -1) ?
401: "\"" + encoding + "\"" :
402: encoding);
403: Document doc = (node instanceof Document) ? (Document) node :
404: node.getOwnerDocument();
405: Node html = null;
406: for (Node ctx = node.getFirstChild(); ctx != null;
407: ctx = ctx.getNextSibling())
408: {
409: if (ctx.getNodeType() == Node.ELEMENT_NODE &&
410: isHTMLElement(ctx, "html"))
411: {
412: html = ctx;
413: break;
414: }
415: }
416: if (html != null)
417: {
418: Node head = null;
419: for (Node ctx = html.getFirstChild(); ctx != null;
420: ctx = ctx.getNextSibling())
421: {
422: if (isHTMLElement(ctx, "head"))
423: {
424: head = ctx;
425: break;
426: }
427: }
428: if (head != null)
429: {
430: Node meta = null;
431: Node metaContent = null;
432: for (Node ctx = head.getFirstChild(); ctx != null;
433: ctx = ctx.getNextSibling())
434: {
435: if (isHTMLElement(ctx, "meta"))
436: {
437: NamedNodeMap metaAttrs = ctx.getAttributes();
438: int len = metaAttrs.getLength();
439: String httpEquiv = null;
440: Node content = null;
441: for (int i = 0; i < len; i++)
442: {
443: Node attr = metaAttrs.item(i);
444: String attrName = attr.getNodeName();
445: if ("http-equiv".equalsIgnoreCase(attrName))
446: httpEquiv = attr.getNodeValue();
447: else if ("content".equalsIgnoreCase(attrName))
448: content = attr;
449: }
450: if ("Content-Type".equalsIgnoreCase(httpEquiv))
451: {
452: meta = ctx;
453: metaContent = content;
454: break;
455: }
456: }
457: }
458: if (meta == null)
459: {
460: meta = doc.createElement("meta");
461:
462: Node first = head.getFirstChild();
463: if (first == null)
464: head.appendChild(meta);
465: else
466: head.insertBefore(meta, first);
467: Node metaHttpEquiv = doc.createAttribute("http-equiv");
468: meta.getAttributes().setNamedItem(metaHttpEquiv);
469: metaHttpEquiv.setNodeValue("Content-Type");
470: }
471: if (metaContent == null)
472: {
473: metaContent = doc.createAttribute("content");
474: meta.getAttributes().setNamedItem(metaContent);
475: }
476: metaContent.setNodeValue(contentType);
477: htmlEncoded = true;
478: }
479: }
480: }
481: children = node.getFirstChild();
482: if (children != null)
483: serialize(children, out, convertToCdata);
484: break;
485: case Node.DOCUMENT_TYPE_NODE:
486: DocumentType doctype = (DocumentType) node;
487: out.write(BRA);
488: out.write(BANG);
489: out.write(encodeText("DOCTYPE "));
490: value = doctype.getNodeName();
491: out.write(encodeText(value));
492: String publicId = doctype.getPublicId();
493: if (publicId != null)
494: {
495: out.write(encodeText(" PUBLIC "));
496: out.write(APOS);
497: out.write(encodeText(publicId));
498: out.write(APOS);
499: }
500: String systemId = doctype.getSystemId();
501: if (systemId != null)
502: {
503: out.write(encodeText(" SYSTEM "));
504: out.write(APOS);
505: out.write(encodeText(systemId));
506: out.write(APOS);
507: }
508: String internalSubset = doctype.getInternalSubset();
509: if (internalSubset != null)
510: {
511: out.write(encodeText(internalSubset));
512: }
513: out.write(KET);
514: out.write(eol.getBytes(encoding));
515: break;
516: case Node.ENTITY_REFERENCE_NODE:
517: value = "&" + node.getNodeValue() + ";";
518: out.write(encodeText(value));
519: break;
520: case Node.PROCESSING_INSTRUCTION_NODE:
521: value = "<?" + node.getNodeName() + " " + node.getNodeValue() + "?>";
522: out.write(encodeText(value));
523: Node pp = node.getParentNode();
524: if (pp != null && pp.getNodeType() == Node.DOCUMENT_NODE)
525: {
526: out.write(encodeText(eol));
527: }
528: break;
529: default:
530: System.err.println("Unhandled node type: "+nt);
531: }
532: }
533:
534: boolean isHTMLElement(Node node, String name)
535: {
536: if (node.getNodeType() != Node.ELEMENT_NODE)
537: return false;
538: String localName = node.getLocalName();
539: if (localName == null)
540: localName = node.getNodeName();
541: if (!name.equalsIgnoreCase(localName))
542: return false;
543: String uri = node.getNamespaceURI();
544: return (uri == null || HTML_URIS.contains(uri));
545: }
546:
547: boolean isDefined(String uri, String prefix)
548: {
549: if (XMLConstants.XML_NS_URI.equals(uri))
550: return "xml".equals(prefix);
551: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri))
552: return "xmlns".equals(prefix);
553: if (prefix == null)
554: prefix = "";
555: for (Iterator i = namespaces.iterator(); i.hasNext(); )
556: {
557: Map ctx = (Map) i.next();
558: String val = (String) ctx.get(uri);
559: if (val != null && val.equals(prefix))
560: return true;
561: }
562: return false;
563: }
564:
565: void pushNamespaceContext()
566: {
567: namespaces.addFirst(new HashMap());
568: }
569:
570: String define(String uri, String prefix)
571: {
572: if (namespaces.isEmpty())
573: return prefix;
574: HashMap ctx = (HashMap) namespaces.getFirst();
575: while (ctx.containsValue(prefix))
576: {
577:
578: prefix = prefix + "_";
579: }
580: ctx.put(uri, prefix);
581: return prefix;
582: }
583:
584: void popNamespaceContext()
585: {
586: namespaces.removeFirst();
587: }
588:
589: final byte[] encodeText(String text)
590: throws IOException
591: {
592: encoder.reset();
593: boolean htmlNeedingEncoding =
594: (mode == Stylesheet.OUTPUT_HTML && !htmlEncoded);
595: if (!encoder.canEncode(text) || htmlNeedingEncoding)
596: {
597:
598: StringBuffer buf = new StringBuffer();
599: int len = text.length();
600: for (int i = 0; i < len; i++)
601: {
602: char c = text.charAt(i);
603: if (!encoder.canEncode(c))
604: {
605:
606: String hex = Integer.toHexString((int) c);
607: buf.append("&#x");
608: buf.append(hex);
609: buf.append(';');
610: }
611: else if (htmlNeedingEncoding)
612: {
613: String entityName = getHTMLCharacterEntity(c);
614: if (entityName != null)
615: {
616: buf.append('&');
617: buf.append(entityName);
618: buf.append(';');
619: }
620: else
621: buf.append(c);
622: }
623: else
624: buf.append(c);
625: }
626: text = buf.toString();
627: }
628: ByteBuffer encoded = encoder.encode(CharBuffer.wrap(text));
629: int len = encoded.limit() - encoded.position();
630: if (encoded.hasArray())
631: {
632: byte[] ret = encoded.array();
633: if (ret.length > len)
634: {
635:
636: byte[] ret2 = new byte[len];
637: System.arraycopy(ret, 0, ret2, 0, len);
638: ret = ret2;
639: }
640: return ret;
641: }
642: encoded.flip();
643: byte[] ret = new byte[len];
644: encoded.get(ret, 0, len);
645: return ret;
646: }
647:
648: String encode(String text, boolean encodeCtl, boolean inAttr)
649: {
650: int len = text.length();
651: StringBuffer buf = null;
652: for (int i = 0; i < len; i++)
653: {
654: char c = text.charAt(i);
655: if (c == '<')
656: {
657: if (buf == null)
658: buf = new StringBuffer(text.substring(0, i));
659: buf.append("<");
660: }
661: else if (c == '>')
662: {
663: if (buf == null)
664: buf = new StringBuffer(text.substring(0, i));
665: buf.append(">");
666: }
667: else if (c == '&')
668: {
669: if (mode == Stylesheet.OUTPUT_HTML && (i + 1) < len &&
670: text.charAt(i + 1) == '{')
671: {
672: if (buf != null)
673: buf.append(c);
674: }
675: else
676: {
677: if (buf == null)
678: buf = new StringBuffer(text.substring(0, i));
679: buf.append("&");
680: }
681: }
682: else if (c == '\'' && inAttr)
683: {
684: if (buf == null)
685: buf = new StringBuffer(text.substring(0, i));
686: if (mode == Stylesheet.OUTPUT_HTML)
687:
688: buf.append("'");
689: else
690: buf.append("'");
691: }
692: else if (c == '"' && inAttr)
693: {
694: if (buf == null)
695: buf = new StringBuffer(text.substring(0, i));
696: buf.append(""");
697: }
698: else if (encodeCtl)
699: {
700: if (c < 0x20)
701: {
702: if (buf == null)
703: buf = new StringBuffer(text.substring(0, i));
704: buf.append('&');
705: buf.append('#');
706: buf.append((int) c);
707: buf.append(';');
708: }
709: else if (buf != null)
710: buf.append(c);
711: }
712: else if (buf != null)
713: buf.append(c);
714: }
715: return (buf == null) ? text : buf.toString();
716: }
717:
718: String toString(Node node)
719: {
720: ByteArrayOutputStream out = new ByteArrayOutputStream();
721: try
722: {
723: serialize(node, out);
724: return new String(out.toByteArray(), encoding);
725: }
726: catch (IOException e)
727: {
728: throw new RuntimeException(e.getMessage());
729: }
730: }
731:
732: boolean isHTMLBoolean(Attr attr, String attrName)
733: {
734: attrName = attrName.toLowerCase();
735: Node element = attr.getOwnerElement();
736: String elementName = element.getLocalName();
737: if (elementName == null)
738: {
739: elementName = element.getNodeName();
740: }
741: elementName = elementName.toLowerCase();
742: Collection attributes =
743: (Collection) HTML_BOOLEAN_ATTRIBUTES.get(elementName);
744: return (attributes != null && attributes.contains(attrName));
745: }
746:
747: static String getHTMLCharacterEntity(char c)
748: {
749:
750: switch (c)
751: {
752: case 160: return "nbsp";
753: case 161: return "iexcl";
754: case 162: return "cent";
755: case 163: return "pound";
756: case 164: return "curren";
757: case 165: return "yen";
758: case 166: return "brvbar";
759: case 167: return "sect";
760: case 168: return "uml";
761: case 169: return "copy";
762: case 170: return "ordf";
763: case 171: return "laquo";
764: case 172: return "not";
765: case 173: return "shy";
766: case 174: return "reg";
767: case 175: return "macr";
768: case 176: return "deg";
769: case 177: return "plusmn";
770: case 178: return "sup2";
771: case 179: return "sup3";
772: case 180: return "acute";
773: case 181: return "micro";
774: case 182: return "para";
775: case 183: return "middot";
776: case 184: return "cedil";
777: case 185: return "sup1";
778: case 186: return "ordm";
779: case 187: return "raquo";
780: case 188: return "frac14";
781: case 189: return "frac12";
782: case 190: return "frac34";
783: case 191: return "iquest";
784: case 192: return "Agrave";
785: case 193: return "Aacute";
786: case 194: return "Acirc";
787: case 195: return "Atilde";
788: case 196: return "Auml";
789: case 197: return "Aring";
790: case 198: return "AElig";
791: case 199: return "Ccedil";
792: case 200: return "Egrave";
793: case 201: return "Eacute";
794: case 202: return "Ecirc";
795: case 203: return "Euml";
796: case 204: return "Igrave";
797: case 205: return "Iacute";
798: case 206: return "Icirc";
799: case 207: return "Iuml";
800: case 208: return "ETH";
801: case 209: return "Ntilde";
802: case 210: return "Ograve";
803: case 211: return "Oacute";
804: case 212: return "Ocirc";
805: case 213: return "Otilde";
806: case 214: return "Ouml";
807: case 215: return "times";
808: case 216: return "Oslash";
809: case 217: return "Ugrave";
810: case 218: return "Uacute";
811: case 219: return "Ucirc";
812: case 220: return "Uuml";
813: case 221: return "Yacute";
814: case 222: return "THORN";
815: case 223: return "szlig";
816: case 224: return "agrave";
817: case 225: return "aacute";
818: case 226: return "acirc";
819: case 227: return "atilde";
820: case 228: return "auml";
821: case 229: return "aring";
822: case 230: return "aelig";
823: case 231: return "ccedil";
824: case 232: return "egrave";
825: case 233: return "eacute";
826: case 234: return "ecirc";
827: case 235: return "euml";
828: case 236: return "igrave";
829: case 237: return "iacute";
830: case 238: return "icirc";
831: case 239: return "iuml";
832: case 240: return "eth";
833: case 241: return "ntilde";
834: case 242: return "ograve";
835: case 243: return "oacute";
836: case 244: return "ocirc";
837: case 245: return "otilde";
838: case 246: return "ouml";
839: case 247: return "divide";
840: case 248: return "oslash";
841: case 249: return "ugrave";
842: case 250: return "uacute";
843: case 251: return "ucirc";
844: case 252: return "uuml";
845: case 253: return "yacute";
846: case 254: return "thorn";
847: case 255: return "yuml";
848: default: return null;
849: }
850: }
851:
852: }