42: def serialize(treewalker, encoding=nil)
43: in_cdata = false
44: @errors = []
45:
46: if encoding and @inject_meta_charset
47: require 'html5/filters/inject_meta_charset'
48: treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
49: end
50:
51: if @strip_whitespace
52: require 'html5/filters/whitespace'
53: treewalker = Filters::WhitespaceFilter.new(treewalker)
54: end
55:
56: if @sanitize
57: require 'html5/filters/sanitizer'
58: treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
59: end
60:
61: if @omit_optional_tags
62: require 'html5/filters/optionaltags'
63: treewalker = Filters::OptionalTagFilter.new(treewalker)
64: end
65:
66: result = []
67: treewalker.each do |token|
68: type = token[:type]
69: if type == :Doctype
70: doctype = "<!DOCTYPE %s>" % token[:name]
71: result << doctype
72:
73: elsif [:Characters, :SpaceCharacters].include? type
74: if type == :SpaceCharacters or in_cdata
75: if in_cdata and token[:data].include?("</")
76: serialize_error("Unexpected </ in CDATA")
77: end
78: result << token[:data]
79: else
80: result << escape(token[:data])
81: end
82:
83: elsif [:StartTag, :EmptyTag].include? type
84: name = token[:name]
85: if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
86: in_cdata = true
87: elsif in_cdata
88: serialize_error(_("Unexpected child element of a CDATA element"))
89: end
90: attributes = []
91: for k,v in attrs = token[:data].to_a.sort
92: attributes << ' '
93:
94: attributes << k
95: if not @minimize_boolean_attributes or \
96: (!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
97: and !BOOLEAN_ATTRIBUTES[:global].include?(k))
98: attributes << "="
99: if @quote_attr_values or v.empty?
100: quote_attr = true
101: else
102: quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
103: end
104: v = v.gsub("&", "&")
105: v = v.gsub("<", "<") if @escape_lt_in_attrs
106: if quote_attr
107: quote_char = @quote_char
108: if @use_best_quote_char
109: if v.index("'") and !v.index('"')
110: quote_char = '"'
111: elsif v.index('"') and !v.index("'")
112: quote_char = "'"
113: end
114: end
115: if quote_char == "'"
116: v = v.gsub("'", "'")
117: else
118: v = v.gsub('"', """)
119: end
120: attributes << quote_char << v << quote_char
121: else
122: attributes << v
123: end
124: end
125: end
126: if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
127: if @space_before_trailing_solidus
128: attributes << " /"
129: else
130: attributes << "/"
131: end
132: end
133: result << "<%s%s>" % [name, attributes.join('')]
134:
135: elsif type == :EndTag
136: name = token[:name]
137: if RCDATA_ELEMENTS.include?(name)
138: in_cdata = false
139: elsif in_cdata
140: serialize_error(_("Unexpected child element of a CDATA element"))
141: end
142: end_tag = "</#{name}>"
143: result << end_tag
144:
145: elsif type == :Comment
146: data = token[:data]
147: serialize_error(_("Comment contains --")) if data.index("--")
148: comment = "<!--%s-->" % token[:data]
149: result << comment
150:
151: else
152: serialize_error(token[:data])
153: end
154: end
155:
156: if encoding and encoding != 'utf-8'
157: require 'iconv'
158: Iconv.iconv(encoding, 'utf-8', result.join('')).first
159: else
160: result.join('')
161: end
162: end