# HTML Outliner 0.5 # http://termos.vemod.net/html-outliner # # See the tests at the bottom for usage examples. # # Copyright Christoffer Sawicki 2006-2008. # Licensed under the same terms as Ruby. # # Please send bug reports and improvements to # christoffer.sawicki@gmail.com. # # Noteworthy changes: # * 0.5: Should now handle malformed HTML gracefully. # * 0.4: An element can only have one id, # so pass the current id to the slugifier. require "hpricot" class SimpleTree attr_accessor :root, :subtrees def initialize(root, subtrees = []) @root = root @subtrees = subtrees end end class HTMLOutliner def initialize(hpricot_doc) @doc = hpricot_doc end def add_header_anchors!(slugifier = method(:default_slugifier)) headers.each_with_index do |header, index| header["id"] = slugifier.call(header.inner_html, index, header["id"]) end end def default_slugifier(string, index, current_id) # Just overwrite the current id "section-" + string.downcase.gsub(/\s+/, "_") end def headers find_headers(@doc.children) end def outline tree_stack = [] headers.inject([]) do |result, header| new_tree = SimpleTree.new(header) until tree_stack.empty? || tree_stack.last.root.name < header.name tree_stack.pop end if tree_stack.empty? tree_stack.push(new_tree) next(result + [ new_tree ]) else tree_stack.last.subtrees.push(new_tree) tree_stack.push(new_tree) next(result) end end end private def find_headers(nodes) nodes.inject([]) do |sum, node| if node.is_a?(Hpricot::Elem) && node.name.match(/^h[1-6]$/i) sum + [ node ] + find_headers(node.children) elsif node.respond_to?(:children) sum + find_headers(node.children) else sum end end end end if __FILE__ == $0 require "test/unit" class HTMLOutlinerTest < Test::Unit::TestCase def test_construction_with_root assert_nothing_raised do HTMLOutliner.new(Hpricot("")).headers end end def test_construction_without_root assert_nothing_raised do HTMLOutliner.new(Hpricot("")).headers end end def test_headers input = <<-END