#!/usr/bin/perl # # Copyright (c) 2005 Matthias Bauer # # This code is published under the MIT license # (http://www.opensource.org/licenses/mit-license.php) # # Convert an HTML file with links to a YAML file for inclusion # in Rubric (http://search.cpan.org/dist/Rubric/) # or de.lirio.us (http://de.lirio.us/code). # Works well for lynx bookmarks. # If the input is of DOCTYPE NETSCAPE-Bookmark-file-1 # the additional attributes (id, add_date, last_modified) # are passed on to Rubric. use strict; use warnings "all"; use HTML::TokeParser; use YAML; my $bookmarks; { local $/; $bookmarks = <>; } my @entries; my $id=1; my $p = HTML::TokeParser->new(\$bookmarks) or die "Mist $@"; while (my $t = $p->get_tag ("a" , "A")) { my %lh; $lh{href} = $t->[1]->{href} or die "'a' tag without href in bookmarks"; # Netscape and older Mozilla Bookmarks have # 'id', 'add_date' and 'last_modified' attributes $lh{created} = $t->[1]->{add_date} || scalar time; $lh{modified} = $t->[1]->{last_modified} || $lh{created}; $lh{id} = $t->[1]->{id} || $id++; # The text between and is the title my $title = $p->get_text; $lh{description} = $title; $lh{tags} = []; push @entries, \%lh; } print YAML::Dump(\@entries);