#!/usr/bin/perl # # Import a given HTML file into Joomla # # Copyright (C) 2007 Paco Hope # Distributed under the same license as Perl itself. # (See the bottom of this file) # # Original from http://paco.to/?p=191 # use strict; use HTML::Parser; use POSIX qw(strftime); use DBI; use DBD::mysql; # Here's the MySQL database stuff you need to configure $db::user = "dbuser"; $db::passwd = "dbpass"; $db::database = "joomla"; $db::hostname = "localhost"; $db::port = "3306"; $db::tablename = "jos_content"; # state for all articles (1=published) $j::state = 1; # numeric Joomla section and category where you want the articles inserted $j::section = 1; $j::category = 1; # numeric creator ID (62 = admin) for all articles $j::creator = 62; ########### ### No need to change anything below here ########### # this first bit is right out of the HTML::Parser perldoc sub title_handler { return if shift ne "title"; my $self = shift; $self->handler( text => sub { $j::title = shift }, "dtext" ); $self->handler( end => sub { shift->eof if shift eq "title"; }, "tagname,self" ); } # Given a file name: # Parse it for # Get its date from the filesystem # Insert it into the Joomla Database sub insertFile { my $file = shift; my $p = HTML::Parser->new( api_version => 3 ); $p->handler( start => \&title_handler, "tagname,self" ); $p->parse_file($file); # Get the mod time on the file, so we can set the creation time of the # Joomla article to that time. This blatently taken from perldoc -f stat my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks ) = stat($file); # Break $mtime down into its constituent parts. # This taken from perldoc -f localtime my ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) = localtime($mtime); # make a MySQL compatible date my $mysqlDate = strftime( "%F %T", $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ); # Open the file and stick its entire contents into $htmlBody my $htmlBody; open HTMLFILE, "<$file"; my $numread = read HTMLFILE, $htmlBody, $size; if ( $numread != $size ) { warn "short read on $file ($numread instead of $size )"; } close HTMLFILE; print "Title: \"$j::title\"\t"; print "Date: \"$mysqlDate\"\n"; $db::sth->execute( $j::title, $j::title, $htmlBody, $j::state, $j::section, $j::category, $mysqlDate, $j::creator, $mysqlDate ); } sub processDir { my $dir = shift; my $entry = ""; opendir( DIR, $dir ) or die "can't opendir $dir: $!"; while ( $entry = readdir(DIR) ) { next unless -f "$dir/$entry"; print "$entry "; insertFile("$dir/$entry"); } closedir DIR; } # Default title for our articles, if one isn't defined in the HTML $j::title = "Article"; if ( $#ARGV != 0 ) { die "need a directory name ($#ARGV)"; } else { $j::dir = $ARGV[0]; if ( !-r $j::dir ) { die "can't open $j::dir"; } if ( !-d $j::dir ) { die "$j::dir is not a directory"; } } $db::dsn = "DBI:mysql:database=$db::database;host=$db::hostname"; $db::dbh = DBI->connect( $db::dsn, $db::user, $db::passwd ); # The jos_content schema (as of 1.0.15) # `id` int(11) unsigned NOT NULL AUTO_INCREMENT, # `title` varchar(100) NOT NULL DEFAULT '', # `title_alias` varchar(100) NOT NULL DEFAULT '', # `introtext` mediumtext NOT NULL, # `fulltext` mediumtext NOT NULL, # `state` tinyint(3) NOT NULL DEFAULT '0', # `sectionid` int(11) unsigned NOT NULL DEFAULT '0', # `mask` int(11) unsigned NOT NULL DEFAULT '0', # `catid` int(11) unsigned NOT NULL DEFAULT '0', # `created` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', # `created_by` int(11) unsigned NOT NULL DEFAULT '0', # `created_by_alias` varchar(100) NOT NULL DEFAULT '', # `modified` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', # `modified_by` int(11) unsigned NOT NULL DEFAULT '0', # `checked_out` int(11) unsigned NOT NULL DEFAULT '0', # `checked_out_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', # `publish_up` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', # `publish_down` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', # `images` text NOT NULL, # `urls` text NOT NULL, # `attribs` text NOT NULL, # `version` int(11) unsigned NOT NULL DEFAULT '1', # `parentid` int(11) unsigned NOT NULL DEFAULT '0', # `ordering` int(11) NOT NULL DEFAULT '0', # `metakey` text NOT NULL, # `metadesc` text NOT NULL, # `access` int(11) unsigned NOT NULL DEFAULT '0', # `hits` int(11) unsigned NOT NULL DEFAULT '0' # Now build up the query my $q = "INSERT INTO `$db::tablename` VALUES "; # first int is the autoincrement field. We assume that will be set by MySQL # date: 2007-07-04 21:07:51 $q .= "(null, ?, ?, ?, '', ?, ?, 0, ?, ?, ?, '', '0000-00-00 00:00:00', "; $q .= "0, 0, '0000-00-00 00:00:00', ?, '0000-00-00 00:00:00', '', '', "; $q .= "'pageclass_sfx=\\nback_button=\\nitem_title=1\\nlink_titles=\\nintrotext=1\\n"; $q .= "section=0\\nsection_link=0\\ncategory=0\\ncategory_link=0\\nrating=\\nauthor=\\n"; $q .= "createdate=\\nmodifydate=\\npdf=\\nprint=\\nemail=\\nkeyref=\\ndocbook_type=', "; $q .= "1, 0, 1, '', '', 0, 0)"; # Prepare the query once. We'll execute it many times. $db::sth = $db::dbh->prepare($q); print "processing '$j::dir'\n"; processDir($j::dir); $db::dbh->disconnect; # License Terms for this file # This is the Perl Artistic License from # http://www.perl.com/pub/a/language/misc/Artistic.html # # Preamble # # The intent of this document is to state the conditions under which a Package # may be copied, such that the Copyright Holder maintains some semblance of # artistic control over the development of the package, while giving the users # of the package the right to use and distribute the Package in a more-or-less # customary fashion, plus the right to make reasonable modifications. # # Definitions # # "Package" refers to the collection of files distributed by the Copyright # Holder, and derivatives of that collection of files created through textual # modification. # # "Standard Version" refers to such a Package if it has not been modified, or # has been modified in accordance with the wishes of the Copyright Holder as # specified below. # # "Copyright Holder" is whoever is named in the copyright or copyrights for the # package. # # "You" is you, if you're thinking about copying or distributing this Package. # # "Reasonable copying fee" is whatever you can justify on the basis of media # cost, duplication charges, time of people involved, and so on. (You will not # be required to justify it to the Copyright Holder, but only to the computing # community at large as a market that must bear the fee.) # # "Freely Available" means that no fee is charged for the item itself, though # there may be fees involved in handling the item. It also means that recipients # of the item may redistribute it under the same conditions they received it. # # 1. You may make and give away verbatim copies of the source form of the # Standard Version of this Package without restriction, provided that you # duplicate all of the original copyright notices and associated disclaimers. # # 2. You may apply bug fixes, portability fixes and other modifications derived # from the Public Domain or from the Copyright Holder. A Package modified in # such a way shall still be considered the Standard Version. # # 3. You may otherwise modify your copy of this Package in any way, provided # that you insert a prominent notice in each changed file stating how and when # you changed that file, and provided that you do at least ONE of the following: # # 1. place your modifications in the Public Domain or otherwise make them # Freely Available, such as by posting said modifications to Usenet or an # equivalent medium, or placing the modifications on a major archive site such # as uunet.uu.net, or by allowing the Copyright Holder to include your # modifications in the Standard Version of the Package. # # 2. use the modified Package only within your corporation or organization. # # 3. rename any non-standard executables so the names do not conflict with # standard executables, which must also be provided, and provide a separate # manual page for each non-standard executable that clearly documents how it # differs from the Standard Version. # # 4. make other distribution arrangements with the Copyright Holder. # # 4. You may distribute the programs of this Package in object code or # executable form, provided that you do at least ONE of the following: # # 1. distribute a Standard Version of the executables and library files, # together with instructions (in the manual page or equivalent) on where to get # the Standard Version. # # 2. accompany the distribution with the machine-readable source of the Package # with your modifications. # # 3. give non-standard executables non-standard names, and clearly document the # differences in manual pages (or equivalent), together with instructions on # where to get the Standard Version. # # 4. make other distribution arrangements with the Copyright Holder. # # 5. You may charge a reasonable copying fee for any distribution of this # Package. You may charge any fee you choose for support of this Package. You # may not charge a fee for this Package itself. However, you may distribute this # Package in aggregate with other (possibly commercial) programs as part of a # larger (possibly commercial) software distribution provided that you do not # advertise this Package as a product of your own. You may embed this Package's # interpreter within an executable of yours (by linking); this shall be # construed as a mere form of aggregation, provided that the complete Standard # Version of the interpreter is so embedded. # # 6. The scripts and library files supplied as input to or produced as output # from the programs of this Package do not automatically fall under the # copyright of this Package, but belong to whomever generated them, and may be # sold commercially, and may be aggregated with this Package. If such scripts or # library files are aggregated with this Package via the so-called "undump" or # "unexec" methods of producing a binary executable image, then distribution of # such an image shall neither be construed as a distribution of this Package nor # shall it fall under the restrictions of Paragraphs 3 and 4, provided that you # do not represent such an executable image as a Standard Version of this # Package. # # 7. C subroutines (or comparably compiled subroutines in other languages) # supplied by you and linked into this Package in order to emulate subroutines # and variables of the language defined by this Package shall not be considered # part of this Package, but are the equivalent of input as in Paragraph 6, # provided these subroutines do not change the language in any way that would # cause it to fail the regression tests for the language. # # 8. Aggregation of this Package with a commercial distribution is always # permitted provided that the use of this Package is embedded; that is, when no # overt attempt is made to make this Package's interfaces visible to the end # user of the commercial distribution. Such use shall not be construed as a # distribution of this Package. # # 9. The name of the Copyright Holder may not be used to endorse or promote # products derived from this software without specific prior written permission. # # 10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # The End