package PositLogPlugin::HatenaDiary;

# --------------------------------------------------------
# HatenaDiary.pm:
#      module for retrieving PositLog dynamic sprites
#      from Hatena Diary
#
# Copyright (c) 2006 Hidekazu Kubota (Taro Sosui) All right reserved
#  <taro@summer.nifty.jp>
#   http://positlog.storybook.jp/
#
# --------------------------------------------------------

# --------------------------------------------------------
# This perl module generate a new srprite from HatenaDiary
# --------------------------------------------------------

use strict;
use HTTP::Lite;			# must be locally installed
use Time::Local;
use Storable qw(lock_retrieve lock_nstore);
use Encode qw/encode decode from_to/;
use PositLogConfig;

# prefix of serialized associtation data (url, spriteID)
my $serializedData = "hd_url_spriteid_";

sub getWidth
{
		return 320;
}

sub getCSS
{
		return "hatena_diary_contents.css";
}


sub getType
{
		return "replace";
}

sub clearCache
{
    my ($pageid, $sourceID, $args) = @_;

    my @argsArray = split(/,/, $args);
    my $url = $argsArray[0];
    my $urlenc = $url;
    $urlenc =~ s/([^\w ])/'%' . unpack('H2', $1)/eg;
    $urlenc =~ tr/ /+/;

    my $urlSpriteid = eval{ Storable::lock_retrieve($PositLogConfig::datapath . $pageid . "/dynamic/". $serializedData . $urlenc.".dat")} or {};

    if (exists($urlSpriteid->{$url})) {
				my $sid = $urlSpriteid->{$url};
				unlink $PositLogConfig::datapath . $pageid . "/static/" . $sid.".spr" or return "Delete error.";
				delete $urlSpriteid->{$url};

				if(!eval{Storable::lock_nstore $urlSpriteid, $PositLogConfig::datapath . $pageid . "/dynamic/" . $serializedData.$urlenc .".dat";}){ return "Save error."; }
				return "Succeed.";
    }
    else{
				return "No cache.";
    }
}


sub getSprites
{
    # get (URL of HatenaDiary)
    my ($pageid, $sourceID, $loginid, $loginpass, $args) = @_;

    my @argsArray = split(/,/, $args);
    my $url = $argsArray[0];
    my $usecache = "";
    if(scalar(@argsArray) >= 2)
    {
				# $usecache works if not ""
				$usecache = $argsArray[1];
    }

    my @spritesArray;

    # URL encoding for serialized data file name
    my $urlenc = $url;
    $urlenc =~ s/([^\w ])/'%' . unpack('H2', $1)/eg;
    $urlenc =~ tr/ /+/;

    # get hatena ID
    $url =~ /http:\/\/d.hatena.ne.jp\/(.*?)\//i;
    my $hatenaID = $1;


    # get sprite list
    my $spritesHash = eval{ Storable::lock_retrieve($PositLogConfig::datapath . $pageid . "/sprites.dat")};
    if($@){ warn $@; return \@spritesArray; }
    # get page config
    my $configHash = eval{Storable::lock_retrieve($PositLogConfig::datapath . $pageid . "/config.dat")};
    if($@){ warn $@; return \@spritesArray; }
    # get dynamic sprite table
    my $urlSpriteid = eval{ Storable::lock_retrieve($PositLogConfig::datapath . $pageid . "/dynamic/". $serializedData.$urlenc.".dat")} or {};

    # retrieve web page
    my $http = new HTTP::Lite;

    # success flag for retrieving
    my $httpsuccess = 1;

    # Check HTTP Status code 304
    if (exists($urlSpriteid->{$url})) {
				# spriteID for the requestted url already exists
				my $sid = $urlSpriteid->{$url};
				if(exists($spritesHash->{$sid}))
				{
						my $modifiedtime = $spritesHash->{$sid}{"modified_time"};
						$modifiedtime =~ /(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)/;
						my $sec = $6;	my $min = $5; 	my $hour = $4; 	my $mday = $3; 	my $mon = $2; 	my $year = $1;
						my $modifiedtimeStr = $year . "/" . $mon . "/" . $mday . " " . $hour . ":" . $min . ":" . $sec;
						$sec =~ s/0(\d)/$1/;
						$min =~ s/0(\d)/$1/;
						$hour =~ s/0(\d)/$1/;
						$mday =~ s/0(\d)/$1/;
						$mon =~ s/0(\d)/$1/;
						my $time = timelocal(scalar($sec), scalar($min), scalar($hour), scalar($mday), scalar($mon) - 1, scalar($year));
						my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
						my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
						my %MonHash = ("Jan" => 1, "Feb" => 2, "Mar" => 3, "Apr" => 4, "May" => 5, "Jun" => 6, "Jul" => 7,  "Aug" => 8,  "Sep" => 9, "Oct" => 10,  "Nov" => 11, "Dec" => 12);
						my ($sec, $min, $hour, $mday, $mon, $year, $wday) = gmtime($time);
						my $modifiedHTTPdate = sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT",
																					 $DoW[$wday],
																					 $mday, $MoY[$mon], $year+1900,
																					 $hour, $min, $sec);

						$http->add_req_header('If-Modified-Since', $modifiedHTTPdate);

						# HatenaDiary does not return 304,
						# however HatenaDiary RSS can return 304.
						
						# HEAD request
						$http->method('HEAD');
						
						# timeout 10sec
						my $req = "";
						eval{
								local $SIG{ALRM} = sub {die "timeout"};
								alarm 10;
								$req = $http->request("http://d.hatena.ne.jp/" . $hatenaID . "/rss");
								alarm 0;
						};
						alarm 0;
						if($@)
						{
								if($@ =~ /timeout/)
								{
										$req = "408";
								}
						}


						# check Not Modified or Serve error
						if($req =~ /304/gi || $req =~ /4\d\d/gi || $req =~ /50\d/gi || $usecache ne "")
						{
								# return cached sprite
								my $spriteContents = eval{ Storable::lock_retrieve($PositLogConfig::datapath . $pageid . "/static/" . $sid.".spr")} or "";
								if($spriteContents ne "")
								{
										push(@spritesArray, {"modified_date" => $modifiedtime, "id" => $sid, "body" => $$spriteContents});
										return \@spritesArray;
								}
						}
				}
    }

    # GET Request
    $http->method('GET');
    # timeout 10sec
    my $req = "";
    eval{
				local $SIG{ALRM} = sub {die "timeout"};
				alarm 10;
				$req = $http->request($url)
						or $httpsuccess = 0;
				alarm 0;
    };
    alarm 0;
    if($@)
    {
				if($@ =~ /timeout/)
				{
						$httpsuccess = 0;
				}
    }


    my $page =  $http->body();

		if($page eq ""){
				$httpsuccess = 0;
		}

    # -----------------------------------
    # parse entry of Hatena Diary
    # (using XML module is more functional,
    #  but needs installing perl module)
    # -----------------------------------

    my $newContents = "";

    # parse day contents 
    my @contentsArray = ();
    foreach my $day ($page =~ /(<div class="day">.*?<!-- google_ad_section_end -->.*?<\/div>)/gis) 
    {
				$day .= "</div>";
				from_to($day, "euc-jp", "utf8");

        # footnote
				$day =~ s/" href="(\/$hatenaID\/.+?#.+?)"/" href="http:\/\/d.hatena.ne.jp$1"/gis;

        # hatena id
				$day =~ s/<a href="\/$hatenaID\//<a href="http:\/\/d.hatena.ne.jp\/$hatenaID\//gis;

				$day =~ s/src="(\/images.*?$hatenaID\/)/src="http:\/\/d.hatena.ne.jp$1/gis;
				
				$day =~ s/\<\!\-\- google\_ad\_section\_end \-\-\>//gi;
				$day =~ s/\<\!\-\- google\_ad\_section\_start \-\-\>//gi;

				push(@contentsArray, $day);
    }

    # parse comment
    my @commentArray = ();
    foreach my $comment ($page =~ /(<div class="comment">.*?<\/div>.*?)<div class="refererlist">/gis) {
				from_to($comment, "euc-jp", "utf8");
				$comment =~ s/href="\/(.*?)\//href="http:\/\/d.hatena.ne.jp\/$1\//gis;
				push(@commentArray, $comment);
    }

    # parse trackback
    my @trackbackArray = ();
    foreach my $tb ($page =~ /(<div class="refererlist">.*?<\/div>.*?<\/div>)/gis) {
				from_to($tb, "euc-jp", "utf8");
				$tb =~ s/href="\/(.*?)\//href="http:\/\/d.hatena.ne.jp\/$1\//gis;
				push(@trackbackArray, $tb);
    }

    while(@contentsArray)
    {
				$newContents .= "<div class='hatena_diary_contents'>";
				$newContents .= shift(@contentsArray);
				$newContents .= shift(@commentArray);
				$newContents .= shift(@trackbackArray);
				$newContents .= "</div>";
    }

		if($newContents eq ""){
				$httpsuccess = 0;
		}

    # change modified time
    my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
    my $modifiedtime = sprintf("%04d%02d%02d%02d%02d%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec);

    if (!$httpsuccess) {
				# can't retrieve page
				while ( (my $keyURL, my $valueSpriteid) = each %$urlSpriteid ) {
						push(@spritesArray, {"modified_date" => $modifiedtime, "id" => $valueSpriteid, "body" => "<div class='hatena_diary_contents'>Cannot retrieve contents</div>"});
				}
    } 
    else
    {
				my %newUrlSpriteid;

				if (exists($urlSpriteid->{$url})) {
						# spriteID for requested url already exists
						my $sid = $urlSpriteid->{$url};
						push(@spritesArray, {"modified_date" => $modifiedtime, "id" => $sid, "body" => $newContents});
						# save sprite
						if(!eval{Storable::lock_nstore \$newContents, $PositLogConfig::datapath . $pageid . "/static/" . $sid.".spr"}) { print "Cannot write .spr."; return \@spritesArray; }
						$spritesHash->{$sid}{"type"} = "dynamic";
						$spritesHash->{$sid}{"plugin_source"} = $sourceID;
						$spritesHash->{$sid}{"modified_time"} = $modifiedtime;
						$configHash->{"modified_time"} = $modifiedtime;

				} else {
						# generate new spriteID
						# id must start from alphabet in HTML4.01
						my $newSpriteID = "";
						do
						{
								my $rand = int (rand(1000));
								$rand = sprintf("%03d", $rand);
								# id must start from alphabet in HTML4.01
								$newSpriteID = "sprite_" . $modifiedtime . "_" . $rand;
						}while(exists($spritesHash->{$newSpriteID}));

						push(@spritesArray, {"modified_date" => $modifiedtime, "id" => $newSpriteID, "body" => $newContents});
						$newUrlSpriteid{$url} = $newSpriteID;

						# store new dynamic sprite table
						if(!eval{Storable::lock_nstore \%newUrlSpriteid, $PositLogConfig::datapath . $pageid . "/dynamic/" . $serializedData.$urlenc.".dat"}) { print "Cannot write dynamic sprite table."; return \@spritesArray; }

						# save sprite
						if(!eval{Storable::lock_nstore \$newContents, $PositLogConfig::datapath . $pageid . "/static/" . $newSpriteID.".spr"}) { print "Cannot write .spr."; return \@spritesArray; }
						$spritesHash->{$newSpriteID}{"type"} = "dynamic";
						$spritesHash->{$newSpriteID}{"plugin_source"} = $sourceID;
						$spritesHash->{$newSpriteID}{"created_time"} = $modifiedtime;
						$spritesHash->{$newSpriteID}{"modified_time"} = $modifiedtime;
						$configHash->{"modified_time"} = $modifiedtime;
				}

				# save modified time
				if(!eval{Storable::lock_nstore $spritesHash, $PositLogConfig::datapath . $pageid . "/sprites.dat"}){ print "Cannot write sprites.dat."; return \@spritesArray; }

				if(!eval{Storable::lock_nstore $configHash, $PositLogConfig::datapath . $pageid . "/config.dat"}) { print "Cannot write the page configuration."; return \@spritesArray; }

    }

    return \@spritesArray;
}
