package GCPlugins::GCbooks::GCAmazon;

###################################################
#
#  Copyright 2005-2009 Tian
#
#  This file is part of GCstar.
#
#  GCstar is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  GCstar is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with GCstar; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################

use strict;
use utf8;

use GCPlugins::GCbooks::GCbooksCommon;

{
    package GCPlugins::GCbooks::GCPluginAmazon;
    
    use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
    use XML::Simple;
    use LWP::Simple qw($ua);
    use Encode;
    use HTML::Entities;
    use GCUtils;

    sub start
    {
        my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
	
        $self->{inside}->{$tagname}++;

        if ($self->{parsingList})
        {
	    # Identify beginning of comments
	    if (($self->{isComment} == 0) && ($tagname eq 'varkcomment'))
	    {
	         $self->{isComment} = 1 ;
	    }

	    # Capture URL of book
	    if (($self->{isComment} == 0) && ($self->{isUrl} == 1) && ($tagname eq 'a'))
	    {
                 $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
	         $self->{isUrl} = 0 ;
		 $self->{isTitle} = 1 ;
		 return;
	    }	

	    # Identify beginning of new book (next text is title)
	    if (($self->{isComment} == 0) && ($tagname eq 'li') && ($attr->{id} =~ /result_[0-9]+/ ))
	    {
		 # Create new entry
                 $self->{itemIdx}++;
	         $self->{isUrl} = 1 ;
		 $self->{isAuthor} = 0 ;
		 return ;
	    }

	    # Identify end of authors list
	    if (($self->{isComment} == 0) && ($tagname eq 'varkendauthors') && ($self->{isAuthor} != 0))
	    {
		 $self->{isAuthor} = 0 ;
		 return ;
	    }
        }
        else
        {
	    # Detection of book themes
	    if (($self->{isTheme} == 0) && ($tagname eq 'varkgenre'))
	    {
                $self->{isTheme} = 1 ;
                return ;
	    }		

	    # Detection of book page count
	    if (($self->{isPage} == 0) && ($tagname eq 'varkdata'))
	    {
                $self->{isPage} = 1 ;
                return ;
	    }	

 	    # Detection of authors
	    if ($tagname eq 'varkauthor')
	    {
                $self->{isAuthor} = 1;
                return ;
	    }

	    # Capture of image
	    if ($tagname eq 'varkimage')
	    {
                $attr->{adress} =~ /http.*?\.jpg/;
		$attr->{adress} =~ s|https://images-na.ssl-images-amazon.com/images/I/|http://z2-ec2.images-amazon.com/images/I/|;
		$self->{curInfo}->{cover} = $attr->{adress};
                return ;
	    }

	    # Detection of book description
	    if (($self->{isDescription} == 0) && ($tagname eq 'varkdescription'))
	    {
                $self->{isDescription} = 1 ;
                return ;
	    }
	    if (($self->{isDescription} == 1) && ($tagname eq 'div'))
	    {
                $self->{isDescription} = 2 ;
                return ;
	    }

	    # Detection title
	    if (($self->{isTitle} == 0) && ($tagname eq 'varktitle'))
	    {
                $self->{isTitle} = 2 ;
                return ;
	    }
        }
    }

    sub end
    {
        my ($self, $tagname) = @_;

        $self->{inside}->{$tagname}--;

        if ($self->{parsingList})
        {
	    # Identify end of comments
	    if (($self->{isComment} == 1) && ($tagname eq 'varkcomment'))
	    {
	         $self->{isComment} = 0 ;
	    } 
        }

        else
        {
	    # Finishing themes analysis
	    if (($self->{isTheme} != 0) && ($tagname eq 'li'))
	    {
                $self->{isTheme} = 0 ;
                return ;
	    }

	    # Finishing description analysis
	    if (($self->{isDescription} != 0) && ($tagname eq 'div'))
	    {
                $self->{isDescription} = 0 ;
                return ;
	    }		    
        }
    }

    sub text
    {
        my ($self, $origtext) = @_;


        if ($self->{parsingList})
        {
	    # Remove blanks before and after string
            $origtext =~ s/^\s+//;
            $origtext =~ s/\s+$//g;

	    # Capture of book title
	    if (($self->{isComment} == 0) && ($self->{isTitle} == 1) && ($origtext ne ''))
	    {
                $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
                $self->{isTitle} = 0 ;
                $self->{isPublication} = 1 ;
                return ;
	    }	

	    # Capture of book publication date
	    if (($self->{isComment} == 0) && ($self->{isPublication} == 1) && ($origtext ne ''))
	    {
                $self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext;
                $self->{isAuthor} = 1 ;
                $self->{isPublication} = 0 ;
                return ;
	    }	
   
	    # Avoid a text area before the first author
	    if (($self->{isComment} == 0) && ($self->{isAuthor} == 1) && ($origtext ne ''))
	    {
                $self->{isAuthor} = 2 ;
                return ;
	    }

	    # Capture of authors
	    if (($self->{isComment} == 0) && ($self->{isAuthor} == 2) && ($origtext ne ''))
	    {
                if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '')
                {
                    $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
		}
		else
		{
		   $self->{itemsList}[$self->{itemIdx}]->{authors} .= " " . $origtext;
		}
		return;
	    }	

        }
        else
        {
	    # Remove blanks before and after string
            $origtext =~ s/^\s+//;
            $origtext =~ s/\s+$//g;

	    # Capture of title
	    if (($self->{isTitle} == 2) && ($origtext ne ''))
	    {
                $self->{isTitle} = 0 ;
                $self->{curInfo}->{title} = $origtext;
                return ;
	    }

	    # Capture of page number
	    if (($self->{isPage} == 1) && ($origtext =~ /^[0-9]+/))
	    {
                $self->{curInfo}->{pages} = $origtext;
                $self->{isPage} = 0 ;
                return ;
	    }

	    # Capture of editor and publication date
	    if (($self->{isEditor} == 0) && ($origtext eq $self->getTranslation(1)))
	    {

                $self->{isEditor} = 1 ;
                return ;
	    }
	    if (($self->{isEditor} == 1) && ($origtext ne ''))
	    {
		my @array = split('\(',$origtext);
		$array[1] =~ s/\)//g;
		$array[0] =~ s/^\s+//;
		$array[0] =~ s/\s+$//g;
		$array[0] =~ s/\;//g;
		$array[1] =~ s/^\s+//;
		$array[1] =~ s/\s+$//g;
                $self->{curInfo}->{publisher} = $array[0];
		$self->{curInfo}->{publication} = $array[1];
                $self->{isEditor} = 0 ;
                return ;
	    }

	    # Capture of language
	    if (($self->{isLanguage} == 0) && ($origtext eq $self->getTranslation(2)))
	    {
                $self->{isLanguage} = 1 ;
                return ;
	    }
	    if (($self->{isLanguage} == 1) && ($origtext ne ''))
	    {
                $self->{curInfo}->{language} = $origtext;
                $self->{isLanguage} = 0 ;
                return ;
	    }

	    # Capture of ISBN
	    if (($self->{isISBN} == 0) && ($origtext eq $self->getTranslation(3)))
	    {
                $self->{isISBN} =1 ;
                return ;
	    }
	    if (($self->{isISBN} == 1) && ($origtext ne ''))
	    {
		$origtext =~ s|-||gi;
                $self->{curInfo}->{isbn} = $origtext;
		$self->{isISBN} = 0 ;
                return ;
	    }

	    # Capture of book dimensions
	    if (($self->{isSize} == 0) && ($origtext eq $self->getTranslation(4)))
	    {
                $self->{isSize} = 1 ;
                return ;
	    }
	    if (($self->{isSize} == 1) && ($origtext ne ''))
	    {
                $self->{curInfo}->{format} = $origtext;
                $self->{isSize} = 0 ;
                return ;
	    }


	    # Detection of themes
	    if (($origtext eq '>') && ($self->{isTheme} == 1))
	    {
                $self->{isTheme} = 2 ;
                return ;
	    }

	    # Capture of themes
	    if (($self->{isTheme} == 2) && ($origtext ne ''))
            {
                if ($self->{curInfo}->{genre} eq '')
                {
                   $self->{curInfo}->{genre} = $origtext;
                }
		else
		{
		   $self->{curInfo}->{genre} .= ", " . $origtext;
		}
		$self->{isTheme} = 1 ;
		return;
            }

	    # Capture of authors
	    if (($self->{isAuthor} == 1) && ($origtext ne '') && ($origtext =~ /^(?:(?!Ajax).)*$/))
            {
		# Lower case for author names, except for first letters
		$origtext =~ s/([[:alpha:]]+)/ucfirst(lc $1)/egi;
                if ($self->{curInfo}->{authors} eq '')
                {
                   $self->{curInfo}->{authors} = $origtext;
                }
		else
		{
		   $self->{curInfo}->{authors} .= ", " . $origtext;
		}
		$self->{isAuthor} = 0 ;
		return;
            }

	    # Capture of description
	    if (($self->{isDescription} == 2) && ($origtext ne ''))
	    {
                if ($self->{curInfo}->{description} eq '')
                {
                   $self->{curInfo}->{description} = $origtext;
                }
		else
		{
		   $self->{curInfo}->{description} .= $origtext;
		}
                return ;
	    }
        }
    }


    sub new
    {
        my $proto = shift;
        my $class = ref($proto) || $proto;
        my $self  = $class->SUPER::new();
        bless ($self, $class);

        $self->{hasField} = {
            title => 1,
            authors => 1,
            publication => 1,
            format => 0,
            edition => 0,
        };

        $self->{isComment} = 0;
	$self->{isUrl} = 0;
	$self->{isTitle} = 0;
	$self->{isPublication} = 0;
	$self->{isAuthor} = 0;
        $self->{isPage} = 0;
        $self->{isEditor} = 0;
        $self->{isISBN} = 0;
        $self->{isDescription} = 0;
	$self->{isLanguage} = 0 ;
	$self->{isTheme} = 0 ;

        return $self;
    }
    
    sub getItemUrl
    {
	my ($self, $url) = @_;	
        return $url;
    }

    sub preProcess
    {
        my ($self, $html) = @_;

	if ($self->{parsingList})
        {
	    # Analysis of results must be disabled during comments
            $html =~ s|<!--|<varkcomment>|gi;
            $html =~ s|-->|</varkcomment>|gi;
	    # Remove other commercial offers
	    $html =~ s|END SPONSORED LINKS SCRIPT.*||s;
	    # End of authors listing detection 
	    $html =~ s|</span></div></div><div class="a-row"><div class="a-column a-span7"><div class="a-row a-spacing-none">|<varkendauthors>|gi;
	    $html =~ s|<h3 class="a-size-small a-color-null s-inline  a-text-normal">|<varkendauthors>|gi;
	    $html =~ s|<div class="a-row a-spacing-mini">|<varkendauthors>|gi;
        }
        else
        {
	    # Beginning of book data : pages, editor, publication date, ISBN, dimensions
	    $html =~ s|<td class="bucket">|<varkdata>|gi;
	    # Beginning and end of book description
	    $html =~ s|<script id="bookDesc_override_CSS" type="text/undefined">|<varkdescription>|;
	    #$html =~ s|<div id="bookDesc_outer_postBodyPS" style="overflow: hidden; z-index: 1; height: 0px; display: block;">|</varkdescription>|;
	    # Beginning of book title
	    $html =~ s|<div id="booksTitle" class="feature" data-feature-name="booksTitle">|<varktitle>|gi;
	    # Beginning of book themes
	    $html =~ s|<ul class="zg_hrsr">|<varkgenre>|gi;
	    # Beginning of authors
	    $html =~ s|<span class="author notFaded" data-width="">|<varkauthor>|gi;
            # Beginning of image 
            $html =~ s|class="a-dynamic-image image-stretch-vertical frontImage" id="imgBlkFront" data-a-dynamic-image="{&quot;|><varkimage adress="|;

	    $html =~ s|<BR>||gi;
	    $html =~ s|<I>||gi;
	    $html =~ s|</I>||gi;
            $html =~ s|\x{8C}|OE|gi;
            $html =~ s|\x{9C}|oe|gi;
            $html =~ s|&#146;|'|gi;

	}
        return $html;
    }

    sub getSearchUrl
    {
		my ($self, $word) = @_;
		return 'http://' . $self->baseWWWamazonUrl . '/s/ref=nb_sb_noss_1?url=search-alias=stripbooks&field-keywords=' . "$word";
    }
    
    sub baseWWWamazonUrl
    {   
		return "www.amazon.com";    
    }    

    sub getName
    {
        return "Amazon (US)";
    }
    
    sub getAuthor
    {
        return 'Varkolak';
    }
    
    sub getLang
    {
        return 'EN';
    }

    sub getCharset
    {
        my $self = shift;
        return "ISO-8859-15";
    }
    
    sub getSearchFieldsArray
    {
        return ['title', 'authors', 'isbn'];
    }

    # Used to get the local translation of editor, language, ISBN, product dimension, series 
    sub getTranslation
    {
         my $param = $_[1];

	 if ($param == 1)
	 {
		return 'Publisher:';
	 }
	 elsif ($param == 2)
	 {
		return 'Language:';
	 }
	 elsif ($param == 3)
	 {
		return 'ISBN-13:';
	 }
	 elsif ($param == 4)
	 {
		return 'Product Dimensions:';
	 }
	 elsif ($param == 5)
	 {
		return 'Series:';
	 }
    }

}
1;