#!/usr/local/bin/perl

use strict;
use warnings;
use Lingua::JA::TFIDF;
use YAML::Syck;
use Data::Dumper;
use Getopt::Long;
use Term::ReadLine;
use HTML::Feature;

my $config     = load_config();
my $calculator = Lingua::JA::TFIDF->new(%$config);
my $html_feature = HTML::Feature->new;

my $term   = Term::ReadLine->new();
my $prompt = "TFIDF > ";
my $out    = $term->OUT || \*STDOUT;

while ( defined( $_ = $term->readline($prompt) ) ) {
    if ( $_ eq 'text' ) {
        my $text;
      LABEL:
        while ( defined( $_ = $term->readline( $prompt . "input text and Control-D : " ) ) ) {
            if ( $_ eq '\cD' ) {
                last LABEL;
            }
            $text .= $_;
        }
        my $list = $calculator->tfidf($text)->list;
        print Dumper($list), "\n";
        print "----", "\n";
    }
    elsif ( $_ =~ /url (.+)/ ) {
        my $url   = $1;
        my $regex = 's?https?://[-_.!~*\'()a-zA-Z0-9;/?:@&=+$,%#]+';
        if ( $url =~ /$regex/ ) {
            my $result = $html_feature->parse($url);            
            my $text;
            $text .= $result->title if $result->title;
            $text .= $result->desc  if $result->desc;
            $text .= $result->text  if $result->text;
            my $list = $calculator->tfidf($text)->list;
            print Dumper($list), "\n";
            print "----", "\n";
        }
        else {
            print "bad url";
        }
    }
    elsif ( $_ eq 'q' ) {
        print $out "bye!\n";
        exit;
    }
}

sub load_config {
    my $config_file;
    my $config;
    Getopt::Long::GetOptions( '--config=s' => \$config_file, ) or pod2usage(2);
    if ($config_file) {
        $config = LoadFile($config_file);
    }
    return $config;
}

exit;