#!/usr/bin/perl

# Coded by Trizen
# Email: echo dHJpemVueEBnbWFpbC5jb20K | base64 -d
# Website: http://trizen.go.ro

&menu;
sub menu {
    print "
\t*******************************************
\t*         Simple Text Extracter           *
\t*                     Coded By Trizen     *
\t-------------------------------------------
\t*  Options:                               *
\t*       1 - Extract from URL              *
\t*       2 - Extract from Wordlist         *
\t*       3 - Extract from a list of URLs   *
\t*******************************************
\t
";
    print '=>> Pick one of: ';
    chomp($pick = <STDIN>);
    if ($pick eq 1) {
        &site;
    }
    if ($pick eq 2) {
        &file;
    }
    if ($pick eq 3) {
        &listofurls;
    }
    if (not $pick or $pick > 3) {
        &menu;
    }
}
sub listofurls {
    print "\n=>> Name of list with URLs: ";
    chomp($list = <STDIN>);
    die $! unless open LIST, '<', "$list";
    $file = 'Wordlist made from URLs.txt';
    die $! unless open OUT, '>>', $file;
    print "\n";
    while (defined($_ = <LIST>)) {
        $url = $_;
        chomp $url;
        unless ($url =~ /^http/) {
            $url = 'http://' . $url;
        }
        unless ($url =~ /\./) {
            die "\n\n(x_x) Invalid URL...\n\n";
        }
        require LWP::UserAgent;
        print "Extracting: $url\n";
        $lwp = 'LWP::UserAgent'->new;
        $lwp->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.3) Gecko/20100402 Firefox/3.6.3');
        $connect = $lwp->get($url);
        die $! unless open TEMP, '>>', '.temp';
        print TEMP $connect->content;
        close TEMP;
        die $! unless open READ, '<', '.temp';
        open TEMP2, '>>', '.temp2';
        while (defined($_ = <READ>)) {
            $_ =~ s/\W/\n/g;
            chomp($word = $_);
            if ($word =~ /$p/) {
                print TEMP2 "$word\n";
            }
        }
        close TEMP2;
    }
    &finish;
}
sub finish {
    print "\n\n(...) Sorting and removing duplicates...\n";
    my $fieldnames = 1;
    die $! unless open IN, '<', '.temp2';
    my(@data) = sort(<IN>);
    my $n = 0;
    my $lastline = '';
    foreach my $currentline (@data) {
        next if $currentline eq $lastline;
        unless ($currentline =~ /([\w\W]{30})/) {
            print OUT $currentline;
            $lastline = $currentline;
            ++$n;
        }
    }
    close IN;
    close OUT;
    unlink '.temp2', '.temp';
    print qq[\n[*] Done... Check out "$file"\n\n];
}
sub site {
    print "\n=>> Insert an URL: ";
    chomp($url = <STDIN>);
    unless ($url =~ /^http/) {
        $url = 'http://' . $url;
    }
    unless ($url =~ /\./) {
        die "\n\n(x_x) Invalid URL...\n\n";
    }
    print "
\t1 = Words (a-zA-Z0-9)
\t2 = Emails (a-zA-Z0-9_.-\@)
\t3 = Customize (advanced)
\t
";
    print "=>> What do you want to extract?\n> ";
    chomp($p = <STDIN>);
    if ($p eq '1' or $p eq '') {
        $file = 'Words from URL.txt';
        die $! unless open OUT, '>>', $file;
        $p = '([\\w]+)';
    }
    if ($p eq '3') {
        print "\nExample: ([a-zA-Z]+), ([^d]+), etc...\n";
        print "\n=>> Customize: ";
        chomp($p = <STDIN>);
        $file = 'Words from URL - customized.txt';
        die $! unless open OUT, '>>', $file;
    }
    if ($p eq '2') {
        $file = 'Emails from URL.txt';
        die $! unless open OUT, '>>', $file;
        &emails;
    }
    require LWP::UserAgent;
    print "\n\n(...) Working...\n";
    $lwp = 'LWP::UserAgent'->new;
    $lwp->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.3) Gecko/20100402 Firefox/3.6.3');
    $connect = $lwp->get($url);
    die $! unless open TEMP, '>', '.temp';
    print TEMP $connect->content;
    close TEMP;
    die $! unless open READ, '<', '.temp';
    open TEMP2, '>', '.temp2';
    while (defined($_ = <READ>)) {
        $_ =~ s/\W/\n/g;
        chomp($word = $_);
        if ($word =~ /$p/) {
            print TEMP2 "$word\n";
        }
    }
    close TEMP2;
    my $fieldnames = 1;
    die $! unless open IN, '<', '.temp2';
    my(@data) = sort(<IN>);
    my $n = 0;
    my $lastline = '';
    foreach my $currentline (@data) {
        next if $currentline eq $lastline;
        unless ($currentline =~ /([\w\W]{30})/) {
            print OUT $currentline;
            $lastline = $currentline;
            ++$n;
        }
    }
    close IN;
    close OUT;
    unlink '.temp2', '.temp';
    print qq[\n[*] Done... Check out "$file"\n\n];
}
sub emails {
    print "[...] Extracting...\n";
    require LWP;
    $lwp = 'LWP::UserAgent'->new;
    $lwp->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.3) Gecko/20100402 Firefox/3.6.3');
    $connect = $lwp->get($url)->content;
    @data = split(?\n?, $connect, 0);
    foreach $_ (@data) {
        if ($_ =~ /[<>]([\w\-\.]+)\@([\w\-\.]+)[<>]+/) {
            print OUT "$1\@$2\n";
        }
        elsif ($_ =~ /([\.\-\w]+)\@([\.\-\w]+)/) {
            print OUT "$1\@$2\n";
        }
    }
    close OUT;
}
sub file {
    print "\n=>> Input file: ";
    chomp($in = <STDIN>);
    die $! unless open IN, '<', $in;
    print "
\t1 = Words (a-zA-Z0-9)
\t2 = Emails (a-zA-Z0-9_.-\@)
\t3 = Customize (advanced)
\t
";
    print "What do you want to extract?\n> ";
    chomp($p = <STDIN>);
    if ($p eq '1' or $p eq '') {
        $file = 'Words from LIST.txt';
        die $! unless open OUT, '>>', $file;
        $p = '([a-z]+)';
    }
    if ($p eq '3') {
        print "\nExample: ([a-zA-Z]+), ([^d]+), etc...\n";
        print "\n=>> Customize: ";
        chomp($p = <STDIN>);
        $file = 'Words from FILE - customized.txt';
        die $! unless open OUT, '>>', $file;
    }
    if ($p eq '2') {
        $file = 'Emails from LIST.txt';
        die $! unless open OUT, '>>', $file;
        &listemails;
    }
    print "\n\n(...) Working...\n";
    die $! unless open TEMP, '>', '.temp';
    while (defined($_ = <IN>)) {
        if ($_ =~ /$p/) {
            $_ =~ s/\W/\n/g;
            $word = $_;
            chomp $word;
            print TEMP "$word\n";
        }
    }
    close TEMP;
    my $fieldnames = 1;
    die $! unless open IN, '<', '.temp';
    my(@data) = sort(<IN>);
    my $n = 0;
    my $lastline = '';
    foreach my $currentline (@data) {
        next if $currentline eq $lastline;
        unless ($currentline =~ /([\w\W]{30})/) {
            print OUT $currentline;
            $lastline = $currentline;
            ++$n;
        }
    }
    close IN;
    close OUT;
    unlink '.temp';
    print qq[\n[*] Done... Check out "$file"\n\n];
}
sub listemails {
    while (defined($_ = <IN>)) {
        if ($_ =~ /[<>]+([\.\-\w]+)\@([\.\-\w]+)[<>]+/) {
            print "Extracted: $1\@$2\n";
            print OUT "$1\@$2\n";
        }
        elsif ($_ =~ /([\.\-\w]+)\@([\.\-\w]+)/) {
            print "Extracted: $1\@$2\n";
            print OUT "$1\@$2\n";
        }
    }
    close OUT;
}

