#!perl

=head1 NAME

ipgrep - grep files by CIDR

=head1 SYNOPSIS

    ipgrep [-1cfHhLlnVv] [long options...] [patterns] [files]

=head1 DESCRIPTION

This program will print lines from a file that matches IP addresses in the given netblocks.

Many of the options mirror similar options from L<grep>.

=head1 SEE ALSO

L<grep|https://www.gnu.org/software/grep/>

L<grepcidr|https://www.pc-tools.net/unix/grepcidr/>

=head1 SOURCE

The development version is on github at L<https://github.com/robrwo/perl-ipgrep> and may be cloned from
L<git://github.com:robrwo/perl-ipgrep.git>.

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website
L<https://github.com/robrwo/perl-ipgrep/issues>.

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 AUTHOR

Robert Rothenberg <rrwo@cpan.org>

=head1 COPYRIGHT AND LICENCE

This software is copyright (c) 2023 by Robert Rothenberg.

This is free software, licensed under The Artistic License 2.0 (GPL Compatible).

=cut

use v5.18;
use warnings;

use Getopt::Long::Descriptive 0.105;
use IO::File;
use IO::Uncompress::Gunzip;
use List::Util 1.33 qw( any );
use Net::CIDR 0.20 qw( cidrvalidate range2cidr );
use NetAddr::IP;
use Sub::Quote 2.003000 qw( qsub );

our $VERSION = 'v0.7.0';

my ( $opt, $usage ) = describe_options(
    '%c %o [patterns] [files]',
    [],
    [ 'count|c'                  => 'print a count of matching lines for each input file' ],
    [ 'files-without-match|L'    => 'print the name of each input file from which no output would normally have been printed' ],
    [ 'files-with-match|l'       => 'print the name of each input file from which output would normally have been printed' ],
    [ 'invert-match|v'           => 'invert the sense of matching, to select non-matching lines' ],
    [ 'max-count|m=i'            => 'Stop reading a file after INT matching lines.' ],
    [ 'line-number|n'            => 'prefix each line of output with the line number within its input file' ],
    [ 'match-first-character|1!' => 'match IP address from the first character of the line only' ],
    [
        'filename' => 'hidden' => {
            one_of => [
                [ 'with-filename|H' => 'print the file name for each match' ],
                [ 'no-filename|h'   => 'suppress the prefixing of file names on output' ],
            ]
        }
    ],
    [ 'label=s'               => 'display input from stdin as coming from label', { default => '(standard input)' } ],
    [ 'file|f=s'              => 'obtain CIDRs from a file, one per line' ],
    [ 'only-matching|o'       => 'show only nonempty parts of lines that match' ],
    [ 'quiet|silent|q'        => 'do not output anything, exit immediately with zero status if any match is found' ],
    [ 'exclude-final-dot!'    => 'exclude matches with a final dot' ],
    [ 'ignore-bad-addresses!' => 'ignore bad addresses' ],
    [],
    [ 'version|V' => 'print the version and exit' ],
    [ 'help'      => 'print usage and exit' ],

);

if ( $opt->version ) {
    say "iprep ${VERSION}";
    exit 0;
}

die $usage if $opt->help;

my @patterns;

if ( my $file = $opt->file ) {

    my $fh = IO::File->new($file);
    while ( my $line = $fh->getline ) {
        chomp($line);
        push @patterns, validate_cidr($line);
    }
}
else {
    my $pattern = shift @ARGV or die $usage;
    push @patterns, validate_cidr($pattern);
}

my $re = make_regexp($opt);

my @files = @ARGV;
push @files, "-" unless @files;

unless ( $opt->filename ) {
    $opt->{filename}         = @files > 1 ? "with_filename" : "no_filename";
    $opt->{$_}               = 0 for qw( with_filename no_filename );
    $opt->{ $opt->filename } = 1;
}

my $callback;

unless ( $opt->count ) {
    my $cb_pattern;
    if ( $opt->only_matching ) {
        my $prefix = '""';
        $prefix     = '$_[0]->input_line_number . ":"' if $opt->line_number;
        $prefix     = '$_[1] . ":" . ' . $prefix       if $opt->with_filename;
        $cb_pattern = 'defined($_[3][0]) ? join("\n", map { ' . $prefix . ' . $_ } @{$_[3]}) . "\n" : ""';
    }
    else {
        $cb_pattern = '$_[2]';
        $cb_pattern = '$_[0]->input_line_number . ":" . ' . $cb_pattern if $opt->line_number;
        $cb_pattern = '$_[1] . ":" . ' . $cb_pattern                    if $opt->with_filename;
    }
    $callback = qsub 'print ' . $cb_pattern;
}

my $post;
if ( $opt->files_with_match || $opt->files_without_match ) {
    my $pattern = '$_[0]';
    $post = qsub 'say ' . $pattern;
}
elsif ( $opt->count ) {
    my $pattern = '$_[1]';
    $pattern = '$_[0] . ":" . ' . $pattern if $opt->with_filename;
    $post    = qsub 'say ' . $pattern;
}

if ( $opt->quiet ) {
    $callback = $post = sub { };
}

my $matched = 0;

my $match_fn = $opt->only_matching ? sub { [ grep defined, @_ ] } : sub {
    any { defined($_) } @_;
};

for my $file (@files) {

    my $class = $file =~ /\.gz\z/ ? "IO::Uncompress::Gunzip" : "IO::File";
    my $fh    = $class->new($file) or die "$!: $file\n";
    my $label = $file eq "-" ? $opt->label : $file;
    my $count = match_file( $opt, $re, $fh, $label, $callback );
    $matched ||= !!$count;
    exit(0)                  if $opt->quiet && $matched;
    next                     if ( $opt->files_with_match && !$count ) || ( $opt->files_without_match && $count );
    $post->( $file, $count ) if $post;

}

exit( $matched ? 0 : 1 );

sub match_file {
    my ( $opt, $re, $fh, $file, $callback ) = @_;
    my $count = 0;
    while ( my $line = <$fh> ) {
        my $matches = $match_fn->( $line =~ m/${re}/g );
        next unless !!$matches ne !!$opt->invert_match;
        if ( $opt->only_matching && !( $opt->files_with_match || $opt->files_without_match ) ) {
            $count += scalar(@$matches);
            splice @$matches, $opt->max_count if $opt->max_count;
        }
        else {
            $count++;
        }
        last                                       if $opt->files_with_match || $opt->files_without_match;
        $callback->( $fh, $file, $line, $matches ) if $callback;
        last                                       if $opt->max_count && $count >= $opt->max_count;
    }
    return $count;
}

sub make_regexp {
    my ($opt) = @_;
    my $ips   = join( "|", map { NetAddr::IP->new($_)->re } @patterns );
    my $start = $opt->match_first_character ? '^' : '\b';
    $ips .= '(?!\.)' if $opt->exclude_final_dot;
    my $re = $start . '(' . $ips . ')\b';
    return qr/${re}/;
}

sub validate_cidr {
    my ($arg) = @_;

    if ( my $cidr = cidrvalidate($arg) ) {
        return $cidr;
    }
    elsif ( my @cidrs = eval { range2cidr($arg) } ) {
        return @cidrs;
    }
    elsif ( $opt->ignore_bad_addresses ) {
        return;
    }
    else {
        die "'${arg}' does not look like an IP address, IP address range or network block\n";
    }
}
