# SPDX-License-Identifier: GPL-2.0-or-later

package Amavis::SpamControl::RspamdClient;
use strict;
use re 'taint';
use warnings;
use warnings FATAL => qw(utf8 void);
no warnings 'uninitialized';
use Scalar::Util qw(looks_like_number);

=pod

=head1 Amavis extension module to use Rspamd as a spam checker

Copyright (c) 2019 Ralph Seichter, partially based on the
SpamdClient extension. Released under GNU General Public
License; see Amavis LICENSE file for details.

=head2 Example configuration #1 (local Rspamd)

  # Rspamd running on the same machine as Amavis. Default URL
  # is http://127.0.0.1:11333/checkv2 , matching Rspamd's
  # "normal" worker defaults.
  @spam_scanners = ( [
      'Local Rspamd', 'Amavis::SpamControl::RspamdClient',

      # Adjust scores according to Rspamd's "required score"
      # setting (defaults to 15). Scores reported by Rspamd
      # will be multiplied with this factor. The following
      # adjusts Rspamd scores to SpamAssassin scores. While
      # this setting is technically optional, not adjusting
      # scores is prone to cause headaches.
      score_factor => $sa_tag2_level_deflt / 15.0,

      # MTA name is used to assess validity of existing
      # Authentication-Results headers, e.g. if DKIM/DMARC
      # validation has already happened.
      mta_name => 'mail.example.com',
  ] );

=head2 Example configuration #2 (remote Rspamd)

  # Rspamd running behind HTTPS-capable proxy using basic
  # authentication to control access.
  @spam_scanners = ( [
      'Remote Rspamd', 'Amavis::SpamControl::RspamdClient',
      url => 'https://rspamd-proxy.example.com/checkv2',

      # Response timeout in seconds. Default is 60, matching
      # Rspamd's standard config for the "normal" worker.
      timeout => 42,

      # SSL-options and -credentials passed to LWP::UserAgent,
      # see https://metacpan.org/pod/LWP::UserAgent . Default:
      # ssl_opts => { verify_hostname => 1 },
      credentials => {
          # The following <host>:<port> must match the 'url'
          # defined above or credentials won't be transmitted.
          netloc => 'rspamd-proxy.example.com:443',
          # Remote authentication realm
          realm => 'Rspamd restricted access',
          username => 'Marco',
          password => 'Polo',
      },

      # Don't scan messages remotely if the body size extends
      # the following limit (optional setting).
      mail_body_size_limit => 32 * 1024,

      score_factor => $sa_tag2_level_deflt / 15.0,
      mta_name => 'mail.example.com',
  ] );

=head2 Requirements

In addition to Amavis' core requirements, this extension needs
the following additional Perl modules:

  JSON
  HTTP::Message
  LWP::UserAgent
  LWP::Protocol::https
  Net::SSLeay

Should your host OS not provide the necessary packages, these
modules can be obtained via https://www.cpan.org .

=cut

BEGIN {
    require Exporter;
    use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
    $VERSION = '2.412';
    @ISA = qw(Exporter);
}

use JSON qw(decode_json);
use LWP::UserAgent;

use Amavis::rfc2821_2822_Tools qw(qquote_rfc2821_local);
use Amavis::Timing qw(section_time);
use Amavis::Util qw(do_log min prolong_timer);

sub new {
    my ($class, $scanner_name, $module, @args) = @_;
    my (%options) = @args;
    bless { scanner_name => $scanner_name, options => \%options }, $class;
}

# Pass meta information using Rspamd's non-standard HTTP headers.
sub pass_meta {
    my ($request, $name, $value) = @_;
    if (defined $value && $value ne '') {
        $request->header($name => $value);
    }
}

# Invoked by Amavis to spam-check one message.
sub check {
    my ($self, $msginfo) = @_;
    my ($which_section, $spam_level, $rspamd_action, $rspamd_rscore,
        $rspamd_skipped, $rspamd_tests, $rspamd_verdict, $size_limit);
    my $scanner_name = $self->{scanner_name};
    my $score_factor = $self->{options}->{'score_factor'} // 1;
    my $mbsl = $self->{options}->{'mail_body_size_limit'};
    if (defined $mbsl) {
        $size_limit = min(32 * 1024, $msginfo->orig_header_size) +
            min($mbsl, $msginfo->orig_body_size);
        # Allow slightly oversized messages to pass in full.
        undef $size_limit if $msginfo->msg_size < $size_limit + 5 * 1024;
    }
    my $per_recip_data = $msginfo->per_recip_data;
    $per_recip_data = [] if !$per_recip_data;

    my $msg = $msginfo->mail_text;
    my $msg_str_ref = $msginfo->mail_text_str; # In-memory copy available?
    $msg = $msg_str_ref if ref $msg_str_ref;
    eval {
        if (!defined $msg) {
            do_log(3, "Empty message");
        }
        elsif (ref $msg eq 'SCALAR') {
            $which_section = 'rspamd_connect';
            my $timeout = $self->{options}->{'timeout'};
            $timeout = 60 unless defined $timeout;
            my $url = $self->{options}->{'url'};
            $url = 'http://127.0.0.1:11333/checkv2' unless defined $url;
            do_log(3, "connecting to rspamd %s (timeout %s)", $url, $timeout);

            my $request = HTTP::Request->new(POST => $url);
            $request->content_type('application/octet-stream');
            $request->content(defined $size_limit ? substr($$msg, 0, $size_limit) : $$msg);
            pass_meta($request, 'Helo', $msginfo->client_helo);
            pass_meta($request, 'Hostname', $msginfo->client_name);
            pass_meta($request, 'IP', $msginfo->client_addr);
            pass_meta($request, 'MTA-Name', $self->{options}->{'mta_name'});
            pass_meta($request, 'From', $msginfo->sender_smtp);
            pass_meta($request, 'Queue-Id', $msginfo->queue_id);
            for my $rcpt (qquote_rfc2821_local(@{$msginfo->recips})) {
                pass_meta($request, 'Rcpt', $rcpt);
            }

            $which_section = 'rspamd_tx';
            my $ssl_opts = $self->{options}->{'ssl_opts'};
            $ssl_opts = { verify_hostname => 1 } unless defined $ssl_opts;
            my $user_agent = LWP::UserAgent->new(
                protocols_allowed => [ 'http', 'https' ],
                ssl_opts          => $ssl_opts
            );
            my $credentials = $self->{options}->{'credentials'};
            if (defined $credentials) {
                $user_agent->credentials(
                    $credentials->{'netloc'},
                    $credentials->{'realm'},
                    $credentials->{'username'},
                    $credentials->{'password'},
                )
            }
            $user_agent->agent('amavis/' . $VERSION);
            $user_agent->timeout($timeout);

            prolong_timer($which_section, undef, undef, $timeout);
            my $response = $user_agent->request($request);
            $response->is_success or die "Error calling rspamd: " . $response->status_line . ", stopped";
            my $content = $response->content;
            defined $content or die "Missing rspamd response, stopped";
            do_log(5, "Rspamd response: " . $content);
            my $rspamd = decode_json $content;
            $rspamd_skipped = $rspamd->{is_skipped};
            $spam_level = $rspamd->{score};
            $rspamd_rscore = $rspamd->{required_score};
            $rspamd_action = $rspamd->{action};
            my $rspamd_symbols = $rspamd->{symbols};
            if (defined $rspamd_symbols) {
                my @tests;
                while (my ($ignored, $symbol) = each %$rspamd_symbols) {
                    my $symbol_name = $symbol->{name};
                    my $symbol_score = $symbol->{score};
                    $symbol_name =~ tr/=,/__/;
                    my $t = looks_like_number($symbol_score)
                        ? sprintf('%s=%.2f', $symbol_name, $symbol_score * $score_factor)
                        : sprintf('%s=%.2f', $symbol_name, 0); # coerce into number, is re-evaluated
                                                               # arithmetically in structured_report
                    push(@tests, $t);
                }
                $rspamd_tests = join(',', @tests);
            }
            # Map Rspamd action to Amavis verdict
            my %action2verdict = (
                'add header'      => 'Spam',
                'no action'       => 'Ham',
                'reject'          => 'Spam',
                'rewrite subject' => 'Spam',
                # Rspamd 1.9 and later
                'discard'         => 'Spam',
                'quarantine'      => 'Spam',
            );
            $rspamd_verdict = exists $action2verdict{$rspamd_action} ?
                $action2verdict{$rspamd_action} : 'Unknown';
        }
        else {
            do_log(2, "%s skipping message type %s", $scanner_name, ref $msg);
            $rspamd_action = 'N/A';
            $rspamd_verdict = 'Unknown';
            $rspamd_skipped = 1;
            $rspamd_rscore = 0;
            $spam_level = 0;
        }

        1;

    } or do {
        my $eval_stat = $@ ne '' ? $@ : "errno=$!";
        chomp $eval_stat;
        do_log(-1, "%s client failed: %s", $scanner_name, $eval_stat);
    };

    section_time($which_section);
    if (defined $spam_level) {
        $spam_level *= $score_factor;
        $rspamd_rscore *= $score_factor;
    }
    do_log(2, "%s rspamd %sscore %.2f/%.2f (%s) %s", $scanner_name,
        $rspamd_skipped ? 'skipped/' : '',
        $spam_level, $rspamd_rscore, $rspamd_action, $rspamd_tests);
    $msginfo->supplementary_info('SCORE-' . $scanner_name, $spam_level);
    $msginfo->supplementary_info('VERDICT-' . $scanner_name, $rspamd_verdict);
    for my $r (@$per_recip_data) {
        $r->spam_level(($r->spam_level || 0) + $spam_level);
        if (!$r->spam_tests) {
            $r->spam_tests([ \$rspamd_tests ]);
        }
        else {
            push(@{$r->spam_tests}, \$rspamd_tests);
        }
    }
}

1;
