#!/usr/bin/perl

# cid2spf.pl - Converts a MS Caller-ID entry (XML) to a SPF entry
#
# (c) 2004 by Ernesto Baschny
#
# Date: 2004-02-25
# Version: 1.0
#
# Usage:
#  ./cid2spf.pl "<ep xmlns='http://ms.net/1'>...</ep>"
#
# Note that the 'include' directives will also have to be checked and
# "translated". Future versions of this script might be able to get a
# domain name as an argument and "crawl" the DNS for the necessary
# information.
#
# A complete reverse translation (SPF -> CID) might be impossible, since
# there are no way to handle:
# - PTR and EXISTS mechanism 
# - MX mechanism with an different domain as argument
# - macros
# 
# References:
# http://www.microsoft.com/mscorp/twc/privacy/spam_callerid.mspx
# http://spf.pobox.com/
#
# Known bugs:
# - Currently it won't handle the exclusions provided in the A and R
#   tags (prefix '!'). They will show up "as-is" in the SPF record
# - I really haven't read the MS-CID specs in-depth, so there are probably
#   other bugs too :)
#
# Ernesto Baschny <ernst@baschny.de>
#

use strict;
use warnings;

use XML::Parser;

my @spf = ();
my $action = '-all';
my $has_servers = undef;

# -------------------------------------------------------------------------
sub StartTag {
	my $hash = shift;
	my $tag = shift;
	my %attr = %_;
	if ($tag eq 'm') {
		if (defined $has_servers && ! $has_servers) {
			print STDERR "Declared <noMailServers\> and later <m>, this CID entry is not valid.\n";
			exit(1);
		}
		$has_servers = 1;
	}
	elsif ($tag eq 'noMailServers') {
		if (defined $has_servers && $has_servers) {
			print STDERR "Declared <m> and later <noMailServers\>, this CID entry is not valid.\n";
			exit(1);
		}
		$has_servers = 0;
	}
	elsif ($tag eq 'ep') {
		if (defined $attr{'testing'} && $attr{'testing'} eq 'true') {
			# A CID with 'testing' found:
			# From the MS-specs:
			#  "Documents in which such attribute is present with a true
			#  value SHOULD be entirely ignored (one should act as if the
			#  document were absent)"
			# From the SPF-specs:
			#  "Neutral (?): The SPF client MUST proceed as if a domain did
			#  not publish SPF data."
			# So we set SPF action to "neutral":
			$action = '?all';
		}
	}
	elsif ($tag eq 'mx') {
		# The empty MX-tag, same as SPF's MX-mechanism
		push @spf, 'mx';
	}
}

# -------------------------------------------------------------------------
sub EndTag {
	my $hash = shift;
	my $tag = shift;
	
	if ($tag eq 'ep') {
		# This is the end... print what we've got out
		my $spf_entry = '';
		$spf_entry .= 'v=spf1';
		if ($has_servers) {
			$spf_entry .= ' ' . join(' ', @spf);
		}
		$spf_entry .= ' ' . $action;
		print "$spf_entry\n";
	}
}

# -------------------------------------------------------------------------
sub Text {
	my $hash = shift;

	my $text = $hash->{'Text'};
	my @context = @{ $hash->{'Context'} };
	my $tag = $context[$#context];
	# Remove starting and trailing spaces from text:
	$text =~ s/^\s+//g;
	$text =~ s/\s+$//g;

	if ($tag eq 'a' || $tag eq 'r') {
		# The A and R tags from MS-CID are both handled by the 
		# ipv4/6-mechanisms from SPF:
		my $mechanism = 'ip4';
		if ($text =~ /:/) {
			$mechanism = 'ip6';
		}
		push @spf, $mechanism . ':' . $text;
	}
	if ($tag eq 'indirect') {
		# MS-CID's indirect is "sort of" the include from SPF:
		# Not really true, because the <indirect> tag from MS-CID also 
		# provides a fallback in case the included domain doesn't provide
		# _ep-records: The inbound MX-servers of the included domains
		# are added to the list of allowed outgoing mailservers for the
		# domain that declared the _ep-record with the <indirect> tag.
		# In SPF you would use the 'mx:domain' to handle this, but this
		# wouldn't depend on referred domain having or not SPF-records.
		push @spf, 'include:' . '_ep.' . $text;
	}
}

# -------------------------------------------------------------------------
# main

if (scalar(@ARGV) == 0) {
	printf("Usage: %s \"<ep xmlns='http://ms.net/1'>...</ep>\"\n", $0);
	exit(1);
}

my $cid_xml = $ARGV[0];

# Parse the beast. Any XML-problem will be reported by XML::Parser.
my $xmlp = new XML::Parser(Style => 'Stream');
$xmlp->parse($cid_xml);

1;

