#!@PERL5@
#
# $OpenBSD: mdoclint,v 1.48 2016/01/24 20:10:48 schwarze Exp $
# $NetBSD: mdoclint,v 1.77 2017/06/08 10:19:56 wiz Exp $
#
# Copyright (c) 2001-2017 Thomas Klausner
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR, THOMAS KLAUSNER,
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

use strict;
use warnings;

$| = 1;

package Parser;
use Getopt::Std;

use constant {
	OPENBSD => 0,
	NETBSD => 1,
};

use vars qw(
	$opt_D $opt_d $opt_F $opt_h
	$opt_m $opt_o $opt_p $opt_r $opt_S $opt_v $opt_w $opt_x
);


my $arch=`uname -m`;
chomp($arch);
my $options="DdFhmoprSvwx";

sub usage
{
	print STDERR <<"EOF";
mdoclint: verify man page correctness
usage:	mdoclint [-$options] file ...
	-D	warn about bad casing and archs in .Dt
	-d	warn about bad date strings (in .Dd only)
	-F	fix whitespace problems (asks before overwriting)
	-h	display this help text
	-m	warn about man pages that are not in mdoc(7) format
	-o	warn about non-empty .Os strings
	-p	warn about punctuation problems
	-r	warn about missing RCS Id
	-S	warn about any .Sh weirdness
	-v	verbose output
	-w	show section header in warnings
	-x	warn about cross-references with missing targets
Default is -DdmoprSx if no flag is specified.
EOF
	exit(0);
}

# constants to build
my %sections;
my $arches_re;
my $sections_re;
my $esections_re;
my $valid_date_re;
# and the code that builds them
{
	my @sections = (
		"NAME",
		NETBSD ? "LIBRARY" : undef,
		"SYNOPSIS",
		"DESCRIPTION",
		OPENBSD ? "CONTEXT" : undef ,
		NETBSD ? "IMPLEMENTATION NOTES" : undef,
		"RETURN VALUES",
		"ENVIRONMENT",
		"FILES",
		"EXIT STATUS",
		"EXAMPLES",
		"DIAGNOSTICS",
		NETBSD ? "COMPATIBILITY" : undef,
		"ERRORS",
		NETBSD ? "CODE REFERENCES" : undef,
		"SEE ALSO",
		"STANDARDS",
		"HISTORY",
		"AUTHORS",
		"CAVEATS",
		"BUGS",
		NETBSD ? "SECURITY CONSIDERATIONS" : undef
	);

	my $i = 1;
	for my $sh (@sections) {
		if (defined $sh) {
			$sections{$sh} = $i++;
		}
	}
	my @arches;
	if (OPENBSD) {
		@arches =
		    (qw(alpha amd64 arm64 armv7 hppa i386
		    landisk loongson luna88k macppc mips64 octeon sgi
		    socppc sparc64));
	}
	if (NETBSD) {
		@arches =
		    (qw(acorn26 acorn32 algor alpha amiga arc atari
		    bebox cats cesfic cobalt dreamcast
		    emips evbarm evbmips evbppc
		    evbsh3 evbsh5 hp300 hpcarm hpcmips hpcsh hppa
		    i386 ibmnws luna68k mac68k macppc mipsco mmeye
		    mvme68k mvmeppc netwinder news68k newsmips next68k
		    pc532 playstation2 pmax pmppc prep sandpoint sbmips
		    sgimips shark sparc sparc64 sun2 sun3 vax walnut
		    x68k x86 x86_64 xen));
	}
	my $a = join('|', @arches);
	$arches_re = qr{(?:$a)}o;
	if (OPENBSD) {
		$sections_re = qr{(?:3p|[1-9])}o;
		$esections_re = qr{(?:3p|[0-9])}o;
	}
	if (NETBSD) {
		$sections_re = qr{[1-9](?:lua)?}o;
		$esections_re = qr{[0-9](?:lua)?}o;
	}
	if (OPENBSD) {
		$valid_date_re = qr{\$Mdocdate\b};
	}
	if (NETBSD) {
		$valid_date_re = qr{(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[1-9][0-9]*,\s*(?:198[0-9]|199[0-9]|200[0-9]|201[0-7])$}o;
	}
}

sub debug
{
	my $self = shift;
    	print STDOUT "debug: $self->{fn}:$self->{ln}: @_\n" if $opt_v;
}

sub warning
{
	my $self = shift;
	my $extra = "";
	if ($opt_w) {
		$extra = $self->{current_section_header}.":";
	}
	print STDOUT "$self->{fn}:$extra$self->{ln}: ", join('', @_), "\n";
}

sub handle_options
{
	getopts($options);
	$opt_h and usage();

	# default to all warnings if no flag is set
	unless ($opt_D or $opt_d or $opt_m
	    or $opt_o or $opt_p or $opt_r or $opt_S or $opt_x) {
		$opt_D = $opt_d = $opt_m =
		    $opt_o = $opt_p = $opt_r = $opt_S = $opt_x = 1;
	}
}


sub verify_xref
{
	my ($self, $page, $section, $pre, $post) = @_;
	if ($self->{names}{$page.$section}) {
		$self->warning("Xref to itself (use .Nm instead)") if $opt_x;
	}
	# try to find corresponding man page
	if (OPENBSD) {
		open my $saveout, '>&', STDOUT;
		open my $saveerr, '>&', STDERR;
		open STDOUT, '>', '/dev/null';
		open STDERR, '>', '/dev/null';
		my $irc = system 'man', '-M', '/usr/share/man:/usr/X11R6/man',
		    '-s', $section, '-f', $page;
		open STDOUT, '>&', $saveout;
		open STDERR, '>&', $saveerr;
		return 1 unless $irc;
	} else {
		for my $dir ('/usr/share/man', '/usr/X11R7/man') {
			for my $a ('', $arch) {
				for my $page ("man$section/$a/$page.$section") {
					return 1 if -f "$dir/$page";
				}
			}
		}
	}
	return 1 if -f "./$page.$section";
	return 1 if -f "./$page.mdoc";

	$self->warning($pre."trailing Xref to $page($section)$post") if $opt_x;
	return 0;
}

sub new
{
	my ($class, $fn) = @_;

	my $o = {
		mandoc_p => 1,
		all => [],
		lastline => '',
		changes => 0,
		oxrcsidseen => 0,
		nxrcsidseen => 0,
		inliteral => 0,
		shseen => {},
		current_section_header => '',
		sec => '0',
		names => { $fn => 1 },
		fn => $fn
	};
	$o->{sec} = $1 if $fn =~ /\.(.+?)$/;
	open my $input, '<', $fn or die "can't open input file $fn";
	$o->{file} = $input;
	$o->{ln} = 0;
	bless $o, $class;
}

sub next_line
{
	my ($self) = @_;

	my $l = readline($self->{file});
	if (defined $l) {
		$self->{ln}++;
	}
	return $l;
}

sub close
{
	my ($self) = @_;

	close($self->{file});
}

sub parse_macro_args
{
	my ($s, $string) = @_;
	$_ = $string;
	my @params = ();
	while (!/^$/) {
		if (s/^\"(.*?)\"\s*//) {
			push(@params, $1);
		} elsif (s/^(\S+)\s*//) {
			push(@params, $1);
		}
	}
	return @params;
}

sub set_section_header
{
	my ($s, $section_header) = @_;
	$section_header = join(' ', $s->parse_macro_args($section_header));

	if (not $sections{$section_header}) {
		$s->warning("unknown section header: ",
		    "`$section_header'") if $opt_S;
	} else {
		$s->{shseen}->{$section_header} = 1;
	}

	$s->{current_section_header} = $section_header;
}

sub process_and_save_line
{
	my ($s, $input) = @_;
	my $result = $s->process_line($input);
	# note that process_line chomps \n, then re-adds it,
	# so we detect a change on last lines without a \n.
	if ($result ne "$input") {
		$s->{changes} = 1;
	}
	push(@{$s->{all}}, $result);
}

sub process_line
{
	my $s;
	($s, $_) = @_;
	chomp;
	# always cut trailing spaces
	s/\s+$//o;
	if (/\$OpenBSD\b.*\$/o) {
		$s->{oxrcsidseen}++;
		if (OPENBSD and ($s->{oxrcsidseen} > 1)) {
		    $s->warning("RCS Id seen twice") if $opt_r;
		}
		return "$_\n";
	}
	if (/[\$]NetBSD\b.*\$/o) {
		$s->{nxrcsidseen}++;
		if (NETBSD and ($s->{nxrcsidseen} > 1)) {
		    $s->warning("RCS Id seen twice") if $opt_r;
		}
		return "$_\n";
	}
	# comments
	if (/^\.\\\"/) {
		return "$_\n";
	}
	if (/^\.TH\s+/o) {
		$s->warning("not mandoc") if $opt_m;
		$s->{mandoc_p} = 0;
#	    	/^.TH\s*[\w-_".]+\s*([1-9])/;
#	    	$section = $1;
		return "$_\n";
	}
#	if (/^.Dt\s*[\w-_".]+\s*([1-9])/) {
#	    	$section = $1;
#	}
	if (/^\.Dt\s+/o) {
		if (/^\.Dt\s+(?:[A-Z\d._-]+)\s+($sections_re)(?:\s+$arches_re)?$/o)  {
			$s->{sec} = $1;
		} else {
			$s->warning("bad .Dt: `$_'") if $opt_D;
		}
	}

	if ($s->{mandoc_p}) {
		if (/^\.Sh\s+(.*)$/o) {
	        	my $line = $_;
			$s->set_section_header($1);
			return "$line\n";
		}
	} else {
		if (/^\.SH\s+(.*)$/o) {
			my $line = $_;
			$s->set_section_header($1);
			return "$line\n";
		}
	}

	if ($s->{current_section_header} eq "NAME") {
		if (/^\.Nm\s+(\S+)/o) {
			$s->{names}{$1.$s->{sec}} = 1;
		}
	}
	if (/^\.Os\s+(.+)/o) {
		$s->warning(".Os used with argument `$1'") if $opt_o;
	}
	if (/^\... .*[^\s][\.();,\[\]\{\}:]$/o
	    and not /\s\.\.\.$/o and not /\\&.$/o) {
		$s->warning("punctuation in format string ",
		    "without space: `$_'") if $opt_p;
	}
	if (/^\./o and /Ns [\.();,\[\]\{\}:]/o) {
		$s->warning("possible Ns abuse: `$_'") if $opt_p;
	}
	if ((/^([^\.]\w+)\(\)/o or /^[^\.].*\s(\w+)\(\)/o) and not $s->{inliteral}) {
		$s->warning("use .Fn or .Xr for functions: `$1()'") if $opt_p;
	}

	my $destruct = $_;
	if ($s->{mandoc_p}) {
		$destruct =~ s/\\\&([\w\.])/$1/o;
		if ($destruct =~ /^\.Xr\s+([\w\:\.\-\+\/]+)\s+($esections_re)(.*)/o) {
			$s->debug("Xref to $1($2) found: `$_'");
			$s->verify_xref($1, $2, "", "");
			if ($3 =~ /^\S/o) {
				$s->warning("No space after section number in Xref: `$_'") if $opt_x;
			}
		} elsif ($destruct =~ /^\.Xr/o) {
			$s->warning("Weird Xref found: `$_'") if $opt_x;
		}
	} else {
		$destruct =~ s/\\f.//go;
		if ($destruct !~ /^\.\\\"/o) {
			while ($destruct =~ s/([-\w.]+)\s*\(($esections_re)\)//o) {
				$s->debug("possible Xref to $1($2) found: `$_'");
				$s->verify_xref($1, $2, "possible ", ": `$_'");
				# so that we have a chance to find more than one
				# per line
				$destruct =~ s/(\w+)\s*\(($sections_re)\)//o;
			}
		}
	}

	if (/^\.Dd/o and not /^\.Dd\s+$valid_date_re/o) {
		$s->warning("Invalid date found: `$_'") if $opt_d;
	}

	if (/^\.Bd\b.*-(?:literal|unfilled)\b/o) {
		$s->{inliteral} = 1;
	}
	if ($s->{inliteral} == 1) {
		if (/^\.Ed\b/o) {
		    $s->{inliteral} = 0;
		}
	}

	$s->{lastline} = $_;
	return "$_\n";
}

sub finish
{
	my ($s) = @_;

	if (NETBSD and not $s->{nxrcsidseen}) {
		$s->warning("Missing RCS Id") if $opt_r;
	}
	if (OPENBSD and not $s->{oxrcsidseen}) {
		$s->warning("Missing RCS Id") if $opt_r;
	}
	if ($s->{mandoc_p}) {
		foreach my $i (qw(NAME SYNOPSIS DESCRIPTION)) {
			if (not ($s->{shseen}{$i})) {
				$s->warning("missing $i section") if $opt_S;
			}
		}
	}
}

package main;

sub handle_file
{
	my $parser = Parser->new($_[0]);

	while ($_ = $parser->next_line) {
		$parser->process_and_save_line($_);
	}

	$parser->finish;
	$parser->close;
	if ($Parser::opt_F and $parser->{changes}) {
		open OUT, ">$_[0].new" or
		    die "can't open output file `$_[0].new'";
		for my $l (@{$parser->{all}}) {
			print OUT $l
		}
		close OUT;
		system("mv -i $_[0].new $_[0]");
	}
}

Parser->handle_options;
foreach my $file (@ARGV) {
	handle_file($file);
}
