#!/usr/bin/perl
# firefox-specific:
# create a diff between two versions (NOT for js/ !!!)
# Optionally, change 'git' from 'no' to 'yes' to do a git diff
# That was intended to ge rid of UTF-8 treated as ASCII, e.g. in rst files,
# but doesn't always work.  Between one release and the first beta of the
# next release it provides some infomration about file movements - for
# vendord, thirdparty and rust that doesn't do anything useful and it
# adds time and a lot of output.  Maybe useful between releases.
# but for normal beta diffs or esr point releases set 'git' to 'no'.
# For either, use an invocation of csplit to break that into each file
# then use perl to decide whether to put the file in a slim diff
# or to put it in a skip file.  Unfortunately, a lot of unwanted
# json and css files still get included in the 'slim' part.
# VERSION 2024-01-24

#  SPDX-License-Identifier: MIT
# License-Text:
#
# MIT License

# Copyright (c) 2023-2024 Ken Moffat
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# Known bugs :-(
# Some .css and .json files appear in the wanted output (fox*.slim).
# Not understood, and not important enough to debug at this time.

# Reviewing the output:
#
# For the files which are expected to be interesting, open the
# fox*.slim file in view, and if a file is not interesting to you
# and has a long set of changes, just use '/^diff' to go to the
# next file (and then '/' to repeat that).
#
# To review if files have been skipped which should not have been:
# grep '^diff ' fox*.skipped | awk '{ print $NF }' | less

use strict;
use warnings;
use File::Basename; # to split a filename into its parts

# mozilla trees and my diff names
my $package;
my $diffname;

# count files ignored by type
my $fcss = 0;
my $fgypi = 0;
my $fjpg = 0;
my $fjpeg = 0;
my $fjson = 0;
my $fpng = 0;
my $fscss = 0;
my $fsvg = 0;
my $fyaml = 0;
my $fyml = 0;

# count files ignored because of directory name
my $android = 0;
my $arm = 0; # libtheora and dav1d
my $arm64 = 0; # in js/src
my $cocoa = 0;
my $installer = 0;
my $loong64 = 0; # in js/src
# mac is not always for macOS, and some matches e.g. for rust
# modules might later match too much
my $accessmac = 0; # accessible/mac
my $macia32 = 0; # in libvpx
my $macx64 = 0; # in libvpx
my $mips32 = 0; # in js/src
my $win64 = 0; # win64 is for MS, win is more general, not just MS
my $winia32 = 0; # in libvpx
my $winx64 = 0;
# although most variants of test are under test/ or tests/
# not all are.
my $crashtests = 0;
my $gtest = 0;
my $gtests = 0;
my $mochitests = 0;
my $reftests = 0;
my $test = 0;
my $tests = 0;
my $testing = 0;

my $records = 0;
my $passed = 0;
my $skipped = 0;

# declare subs for strict
sub help;
sub info;

my $git = "no"; # set to "yes" to use git diff (much more output)

# start of main line
# check three args (package, the version numbers of the firefox trees,
# e.g. 113.b0 for firefox-113.0b0), then create fox-V1-V2 as the diff

my $arg0=$ARGV[0];
# if a recognized package, use it and look for two more args
#print "arg0 is $arg0\n";		
if ( "$arg0" eq "firefox" ) {
	$package=$arg0;
	$diffname = "fox";
} elsif ( "$arg0" eq "thunderbird" ) {
	$package=$arg0;
	$diffname = "bird";
} elsif ( "$arg0" eq "seamonkey" ) {
	$package=$arg0;
	$diffname = "monkey";
} else {
	print "You need to specify a recognized package as first arg\n";
	help;
}

my $old=$ARGV[1];
print "old version is $old\n";
if (-d "$package-$old") {
	print "$package-$old exists\n";
} else {
	help;
}

my $new=$ARGV[2];
print "new version is $new\n";
if (-d "$package-$new") {
	print "$package-$new also exists\n";
} else {
	help;
}

# use the systom command to do a diff -Nur called fox-$old-$new
# start of good lines, comment to avoid rerunning	
print "Creating the full diff\n";
if ( "$git" eq "yes" ) {
print "command will be git diff --no-index -l 0 $package-$old $package-$new >$diffname-$old-$new\n";
system "rm -rf $diffname-$old-$new*";
system "git diff --no-index -l 0 $package-$old $package-$new >$diffname-$old-$new";

# now use the system csplit command to break it down into files
print "Using csplit to break the diff into files\n";
system "csplit -n 6 -s -z $diffname-$old-$new '/diff --git/' {\*}";

} else {
print "command will be diff -Nur $package-$old $package-$new >$diffname-$old-$new\n";
system "rm -rf $diffname-$old-$new*";
system "diff -Nur $package-$old $package-$new >$diffname-$old-$new";

# now use the system csplit command to break it down into files
print "Using csplit to break the diff into files\n";
##print "command will be csplit -n 5 -s -z fox-$old-$new '/diff -N/' {\*}\n";
system "csplit -n 6 -s -z $diffname-$old-$new '/diff -N/' {\*}";

}

# need to process all xx?????? files in CWD
my @files = <xx??????>;
foreach my $file (@files) {
	#print $file . "\n"; # xx????? to show progress through the loop	
	open(my $fh, '<:encoding(UTF-8)', $file) or die "could not open diff $file\n";
	my $header = <$fh>;
	#print "header is $header\n";	
	++$records;
	my @fields = split(' ', $header);
	# report the filename being processed
	print "$fields[3]\n";
	my $newfile = $fields[3];
	(my $base, my $dir, my $ext) = fileparse($newfile, qr/\.[^.]*/);
    #print "filename is $base\n";	
	#print "ext is $ext\n";	
	#print "dir is $dir\n";	

	# Assume this item should be skipped.
	# For efficiency, do not set after each test,
	# set at end of tests if wanted.
	my $goodness = 0;

	# first see if the file extension is uninteresting
print("start of exceptions: goodness is $goodness\n");
	if ( "$ext" =~ /\.css/ ) {
print("CSS: goodness is $goodness\n");
		++$fcss;
	}
	elsif ( "$ext" =~ /\.gypi/ ) {
print("GYPI: goodness is $goodness\n");
		++$fgypi;
	}
	elsif ( "$ext" =~ /\.jpg/ ) {
print("JPG: goodness is $goodness\n");
		++$fjpg;
	}
	elsif ( "$ext" =~ /\.jpeg/ ) {
print("JPEG: goodness is $goodness\n");
		++$fjpeg;
	}
	elsif ( "$ext" =~ /\.json/ ) {
print("JSON: goodness is $goodness\n");
		++$fjson;
	}
	elsif ( "$ext" =~ /\.png/ ) {
print("PNG: goodness is $goodness\n");
		++$fpng;
	}
	elsif ( "$ext" =~ /\.scss/ ) {
		++$fscss;
	}
	elsif ( "$ext" =~ /\.svg/ ) {
print("SVG: goodness is $goodness\n");
		++$fsvg;
	}
	elsif ( "$ext" =~ /\.yaml/ ) {
print("YAML: goodness is $goodness\n");
		++$fyaml;
	}
	elsif ( "$ext" =~ /\.yml/ ) {
print("YML: goodness is $goodness\n");
		++$fyml;
	}
	else {
print("not dropped because of file type\n");
	# now look at the directory - this will be slower,
	# do the more-likely variants first
	# note that testing as a top-level directory wih a lot of tests
	# so can fairly quickly match items
	#elsif ("$dir" =~ /\/testing\// ) {
	if ("$dir" =~ /\/testing\// ) {
print("testing: goodness is $goodness\n");
		++$testing;
	}
	elsif ("$dir" =~ /\/test\// ) {
print("test: goodness is $goodness\n");
		++$test;
	}
	elsif ("$dir" =~ /\/tests\// ) {
print("tests: goodness is $goodness\n");
		++$tests;
	}
	elsif ("$dir" =~ /\/mochitest\// ) {
print("mochitest: goodness is $goodness\n");
		++$mochitests;
	}
	elsif ("$dir" =~ /\/crashtests\// ) {
print("crashtests: goodness is $goodness\n");
		++$crashtests;
	}
	elsif ("$dir" =~ /\/reftests\// ) {
print("reftests: goodness is $goodness\n");
		++$reftests;
	}
	elsif ("$dir" =~ /\/gtest\// ) {
print("gtest: goodness is $goodness\n");
		++$gtest;
	}
	elsif ("$dir" =~ /\/gtests\// ) {
print("gtests: goodness is $goodness\n");
		++$gtests;
	}
	elsif ("$dir" =~ /\/android\// ) {
print("android: goodness is $goodness\n");
		++$android;
	}
	elsif ("$dir" =~ /\/arm\// ) {
print("arm: goodness is $goodness\n");
		++$arm;
	}
	elsif ("$dir" =~ /\/arm64\// ) {
print("arm64: goodness is $goodness\n");
		++$arm64;
	}
	elsif ("$dir" =~ /\/cocoa\// ) {
print("cocoa: goodness is $goodness\n");
		++$cocoa;
	}
	elsif ("$dir" =~ /\/installer\// ) {
print("installer: goodness is $goodness\n");
		++$installer;
	}
	elsif ("$dir" =~ /\/loong64\// ) {
print("loong64: goodness is $goodness\n");
		++$loong64;
	}
	elsif ("$dir" =~ /\/accessible\/mac\// ) {
print("accessible mac: goodness is $goodness\n");
		++$accessmac;
	}
	elsif ("$dir" =~ /\/mac\/ia32\// ) {
print("mac ia32: goodness is $goodness\n");
		++$accessmac;
	}
	elsif ("$dir" =~ /\/mac\/x64\// ) {
print("mac x64: goodness is $goodness\n");
		++$accessmac;
	}
	elsif ("$dir" =~ /\/mips32\// ) {
print("mips32: goodness is $goodness\n");
		++$mips32;
	}
	elsif ("$dir" =~ /\/win64\// ) {
print("win64: goodness is $goodness\n");
		++$win64;
	}
	elsif ("$dir" =~ /\/win\/ia32\// ) {
print("win ia32: goodness is $goodness\n");
		++$winia32;
	}
	elsif ("$dir" =~ /\/win\/x64\// ) {
print("win x64: goodness is $goodness\n");
		++$winx64;
	}
	else {
		print("fell through file types and directories: setting goodness to 1\n");
		$goodness = 1;
	}
}

	if ($goodness) {
		print "will use this diff\n"; 
		++$passed;
		system "cat $file >>$diffname-$old-$new.slim";
				# ensure variable is cleared after passing this item
		my $goodness = 0;
	} else {
		print "will skip this diff\n"; 
		++$skipped;
		system "cat $file >>$diffname-$old-$new.skipped";
	}

	#if ($records > 500) {
	#	info;
	#	exit (0);
	#}
# try to fix converting directories to be within a new else
}

info;
system("rm -f xx\*");
exit 0;

# end of main line

sub help {
	# This must exit with bad status, e.g. if wrong number of args.
	print "Remove uninteresting items from a mozilla diff\n";
	print "\n";
	print "usage: pass the package name and versions of the trees\n";
	print " e.g firefox 112.0 113.0\n";
    print " should work for firefox, seamonkey, thunderbird\n";
	exit (0);
}

sub info {
	print "Processed $records records\n";
	print "Passed: $passed  Skipped: $skipped\n";
	print "skipped because of uninteresting file extension:\n";
	printf("%-6s%6d  %-6s%6d\n",'css',$fcss,'gypi',$fgypi);
	printf("%-6s%6d  %-6s%6d\n",'jpeg',$fjpeg,'jpg',$fjpg);
	printf("%-6s%6d  %-6s%6d\n",'json',$fjson,'png',$fpng);
	printf("%-6s%6d  %-6s%6d\n",'scss',$fscss,'svg',$fsvg);
	printf("%-6s%6d  %-6s%6d\n",'yaml',$fyaml,'yml',$fyml);
	# remember - many files could match more than one of these
	# directories, so numbers will change with the order of testing
	print "skipped because of uninteresting directory:\n";
	# 21 of these, so threee per line
	printf("%-17s%6d  %-17s%6d  %-17s%6d\n",'/testing/',$testing,'/test/',$test,'/tests/',$tests);
	printf("%-17s%6d  %-17s%6d  %-17s%6d\n",'/crashtests/',$crashtests,'/gtest/',$gtest,'/gtests/',$gtests);
	printf("%-17s%6d  %-17s%6d  %-17s%6d\n",'/mochitests/',$mochitests,'/reftests/',$reftests,'/android/',$android);
	printf("%-17s%6d  %-17s%6d  %-17s%6d\n",'/arm/',$arm,'/arm64/',$arm64,'/cocoa/',$cocoa);
	printf("%-17s%6d  %-17s%6d  %-17s%6d\n",'/installer/',$installer,'/loong64/',$loong64,'/accessible/mac/',$accessmac);
	printf("%-17s%6d  %-17s%6d  %-17s%6d\n",'/mac/ia32/',$macia32,'/mac/64/',$macx64,'/mips32/',$mips32);
	printf("%-17s%6d  %-17s%6d  %-17s%6d\n",'/win64/',$win64,'/win/ia32/',$winia32,'/winx64/',$winx64);
}

# vim: ts=4
