Patch to use GCC kung foo to make bzip2 go faster! bzip2-shared, and libbz2.so, will go about 4.5% faster. bzip2 (not linked to libbz2.so) will go about 7% faster (install this one). These numbers don't account for -fomit-frame-pointer, so you should actually get better performance than this. robert diff -Naur bzip2-1.0.5.orig/Makefile bzip2-1.0.5/Makefile --- bzip2-1.0.5.orig/Makefile 2008-02-14 12:39:18.000000000 +0000 +++ bzip2-1.0.5/Makefile 2010-03-01 09:07:36.000000000 +0000 @@ -21,7 +21,7 @@ LDFLAGS= BIGFILES=-D_FILE_OFFSET_BITS=64 -CFLAGS=-Wall -Winline -O2 -g $(BIGFILES) +CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer $(BIGFILES) # Where you want it installed when you do 'make install' PREFIX=/usr/local @@ -37,8 +37,19 @@ all: libbz2.a bzip2 bzip2recover test -bzip2: libbz2.a bzip2.o - $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 bzip2.o -L. -lbz2 +bzip2: + $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 --combine -fprofile-generate \ + blocksort.c huffman.c crctable.c randtable.c compress.c \ + decompress.c bzlib.c bzip2.c + ./bzip2 -1 < sample1.ref > sample1.rb2 + ./bzip2 -2 < sample2.ref > sample2.rb2 + ./bzip2 -3 < sample3.ref > sample3.rb2 + ./bzip2 -d < sample1.bz2 > sample1.tst + ./bzip2 -d < sample2.bz2 > sample2.tst + ./bzip2 -ds < sample3.bz2 > sample3.tst + $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2 --combine -fprofile-use \ + blocksort.c huffman.c crctable.c randtable.c compress.c \ + decompress.c bzlib.c bzip2.c bzip2recover: bzip2recover.o $(CC) $(CFLAGS) $(LDFLAGS) -o bzip2recover bzip2recover.o diff -Naur bzip2-1.0.5.orig/Makefile-libbz2_so bzip2-1.0.5/Makefile-libbz2_so --- bzip2-1.0.5.orig/Makefile-libbz2_so 2007-12-09 13:00:50.000000000 +0000 +++ bzip2-1.0.5/Makefile-libbz2_so 2010-03-01 09:05:56.000000000 +0000 @@ -24,36 +24,42 @@ SHELL=/bin/sh CC=gcc BIGFILES=-D_FILE_OFFSET_BITS=64 -CFLAGS=-fpic -fPIC -Wall -Winline -O2 -g $(BIGFILES) +CFLAGS=-Wall -Winline -O2 -fomit-frame-pointer $(BIGFILES) +BZIP2_SHARED=LD_PRELOAD=./libbz2.so.1.0.4 ./bzip2-shared -OBJS= blocksort.o \ - huffman.o \ - crctable.o \ - randtable.o \ - compress.o \ - decompress.o \ - bzlib.o - -all: $(OBJS) - $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.4 $(OBJS) - $(CC) $(CFLAGS) -o bzip2-shared bzip2.c libbz2.so.1.0.4 +all: + $(CC) -shared -Wl,-soname -Wl,libbz2.so.1.0 -o libbz2.so.1.0.4 \ + blocksort.c huffman.c crctable.c randtable.c compress.c \ + decompress.c bzlib.c $(CFLAGS) -fPIC --combine -fprofile-generate + $(CC) $(CFLAGS) --combine -fprofile-generate \ + -o bzip2-shared bzip2.c libbz2.so.1.0.4 + $(BZIP2_SHARED) -1 < sample1.ref > sample1.rb2 + $(BZIP2_SHARED) -2 < sample2.ref > sample2.rb2 + $(BZIP2_SHARED) -3 < sample3.ref > sample3.rb2 + $(BZIP2_SHARED) -d < sample1.bz2 > sample1.tst + $(BZIP2_SHARED) -d < sample2.bz2 > sample2.tst + $(BZIP2_SHARED) -ds < sample3.bz2 > sample3.tst + $(CC) -shared -Wl,-soname -Wl,libbz2.so.1 -o libbz2.so.1.0.4 \ + blocksort.c huffman.c crctable.c randtable.c compress.c \ + decompress.c bzlib.c $(CFLAGS) -fPIC --combine -fprofile-use + $(CC) $(CFLAGS) --combine -fprofile-use \ + -o bzip2-shared bzip2.c libbz2.so.1.0.4 + @cat words1 + $(BZIP2_SHARED) -1 < sample1.ref > sample1.rb2 + $(BZIP2_SHARED) -2 < sample2.ref > sample2.rb2 + $(BZIP2_SHARED) -3 < sample3.ref > sample3.rb2 + $(BZIP2_SHARED) -d < sample1.bz2 > sample1.tst + $(BZIP2_SHARED) -d < sample2.bz2 > sample2.tst + $(BZIP2_SHARED) -ds < sample3.bz2 > sample3.tst + cmp sample1.bz2 sample1.rb2 + cmp sample2.bz2 sample2.rb2 + cmp sample3.bz2 sample3.rb2 + cmp sample1.tst sample1.ref + cmp sample2.tst sample2.ref + cmp sample3.tst sample3.ref + @cat words3 rm -f libbz2.so.1.0 ln -s libbz2.so.1.0.4 libbz2.so.1.0 clean: - rm -f $(OBJS) bzip2.o libbz2.so.1.0.4 libbz2.so.1.0 bzip2-shared - -blocksort.o: blocksort.c - $(CC) $(CFLAGS) -c blocksort.c -huffman.o: huffman.c - $(CC) $(CFLAGS) -c huffman.c -crctable.o: crctable.c - $(CC) $(CFLAGS) -c crctable.c -randtable.o: randtable.c - $(CC) $(CFLAGS) -c randtable.c -compress.o: compress.c - $(CC) $(CFLAGS) -c compress.c -decompress.o: decompress.c - $(CC) $(CFLAGS) -c decompress.c -bzlib.o: bzlib.c - $(CC) $(CFLAGS) -c bzlib.c + rm -f libbz2.so.1.0.4 libbz2.so.1.0 bzip2-shared