(* huffman compressor/decompressor $Revision: 20 $ (c) David Pethes (imcold) *) program huffman; {$mode objfpc} {define SDLTIMER} uses {$ifdef SDLTIMER} sdl, {$endif} sysutils, getopts, bitio; const END_OF_STREAM = 256; TOP_NODE = 513; //END_OF_STREAM*2 +1 BIT_IO_BUFFER_SIZE = 100000; //dobra hodnota type tree_node_t = record weight: integer; saved_weight: integer; child_0: integer; child_1: integer; end; vlc_code_t = record code: longword; code_bits: byte; code_str: string; end; huff_params_t = record counts: array of longword; nodes: array of tree_node_t; root_node: longword; codes: array of vlc_code_t; input, output: string; input_size: longword; instream: pbyte; bit_io_buffer: integer; no_headers, save_model: boolean; end; var huff_params: huff_params_t; verbose, decode: boolean; {$ifdef SDLTIMER} time: UInt32; {$endif} function GetFileSize (fname: string): integer; var f: file; begin if not(FileExists(fname)) then Result := 0 else begin AssignFile(f, fname); Reset(f, 1); Result := integer( FileSize(f) ); CloseFile(f); end; end; (******************************************************************************* huff_log vypisy priebehu, chyby, statistiky a pod. *******************************************************************************) procedure huff_log (s: string; no_line_ending: boolean = false); inline; begin if verbose then if not(no_line_ending) then writeln (stderr, s) else write (stderr, s + #13); end; (******************************************************************************* huff_save_model uloz model: vahy jednotlivych zakladnych uzlov a vygenerovane vlc kody *******************************************************************************) procedure huff_save_model(var params: huff_params_t; name: string); var t: text; c: integer; begin AssignFile(t, name); Rewrite(t); writeln(t, 'tree model for file: ', params.input, #13#10); for c := 0 to TOP_NODE do begin if params.nodes[c].saved_weight = 0 then continue; write(t, 'node ', c:3, ' val: ', params.nodes[c].saved_weight:4); if c > END_OF_STREAM then write(t, ' child 0/1: ', params.nodes[c].child_0,'/',params.nodes[c].child_1 ); writeln(t); end; writeln(t); writeln(t, 'vlc codes: '); for c := 0 to END_OF_STREAM do if params.codes[c].code_bits > 0 then writeln(t, c:4, params.nodes[c].saved_weight:5,'x ', binStr(params.codes[c].code, params.codes[c].code_bits), ' (',params.codes[c].code_bits,')' ); CloseFile(t); end; (******************************************************************************* huff_init inicializacia struktury kodera, nastavenie vychodzich parametrov *******************************************************************************) procedure huff_init (var params: huff_params_t); begin Setlength( params.counts, END_OF_STREAM+1 ); Setlength( params.nodes, (END_OF_STREAM+1)* 2 * sizeof(tree_node_t) ); SetLength( params.codes, (END_OF_STREAM+1) * sizeof(vlc_code_t) ); params.bit_io_buffer := BIT_IO_BUFFER_SIZE; params.root_node := 0; params.no_headers := false; params.save_model := false; end; (******************************************************************************* huff_free uvolnenie struktury kodera *******************************************************************************) procedure huff_free (var params: huff_params_t); begin params.counts := nil; params.nodes := nil; params.codes := nil; params.instream := nil; end; (******************************************************************************* huff_read_input nacita data zo vstupneho suboru do pamate *******************************************************************************) procedure huff_read_input (var params: huff_params_t); var f: file; begin AssignFile(f, params.input); Reset(f, 1); params.input_size := longword(FileSize(f)); params.instream := GetMem(params.input_size); blockread(f, params.instream^, params.input_size); CloseFile(f); end; (******************************************************************************* huff_free_input uvolni pamat s datami zo vstupneho suboru *******************************************************************************) procedure huff_free_input (var params: huff_params_t); begin Freemem(params.instream); end; (******************************************************************************* huff_count_symbols spocita vyskyt znakov vo vstupe *******************************************************************************) procedure huff_count_symbols (var params: huff_params_t); var c: integer; p: pbyte; begin p := params.instream; FillByte(params.counts[0], END_OF_STREAM, 0); for c := 0 to params.input_size-1 do params.counts[p[c]] += 1; end; (******************************************************************************* huff_scale_counts uprav pocetnost najdenych znakov do rozsahu 1...255 *******************************************************************************) procedure huff_scale_counts(var params: huff_params_t); var counts: array of longword; b: integer; max_count: longword; new_count: longword; ratio: double; begin counts := params.counts; max_count := 0; for b := 0 to 255 do if counts[b] > max_count then max_count := counts[b]; if max_count < 256 then Exit; ratio := double(max_count) / 255; for b := 0 to 255 do if counts[b] > 0 then begin new_count := round( counts[b] / ratio ); if new_count = 0 then counts[b] := 1 else counts[b] := new_count; end; end; (******************************************************************************* huff_build_tree zostav huffmanov strom, vrat index korenoveho uzla stromu *******************************************************************************) procedure huff_build_tree (var params: huff_params_t); var nodes: array of tree_node_t; counts: array of longword; next_free: integer; i: integer; min_1, min_2: integer; begin nodes := params.nodes; counts := params.counts; for i := 0 to END_OF_STREAM-1 do nodes[i].weight := counts[i]; nodes[END_OF_STREAM].weight := 1; nodes[TOP_NODE].weight := High(integer); next_free := END_OF_STREAM; while true do begin next_free := next_free + 1; min_1 := TOP_NODE; min_2 := TOP_NODE; for i := 0 to next_free do if nodes[i].weight > 0 then begin if nodes[i].weight < nodes[min_1].weight then begin min_2 := min_1 ; min_1 := i ; end else if nodes[i].weight < nodes[min_2].weight then min_2 := i; end; if min_2 = TOP_NODE then break; nodes[next_free].weight := nodes[min_1].weight + nodes[min_2].weight; nodes[next_free].child_0 := min_1; nodes[next_free].child_1 := min_2; nodes[min_1].saved_weight := nodes[min_1].weight; nodes[min_2].saved_weight := nodes[min_2].weight; nodes[min_1].weight := 0; nodes[min_2].weight := 0; end; params.root_node := next_free - 1; nodes[params.root_node].saved_weight := nodes[params.root_node].weight; end; (******************************************************************************* huff_convert_tree_to_code z korenoveho uzlu rekurzivne postupuj cez jednotlive listy a zapis cestu ako VLC kod a pocet jeho bitov do tabulky *******************************************************************************) procedure huff_convert_tree_to_code (var p: huff_params_t; code_so_far: longword; bits, node: integer); begin if node <= END_OF_STREAM then begin p.codes[node].code := code_so_far; p.codes[node].code_bits := bits; p.codes[node].code_str := binStr(p.codes[node].code, p.codes[node].code_bits); Exit; end; code_so_far := code_so_far shl 1; bits := bits + 1; huff_convert_tree_to_code (p, code_so_far, bits, p.nodes[node].child_0 ); huff_convert_tree_to_code (p, code_so_far + 1, bits, p.nodes[node].child_1 ); end; (******************************************************************************* huff_compress_data precitaj bajt zo vstupu a do vystupu zapis jeho VLC kod *******************************************************************************) procedure huff_compress_data (var params: huff_params_t); var fout: bitio_file_t; c: integer; instream: PByte; codes: array of vlc_code_t; //+progress progress_total, progress_part: integer; begin //+progress init progress_part := params.input_size div 100 + 1; progress_total := 0; instream := params.instream; codes := params.codes; bitio_open_file_write (fout, params.output, params.bit_io_buffer); //zapis velkost orig. suboru a vyskyty znakov if not(params.no_headers) then begin bitio_write_bits(fout, params.input_size, 32); for c := 0 to 255 do bitio_write_str(fout, binStr(params.counts[c], 8)); end; //write data for c := 0 to params.input_size-1 do begin bitio_write_str (fout, codes[instream[c]].code_str); //+progress if (c > progress_total) then begin progress_total += progress_part; huff_log( format('encoding: %d%%', [progress_total div progress_part]), true ); end; end; bitio_write_str (fout, codes[END_OF_STREAM].code_str); bitio_close_file(fout); huff_log (#10, true); end; (******************************************************************************* huff_encode zo vstupneho suboru vytvor skomprimovany vystup *******************************************************************************) procedure huff_encode(var params: huff_params_t); begin huff_log ('reading input...'); huff_read_input(params); huff_log ('counting symbol occurences...'); huff_count_symbols(params); huff_log ('scaling counts...'); huff_scale_counts(params); huff_log ('building the tree...'); huff_build_tree(params); huff_log ( format('converting tree to vlc codes (root node: %d)...', [params.root_node]) ); huff_convert_tree_to_code (params, 0, 0, params.root_node); huff_log ('compressing...'); huff_compress_data(params); huff_log ('freeing input'); huff_free_input(params); if params.save_model then begin huff_log ('saving model...'); huff_save_model(params, 'huffout_encode_model.txt'); end; end; (******************************************************************************* huff_read_counts citaj vyskyty jednotlivych hodnot zo skomprimovaneho suboru *******************************************************************************) procedure huff_read_counts(var params: huff_params_t); var f: file; c: integer; size: longword; begin AssignFile(f, params.input); Reset(f, 1); blockread(f, size, 4); for c := 0 to 255 do begin blockread(f, params.counts[c], 1); end; CloseFile(f); params.counts[END_OF_STREAM] := 1; end; (******************************************************************************* huff_expand_data citaj bity zo vstupu az kym nezostavis VLC kod, ktory sa nachadza v tabulke a zapis zodpovedajuci bajt do vystupu *******************************************************************************) procedure huff_expand_data (var params: huff_params_t); const OUTPUT_BUFFER_SIZE = 100000; //dobra hodnota var fin: bitio_file_t; fout: file; bit, node: integer; buff: PByte; buff_pos: integer; nodes: array of tree_node_t; c, dummy: longword; //+progress uncompressed_size: longword; bytes_decompressed, progress_total, progress_part: integer; begin AssignFile(fout, params.output); Rewrite(fout, 1); buff := Getmem(OUTPUT_BUFFER_SIZE); buff_pos := 0; bitio_open_file_read(fin, params.input, params.bit_io_buffer); //nacitaj velkost orig. suboru a preskoc tabulku s vyskytmi symbolov bitio_read_bits(fin, uncompressed_size, 32); huff_log( format('uncompressed size: %d', [uncompressed_size]) ); for c := 1 to 256 do bitio_read_bits(fin, dummy, 8); bytes_decompressed := 0; progress_part := uncompressed_size div 100 + 1; progress_total := 0; node := 0; nodes := params.nodes; while not(node = END_OF_STREAM) do begin node := params.root_node; while node > END_OF_STREAM do begin bitio_read(fin, bit); if bit = 0 then node := nodes[node].child_0 else node := nodes[node].child_1; end; if not(node = END_OF_STREAM) then begin buff[buff_pos] := node; buff_pos := buff_pos + 1; //progress inc(bytes_decompressed); if (bytes_decompressed > progress_total) then begin progress_total += progress_part; huff_log( format('decoding: %d%%', [progress_total div progress_part]), true); end; end; if buff_pos = OUTPUT_BUFFER_SIZE then begin blockwrite(fout, buff^, buff_pos); buff_pos := 0; end; end; //zapis pripadny zbytok bufferu if buff_pos > 0 then blockwrite(fout, buff^, buff_pos); bitio_close_file(fin); CloseFile(fout); Freemem(buff); huff_log (#10, true); end; (******************************************************************************* huff_decode zo vstupneho suboru vytvor dekomprimovany vystup *******************************************************************************) procedure huff_decode(var params: huff_params_t); begin huff_log ('reading counts from input...'); huff_read_counts(params); huff_log ('building the tree...'); huff_build_tree (params); huff_log ('decompressing...'); huff_expand_data(params); if params.save_model then begin huff_log ('saving model...'); huff_convert_tree_to_code (params, 0, 0, params.root_node); huff_save_model (params, 'huffout_decode_model.txt'); end; end; (******************************************************************************* Help vypis navod na pouzitie *******************************************************************************) procedure Help(); begin writeln(stdout, 'usage: huffman [options] input [output]'); writeln(stdout, 'default encoding output: huffout.huff'); writeln(stdout, 'default decoding output: huffout.dec'); writeln(stdout, 'options:'); writeln(stdout, ' -h, --help print help'); writeln(stdout, ' -d, --decode decode huffman compressed file'); writeln(stdout, ' -m, --model save coding model'); writeln(stdout, ' -n, --no-headers don''t save headers in output'); writeln(stdout, ' -s, --silent don''t show progress info'); writeln(stdout, ' -b <int>, --bit-buff <int> bitfile buffer size [', BIT_IO_BUFFER_SIZE,']'); end; (******************************************************************************* Parse spracuj prikazovy riadok a nastav parametre programu *******************************************************************************) procedure Parse(); const LONG_OPT_COUNT = 6; var opts: POption; opt_index: integer; c: char; function make_opt(name: string; has_arg: integer; flag: PChar; value: char) : TOption; begin Result.Name := name; Result.Flag := flag; Result.Has_arg := has_arg; Result.Value := value; end; begin opts := GetMem(LONG_OPT_COUNT*sizeof(TOption)); opts[0] := make_opt('decode', No_Argument, nil, 'd'); opts[1] := make_opt('help', No_Argument, nil, 'h'); opts[2] := make_opt('no-headers', No_Argument, nil, 'n'); opts[3] := make_opt('bit-buff', Required_Argument, nil, 'b'); opts[4] := make_opt('silent', No_Argument, nil, 's'); opts[LONG_OPT_COUNT-1] := make_opt('', 0, nil, #0); c := #0; repeat c := GetLongOpts('hdmnsb:', opts, opt_index); case c of 'h' : begin help(); Halt(); end; 'd' : begin decode := true; end; 'm' : begin huff_params.save_model := true; end; 'n' : begin huff_params.no_headers := true; huff_log ('[info] not writing value count headers, file won''t be decodable'); end; 's' : begin verbose := false; end; 'b' : begin huff_params.bit_io_buffer := StrToInt(OptArg); if huff_params.bit_io_buffer <= 0 then huff_params.bit_io_buffer := BIT_IO_BUFFER_SIZE; huff_log (format('[info] using %d bytes for bit IO operations', [huff_params.bit_io_buffer])); end; '?', ':' : huff_log ('[error] unknown option: '+optopt); end; until c = EndOfOptions; Freemem(opts); huff_params.input := ParamStr(OptInd); huff_params.output := ParamStr(OptInd+1); if huff_params.input = '' then begin Help(); huff_log ('[error] no input specified'); Halt(); end; if GetFileSize(huff_params.input) = 0 then begin huff_log ('[error] input size equals 0 or input doesn''t exist: ' + huff_params.input); Halt(); end; if huff_params.output = '' then if decode then huff_params.output := 'huffout.dec' else huff_params.output := 'huffout.huff'; if huff_params.input = huff_params.output then huff_log ('[error] output same as input'); end; (******************************************************************************* main nacitaj parametry, komprimuj/dekomprimuj subor *******************************************************************************) begin writeln(stdout, 'huffman compressor by imcold, ',{$I %DATE%}, ' fpc ', {$I %FPCVERSION%}); huff_init(huff_params); verbose := true; decode := false; Parse(); writeln(stdout, huff_params.input, ' -> ', huff_params.output); {$ifdef SDLTIMER} time := SDL_GetTicks(); {$endif} if decode then begin huff_decode(huff_params); writeln (stdout, 'decoding finished'); end else begin huff_encode(huff_params); writeln (stdout, format('encoding finished, %5.2f%% of the original size', [GetFileSize(huff_params.output) / (GetFileSize(huff_params.input) / 100)])); end; {$ifdef SDLTIMER} writeln (stdout, (SDL_GetTicks() - time) / 1000 :5:2, ' s'); {$endif} huff_free(huff_params); end.