(*
huffman compressor/decompressor
$Revision: 20 $
(c) David Pethes (imcold)

*)
program huffman;
{$mode objfpc}
{define SDLTIMER}

uses
  {$ifdef SDLTIMER}
  sdl,
  {$endif}
  sysutils, getopts, bitio;
  
const
  END_OF_STREAM = 256;
  TOP_NODE      = 513; //END_OF_STREAM*2 +1
  BIT_IO_BUFFER_SIZE = 100000;  //dobra hodnota
  
type
  tree_node_t = record
      weight: integer;
      saved_weight: integer;
      child_0: integer;
      child_1: integer;
  end;

  vlc_code_t = record
      code: longword;
      code_bits: byte;
      code_str:  string;
  end;
  
  huff_params_t = record
      counts: array of longword;
      nodes: array of tree_node_t;
      root_node: longword;
      codes: array of vlc_code_t;

      input,
      output: string;
      input_size: longword;
      instream: pbyte;

      bit_io_buffer: integer;
      no_headers,
      save_model: boolean;
  end;

  
var
  huff_params: huff_params_t;
  verbose,
  decode: boolean;
  {$ifdef SDLTIMER}
  time: UInt32;
  {$endif}

function GetFileSize (fname: string): integer;
var
  f: file;
begin
  if not(FileExists(fname)) then
      Result := 0
  else begin
      AssignFile(f, fname);
      Reset(f, 1);
      Result := integer( FileSize(f) );
      CloseFile(f);
  end;
end;


(*******************************************************************************
huff_log
    vypisy priebehu, chyby, statistiky a pod.
*******************************************************************************)
procedure huff_log (s: string; no_line_ending: boolean = false); inline;
begin
  if verbose then
      if not(no_line_ending) then
          writeln (stderr, s)
      else
          write (stderr, s + #13);
end;



(*******************************************************************************
huff_save_model
    uloz model: vahy jednotlivych zakladnych uzlov a vygenerovane vlc kody
*******************************************************************************)
procedure huff_save_model(var params: huff_params_t; name: string);
var
  t: text;
  c: integer;
begin
  AssignFile(t, name);
  Rewrite(t);
  writeln(t, 'tree model for file: ', params.input, #13#10);
  for c := 0 to TOP_NODE do begin
      if params.nodes[c].saved_weight = 0 then continue;
      write(t, 'node ', c:3, '  val: ', params.nodes[c].saved_weight:4);
      if c > END_OF_STREAM then
          write(t, '  child 0/1:  ', params.nodes[c].child_0,'/',params.nodes[c].child_1 );
      writeln(t);
  end;
  writeln(t);
  writeln(t, 'vlc codes: ');
  for c := 0 to END_OF_STREAM do
      if params.codes[c].code_bits > 0 then
          writeln(t, c:4, params.nodes[c].saved_weight:5,'x ',
              binStr(params.codes[c].code, params.codes[c].code_bits),
              ' (',params.codes[c].code_bits,')'
              );
  CloseFile(t);
end;



(*******************************************************************************
huff_init
    inicializacia struktury kodera, nastavenie vychodzich parametrov
*******************************************************************************)
procedure huff_init (var params: huff_params_t);
begin
  Setlength( params.counts, END_OF_STREAM+1 );
  Setlength( params.nodes, (END_OF_STREAM+1)* 2 * sizeof(tree_node_t) );
  SetLength( params.codes, (END_OF_STREAM+1) * sizeof(vlc_code_t) );
  params.bit_io_buffer := BIT_IO_BUFFER_SIZE;
  params.root_node  := 0;
  params.no_headers := false;
  params.save_model := false;
end;


(*******************************************************************************
huff_free
    uvolnenie struktury kodera
*******************************************************************************)
procedure huff_free (var params: huff_params_t);
begin
  params.counts := nil;
  params.nodes  := nil;
  params.codes  := nil;
  params.instream := nil;
end;



(*******************************************************************************
huff_read_input
    nacita data zo vstupneho suboru do pamate
*******************************************************************************)
procedure huff_read_input (var params: huff_params_t);
var
  f: file;
begin
  AssignFile(f, params.input);
  Reset(f, 1);
  params.input_size := longword(FileSize(f));
  params.instream := GetMem(params.input_size);
  blockread(f, params.instream^, params.input_size);
  CloseFile(f);
end;


(*******************************************************************************
huff_free_input
    uvolni pamat s datami zo vstupneho suboru
*******************************************************************************)
procedure huff_free_input (var params: huff_params_t);
begin
  Freemem(params.instream);
end;



(*******************************************************************************
huff_count_symbols
    spocita vyskyt znakov vo vstupe
*******************************************************************************)
procedure huff_count_symbols (var params: huff_params_t);
var
  c: integer;
  p: pbyte;
begin
  p := params.instream;
  FillByte(params.counts[0], END_OF_STREAM, 0);
  for c := 0 to params.input_size-1 do params.counts[p[c]] += 1;
end;



(*******************************************************************************
huff_scale_counts
    uprav pocetnost najdenych znakov do rozsahu 1...255
*******************************************************************************)
procedure huff_scale_counts(var params: huff_params_t);
var
  counts: array of longword;
  b: integer;
  max_count: longword;
  new_count: longword;
  ratio: double;
begin
  counts := params.counts;
  max_count := 0;
  for b := 0 to 255 do if counts[b] > max_count then max_count := counts[b];
  if max_count < 256 then Exit;
  ratio := double(max_count) / 255;
  for b := 0 to 255 do
      if counts[b] > 0 then begin
          new_count := round( counts[b] / ratio );
          if new_count = 0 then
              counts[b] := 1
          else
              counts[b] := new_count;
      end;
end;


(*******************************************************************************
huff_build_tree
    zostav huffmanov strom, vrat index korenoveho uzla stromu
*******************************************************************************)
procedure huff_build_tree (var params: huff_params_t);
var
  nodes:  array of tree_node_t;
  counts: array of longword;
  next_free: integer;
  i: integer;
  min_1,
  min_2: integer;
begin
  nodes  := params.nodes;
  counts := params.counts;
  for i := 0 to END_OF_STREAM-1 do nodes[i].weight := counts[i];
  nodes[END_OF_STREAM].weight := 1;
  nodes[TOP_NODE].weight := High(integer);
  next_free := END_OF_STREAM;
  
  while true do begin
      next_free := next_free + 1;
      min_1 := TOP_NODE;
      min_2 := TOP_NODE;

      for i := 0 to next_free do
          if nodes[i].weight > 0 then begin
              if nodes[i].weight < nodes[min_1].weight then begin
                  min_2 := min_1 ;
                  min_1 := i ;
              end else
                  if nodes[i].weight < nodes[min_2].weight then min_2 := i;
          end;

      if min_2 = TOP_NODE then break;
      nodes[next_free].weight := nodes[min_1].weight + nodes[min_2].weight;
      nodes[next_free].child_0 := min_1;
      nodes[next_free].child_1 := min_2;
      nodes[min_1].saved_weight := nodes[min_1].weight;
      nodes[min_2].saved_weight := nodes[min_2].weight;
      nodes[min_1].weight := 0;
      nodes[min_2].weight := 0;
  end;
  
  params.root_node := next_free - 1;
  nodes[params.root_node].saved_weight := nodes[params.root_node].weight;
end;


(*******************************************************************************
huff_convert_tree_to_code
    z korenoveho uzlu rekurzivne postupuj cez jednotlive listy a zapis cestu
    ako VLC kod a pocet jeho bitov do tabulky
*******************************************************************************)
procedure huff_convert_tree_to_code (var p: huff_params_t; code_so_far: longword; bits, node: integer);
begin
  if node <= END_OF_STREAM then begin
      p.codes[node].code := code_so_far;
      p.codes[node].code_bits := bits;
      p.codes[node].code_str  := binStr(p.codes[node].code, p.codes[node].code_bits);
      Exit;
  end;
  code_so_far := code_so_far shl 1;
  bits := bits + 1;
  huff_convert_tree_to_code (p, code_so_far, bits, p.nodes[node].child_0 );
  huff_convert_tree_to_code (p, code_so_far + 1, bits, p.nodes[node].child_1 );
end;


(*******************************************************************************
huff_compress_data
    precitaj bajt zo vstupu a do vystupu zapis jeho VLC kod
*******************************************************************************)
procedure huff_compress_data (var params: huff_params_t);
var
  fout: bitio_file_t;
  c: integer;
  instream: PByte;
  codes: array of vlc_code_t;
  //+progress
  progress_total,
  progress_part: integer;
begin
  //+progress init
  progress_part  := params.input_size div 100 + 1;
  progress_total := 0;

  instream := params.instream;
  codes := params.codes;
  bitio_open_file_write (fout, params.output, params.bit_io_buffer);
  //zapis velkost orig. suboru a vyskyty znakov
  if not(params.no_headers) then begin
      bitio_write_bits(fout, params.input_size, 32);
      for c := 0 to 255 do bitio_write_str(fout, binStr(params.counts[c], 8));
  end;
  //write data
  for c := 0 to params.input_size-1 do begin
      bitio_write_str (fout, codes[instream[c]].code_str);
      //+progress
      if (c > progress_total) then begin
          progress_total += progress_part;
          huff_log( format('encoding: %d%%', [progress_total div progress_part]), true );
      end;
  end;

  bitio_write_str (fout, codes[END_OF_STREAM].code_str);
  bitio_close_file(fout);

  huff_log (#10, true);
end;

         
(*******************************************************************************
huff_encode
    zo vstupneho suboru vytvor skomprimovany vystup
*******************************************************************************)
procedure huff_encode(var params: huff_params_t);
begin
  huff_log ('reading input...');
  huff_read_input(params);
  
  huff_log ('counting symbol occurences...');
  huff_count_symbols(params);
  
  huff_log ('scaling counts...');
  huff_scale_counts(params);
  
  huff_log ('building the tree...');
  huff_build_tree(params);
  
  huff_log ( format('converting tree to vlc codes (root node: %d)...', [params.root_node]) );
  huff_convert_tree_to_code (params, 0, 0, params.root_node);
  
  huff_log ('compressing...');
  huff_compress_data(params);
  
  huff_log ('freeing input');
  huff_free_input(params);
  
  if params.save_model then begin
      huff_log ('saving model...');
      huff_save_model(params, 'huffout_encode_model.txt');
  end;
end;


(*******************************************************************************
huff_read_counts
    citaj vyskyty jednotlivych hodnot zo skomprimovaneho suboru
*******************************************************************************)
procedure huff_read_counts(var params: huff_params_t);
var
  f: file;
  c: integer;
  size: longword;
begin
  AssignFile(f, params.input);
  Reset(f, 1);
  blockread(f, size, 4);
  for c := 0 to 255 do begin
      blockread(f, params.counts[c], 1);
  end;
  CloseFile(f);
  params.counts[END_OF_STREAM] := 1;
end;


(*******************************************************************************
huff_expand_data
    citaj bity zo vstupu az kym nezostavis VLC kod, ktory sa nachadza
    v tabulke a zapis zodpovedajuci bajt do vystupu
*******************************************************************************)
procedure huff_expand_data (var params: huff_params_t);
const
  OUTPUT_BUFFER_SIZE = 100000;  //dobra hodnota
var
  fin: bitio_file_t;
  fout: file;
  bit, node: integer;
  buff: PByte;
  buff_pos: integer;
  nodes: array of tree_node_t;
  c, dummy: longword;
  //+progress
  uncompressed_size: longword;
  bytes_decompressed,
  progress_total,
  progress_part: integer;
  
begin
  AssignFile(fout, params.output);
  Rewrite(fout, 1);
  buff := Getmem(OUTPUT_BUFFER_SIZE);
  buff_pos := 0;
  
  bitio_open_file_read(fin, params.input, params.bit_io_buffer);
  
  //nacitaj velkost orig. suboru a preskoc tabulku s vyskytmi symbolov
  bitio_read_bits(fin, uncompressed_size, 32);
  huff_log( format('uncompressed size: %d', [uncompressed_size]) );
  for c := 1 to 256 do bitio_read_bits(fin, dummy, 8);
  
  bytes_decompressed := 0;
  progress_part  := uncompressed_size div 100 + 1;
  progress_total := 0;

  node := 0;
  nodes := params.nodes;
  while not(node = END_OF_STREAM) do begin
      node := params.root_node;
      while node > END_OF_STREAM do begin
          bitio_read(fin, bit);
          if bit = 0 then
              node := nodes[node].child_0
          else
              node := nodes[node].child_1;
      end;

      if not(node = END_OF_STREAM) then begin
          buff[buff_pos] := node;
          buff_pos := buff_pos + 1;
          //progress
          inc(bytes_decompressed);
          if (bytes_decompressed > progress_total) then begin
              progress_total += progress_part;
              huff_log( format('decoding: %d%%', [progress_total div progress_part]), true);
          end;
      end;
      
      if buff_pos = OUTPUT_BUFFER_SIZE then begin
          blockwrite(fout, buff^, buff_pos);
          buff_pos := 0;
      end;
  end;
  //zapis pripadny zbytok bufferu
  if buff_pos > 0 then
      blockwrite(fout, buff^, buff_pos);
  
  bitio_close_file(fin);
  CloseFile(fout);
  Freemem(buff);

  huff_log (#10, true);
end;


(*******************************************************************************
huff_decode
    zo vstupneho suboru vytvor dekomprimovany vystup
*******************************************************************************)
procedure huff_decode(var params: huff_params_t);
begin
  huff_log ('reading counts from input...');
  huff_read_counts(params);

  huff_log ('building the tree...');
  huff_build_tree (params);
  
  huff_log ('decompressing...');
  huff_expand_data(params);
  
  if params.save_model then begin
      huff_log ('saving model...');
      huff_convert_tree_to_code (params, 0, 0, params.root_node);
      huff_save_model (params, 'huffout_decode_model.txt');
  end;
end;


(*******************************************************************************
Help
    vypis navod na pouzitie
*******************************************************************************)
procedure Help();
begin
  writeln(stdout, 'usage:    huffman [options] input [output]');
  writeln(stdout, 'default encoding output:  huffout.huff');
  writeln(stdout, 'default decoding output:  huffout.dec');
  writeln(stdout, 'options:');
  writeln(stdout, '  -h, --help          print help');  
  writeln(stdout, '  -d, --decode        decode huffman compressed file');
  writeln(stdout, '  -m, --model         save coding model');
  writeln(stdout, '  -n, --no-headers    don''t save headers in output');
  writeln(stdout, '  -s, --silent        don''t show progress info');
  writeln(stdout, '  -b <int>, --bit-buff <int>     bitfile buffer size [', BIT_IO_BUFFER_SIZE,']');
end;


(*******************************************************************************
Parse
    spracuj prikazovy riadok a nastav parametre programu
*******************************************************************************)
procedure Parse();
const
  LONG_OPT_COUNT = 6;
var
  opts: POption;
  opt_index: integer;
  c: char;
  
function make_opt(name: string; has_arg: integer; flag: PChar; value: char) : TOption;
  begin
    Result.Name := name;
    Result.Flag := flag;
    Result.Has_arg := has_arg;
    Result.Value := value;
  end;
  
begin
  opts := GetMem(LONG_OPT_COUNT*sizeof(TOption));
  opts[0] := make_opt('decode', No_Argument, nil, 'd');
  opts[1] := make_opt('help', No_Argument, nil, 'h');
  opts[2] := make_opt('no-headers', No_Argument, nil, 'n');
  opts[3] := make_opt('bit-buff', Required_Argument, nil, 'b');
  opts[4] := make_opt('silent', No_Argument, nil, 's');
  opts[LONG_OPT_COUNT-1] := make_opt('', 0, nil, #0);
  c := #0;
  repeat
      c := GetLongOpts('hdmnsb:', opts, opt_index);
      case c of
        'h' : begin
              help();
              Halt();
              end;
        'd' : begin
              decode := true;
              end;
        'm' : begin
              huff_params.save_model := true;
              end;
        'n' : begin
              huff_params.no_headers := true;
              huff_log ('[info] not writing value count headers, file won''t be decodable');
              end;
        's' : begin
              verbose := false;
              end;
        'b' : begin
              huff_params.bit_io_buffer := StrToInt(OptArg);
              if huff_params.bit_io_buffer <= 0 then huff_params.bit_io_buffer := BIT_IO_BUFFER_SIZE;
              huff_log (format('[info] using %d bytes for bit IO operations', [huff_params.bit_io_buffer]));
              end;
        '?', ':' :
              huff_log ('[error] unknown option: '+optopt);
      end;
  until c = EndOfOptions;
  Freemem(opts);
  huff_params.input := ParamStr(OptInd);
  huff_params.output := ParamStr(OptInd+1);
  if huff_params.input = '' then begin
      Help();
      huff_log ('[error] no input specified');
      Halt();
  end;
  if GetFileSize(huff_params.input) = 0 then begin
      huff_log ('[error] input size equals 0 or input doesn''t exist: ' + huff_params.input);
      Halt();
  end;
  if huff_params.output = '' then
      if decode then huff_params.output := 'huffout.dec'
      else huff_params.output := 'huffout.huff';
  if huff_params.input = huff_params.output then
      huff_log ('[error] output same as input');
end;


(*******************************************************************************
main
    nacitaj parametry, komprimuj/dekomprimuj subor
*******************************************************************************)
begin
writeln(stdout, 'huffman compressor by imcold,  ',{$I %DATE%}, ' fpc ', {$I %FPCVERSION%});
huff_init(huff_params);

verbose := true;
decode  := false;
Parse();
writeln(stdout, huff_params.input, ' -> ', huff_params.output);

{$ifdef SDLTIMER}
time := SDL_GetTicks();
{$endif}
if decode then begin
    huff_decode(huff_params);
    writeln (stdout, 'decoding finished');
end else begin
    huff_encode(huff_params);
    writeln (stdout, format('encoding finished, %5.2f%% of the original size',
      [GetFileSize(huff_params.output) / (GetFileSize(huff_params.input) / 100)]));
end;
{$ifdef SDLTIMER}
writeln (stdout, (SDL_GetTicks() - time) / 1000 :5:2, ' s');
{$endif}

huff_free(huff_params);
end.