diff --git a/huffman.lisp b/huffman.lisp index 1bda7a5..01db12a 100644 --- a/huffman.lisp +++ b/huffman.lisp @@ -28,7 +28,6 @@ (defun comp-interim (left right) "sort interum nodes" (< (car left) (car right))) - (defun freq2huffman (interim-nodes) (loop until (singlep interim-nodes) @@ -51,7 +50,7 @@ (setq code (ash code 1)))) (setq codelen ,codelen-form)) collect (cons ,key-form (cons code codelen))))) -;; (macroexpand '(mkcanon (for pair in sorted-huffman-list while pair) (car pair) (cdr (cdr pair)))) +(macroexpand '(mkcanon (for pair in sorted-huffman-list while pair) (car pair) (cdr (cdr pair)))) (defun comp-huffman (left right) "sort huffman mappings by code length & value" @@ -97,47 +96,35 @@ ; --- -(defun read-number (instream) - (let* ((right-mask (mk-mask 5)) - (first-byte (read-byte instream nil nil)) - (bytes-to-read (ash first-byte -5)) - (result (logand first-byte right-mask))) - (loop for i from 1 to bytes-to-read - for c = (read-byte instream nil nil) - do (setq result (logior (ash result 8) c))) - result)) - -(defun write-number (num outstream) - (let ((n 0) - (temp num)) - (loop while (> (integer-length temp) 5) - do (setq temp (ash temp -8)) - (incf n)) - (write-byte (logior (ash n 5) temp) outstream) - (loop for i from (- n 1) downto 0 - do (write-byte (logand (ash num (* i -8)) (mk-mask 8)) outstream)))) - -;; (with-open-file (outstream "test" :element-type '(unsigned-byte 8) :direction :output :if-exists :supersede) -;; (write-number 31 outstream) -;; (write-number 32 outstream) -;; (write-number 1024 outstream) -;; (write-number 1099511627776 outstream) -;; ) -;; ; -;; (with-open-file (instream "test" :element-type '(unsigned-byte 8) :direction :input) -;; (format t "~%~d" (read-number instream)) -;; (format t "~%~d" (read-number instream)) -;; (format t "~%~d" (read-number instream)) -;; (format t "~%~d" (read-number instream)) -;; ) - -; --- - -(defmacro stream-drain-bitstream (outstream bit-stream cmp) +(defmacro buf-drain-bitstream (buf bit-stream cmp) `(loop while (>= (cdr ,bit-stream) ,cmp) do (multiple-value-bind (new-bit-stream popped-byte) (bitstream-pop8 ,bit-stream) (setq ,bit-stream new-bit-stream) - (write-byte popped-byte ,outstream)))) + (push popped-byte ,buf)))) + +(defmacro build-encoded-message-block (encoding-map instream buf bit-stream) + `(loop + for i from 0 to 255 + for c = (read-byte ,instream nil nil) + while c + do (setq ,bit-stream (bitstream-push ,bit-stream (gethash c ,encoding-map))) + do (buf-drain-bitstream ,buf ,bit-stream 8) + finally (return i))) + +(defmacro write-encoded-message-block (encoding-map instream outstream) + `(let* ((bit-stream (cons 0 0)) + (buf (list)) + (len (build-encoded-message-block ,encoding-map ,instream buf bit-stream))) + (buf-drain-bitstream buf bit-stream 1) + (when (> len 0) + (write-byte (- len 1) ,outstream) + (write-sequence (reverse buf) ,outstream) + (format t "~%wrote Block with ~d codes" len) + ) + len)) + +(defmacro write-encoded-message (encoding-map instream outstream) + `(loop for blk-count from 1 while (= 256 (write-encoded-message-block ,encoding-map ,instream ,outstream)) finally (return blk-count))) (defun encode (instream outstream) (file-position instream :start) @@ -150,33 +137,40 @@ do (progn (write-byte (car pair) outstream); write the key (write-byte (cdr (cdr pair)) outstream); write the code-length + (bitstream-format (cdr pair)) + (format t " <-- ~:C" (code-char (car pair))) (setf (gethash (car pair) encoding-map) (cdr pair)) )) ;; Writing the contents (file-position instream :start) - (let ((bit-stream (cons 0 0))) - (write-number (file-length instream) outstream) - (loop for c = (read-byte instream nil nil) - while c - do (setq bit-stream (bitstream-push bit-stream (gethash c encoding-map))) - (stream-drain-bitstream outstream bit-stream 8)) - (stream-drain-bitstream outstream bit-stream 1))))) + (format t "~%wrote ~d blocks" (write-encoded-message encoding-map instream outstream)) + ))) (defun decode (instream outstream) (let ((canon (stream2canon instream)) (decoding-map (make-hash-table :test #'equal))) + (format t "~%Reading dictionary") (loop for pair in canon do (setf (gethash (cdr pair) decoding-map) (car pair))) - (let ((len (read-number instream)) (source (cons 0 0)) (sink (cons 0 0))) - (loop for i from 1 to len - do (loop until (gethash sink decoding-map) - do (when (= (cdr source) 0) - (setq source (cons (read-byte instream) 8))) - (multiple-value-bind (new-source popped) (bitstream-pop1 source) - (setq source new-source) - (setq sink (bitstream-push sink (cons popped 1)))) - finally (write-byte (gethash sink decoding-map) outstream) - (setq sink (cons 0 0))))))) + (loop for len = (read-byte instream nil nil) + while len + do (let ((source (cons 0 0))) + (format t "~%Expecting to see ~d codes" (+ len 1)) + (write-sequence + (loop for k from 0 to len + collect (let ((sink (cons 0 0))) + (loop until (gethash sink decoding-map) + do (progn + (when (= (cdr source) 0) + (setq source (cons (read-byte instream) 8))) + (multiple-value-bind (new-source popped) (bitstream-pop1 source) + (setq source new-source) + (setq sink (bitstream-push sink (cons popped 1)))) + ) + finally (return (gethash sink decoding-map))))) + outstream) + )) + )) (defmacro with-stream-translate (in-filename out-filename &body body) `(with-open-file (instream ,in-filename :element-type '(unsigned-byte 8) :direction :input) @@ -186,9 +180,8 @@ (defun main (&optional (args (or #+CLISP *args* #+SBCL *posix-argv* #+LISPWORKS system:*line-arguments-list* #+CMU extensions:*command-line-words* nil))) (with-stream-translate (nth 2 args) (nth 3 args) (funcall - (case (intern (string-upcase (nth 1 args))) (e #'encode) (d #'decode)) - instream outstream))) - + (case (intern (string-upcase (nth 1 args))) ('e #'encode) ('d #'decode)) + instream outstream))) ;(main (list "_" "e" "test-input.txt" "test-output.txt.hf")) ;(main (list "_" "d" "test-output.txt.hf" "test-output.txt"))