commit af938ba5ffb763a17aa296f533b0a6d003d78175 Author: Alan Daniels Date: Wed Jan 14 20:30:49 2026 +1100 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..355c023 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.txt* diff --git a/build/.gitignore b/build/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/huffman.lisp b/huffman.lisp new file mode 100644 index 0000000..01db12a --- /dev/null +++ b/huffman.lisp @@ -0,0 +1,189 @@ +(require :uiop) + +(defun buffer-freq (stream) + (let ((freq-map (make-hash-table))) + (loop for c = (read-byte stream nil nil) + while c + do (setf (gethash c freq-map) (+ 1 (gethash c freq-map 0)))) + (loop for key being the hash-keys of freq-map + using (hash-value value) + collect (cons value key)) + )) + +; --- + +(defun merge-interim-nodes (left right) + (cons (+ (car left) (car right)) (cons (cdr left) (cdr right)))) + +(defun singlep (lst) + (and (consp lst) (not (cdr lst)))) + +(defun huffman-tree-encode (huffman-tree value depth) + (if (consp huffman-tree) + (append + (huffman-tree-encode (car huffman-tree) (+ 1 (ash value 1)) (+ 1 depth)) + (huffman-tree-encode (cdr huffman-tree) (+ 0 (ash value 1)) (+ 1 depth))) + (list (cons huffman-tree (cons value depth))))) + +(defun comp-interim (left right) + "sort interum nodes" + (< (car left) (car right))) +(defun freq2huffman (interim-nodes) + (loop + until (singlep interim-nodes) + do (progn + (setq interim-nodes (sort interim-nodes #'comp-interim)) + (push (merge-interim-nodes (pop interim-nodes) (pop interim-nodes)) interim-nodes) + )) + (huffman-tree-encode (cdr (car interim-nodes)) 0 1)) + +; --- + +(defmacro mkcanon (loop-args key-form codelen-form) + `(let ((codelen nil) (code 0)) + (loop ,@loop-args + do (if codelen + (progn + (setq code (+ code 1)) + (when (> ,codelen-form codelen) + (setq codelen ,codelen-form) + (setq code (ash code 1)))) + (setq codelen ,codelen-form)) + collect (cons ,key-form (cons code codelen))))) +(macroexpand '(mkcanon (for pair in sorted-huffman-list while pair) (car pair) (cdr (cdr pair)))) + +(defun comp-huffman (left right) + "sort huffman mappings by code length & value" + (if (= (cdr (cdr right)) (cdr (cdr left))) + (< (car left) (car right)); sort by value + (> (cdr (cdr right)) (cdr (cdr left))); sort by code length + )) +(defmacro huffman2canon (unsorted-huffman-list) + `(mkcanon (for pair in (sort ,unsorted-huffman-list #'comp-huffman) while pair) (car pair) (cdr (cdr pair)))) +;(huffman2canon (list )) + +(defmacro stream2canon (instream) + `(let ((len (read-byte ,instream))) + (mkcanon (for i from 0 to len for pair = (cons (read-byte ,instream) (read-byte ,instream))) (car pair) (cdr pair)))) + +; --- + +(defmacro mk-mask (size) + (loop for i from 0 to (- size 1) sum (ash 1 i))) +;(format t "~b" (mk-mask 4)) + +(defun bitstream-format (source) + (format t "~%(~2d)~v,'0b" (cdr source) (cdr source) (car source))) +;(bitstream-format (cons 6 4)) + +(defun bitstream-push (sink source) + "Copies all bits from source to sink, returns new sink." + (cons (logior (ash (car sink) (cdr source)) (car source)) (+ (cdr sink) (cdr source)))) +;(bitstream-format (bitstream-push (cons 11 5) (cons 0 4))) + +(defmacro mk-bitstream-pop (source len) + "Splits the bitstream by 'len bits returns the new source & the popped bits" + `(let* ((new-len (- (cdr ,source) ,len)) + (mask (ash (mk-mask ,len) new-len))) + (values + (cons (logxor (logior (car ,source) mask) mask) new-len) + (ash (car ,source) (- ,len (cdr ,source)))))) + +(defun bitstream-pop8 (source) + (mk-bitstream-pop source 8)) +(defun bitstream-pop1 (source) + (mk-bitstream-pop source 1)) + +; --- + +(defmacro buf-drain-bitstream (buf bit-stream cmp) + `(loop while (>= (cdr ,bit-stream) ,cmp) + do (multiple-value-bind (new-bit-stream popped-byte) (bitstream-pop8 ,bit-stream) + (setq ,bit-stream new-bit-stream) + (push popped-byte ,buf)))) + +(defmacro build-encoded-message-block (encoding-map instream buf bit-stream) + `(loop + for i from 0 to 255 + for c = (read-byte ,instream nil nil) + while c + do (setq ,bit-stream (bitstream-push ,bit-stream (gethash c ,encoding-map))) + do (buf-drain-bitstream ,buf ,bit-stream 8) + finally (return i))) + +(defmacro write-encoded-message-block (encoding-map instream outstream) + `(let* ((bit-stream (cons 0 0)) + (buf (list)) + (len (build-encoded-message-block ,encoding-map ,instream buf bit-stream))) + (buf-drain-bitstream buf bit-stream 1) + (when (> len 0) + (write-byte (- len 1) ,outstream) + (write-sequence (reverse buf) ,outstream) + (format t "~%wrote Block with ~d codes" len) + ) + len)) + +(defmacro write-encoded-message (encoding-map instream outstream) + `(loop for blk-count from 1 while (= 256 (write-encoded-message-block ,encoding-map ,instream ,outstream)) finally (return blk-count))) + +(defun encode (instream outstream) + (file-position instream :start) + (let ((canon (huffman2canon (freq2huffman (buffer-freq instream)))) + (encoding-map (make-hash-table))) + (when (consp canon); if instream is empty, do mothing + ;; Writing the header + (write-byte (- (length canon) 1) outstream); write length of map (assuming 0 is unrepresentable) + (loop for pair in canon + do (progn + (write-byte (car pair) outstream); write the key + (write-byte (cdr (cdr pair)) outstream); write the code-length + (bitstream-format (cdr pair)) + (format t " <-- ~:C" (code-char (car pair))) + (setf (gethash (car pair) encoding-map) (cdr pair)) + )) + ;; Writing the contents + (file-position instream :start) + (format t "~%wrote ~d blocks" (write-encoded-message encoding-map instream outstream)) + ))) + +(defun decode (instream outstream) + (let ((canon (stream2canon instream)) + (decoding-map (make-hash-table :test #'equal))) + (format t "~%Reading dictionary") + (loop for pair in canon + do (setf (gethash (cdr pair) decoding-map) (car pair))) + (loop for len = (read-byte instream nil nil) + while len + do (let ((source (cons 0 0))) + (format t "~%Expecting to see ~d codes" (+ len 1)) + (write-sequence + (loop for k from 0 to len + collect (let ((sink (cons 0 0))) + (loop until (gethash sink decoding-map) + do (progn + (when (= (cdr source) 0) + (setq source (cons (read-byte instream) 8))) + (multiple-value-bind (new-source popped) (bitstream-pop1 source) + (setq source new-source) + (setq sink (bitstream-push sink (cons popped 1)))) + ) + finally (return (gethash sink decoding-map))))) + outstream) + )) + )) + +(defmacro with-stream-translate (in-filename out-filename &body body) + `(with-open-file (instream ,in-filename :element-type '(unsigned-byte 8) :direction :input) + (with-open-file (outstream ,out-filename :element-type '(unsigned-byte 8) :direction :output :if-exists :supersede) + (progn ,@body)))) + +(defun main (&optional (args (or #+CLISP *args* #+SBCL *posix-argv* #+LISPWORKS system:*line-arguments-list* #+CMU extensions:*command-line-words* nil))) + (with-stream-translate (nth 2 args) (nth 3 args) + (funcall + (case (intern (string-upcase (nth 1 args))) ('e #'encode) ('d #'decode)) + instream outstream))) +;(main (list "_" "e" "test-input.txt" "test-output.txt.hf")) +;(main (list "_" "d" "test-output.txt.hf" "test-output.txt")) + +(setq uiop:*image-entry-point* #'main) +(uiop:dump-image "build/huffman" :executable t)