Run Length Encoding
February 9, 2021
We will solve the problem three times in less than twenty-five minutes. Here is the first solution, which uses uniq-c from the Standard Prelude:
(define (f str) (uniq-c char=? (string->list str))) > (f "aaaabbbcca") ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1))
The linked article that was the source of the problem gives this solution, which uses group-by from a previous exercise:
(define (chop-chop str)
(let ((xs (group-by char=? (string->list str))))
(map list (map (compose string car) xs)
(map length xs))))
> (chop-chop "aaaabbbcca")
(("a" 4) ("b" 3) ("c" 2) ("a" 1))
This is the third exercise where we used group-by, so I’ll add it to the Standard Prelude.
The third method scans the string from left to right, accumulating output every time the current character differs from its predecessor; this is probably the solution Alexey expects candidates to offer:
(define (rle str)
(let loop ((cs (string->list str))
(prev #f) (count 0)
(output (list)))
(cond ((not prev) ; start input
(if (null? cs) output
(loop (cdr cs) (car cs) 1 output)))
((null? cs) ; end of input
(reverse (cons (cons prev count) output)))
((char=? (car cs) prev) ; continue current run
(loop (cdr cs) prev (+ count 1) output))
(else ; end current run, start another
(loop (cdr cs) (car cs) 1
(cons (cons prev count) output))))))
> (rle "aaaabbbcca") ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1))
You can run all three programs at https://ideone.com/gpOaZy.
First thing that came to mind. In Ruby.
def encode(input) char = nil count = 0 out = "" # bag = [["a", 4], ["b", 3], ["c",2], ["a",1]] bag = [] input.chars.each do |c| case char when nil char = c count = 1 when c count += 1 else bag << [char, count] char = c count = 1 end end if char bag << [char, count] end out << "[" out << bag.map {|a, b| "(\"#{a}\",#{b})"}.join(", ") out << "]" out end puts encode("aaaabbbcca")Output:
[("a",4), ("b",3), ("c",2), ("a",1)]Here is a short and simple solution in standard R7RS Scheme using fold
from SRFI 1.
(import (scheme base) (scheme write) (only (srfi 1) fold every)) (define samples ; ((input . output) ...) '(("aaaabbbcca" . ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1))) ("" . ()) ("foo" . ((#\f . 1) (#\o . 2))))) (define (run-length-encode str) (reverse (fold (lambda (ch res) (if (and (pair? res) (char=? (caar res) ch)) (cons (cons ch (+ 1 (cdar res))) (cdr res)) (cons (cons ch 1) res))) '() (string->list str)))) (display (every (lambda (smpl) (equal? (run-length-encode (car smpl)) (cdr smpl))) samples)) (newline)Output:
Racket has splitf-at the same as split-while in your Prelude and I like :
#lang racket (define (run-length-encoding xs) (match xs [(cons x _) (define-values (ys zs) (splitf-at xs (curry equal? x))) (cons (list (first ys) (length ys)) (run-length-encoding zs))] [_ null])) (equal? (run-length-encoding (string->list "aaaabbbcca")) '((#\a 4) (#\b 3) (#\c 2) (#\a 1))) (equal? (run-length-encoding (string->list "")) '()) (equal? (run-length-encoding (string->list "aaa")) '((#\a 3))) (equal? (run-length-encoding (string->list "aaabcc")) '((#\a 3) (#\b 1) (#\c 2)))Interesting drill. Although I don’t find RLE a serious contender when it comes to compression algos, it’s a fun exercise. Here is my take on it in Julia 1.5: https://pastebin.com/3hiUsc4v
Cheers
Here’s a solution in Python.
from itertools import groupby def run_length_encode(string): return [(k, len(tuple(g))) for k, g in groupby(string)] print(run_length_encode('aaaabbbcca'))Output:
[('a', 4), ('b', 3), ('c', 2), ('a', 1)]Here’s a Haskell version.
$ ./rle [('a',4),('b',3),('c',2),('a',1)]A solution in Racket:
(define (run-length-encode str) (let next-char ((chars (string->list str)) (last-char null) (out empty)) (cond ((empty? chars) (display (string-join (map (λ (x) (~a "(\"" (car x) "\"," (cadr x) ")")) (reverse out)) ", " #:before-first "[" #:after-last "]"))) ((eq? (car chars) last-char) (next-char (cdr chars) last-char (cons (list (caar out) (add1 (cadar out))) (cdr out)))) (else (next-char (cdr chars) (car chars) (cons (list (car chars) 1) out))))))Example:
> (run-length-encode "aaaabbbcca") [("a",4), ("b",3), ("c",2), ("a",1)]Here’s are two solutions in C. The first streams the output while looping over the input string. The second creates an intermediate array with the character counts.
#include <assert.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> int main(int argc, char* argv[]) { assert(argc == 2); char* string = argv[1]; char previous = *string; char count = 0; printf("["); bool comma = false; do { char c = *string; if (c != previous) { if (comma) printf(", "); printf("(\"%c\",%d)", previous, count); count = 0; comma = true; } ++count; ++string; previous = c; } while (previous != '\0'); printf("]\n"); return EXIT_SUCCESS; }#include <assert.h> #include <stdio.h> #include <stdlib.h> typedef struct { char key; int count; } pair; void run_length_encode(char* string, pair** encoding, int* n) { char previous = '\0'; *n = 0; int strlen = 0; for (char* p = string; *p != '\0'; ++p) { ++strlen; char c = *p; if (c != previous) ++(*n); previous = c; } *encoding = malloc(sizeof(pair) * (*n)); previous = '\0'; int idx = -1; for (int i = 0; i < strlen; ++i) { char c = string[i]; if (c != previous) (*encoding)[++idx] = (pair){.key = c, .count = 0}; ++(*encoding)[idx].count; previous = c; } } int main(int argc, char* argv[]) { assert(argc == 2); int n; pair* encoding; run_length_encode(argv[1], &encoding, &n); printf("["); for (int i = 0; i < n; ++i) { if (i > 0) printf(", "); printf("(\"%c\",%d)", encoding[i].key, encoding[i].count); } printf("]\n"); free(encoding); return EXIT_SUCCESS; }Example usage (same for both programs):
$ ./a.out aaaabbbcca [("a",4), ("b",3), ("c",2), ("a",1)]package codekatas; import java.util.LinkedList; public class RunLengthEncoding { public static void main(String[] args) { // Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)] // List<String> paroleTrovate RunLengthEncoding r = new RunLengthEncoding(); String in = "aaaabbbcca"; String tmp = ""; LinkedList<Sequenza> occorrenzeList = new LinkedList<Sequenza>(); int occorrenze = 0; RunLengthEncoding.Sequenza seq = null; for (int i = 0; i < in.length(); i++) { String charAt = in.charAt(i) + ""; if (tmp.trim().length() == 0) { occorrenze++; seq = r.new Sequenza(); seq.setLettera(charAt); seq.setOccorrenza(occorrenze); occorrenzeList.add(seq); } if (tmp.trim().length() > 0 && charAt.equals(tmp)) { occorrenze++; occorrenzeList.getLast().setOccorrenza(occorrenze); } if (tmp.trim().length() > 0 && !charAt.equals(tmp)) { occorrenze = 0; occorrenze++; seq = r.new Sequenza(); seq.setLettera(charAt); seq.setOccorrenza(occorrenze); occorrenzeList.add(seq); } tmp = charAt; } System.out.println(occorrenzeList); } public class Sequenza { String lettera; int occorrenza; public String getLettera() { return lettera; } public void setLettera(String lettera) { this.lettera = lettera; } public int getOccorrenza() { return occorrenza; } public void setOccorrenza(int occorrenza) { this.occorrenza = occorrenza; } @Override public String toString() { return "Sequenza [lettera=" + lettera + ", occorrenza=" + occorrenza + "]"; } } }