Run Length Encoding

February 9, 2021

We will solve the problem three times in less than twenty-five minutes. Here is the first solution, which uses uniq-c from the Standard Prelude:

(define (f str) (uniq-c char=? (string->list str)))

> (f "aaaabbbcca")
((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1))

The linked article that was the source of the problem gives this solution, which uses group-by from a previous exercise:

(define (chop-chop str)
   (let ((xs (group-by char=? (string->list str))))
     (map list (map (compose string car) xs)
               (map length xs))))

> (chop-chop "aaaabbbcca")
(("a" 4) ("b" 3) ("c" 2) ("a" 1))

This is the third exercise where we used group-by, so I’ll add it to the Standard Prelude.

The third method scans the string from left to right, accumulating output every time the current character differs from its predecessor; this is probably the solution Alexey expects candidates to offer:

(define (rle str)
   (let loop ((cs (string->list str))
              (prev #f) (count 0)
              (output (list)))
     (cond ((not prev) ; start input
             (if (null? cs) output
               (loop (cdr cs) (car cs) 1 output)))
           ((null? cs) ; end of input
             (reverse (cons (cons prev count) output)))
           ((char=? (car cs) prev) ; continue current run
             (loop (cdr cs) prev (+ count 1) output))
           (else ; end current run, start another
             (loop (cdr cs) (car cs) 1
                   (cons (cons prev count) output))))))

> (rle "aaaabbbcca")
((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1))

You can run all three programs at https://ideone.com/gpOaZy.

Posted by programmingpraxis

Filed in Exercises

9 Comments »

9 Responses to “Run Length Encoding”

V said

February 9, 2021 at 11:12 AM

First thing that came to mind. In Ruby.

  
def encode(input)
  char = nil
  count = 0
  out = ""
  # bag =  [["a", 4], ["b", 3], ["c",2], ["a",1]]
  bag =  []
  input.chars.each do |c|
    case char
    when nil
      char = c
      count = 1
    when c
      count += 1
    else
      bag << [char, count]
      char = c
      count = 1
    end
  end
  
  if char
    bag << [char, count]
  end
  
  out << "["
  out << bag.map {|a, b| "(\"#{a}\",#{b})"}.join(", ")
  out << "]"
  out
end

puts encode("aaaabbbcca")

Output:

[("a",4), ("b",3), ("c",2), ("a",1)]

chaw said

February 9, 2021 at 11:15 AM

Here is a short and simple solution in standard R7RS Scheme using fold
from SRFI 1.


(import (scheme base)
        (scheme write)
        (only (srfi 1) fold every))

(define samples ; ((input . output) ...)
  '(("aaaabbbcca" . ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1)))
    ("" . ())
    ("foo" . ((#\f . 1) (#\o . 2)))))

(define (run-length-encode str)
  (reverse (fold (lambda (ch res)
                   (if (and (pair? res)
                            (char=? (caar res) ch))
                       (cons (cons ch (+ 1 (cdar res)))
                             (cdr res))
                       (cons (cons ch 1)
                             res)))
                 '()
                 (string->list str))))

(display (every (lambda (smpl)
                  (equal? (run-length-encode (car smpl))
                          (cdr smpl)))
                samples))
(newline)

Output:

#t

d1rge said

February 9, 2021 at 12:37 PM

Racket has splitf-at the same as split-while in your Prelude and I like :

#lang racket
(define (run-length-encoding xs)
  (match xs
    [(cons x _)
     (define-values (ys zs) (splitf-at xs (curry equal? x)))
     (cons (list (first ys) (length ys)) (run-length-encoding zs))]
    [_ null]))

(equal?
 (run-length-encoding (string->list "aaaabbbcca"))
 '((#\a 4) (#\b 3) (#\c 2) (#\a 1)))

(equal?
 (run-length-encoding (string->list ""))
 '())

(equal?
 (run-length-encoding (string->list "aaa"))
 '((#\a 3)))

(equal?
 (run-length-encoding (string->list "aaabcc"))
 '((#\a 3) (#\b 1) (#\c 2)))

Zack said
February 9, 2021 at 12:56 PM
Interesting drill. Although I don’t find RLE a serious contender when it comes to compression algos, it’s a fun exercise. Here is my take on it in Julia 1.5: https://pastebin.com/3hiUsc4v
Cheers

Daniel said

February 9, 2021 at 5:01 PM

Here’s a solution in Python.

from itertools import groupby

def run_length_encode(string):
    return [(k, len(tuple(g))) for k, g in groupby(string)]

print(run_length_encode('aaaabbbcca'))

Output:

[('a', 4), ('b', 3), ('c', 2), ('a', 1)]

Globules said

February 10, 2021 at 3:12 AM

Here’s a Haskell version.

import Control.Arrow ((&&&))
import Data.List (group)

rle :: Eq a => [a] -> [(a, Int)]
rle = map (head &&& length) . group

main :: IO ()
main = print $ rle "aaaabbbcca"

$ ./rle
[('a',4),('b',3),('c',2),('a',1)]

Kevin said

February 10, 2021 at 3:57 AM

A solution in Racket:

(define (run-length-encode str)
  (let next-char ((chars (string->list str)) (last-char null) (out empty))
    (cond
      ((empty? chars) (display (string-join (map (λ (x) (~a "(\"" (car x) "\"," (cadr x) ")")) (reverse out)) ", " #:before-first "[" #:after-last "]")))
      ((eq? (car chars) last-char) (next-char (cdr chars) last-char (cons (list (caar out) (add1 (cadar out))) (cdr out))))
      (else (next-char (cdr chars) (car chars) (cons (list (car chars) 1) out))))))

Example:

> (run-length-encode "aaaabbbcca")
[("a",4), ("b",3), ("c",2), ("a",1)]

Daniel said

February 10, 2021 at 7:55 PM

Here’s are two solutions in C. The first streams the output while looping over the input string. The second creates an intermediate array with the character counts.

#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char* argv[]) {
  assert(argc == 2);
  char* string = argv[1];
  char previous = *string;
  char count = 0;
  printf("[");
  bool comma = false;
  do {
    char c = *string;
    if (c != previous) {
      if (comma)
        printf(", ");
      printf("(\"%c\",%d)", previous, count);
      count = 0;
      comma = true;
    }
    ++count;
    ++string;
    previous = c;
  } while (previous != '\0');
  printf("]\n");
  return EXIT_SUCCESS;
}

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>

typedef struct {
  char key;
  int count;
} pair;

void run_length_encode(char* string, pair** encoding, int* n) {
  char previous = '\0';
  *n = 0;
  int strlen = 0;
  for (char* p = string; *p != '\0'; ++p) {
    ++strlen;
    char c = *p;
    if (c != previous)
      ++(*n);
    previous = c;
  }
  *encoding = malloc(sizeof(pair) * (*n));
  previous = '\0';
  int idx = -1;
  for (int i = 0; i < strlen; ++i) {
    char c = string[i];
    if (c != previous)
      (*encoding)[++idx] = (pair){.key = c, .count = 0};
    ++(*encoding)[idx].count;
    previous = c;
  }
}

int main(int argc, char* argv[]) {
  assert(argc == 2);
  int n;
  pair* encoding;
  run_length_encode(argv[1], &encoding, &n);
  printf("[");
  for (int i = 0; i < n; ++i) {
    if (i > 0)
      printf(", ");
    printf("(\"%c\",%d)", encoding[i].key, encoding[i].count);
  }
  printf("]\n");
  free(encoding);
  return EXIT_SUCCESS;
}

Example usage (same for both programs):

$ ./a.out aaaabbbcca
[("a",4), ("b",3), ("c",2), ("a",1)]

cagisw said

March 30, 2021 at 2:53 PM

package codekatas;

import java.util.LinkedList;

public class RunLengthEncoding {

	public static void main(String[] args) {

		// Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)]
		// List<String> paroleTrovate
		RunLengthEncoding r = new RunLengthEncoding();
		String in = "aaaabbbcca";
		String tmp = "";
		LinkedList<Sequenza> occorrenzeList = new LinkedList<Sequenza>();
		int occorrenze = 0;
		RunLengthEncoding.Sequenza seq = null;
		for (int i = 0; i < in.length(); i++) {
			String charAt = in.charAt(i) + "";

			if (tmp.trim().length() == 0) {
				occorrenze++;
				seq = r.new Sequenza();
				seq.setLettera(charAt);
				seq.setOccorrenza(occorrenze);
				occorrenzeList.add(seq);
			}
			
			if (tmp.trim().length() > 0 && charAt.equals(tmp)) {
				occorrenze++;
				occorrenzeList.getLast().setOccorrenza(occorrenze);
			}

			if (tmp.trim().length() > 0 && !charAt.equals(tmp)) {
				occorrenze = 0;
				occorrenze++;
				seq = r.new Sequenza();
				seq.setLettera(charAt);
				seq.setOccorrenza(occorrenze);
				occorrenzeList.add(seq);
			}
			tmp = charAt;
		}
		System.out.println(occorrenzeList);
	}

	public class Sequenza {

		String lettera;
		int occorrenza;

		public String getLettera() {
			return lettera;
		}

		public void setLettera(String lettera) {
			this.lettera = lettera;
		}

		public int getOccorrenza() {
			return occorrenza;
		}

		public void setOccorrenza(int occorrenza) {
			this.occorrenza = occorrenza;
		}

		@Override
		public String toString() {
			return "Sequenza [lettera=" + lettera + ", occorrenza=" + occorrenza + "]";
		}
	}

}

S	M	T	W	T	F	S
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28

Programming Praxis