Run Length Encoding

February 9, 2021

Alexey Grigorev says:

Most candidates cannot solve this interview problem:
Input: “aaaabbbcca”
Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)]
Write a function that converts the input to the output. I ask it in the screening inverview and give it 25 minutes. How would you solve it?

Your task is to write the requested function. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.

Posted by programmingpraxis

Filed in Exercises

9 Comments »

9 Responses to “Run Length Encoding”

V said

February 9, 2021 at 11:12 AM

First thing that came to mind. In Ruby.

  
def encode(input)
  char = nil
  count = 0
  out = ""
  # bag =  [["a", 4], ["b", 3], ["c",2], ["a",1]]
  bag =  []
  input.chars.each do |c|
    case char
    when nil
      char = c
      count = 1
    when c
      count += 1
    else
      bag << [char, count]
      char = c
      count = 1
    end
  end
  
  if char
    bag << [char, count]
  end
  
  out << "["
  out << bag.map {|a, b| "(\"#{a}\",#{b})"}.join(", ")
  out << "]"
  out
end

puts encode("aaaabbbcca")

Output:

[("a",4), ("b",3), ("c",2), ("a",1)]

chaw said

February 9, 2021 at 11:15 AM

Here is a short and simple solution in standard R7RS Scheme using fold
from SRFI 1.


(import (scheme base)
        (scheme write)
        (only (srfi 1) fold every))

(define samples ; ((input . output) ...)
  '(("aaaabbbcca" . ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1)))
    ("" . ())
    ("foo" . ((#\f . 1) (#\o . 2)))))

(define (run-length-encode str)
  (reverse (fold (lambda (ch res)
                   (if (and (pair? res)
                            (char=? (caar res) ch))
                       (cons (cons ch (+ 1 (cdar res)))
                             (cdr res))
                       (cons (cons ch 1)
                             res)))
                 '()
                 (string->list str))))

(display (every (lambda (smpl)
                  (equal? (run-length-encode (car smpl))
                          (cdr smpl)))
                samples))
(newline)

Output:

#t

d1rge said

February 9, 2021 at 12:37 PM

Racket has splitf-at the same as split-while in your Prelude and I like :

#lang racket
(define (run-length-encoding xs)
  (match xs
    [(cons x _)
     (define-values (ys zs) (splitf-at xs (curry equal? x)))
     (cons (list (first ys) (length ys)) (run-length-encoding zs))]
    [_ null]))

(equal?
 (run-length-encoding (string->list "aaaabbbcca"))
 '((#\a 4) (#\b 3) (#\c 2) (#\a 1)))

(equal?
 (run-length-encoding (string->list ""))
 '())

(equal?
 (run-length-encoding (string->list "aaa"))
 '((#\a 3)))

(equal?
 (run-length-encoding (string->list "aaabcc"))
 '((#\a 3) (#\b 1) (#\c 2)))

Zack said
February 9, 2021 at 12:56 PM
Interesting drill. Although I don’t find RLE a serious contender when it comes to compression algos, it’s a fun exercise. Here is my take on it in Julia 1.5: https://pastebin.com/3hiUsc4v
Cheers

Daniel said

February 9, 2021 at 5:01 PM

Here’s a solution in Python.

from itertools import groupby

def run_length_encode(string):
    return [(k, len(tuple(g))) for k, g in groupby(string)]

print(run_length_encode('aaaabbbcca'))

Output:

[('a', 4), ('b', 3), ('c', 2), ('a', 1)]

Globules said

February 10, 2021 at 3:12 AM

Here’s a Haskell version.

import Control.Arrow ((&&&))
import Data.List (group)

rle :: Eq a => [a] -> [(a, Int)]
rle = map (head &&& length) . group

main :: IO ()
main = print $ rle "aaaabbbcca"

$ ./rle
[('a',4),('b',3),('c',2),('a',1)]

Kevin said

February 10, 2021 at 3:57 AM

A solution in Racket:

(define (run-length-encode str)
  (let next-char ((chars (string->list str)) (last-char null) (out empty))
    (cond
      ((empty? chars) (display (string-join (map (λ (x) (~a "(\"" (car x) "\"," (cadr x) ")")) (reverse out)) ", " #:before-first "[" #:after-last "]")))
      ((eq? (car chars) last-char) (next-char (cdr chars) last-char (cons (list (caar out) (add1 (cadar out))) (cdr out))))
      (else (next-char (cdr chars) (car chars) (cons (list (car chars) 1) out))))))

Example:

> (run-length-encode "aaaabbbcca")
[("a",4), ("b",3), ("c",2), ("a",1)]

Daniel said

February 10, 2021 at 7:55 PM

Here’s are two solutions in C. The first streams the output while looping over the input string. The second creates an intermediate array with the character counts.

#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char* argv[]) {
  assert(argc == 2);
  char* string = argv[1];
  char previous = *string;
  char count = 0;
  printf("[");
  bool comma = false;
  do {
    char c = *string;
    if (c != previous) {
      if (comma)
        printf(", ");
      printf("(\"%c\",%d)", previous, count);
      count = 0;
      comma = true;
    }
    ++count;
    ++string;
    previous = c;
  } while (previous != '\0');
  printf("]\n");
  return EXIT_SUCCESS;
}

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>

typedef struct {
  char key;
  int count;
} pair;

void run_length_encode(char* string, pair** encoding, int* n) {
  char previous = '\0';
  *n = 0;
  int strlen = 0;
  for (char* p = string; *p != '\0'; ++p) {
    ++strlen;
    char c = *p;
    if (c != previous)
      ++(*n);
    previous = c;
  }
  *encoding = malloc(sizeof(pair) * (*n));
  previous = '\0';
  int idx = -1;
  for (int i = 0; i < strlen; ++i) {
    char c = string[i];
    if (c != previous)
      (*encoding)[++idx] = (pair){.key = c, .count = 0};
    ++(*encoding)[idx].count;
    previous = c;
  }
}

int main(int argc, char* argv[]) {
  assert(argc == 2);
  int n;
  pair* encoding;
  run_length_encode(argv[1], &encoding, &n);
  printf("[");
  for (int i = 0; i < n; ++i) {
    if (i > 0)
      printf(", ");
    printf("(\"%c\",%d)", encoding[i].key, encoding[i].count);
  }
  printf("]\n");
  free(encoding);
  return EXIT_SUCCESS;
}

Example usage (same for both programs):

$ ./a.out aaaabbbcca
[("a",4), ("b",3), ("c",2), ("a",1)]

cagisw said

March 30, 2021 at 2:53 PM

package codekatas;

import java.util.LinkedList;

public class RunLengthEncoding {

	public static void main(String[] args) {

		// Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)]
		// List<String> paroleTrovate
		RunLengthEncoding r = new RunLengthEncoding();
		String in = "aaaabbbcca";
		String tmp = "";
		LinkedList<Sequenza> occorrenzeList = new LinkedList<Sequenza>();
		int occorrenze = 0;
		RunLengthEncoding.Sequenza seq = null;
		for (int i = 0; i < in.length(); i++) {
			String charAt = in.charAt(i) + "";

			if (tmp.trim().length() == 0) {
				occorrenze++;
				seq = r.new Sequenza();
				seq.setLettera(charAt);
				seq.setOccorrenza(occorrenze);
				occorrenzeList.add(seq);
			}
			
			if (tmp.trim().length() > 0 && charAt.equals(tmp)) {
				occorrenze++;
				occorrenzeList.getLast().setOccorrenza(occorrenze);
			}

			if (tmp.trim().length() > 0 && !charAt.equals(tmp)) {
				occorrenze = 0;
				occorrenze++;
				seq = r.new Sequenza();
				seq.setLettera(charAt);
				seq.setOccorrenza(occorrenze);
				occorrenzeList.add(seq);
			}
			tmp = charAt;
		}
		System.out.println(occorrenzeList);
	}

	public class Sequenza {

		String lettera;
		int occorrenza;

		public String getLettera() {
			return lettera;
		}

		public void setLettera(String lettera) {
			this.lettera = lettera;
		}

		public int getOccorrenza() {
			return occorrenza;
		}

		public void setOccorrenza(int occorrenza) {
			this.occorrenza = occorrenza;
		}

		@Override
		public String toString() {
			return "Sequenza [lettera=" + lettera + ", occorrenza=" + occorrenza + "]";
		}
	}

}

S	M	T	W	T	F	S
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28

Programming Praxis