Run Length Encoding
February 9, 2021
Alexey Grigorev says:
Most candidates cannot solve this interview problem:
Input: “aaaabbbcca”
Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)]
Write a function that converts the input to the output. I ask it in the screening inverview and give it 25 minutes. How would you solve it?
Your task is to write the requested function. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.
First thing that came to mind. In Ruby.
def encode(input) char = nil count = 0 out = "" # bag = [["a", 4], ["b", 3], ["c",2], ["a",1]] bag = [] input.chars.each do |c| case char when nil char = c count = 1 when c count += 1 else bag << [char, count] char = c count = 1 end end if char bag << [char, count] end out << "[" out << bag.map {|a, b| "(\"#{a}\",#{b})"}.join(", ") out << "]" out end puts encode("aaaabbbcca")Output:
[("a",4), ("b",3), ("c",2), ("a",1)]Here is a short and simple solution in standard R7RS Scheme using fold
from SRFI 1.
(import (scheme base) (scheme write) (only (srfi 1) fold every)) (define samples ; ((input . output) ...) '(("aaaabbbcca" . ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1))) ("" . ()) ("foo" . ((#\f . 1) (#\o . 2))))) (define (run-length-encode str) (reverse (fold (lambda (ch res) (if (and (pair? res) (char=? (caar res) ch)) (cons (cons ch (+ 1 (cdar res))) (cdr res)) (cons (cons ch 1) res))) '() (string->list str)))) (display (every (lambda (smpl) (equal? (run-length-encode (car smpl)) (cdr smpl))) samples)) (newline)Output:
Racket has splitf-at the same as split-while in your Prelude and I like :
#lang racket (define (run-length-encoding xs) (match xs [(cons x _) (define-values (ys zs) (splitf-at xs (curry equal? x))) (cons (list (first ys) (length ys)) (run-length-encoding zs))] [_ null])) (equal? (run-length-encoding (string->list "aaaabbbcca")) '((#\a 4) (#\b 3) (#\c 2) (#\a 1))) (equal? (run-length-encoding (string->list "")) '()) (equal? (run-length-encoding (string->list "aaa")) '((#\a 3))) (equal? (run-length-encoding (string->list "aaabcc")) '((#\a 3) (#\b 1) (#\c 2)))Interesting drill. Although I don’t find RLE a serious contender when it comes to compression algos, it’s a fun exercise. Here is my take on it in Julia 1.5: https://pastebin.com/3hiUsc4v
Cheers
Here’s a solution in Python.
from itertools import groupby def run_length_encode(string): return [(k, len(tuple(g))) for k, g in groupby(string)] print(run_length_encode('aaaabbbcca'))Output:
[('a', 4), ('b', 3), ('c', 2), ('a', 1)]Here’s a Haskell version.
$ ./rle [('a',4),('b',3),('c',2),('a',1)]A solution in Racket:
(define (run-length-encode str) (let next-char ((chars (string->list str)) (last-char null) (out empty)) (cond ((empty? chars) (display (string-join (map (λ (x) (~a "(\"" (car x) "\"," (cadr x) ")")) (reverse out)) ", " #:before-first "[" #:after-last "]"))) ((eq? (car chars) last-char) (next-char (cdr chars) last-char (cons (list (caar out) (add1 (cadar out))) (cdr out)))) (else (next-char (cdr chars) (car chars) (cons (list (car chars) 1) out))))))Example:
> (run-length-encode "aaaabbbcca") [("a",4), ("b",3), ("c",2), ("a",1)]Here’s are two solutions in C. The first streams the output while looping over the input string. The second creates an intermediate array with the character counts.
#include <assert.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> int main(int argc, char* argv[]) { assert(argc == 2); char* string = argv[1]; char previous = *string; char count = 0; printf("["); bool comma = false; do { char c = *string; if (c != previous) { if (comma) printf(", "); printf("(\"%c\",%d)", previous, count); count = 0; comma = true; } ++count; ++string; previous = c; } while (previous != '\0'); printf("]\n"); return EXIT_SUCCESS; }#include <assert.h> #include <stdio.h> #include <stdlib.h> typedef struct { char key; int count; } pair; void run_length_encode(char* string, pair** encoding, int* n) { char previous = '\0'; *n = 0; int strlen = 0; for (char* p = string; *p != '\0'; ++p) { ++strlen; char c = *p; if (c != previous) ++(*n); previous = c; } *encoding = malloc(sizeof(pair) * (*n)); previous = '\0'; int idx = -1; for (int i = 0; i < strlen; ++i) { char c = string[i]; if (c != previous) (*encoding)[++idx] = (pair){.key = c, .count = 0}; ++(*encoding)[idx].count; previous = c; } } int main(int argc, char* argv[]) { assert(argc == 2); int n; pair* encoding; run_length_encode(argv[1], &encoding, &n); printf("["); for (int i = 0; i < n; ++i) { if (i > 0) printf(", "); printf("(\"%c\",%d)", encoding[i].key, encoding[i].count); } printf("]\n"); free(encoding); return EXIT_SUCCESS; }Example usage (same for both programs):
$ ./a.out aaaabbbcca [("a",4), ("b",3), ("c",2), ("a",1)]package codekatas; import java.util.LinkedList; public class RunLengthEncoding { public static void main(String[] args) { // Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)] // List<String> paroleTrovate RunLengthEncoding r = new RunLengthEncoding(); String in = "aaaabbbcca"; String tmp = ""; LinkedList<Sequenza> occorrenzeList = new LinkedList<Sequenza>(); int occorrenze = 0; RunLengthEncoding.Sequenza seq = null; for (int i = 0; i < in.length(); i++) { String charAt = in.charAt(i) + ""; if (tmp.trim().length() == 0) { occorrenze++; seq = r.new Sequenza(); seq.setLettera(charAt); seq.setOccorrenza(occorrenze); occorrenzeList.add(seq); } if (tmp.trim().length() > 0 && charAt.equals(tmp)) { occorrenze++; occorrenzeList.getLast().setOccorrenza(occorrenze); } if (tmp.trim().length() > 0 && !charAt.equals(tmp)) { occorrenze = 0; occorrenze++; seq = r.new Sequenza(); seq.setLettera(charAt); seq.setOccorrenza(occorrenze); occorrenzeList.add(seq); } tmp = charAt; } System.out.println(occorrenzeList); } public class Sequenza { String lettera; int occorrenza; public String getLettera() { return lettera; } public void setLettera(String lettera) { this.lettera = lettera; } public int getOccorrenza() { return occorrenza; } public void setOccorrenza(int occorrenza) { this.occorrenza = occorrenza; } @Override public String toString() { return "Sequenza [lettera=" + lettera + ", occorrenza=" + occorrenza + "]"; } } }