Run Length Encoding

February 9, 2021

Alexey Grigorev says:

Most candidates cannot solve this interview problem:

Input: “aaaabbbcca”

Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)]

Write a function that converts the input to the output. I ask it in the screening inverview and give it 25 minutes. How would you solve it?

Your task is to write the requested function. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.

Pages: 1 2

9 Responses to “Run Length Encoding”

  1. V said

    First thing that came to mind. In Ruby.

      
    def encode(input)
      char = nil
      count = 0
      out = ""
      # bag =  [["a", 4], ["b", 3], ["c",2], ["a",1]]
      bag =  []
      input.chars.each do |c|
        case char
        when nil
          char = c
          count = 1
        when c
          count += 1
        else
          bag << [char, count]
          char = c
          count = 1
        end
      end
      
      if char
        bag << [char, count]
      end
      
      out << "["
      out << bag.map {|a, b| "(\"#{a}\",#{b})"}.join(", ")
      out << "]"
      out
    end
    
    puts encode("aaaabbbcca")
    

    Output:

    [("a",4), ("b",3), ("c",2), ("a",1)]  
    
  2. chaw said

    Here is a short and simple solution in standard R7RS Scheme using fold
    from SRFI 1.

    (import (scheme base)
            (scheme write)
            (only (srfi 1) fold every))
    
    (define samples ; ((input . output) ...)
      '(("aaaabbbcca" . ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1)))
        ("" . ())
        ("foo" . ((#\f . 1) (#\o . 2)))))
    
    (define (run-length-encode str)
      (reverse (fold (lambda (ch res)
                       (if (and (pair? res)
                                (char=? (caar res) ch))
                           (cons (cons ch (+ 1 (cdar res)))
                                 (cdr res))
                           (cons (cons ch 1)
                                 res)))
                     '()
                     (string->list str))))
    
    (display (every (lambda (smpl)
                      (equal? (run-length-encode (car smpl))
                              (cdr smpl)))
                    samples))
    (newline)
    

    Output:

    #t
    

  3. d1rge said

    Racket has splitf-at the same as split-while in your Prelude and I like :

    #lang racket
    (define (run-length-encoding xs)
      (match xs
        [(cons x _)
         (define-values (ys zs) (splitf-at xs (curry equal? x)))
         (cons (list (first ys) (length ys)) (run-length-encoding zs))]
        [_ null]))
    
    (equal?
     (run-length-encoding (string->list "aaaabbbcca"))
     '((#\a 4) (#\b 3) (#\c 2) (#\a 1)))
    
    (equal?
     (run-length-encoding (string->list ""))
     '())
    
    (equal?
     (run-length-encoding (string->list "aaa"))
     '((#\a 3)))
    
    (equal?
     (run-length-encoding (string->list "aaabcc"))
     '((#\a 3) (#\b 1) (#\c 2)))
    
  4. Zack said

    Interesting drill. Although I don’t find RLE a serious contender when it comes to compression algos, it’s a fun exercise. Here is my take on it in Julia 1.5: https://pastebin.com/3hiUsc4v
    Cheers

  5. Daniel said

    Here’s a solution in Python.

    from itertools import groupby
    
    def run_length_encode(string):
        return [(k, len(tuple(g))) for k, g in groupby(string)]
    
    print(run_length_encode('aaaabbbcca'))
    

    Output:

    [('a', 4), ('b', 3), ('c', 2), ('a', 1)]
    
  6. Globules said

    Here’s a Haskell version.

    import Control.Arrow ((&&&))
    import Data.List (group)
    
    rle :: Eq a => [a] -> [(a, Int)]
    rle = map (head &&& length) . group
    
    main :: IO ()
    main = print $ rle "aaaabbbcca"
    
    $ ./rle
    [('a',4),('b',3),('c',2),('a',1)]
    
  7. Kevin said

    A solution in Racket:

    (define (run-length-encode str)
      (let next-char ((chars (string->list str)) (last-char null) (out empty))
        (cond
          ((empty? chars) (display (string-join (map (λ (x) (~a "(\"" (car x) "\"," (cadr x) ")")) (reverse out)) ", " #:before-first "[" #:after-last "]")))
          ((eq? (car chars) last-char) (next-char (cdr chars) last-char (cons (list (caar out) (add1 (cadar out))) (cdr out))))
          (else (next-char (cdr chars) (car chars) (cons (list (car chars) 1) out))))))
    

    Example:

    > (run-length-encode "aaaabbbcca")
    [("a",4), ("b",3), ("c",2), ("a",1)]
    
  8. Daniel said

    Here’s are two solutions in C. The first streams the output while looping over the input string. The second creates an intermediate array with the character counts.

    #include <assert.h>
    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    int main(int argc, char* argv[]) {
      assert(argc == 2);
      char* string = argv[1];
      char previous = *string;
      char count = 0;
      printf("[");
      bool comma = false;
      do {
        char c = *string;
        if (c != previous) {
          if (comma)
            printf(", ");
          printf("(\"%c\",%d)", previous, count);
          count = 0;
          comma = true;
        }
        ++count;
        ++string;
        previous = c;
      } while (previous != '\0');
      printf("]\n");
      return EXIT_SUCCESS;
    }
    
    #include <assert.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    typedef struct {
      char key;
      int count;
    } pair;
    
    void run_length_encode(char* string, pair** encoding, int* n) {
      char previous = '\0';
      *n = 0;
      int strlen = 0;
      for (char* p = string; *p != '\0'; ++p) {
        ++strlen;
        char c = *p;
        if (c != previous)
          ++(*n);
        previous = c;
      }
      *encoding = malloc(sizeof(pair) * (*n));
      previous = '\0';
      int idx = -1;
      for (int i = 0; i < strlen; ++i) {
        char c = string[i];
        if (c != previous)
          (*encoding)[++idx] = (pair){.key = c, .count = 0};
        ++(*encoding)[idx].count;
        previous = c;
      }
    }
    
    int main(int argc, char* argv[]) {
      assert(argc == 2);
      int n;
      pair* encoding;
      run_length_encode(argv[1], &encoding, &n);
      printf("[");
      for (int i = 0; i < n; ++i) {
        if (i > 0)
          printf(", ");
        printf("(\"%c\",%d)", encoding[i].key, encoding[i].count);
      }
      printf("]\n");
      free(encoding);
      return EXIT_SUCCESS;
    }
    

    Example usage (same for both programs):

    $ ./a.out aaaabbbcca
    [("a",4), ("b",3), ("c",2), ("a",1)]
    
  9. cagisw said
    package codekatas;
    
    import java.util.LinkedList;
    
    public class RunLengthEncoding {
    
    	public static void main(String[] args) {
    
    		// Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)]
    		// List<String> paroleTrovate
    		RunLengthEncoding r = new RunLengthEncoding();
    		String in = "aaaabbbcca";
    		String tmp = "";
    		LinkedList<Sequenza> occorrenzeList = new LinkedList<Sequenza>();
    		int occorrenze = 0;
    		RunLengthEncoding.Sequenza seq = null;
    		for (int i = 0; i < in.length(); i++) {
    			String charAt = in.charAt(i) + "";
    
    			if (tmp.trim().length() == 0) {
    				occorrenze++;
    				seq = r.new Sequenza();
    				seq.setLettera(charAt);
    				seq.setOccorrenza(occorrenze);
    				occorrenzeList.add(seq);
    			}
    			
    			if (tmp.trim().length() > 0 && charAt.equals(tmp)) {
    				occorrenze++;
    				occorrenzeList.getLast().setOccorrenza(occorrenze);
    			}
    
    			if (tmp.trim().length() > 0 && !charAt.equals(tmp)) {
    				occorrenze = 0;
    				occorrenze++;
    				seq = r.new Sequenza();
    				seq.setLettera(charAt);
    				seq.setOccorrenza(occorrenze);
    				occorrenzeList.add(seq);
    			}
    			tmp = charAt;
    		}
    		System.out.println(occorrenzeList);
    	}
    
    	public class Sequenza {
    
    		String lettera;
    		int occorrenza;
    
    		public String getLettera() {
    			return lettera;
    		}
    
    		public void setLettera(String lettera) {
    			this.lettera = lettera;
    		}
    
    		public int getOccorrenza() {
    			return occorrenza;
    		}
    
    		public void setOccorrenza(int occorrenza) {
    			this.occorrenza = occorrenza;
    		}
    
    		@Override
    		public String toString() {
    			return "Sequenza [lettera=" + lettera + ", occorrenza=" + occorrenza + "]";
    		}
    	}
    
    }
    
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: