Run Length Encoding

February 9, 2021

We will solve the problem three times in less than twenty-five minutes. Here is the first solution, which uses uniq-c from the Standard Prelude:

(define (f str) (uniq-c char=? (string->list str)))

> (f "aaaabbbcca")
((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1)) 

The linked article that was the source of the problem gives this solution, which uses group-by from a previous exercise:

(define (chop-chop str)
   (let ((xs (group-by char=? (string->list str))))
     (map list (map (compose string car) xs)
               (map length xs))))
> (chop-chop "aaaabbbcca")
(("a" 4) ("b" 3) ("c" 2) ("a" 1)) 

This is the third exercise where we used group-by, so I’ll add it to the Standard Prelude.

The third method scans the string from left to right, accumulating output every time the current character differs from its predecessor; this is probably the solution Alexey expects candidates to offer:

(define (rle str)
   (let loop ((cs (string->list str))
              (prev #f) (count 0)
              (output (list)))
     (cond ((not prev) ; start input
             (if (null? cs) output
               (loop (cdr cs) (car cs) 1 output)))
           ((null? cs) ; end of input
             (reverse (cons (cons prev count) output)))
           ((char=? (car cs) prev) ; continue current run
             (loop (cdr cs) prev (+ count 1) output))
           (else ; end current run, start another
             (loop (cdr cs) (car cs) 1
                   (cons (cons prev count) output))))))
> (rle "aaaabbbcca")
((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1)) 

You can run all three programs at https://ideone.com/gpOaZy.

Advertisement

Pages: 1 2

9 Responses to “Run Length Encoding”

  1. V said

    First thing that came to mind. In Ruby.

      
    def encode(input)
      char = nil
      count = 0
      out = ""
      # bag =  [["a", 4], ["b", 3], ["c",2], ["a",1]]
      bag =  []
      input.chars.each do |c|
        case char
        when nil
          char = c
          count = 1
        when c
          count += 1
        else
          bag << [char, count]
          char = c
          count = 1
        end
      end
      
      if char
        bag << [char, count]
      end
      
      out << "["
      out << bag.map {|a, b| "(\"#{a}\",#{b})"}.join(", ")
      out << "]"
      out
    end
    
    puts encode("aaaabbbcca")
    

    Output:

    [("a",4), ("b",3), ("c",2), ("a",1)]  
    
  2. chaw said

    Here is a short and simple solution in standard R7RS Scheme using fold
    from SRFI 1.

    (import (scheme base)
            (scheme write)
            (only (srfi 1) fold every))
    
    (define samples ; ((input . output) ...)
      '(("aaaabbbcca" . ((#\a . 4) (#\b . 3) (#\c . 2) (#\a . 1)))
        ("" . ())
        ("foo" . ((#\f . 1) (#\o . 2)))))
    
    (define (run-length-encode str)
      (reverse (fold (lambda (ch res)
                       (if (and (pair? res)
                                (char=? (caar res) ch))
                           (cons (cons ch (+ 1 (cdar res)))
                                 (cdr res))
                           (cons (cons ch 1)
                                 res)))
                     '()
                     (string->list str))))
    
    (display (every (lambda (smpl)
                      (equal? (run-length-encode (car smpl))
                              (cdr smpl)))
                    samples))
    (newline)
    

    Output:

    #t
    

  3. d1rge said

    Racket has splitf-at the same as split-while in your Prelude and I like :

    #lang racket
    (define (run-length-encoding xs)
      (match xs
        [(cons x _)
         (define-values (ys zs) (splitf-at xs (curry equal? x)))
         (cons (list (first ys) (length ys)) (run-length-encoding zs))]
        [_ null]))
    
    (equal?
     (run-length-encoding (string->list "aaaabbbcca"))
     '((#\a 4) (#\b 3) (#\c 2) (#\a 1)))
    
    (equal?
     (run-length-encoding (string->list ""))
     '())
    
    (equal?
     (run-length-encoding (string->list "aaa"))
     '((#\a 3)))
    
    (equal?
     (run-length-encoding (string->list "aaabcc"))
     '((#\a 3) (#\b 1) (#\c 2)))
    
  4. Zack said

    Interesting drill. Although I don’t find RLE a serious contender when it comes to compression algos, it’s a fun exercise. Here is my take on it in Julia 1.5: https://pastebin.com/3hiUsc4v
    Cheers

  5. Daniel said

    Here’s a solution in Python.

    from itertools import groupby
    
    def run_length_encode(string):
        return [(k, len(tuple(g))) for k, g in groupby(string)]
    
    print(run_length_encode('aaaabbbcca'))
    

    Output:

    [('a', 4), ('b', 3), ('c', 2), ('a', 1)]
    
  6. Globules said

    Here’s a Haskell version.

    import Control.Arrow ((&&&))
    import Data.List (group)
    
    rle :: Eq a => [a] -> [(a, Int)]
    rle = map (head &&& length) . group
    
    main :: IO ()
    main = print $ rle "aaaabbbcca"
    
    $ ./rle
    [('a',4),('b',3),('c',2),('a',1)]
    
  7. Kevin said

    A solution in Racket:

    (define (run-length-encode str)
      (let next-char ((chars (string->list str)) (last-char null) (out empty))
        (cond
          ((empty? chars) (display (string-join (map (λ (x) (~a "(\"" (car x) "\"," (cadr x) ")")) (reverse out)) ", " #:before-first "[" #:after-last "]")))
          ((eq? (car chars) last-char) (next-char (cdr chars) last-char (cons (list (caar out) (add1 (cadar out))) (cdr out))))
          (else (next-char (cdr chars) (car chars) (cons (list (car chars) 1) out))))))
    

    Example:

    > (run-length-encode "aaaabbbcca")
    [("a",4), ("b",3), ("c",2), ("a",1)]
    
  8. Daniel said

    Here’s are two solutions in C. The first streams the output while looping over the input string. The second creates an intermediate array with the character counts.

    #include <assert.h>
    #include <stdbool.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    int main(int argc, char* argv[]) {
      assert(argc == 2);
      char* string = argv[1];
      char previous = *string;
      char count = 0;
      printf("[");
      bool comma = false;
      do {
        char c = *string;
        if (c != previous) {
          if (comma)
            printf(", ");
          printf("(\"%c\",%d)", previous, count);
          count = 0;
          comma = true;
        }
        ++count;
        ++string;
        previous = c;
      } while (previous != '\0');
      printf("]\n");
      return EXIT_SUCCESS;
    }
    
    #include <assert.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    typedef struct {
      char key;
      int count;
    } pair;
    
    void run_length_encode(char* string, pair** encoding, int* n) {
      char previous = '\0';
      *n = 0;
      int strlen = 0;
      for (char* p = string; *p != '\0'; ++p) {
        ++strlen;
        char c = *p;
        if (c != previous)
          ++(*n);
        previous = c;
      }
      *encoding = malloc(sizeof(pair) * (*n));
      previous = '\0';
      int idx = -1;
      for (int i = 0; i < strlen; ++i) {
        char c = string[i];
        if (c != previous)
          (*encoding)[++idx] = (pair){.key = c, .count = 0};
        ++(*encoding)[idx].count;
        previous = c;
      }
    }
    
    int main(int argc, char* argv[]) {
      assert(argc == 2);
      int n;
      pair* encoding;
      run_length_encode(argv[1], &encoding, &n);
      printf("[");
      for (int i = 0; i < n; ++i) {
        if (i > 0)
          printf(", ");
        printf("(\"%c\",%d)", encoding[i].key, encoding[i].count);
      }
      printf("]\n");
      free(encoding);
      return EXIT_SUCCESS;
    }
    

    Example usage (same for both programs):

    $ ./a.out aaaabbbcca
    [("a",4), ("b",3), ("c",2), ("a",1)]
    
  9. cagisw said
    package codekatas;
    
    import java.util.LinkedList;
    
    public class RunLengthEncoding {
    
    	public static void main(String[] args) {
    
    		// Output: [(“a”,4), (“b”,3), (“c”,2), (“a”,1)]
    		// List<String> paroleTrovate
    		RunLengthEncoding r = new RunLengthEncoding();
    		String in = "aaaabbbcca";
    		String tmp = "";
    		LinkedList<Sequenza> occorrenzeList = new LinkedList<Sequenza>();
    		int occorrenze = 0;
    		RunLengthEncoding.Sequenza seq = null;
    		for (int i = 0; i < in.length(); i++) {
    			String charAt = in.charAt(i) + "";
    
    			if (tmp.trim().length() == 0) {
    				occorrenze++;
    				seq = r.new Sequenza();
    				seq.setLettera(charAt);
    				seq.setOccorrenza(occorrenze);
    				occorrenzeList.add(seq);
    			}
    			
    			if (tmp.trim().length() > 0 && charAt.equals(tmp)) {
    				occorrenze++;
    				occorrenzeList.getLast().setOccorrenza(occorrenze);
    			}
    
    			if (tmp.trim().length() > 0 && !charAt.equals(tmp)) {
    				occorrenze = 0;
    				occorrenze++;
    				seq = r.new Sequenza();
    				seq.setLettera(charAt);
    				seq.setOccorrenza(occorrenze);
    				occorrenzeList.add(seq);
    			}
    			tmp = charAt;
    		}
    		System.out.println(occorrenzeList);
    	}
    
    	public class Sequenza {
    
    		String lettera;
    		int occorrenza;
    
    		public String getLettera() {
    			return lettera;
    		}
    
    		public void setLettera(String lettera) {
    			this.lettera = lettera;
    		}
    
    		public int getOccorrenza() {
    			return occorrenza;
    		}
    
    		public void setOccorrenza(int occorrenza) {
    			this.occorrenza = occorrenza;
    		}
    
    		@Override
    		public String toString() {
    			return "Sequenza [lettera=" + lettera + ", occorrenza=" + occorrenza + "]";
    		}
    	}
    
    }
    
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: