Entropy

January 22, 2016

Here is our entropy calculator:

(define (entropy filename)
  (with-input-from-file filename (lambda ()
    (let ((freqs (make-vector 256 0)) (ent 0.0))
      (let loop ((c (read-char)) (len 0))
        (if (not (eof-object? c))
            (let ((i (char->integer c)))
              (vector-set! freqs i
                (+ (vector-ref freqs i) 1))
              (loop (read-char) (+ len 1)))
            (do ((k 0 (+ k 1)))
                ((= k 256) (- ent))
              (let ((freq (vector-ref freqs k)))
                (when (positive? freq)
                  (let ((x (/ freq len)))
                    (set! ent (+ ent (* x (log2 x))))))))))))))

The outer loop (on let) reads the input, one character at a time, and counts each character in the freqs array. The inner loop (on do), sums the entropy of each symbol that appeared in the input. Here’s an example, where the input file entropy.ss contains the function shown above:

> (entropy "entropy.ss")
3.8011744425003404

You can run the program at http://ideone.com/GBPNhL.

Advertisement

Pages: 1 2

7 Responses to “Entropy”

  1. Jussi Piitulainen said

    Python has more entropy!

    from math import log
    from math import fsum as fum
    from collections import Counter as C
    
    def H(S):
        F = C(S)
        N = sum(F[s] for s in F)
        return - fum(p * log(p) for s in F for p in [F[s]/N]) / log(2)
    
    for data in ("banana", "nanana", "phenomenon"):
        print("H({}) = {:.4f}".format(repr(data), H(data)))
    
    print("H(H) = {:.4f}".format(H(open("H.py").read())))
    
    # Output before adding either of these comments:                                                                                                                        
    # H('banana') = 1.4591                                                                                                                                                  
    # H('nanana') = 1.0000                                                                                                                                                  
    # H('phenomenon') = 2.4464                                                                                                                                              
    # H(H) = 4.6956                                                                                                                                                         
    
    # Output after adding the above comment:                                                                                                                                
    # H('banana') = 1.4591                                                                                                                                                  
    # H('nanana') = 1.0000                                                                                                                                                  
    # H('phenomenon') = 2.4464                                                                                                                                              
    # H(H) = 4.9039                                                                                                                                                         
    
    # Afterthought, on Praxis' entropy.ss:                                                                                                                                  
    # H(entropy) = 3.8012
    
  2. Kooda said

    Other Scheme version over arbitrary sorted lists

    (define (count-occurences l)
    (define (count l last lastcount)
    (cond
    ((null? l) ‘())
    ((equal? (car l) last) (count (cdr l) last (add1 lastcount)))
    (else (cons lastcount (count (cdr l) (car l) 1)))))
    (if (null? l) ‘() (count (cdr l) (car l) 1)))

    (define (shannon-entropy l)
    (let ((len (length l)))
    (* -1
    (fold
    (lambda (occ last)
    (let ((pi (/ occ len)))
    (+ last (* pi
    (/ (log pi) (log 2))))))
    0
    (count-occurences l)))))

    (shannon-entropy (sort (string->list “Hello mine-turtle!”) char 3.29883055667735

    (shannon-entropy ‘(red red blue black yellow purple))
    ;; -> 1.8208020839343

  3. bartek said

    My chicken-scheme slightly modified version

    (use vector-lib)

    (define (log2 x) (/ (log x) (log 2)))

    (define (filefreqs filename)
    (with-input-from-file filename (lambda ()
    (let ((freqs (make-vector 256 0)))
    (let loop ((c (read-char))
    (len 0))
    (if (eof-object? c)
    (vector-map (lambda (i x) (/ x len)) freqs)
    (let ((i (char->integer c)))
    (vector-set! freqs i (+ (vector-ref freqs i) 1))
    (loop (read-char) (+ 1 len)))))))))

    (define (entropy filename)
    (- (vector-fold (lambda (i prev x)
    (+ prev (* x (if (positive? x) (log2 x) 0))))
    0
    (filefreqs filename))))

    (display (entropy "entropy.scm"))

  4. bartek said

    Does formatting work?

    
    (use vector-lib)
    
    (define (log2 x) (/ (log x) (log 2)))
    
    (define (filefreqs filename)
      (with-input-from-file filename (lambda ()
        (let ((freqs (make-vector 256 0)))
          (let loop ((c (read-char))
                     (len 0))
            (if (eof-object? c)
              (vector-map (lambda (i x) (/ x len)) freqs)
              (let ((i (char->integer c)))
                (vector-set! freqs i (+ (vector-ref freqs i) 1))
                (loop (read-char) (+ 1 len)))))))))
    
    
    (define (entropy filename)
        (- (vector-fold (lambda (i prev x)
                      (+ prev (* x (if (positive? x) (log2 x) 0))))
                   0
                   (filefreqs filename))))
    
    (display (entropy "entropy.scm"))
    
    
  5. Haskell, using the nice little foldl library:

    import           Control.Foldl    (Fold (..))
    import qualified Control.Foldl    as L
    import           Data.Map.Strict  (Map)
    import qualified Data.Map.Strict  as Map
    
    count :: (Ord k, Num a) => Fold k (Map k a)
    count = Fold step mempty id where
      step m k = Map.insertWith (+) k 1 m
    
    distribution :: (Ord k, Fractional a) => Fold k (Map k a)
    distribution = divide <$> count <*> L.genericLength where
      divide m n = fmap (/ n) m
    
    entropize :: Floating a => a -> a
    entropize m = negate (m * logBase 2 m)
    
    entropy :: (Foldable f, Ord k, Floating a) => f k -> a
    entropy = L.fold (L.premap entropize L.sum) . L.fold distribution
    
  6. Jussi Piitulainen said

    Haskell has slightly less entropy than Python. Computed from my Python when the file contained just the source code, Jared’s Haskell, and Praxis’ Scheme:

    H(python) = 4.6956
    H(haskell) = 4.6592
    H(scheme) = 3.8012
    
  7. mcmillhj said

    Almost got by without using a variable:

    #!/usr/bin/env perl
    
    use strict;
    use warnings;
    
    use List::Util qw(sum pairmap);
    use List::UtilsBy qw(count_by);
    
    sub H {
       my ($s) = @_;
      
       my $total_count = 0;
       return - sum(
          map      { $_ * log $_            } 
          map      { $_ / $total_count      } 
          pairmap  { $total_count += $b; $b } 
          count_by { $_                     } split //, $s
       ) / log 2;
    }
    
    my $text = do {
       open my $fh, '<', $ARGV[0] or die "$!";
       local $/ = undef; # slurp mode
       <$fh>;
    };
    
    print H("banana"), "\n";
    print H($text), "\n";
    
    __END__
    [hunter@apollo: 02]$ perl entropy.pl entropy.pl 
    1.45914791702725
    4.53554363380707
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: