Sliding Median

June 29, 2012

Our function is just a direct translation of the algorithm stated above, stopping after n items; the next function fetches the next item from the input stream each time it is called:

(define (sliding-median k next n)
  (let ((window (make-vector k)) (xs (make-dict <)))
    (do ((i 0 (+ i 1))) ((= i k))
      (let ((x (next)))
        (vector-set! window i x)
        (set! xs (xs 'insert x x))))
    (do ((i 0 (+ i 1))) ((= i n))
      (let ((x (next)))
        (display
          (if (odd? k)
              (car (xs 'nth (quotient k 2)))
              (/ (+ (car (xs 'nth (quotient k 2)))
                    (car (xs 'nth (+ (quotient k 2) 1))))
                 2)))
        (newline)
        (set! xs (xs 'delete (vector-ref window (modulo i k))))
        (set! xs (xs 'insert x x))
        (vector-set! window (modulo i k) x)))))

This will break if the sliding window ever contains two or more identical items; you could fix that with a custom dictionary that permits identical items. Here’s an example, with the input stream read from a string port:

> (with-input-from-string "13 28 94 34 32 78 12 10 84 93 45 66 67 52 24 49"
    (lambda () (sliding-median 5 read 12)))
32
34
34
32
32
78
45
66
67
66
52
52

You can run the program at http://programmingpraxis.codepad.org/rsJImeg0, where we used a random number generator to supply the stream of numbers, instead of a string, because codepad doesn’t support the with-input-from-string syntax.

Advertisement

Pages: 1 2

7 Responses to “Sliding Median”

  1. […] today’s Programming Praxis exercise, our goal is to determine the median values of a sliding window over a […]

  2. My Haskell solution (see http://bonsaicode.wordpress.com/2012/06/29/programming-praxis-sliding-median/ for a version with comments):

    import Data.List
    
    slidingMedian :: (Ord a, Fractional a) => Int -> Int -> [a] -> [a]
    slidingMedian size count =
        map ((\ ~(x:xs) -> if odd size then x else (x + head xs) / 2) .
             drop (div (size - 1) 2) . sort . take size) . take count . tails
    
  3. Mike said

    Fairly basic python implementation.
    It uses a sorted list for the ordered map.

    from bisect import insort
    from collections import deque
    
    def sliding_median(iterable, k):
        ''' generator returning median value in a sliding window k elements wide.
    
        >>> data = [13, 28, 94, 34, 32, 78, 12, 10, 84, 93, 45, 66, 67, 52, 24, 49]
        >>> list(sliding_median(data, 5))
        [32, 34, 34, 32, 32, 78, 45, 66, 67, 66, 52, 52]
        '''
        fifo = deque(maxlen=k)
        omap = []
    
        m = (k-1)//2
        median = (lambda:omap[m]) if k&1 else (lambda:(omap[m] + omap[m+1])/2.0)
    
        for x in iter(iterable):
            fifo.append(x)
            insort(omap, x)
            if len(fifo) == k:
                yield median()
                omap.remove(fifo[0])
    
  4. madscifi said

    templated c++ implementation.

    #include <iostream>
    #include <vector>
    #include <algorithm>
    #include <iterator>
    
    template<class I, class J>
    void SlidingMedian( I inIter, I inEnd, J output, int window )
    {
        typedef typename std::iterator_traits<I>::value_type T;
    
        std::vector<T> cq( window );
        std::vector<T> ordered( window );
     
        int index = 0;
        bool full = false;
        for( ; inIter != inEnd; ++inIter )
        {
            T in = *inIter;
            T old = cq[index];
            cq[index] = in;
            index = ( index+1 ) % window;
            if( index == 0 ) full = true;
            ordered.erase( std::lower_bound( ordered.begin(), ordered.end(), old ) );
            ordered.insert( std::upper_bound( ordered.begin(), ordered.end(), in ), in );
            if( full )
            {
                if( window % 2 ) *output++ = ordered[ window/2 ];
                else *output++ = ( ordered[ window/2-1 ] + ordered[ window/2 ] ) / T(2);
            }
        }
    }
    
    int main(int argc, char* argv[])
    {
        double d[] = { 13, 28, 94, 34, 32, 78, 12, 10, 84, 93, 45, 66 };    
        std::vector<double> output;
     
        SlidingMedian( &d[0], 
                       &d[ sizeof(d)/sizeof(d[0]) ], 
                       std::insert_iterator< std::vector<double> >( 
                           output, output.begin() ), 
                       5 );
    
        for( double v : output ) { std::cout << v << " "; }
        std::cout << std::endl;
     
        return 0;
    }
    
  5. ryan said

    SlidingMedian class uses three data structures:

    1) List which maintains the order of values as they arrive
    2) Two STL sets, which are implemented as balanced binary search trees. One set stores all values less than or equal to the median, the other set stores values greater than or equal to the median.

    Insert and median retrieval is therefore O(log n).

    #include <iostream>
    #include <iomanip>
    #include <set>
    #include <list>
    
    // ****************************************************
    // ****************************************************
    class SlidingMedian {
      public:
    
        /**
         * Construct a new SlidingMedian class
         * @param k the window size of the sliding median
         */
        SlidingMedian(size_t k);
    
        /**
         * Post a new value to the sliding median object
         * @param v the new value to post
         * @return the new median
         */
        float post(int v);
    
      private:
    
        /** store the window size */
        size_t window;
    
        /** store the orded list of values */
        std::list<int> list;
    
        /** store the lower half set of values */
        std::set<int> lowSet;
    
        /** store the upper half set of values */
        std::set<int> hghSet;
    };
    
    // ****************************************************
    // ****************************************************
    SlidingMedian::SlidingMedian(size_t k) :
      window(k)
    {
    
    }
    // ****************************************************
    // ****************************************************
    SlidingMedian::SlidingMedian(size_t k) :
      window(k)
    {
    
    }
    
    // ****************************************************
    // ****************************************************
    float SlidingMedian::post(int v) {
      // remove the old value
      if(this->list.size() == this->window) {
        int vOld(this->list.front());
    
        // erase the value from the list
        this->list.erase(this->list.begin());
    
        // see which set the value to remove is in
        if(this->lowSet.size() && vOld <= *(this->lowSet.rbegin())) {
          this->lowSet.erase(vOld);
        }
        else if(this->hghSet.size() && vOld >= *(this->hghSet.begin())) {
          this->hghSet.erase(vOld);
        }
        else {
          throw std::string("SlidingMedian::post Internal Error");
        }
      }
    
      // insert the new value 
      this->list.push_back(v);
    
      if(this->hghSet.size() && v >= (*this->hghSet.begin())) {
        this->hghSet.insert(v);
      }
      else {
        this->lowSet.insert(v);
      }
    
      // balance the two sets
      while(this->lowSet.size() > (this->hghSet.size())) {
        this->hghSet.insert(*this->lowSet.rbegin());
        this->lowSet.erase(*this->lowSet.rbegin());
      }
    
      while(this->hghSet.size() > (this->lowSet.size())) {
        this->lowSet.insert(*this->hghSet.begin());
        this->hghSet.erase(*this->hghSet.begin());
      }
    
      // return the median of the two sets 
      if(this->lowSet.size() == this->hghSet.size()) {
        return ((*this->lowSet.rbegin()+*this->hghSet.begin())/2.0f);
      }
      else {
        return *this->lowSet.rbegin();
      }
    }
    
    // ****************************************************
    // ****************************************************
    int main(int argc, char *argv[]) {
      SlidingMedian slm(17);
    
      for(size_t idx(0); idx < 100; ++idx) {
        float median(slm.post(rand() % 1000));
        std::cout << "Median: " << median << std::endl;
      }
    
      return 0;
    }
    
    
  6. madscifi said

    Of course, anyone that is actually paying attention would have used:

      std::inserter( output, output.begin() ),
    

    in place of

      std::insert_iterator< std::vector<double> >( output, output.begin() ),
    
  7. […] have previously studied algorithms for the streaming median and sliding median that calculate the median of a stream of numbers; the streaming median requires storage of all the […]

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: