Top Five Test Scores

July 3, 2018

We begin by creating a data set:

(define scores '((Bob 78) (Sue 75) (Bill 83) (Jill 88) (Joe 58)
  (Bob 93) (Sue 84) (Bill 81) (Jill 91) (Joe 75) (Bob 62) (Sue 93)
  (Bill 91) (Jill 99) (Joe 62) (Bob 92) (Sue 88) (Bill 88) (Jill 76)
  (Joe 61) (Bob 87) (Sue 92) (Bill 87) (Jill 85) (Joe 69) (Bob 85)
  (Sue 87) (Bill 93) (Jill 96) (Joe 65) (Bob 72) (Sue 88) (Bill 85)
  (Jill 83) (Joe 71)))

Our algorithm is to collect all the scores in an R6RS hash table keyed by student name with a list of scores for each student, then for each student sort the scores, take the top five, and report the result:

(define (final-score scores)
  (let ((ht (make-eq-hashtable)))
    (do ((scores scores (cdr scores)))
        ((null? scores)
          (call-with-values
            (lambda () (hashtable-entries ht))
            (lambda (names score-lists)
              (map (lambda (name score-list)
                     (cons name (/ (sum (take 5 (sort > score-list))) 5.0)))
                   (vector->list names) (vector->list score-lists)))))
      (hashtable-update! ht
        (caar scores)
        (lambda (score) (cons (cadar scores) score))
        '()))))

There are other ways to do that. You could use an association list instead of a hash table, or some kind of ordered tree. Instead of collecting all the scores in a list, you could collect the scores in a priority queue of fixed length, thus storing only five scores at any time. Or, since the number of stores test scores is so small, you could keep them in a sorted array and shift the array items as new scores arrive. And there are probably dozens of other alternatives, which I’m sure all of you clever folks will discuss in the comments. Here’s the output:

> (final-score scores)
((Bill . 88.8) (Bob . 87.0) (Jill . 91.8) (Sue . 89.6) (Joe . 68.4))

You can run the program at https://ideone.com/3a9opt.

Posted by programmingpraxis

Filed in Exercises

9 Comments »

9 Responses to “Top Five Test Scores”

Rutger said

July 3, 2018 at 10:49 AM

In Python, using a heapq for keeping the best 5 scores on the go. Should be more efficient than sorting the all results per student afterwards.

Also works for students with less than 5 scores (student3 example), although that might not be really fair :)

import heapq

scores = [  ('student1', 1),
			('student1', 2),
			('student1', 3),
			('student1', 4),
			('student1', 5),
			('student1', 6),
			('student2', 1),
			('student2', 3),
			('student2', 3),
			('student2', 3),
			('student2', 3),
			('student2', 4),
			('student3', 3),
			('student3', 4)
			]

studentheaps = {}
for student, score in scores:
	if student not in studentheaps:
		studentheaps[student] = []
	if len(studentheaps[student]) < 5:
		heapq.heappush(studentheaps[student], score)
	else:
		heapq.heappushpop(studentheaps[student], score)

for student in studentheaps:
	l = studentheaps[student]
	print(student, sum(l) / float(len(l)))

# output
# student1 4.0
# student2 3.2
# student3 3.5

chaw said

July 3, 2018 at 12:03 PM

Here is solution that I believe is asymptotically optimal O(nlog(k)),
with the usual hashing caveat. It uses R7RS Scheme, hash tables from
SRFI 69, and priority queues from SLIB. We could also replace the
hash table with a balanced-tree data structure to get guaranteed
O(nlog(n)) and output sorted by names if needed.


(import (scheme base)
        (scheme write)
        (srfi 69)
        (slib priority-queue))

(define scores
  '((Bob 78) (Sue 75) (Bill 83) (Jill 88) (Joe 58)
    (Bob 93) (Sue 84) (Bill 81) (Jill 91) (Joe 75) (Bob 62) (Sue 93)
    (Bill 91) (Jill 99) (Joe 62) (Bob 92) (Sue 88) (Bill 88) (Jill 76)
    (Joe 61) (Bob 87) (Sue 92) (Bill 87) (Jill 85) (Joe 69) (Bob 85)
    (Sue 87) (Bill 93) (Jill 96) (Joe 65) (Bob 72) (Sue 88) (Bill 85)
    (Jill 83) (Joe 71)))

(define (heap-extract-max-k heap k)
  (let loop ((i 0)
             (top-i '()))
    (if (or (>= i k)
            (zero? (heap-length heap)))
        top-i
        (loop (+ i 1)
              (cons (heap-extract-max! heap)
                    top-i)))))

(define (top-k-by-name k name-item-duos name=? name-hasher item>?)
  (let ((ht (make-hash-table name=? name-hasher)))
    (for-each (lambda (name-item)
                (hash-table-update!/default ht
                                            (car name-item)
                                            (lambda (heap)
                                              (heap-insert! heap
                                                            (cadr name-item))
                                              (when (> (heap-length heap) k)
                                                (heap-extract-max! heap))
                                              heap)
                                            (make-heap item>?)))
              name-item-duos)
    (hash-table-fold ht
                     (lambda (name heap ans)
                       (cons (list name (heap-extract-max-k heap k))
                             ans))
                     '())))

(define (list-mean nums)
  (let ((n (length nums)))
    (and (positive? n)
         (/ (apply + nums) n))))

(display (map (lambda (duo)
                (list (car duo)
                      (inexact (list-mean (cadr duo)))))
              (top-k-by-name 5 scores symbol=? hash >)))
(newline)


((Joe 68.4) (Jill 91.8) (Bill 88.8) (Sue 89.6) (Bob 87.0))

Steve said

July 3, 2018 at 6:19 PM

Klong version

l::[["Bob" 78] ["Sue" 75] ["Bill" 83] ["Jill" 88] ["Joe" 58] ["Bob" 93] ["Sue" 84] ["Bill" 81] ["Jill" 91] ["Joe" 75] ["Bob" 62] ["Sue" 93] ["Bill" 91] ["Jill" 99] ["Joe" 62] ["Bob" 92] ["Sue" 88] ["Bill" 88] ["Jill" 76] ["Joe" 61] ["Bob" 87] ["Sue" 92] ["Bill" 87] ["Jill" 85] ["Joe" 69] ["Bob" 85] ["Sue" 87] ["Bill" 93] ["Jill" 96] ["Joe" 65] ["Bob" 72] ["Sue" 88] ["Bill" 85] ["Jill" 83] ["Joe" 71]]

build::{:[:_d?x; d,x,y; {d,x,,((d?x),,y)}(x; y)]}

d:::{}; {build(x@0; x@1)}'l; "Building person-indexed dictionary"

gs::{[s l]; s::0; l::x; {s::s+l@x}'!5; s%5}

{.p((x@0)," = ",gs(l@>l::x@1))}'d;"end"

steve@steve-VirtualBox:~/klong/lib$ rlwrap ../kg -l scores.kg
[Bill  =  88.8]
[Sue  =  89.6]
[Bob  =  87.0]
[Jill  =  91.8]
[Joe  =  68.4]
        Klong 20180213

bookofstevegraham said

July 3, 2018 at 6:24 PM

Mumps version

vista@steve-VirtualBox:~/EHR/r$ cat scores.m 
scores(list)    ; get average of top 5 scores for each student
        ;
        n arr,glo,i,j,n,s,t
        ;
        f i=1:1:$l(list,"|") d
	. s n=$p(list,"|",i),s=$p(n,",",2),n=$p(n,",")
	. s arr(n,-s,$i(arr(n,-s)))="" ; Allow for duplicate scores
        ;
        w !,"Average of top 5 scores per student"
	s n=""
        f  s n=$o(arr(n)) q:n=""  d
        . s glo="arr("""_n_""")",(i,t)=0
        . f  s glo=$q(@glo) q:glo=""!($qs(glo,1)'=n)  i $ql(glo)=3 s s=$qs(glo,2),t=t+s q:$i(i)=5  ; Allow for duplicate scores
        . w !,n,": ",-t/5.0
	q
vista@steve-VirtualBox:~/EHR/r$ gtm

GTM>s list="Bob,78|Sue,75|Bill,83|Jill,88|Joe,58|Bob,93|Sue,84|Bill,81|Jill,91|Joe,75|Bob,62|Sue,93|Bill,91|Jill,99|Joe,62|Bob,92|Sue,88|Bill,88|Jill,76|Joe,61|Bob,87|Sue,92|Bill,87|Jill,85|Joe,69|Bob,85|Sue,87|Bill,93|Jill,96|Joe,65|Bob,72|Sue,88|Bill,85|Jill,83|Joe,71"

GTM>d ^scores(list)

Average of top 5 scores per student
Bill: 88.8
Bob: 87
Jill: 91.8
Joe: 68.4
Sue: 89.6
GTM>

Gambiteer said
July 3, 2018 at 8:36 PM
@programmingpraxis: In Scheme, numbers should be compared with eqv?, not eq?.

Daniel said

July 4, 2018 at 11:42 PM

Here’s a solution in C++11.

#include <algorithm>
#include <cstdlib>
#include <iomanip>
#include <ios>
#include <iostream>
#include <numeric>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

using namespace std;

void average_top_scores(
        const unsigned num_top_scores,
        const vector<pair<string,int>>& data,
        unordered_map<string,float>* averages) {
    unordered_map<string,vector<int>> top_scores_lookup;
    for (pair<string,int> item : data) {
        string name = item.first;
        int score = item.second;
        vector<int>& scores = top_scores_lookup[name];
        if (scores.size() == num_top_scores && scores[0] < score) {
            pop_heap(scores.begin(), scores.end(), greater<int>());
            scores.pop_back();
        }
        if (scores.size() < num_top_scores) {
            scores.push_back(score);
            push_heap(scores.begin(), scores.end(), greater<int>());
        }
    }
    for (pair<string,vector<int>> item : top_scores_lookup) {
        vector<int>& scores = item.second;
        float average = accumulate(scores.begin(), scores.end(), 0.0) / scores.size();
        (*averages)[item.first] = average;
    }
}

int main(void) {
    vector<pair<string,int>> data = {
        {"Bob", 78}, {"Sue", 75}, {"Bill", 83}, {"Jill", 88}, {"Joe", 58},
        {"Bob", 93}, {"Sue", 84}, {"Bill", 81}, {"Jill", 91}, {"Joe", 75},
        {"Bob", 62}, {"Sue", 93}, {"Bill", 91}, {"Jill", 99}, {"Joe", 62},
        {"Bob", 92}, {"Sue", 88}, {"Bill", 88}, {"Jill", 76}, {"Joe", 61},
        {"Bob", 87}, {"Sue", 92}, {"Bill", 87}, {"Jill", 85}, {"Joe", 69},
        {"Bob", 85}, {"Sue", 87}, {"Bill", 93}, {"Jill", 96}, {"Joe", 65},
        {"Bob", 72}, {"Sue", 88}, {"Bill", 85}, {"Jill", 83}, {"Joe", 71}
    };
    unordered_map<string,float> averages;
    average_top_scores(5, data, &averages);
    for (pair<string,float> item : averages) {
        cout << item.first << ": " << fixed << setprecision(1) << item.second << endl;
    }
    return EXIT_SUCCESS;
}

Output:

Bob: 87.0
Sue: 89.6
Jill: 91.8
Bill: 88.8
Joe: 68.4

James Curtis-Smith said

July 5, 2018 at 9:18 AM

Probably my longest Perl ever here – we need to quickly implement an ordered queue to keep track of the top 5 scores for each candidate.

use strict;
use warnings;

my @input = (
  ['Bob', 78], ['Sue', 75], ['Bill', 83], ['Jill', 88], ['Joe', 58],
  ['Bob', 93], ['Sue', 84], ['Bill', 81], ['Jill', 91], ['Joe', 75],
  ['Bob', 62], ['Sue', 93], ['Bill', 91], ['Jill', 99], ['Joe', 62],
  ['Bob', 92], ['Sue', 88], ['Bill', 88], ['Jill', 76], ['Joe', 61],
  ['Bob', 87], ['Sue', 92], ['Bill', 87], ['Jill', 85], ['Joe', 69],
  ['Bob', 85], ['Sue', 87], ['Bill', 93], ['Jill', 96], ['Joe', 65],
  ['Bob', 72], ['Sue', 88], ['Bill', 85], ['Jill', 83], ['Joe', 71]
);
my ($N,$t) = (5 - 1,0);
my %scores;
foreach( @input ) {
  my($name,$v) = @{$_};
  foreach(0..$N) {
    next if $_ < @{$scores{$name}||[]} && $v < $scores{$name}[$_];
    splice @{$scores{$name}},$_,0,$v; ## Put in correct place
    splice @{$scores{$name}},$N+1;    ## Trim if longer than 5
    last;
  }
}
foreach my $name ( sort keys %scores ) {
  $t = 0;
  $t += $_ foreach @{$scores{$name}};
  printf "%7.3f\t%s\n", $t/@{$scores{$name}}, $name;
}

James Curtis-Smith said

July 5, 2018 at 9:35 AM

I think I prefer to sort by score descending – so we will have one sort here!

foreach my $name ( sort keys %scores ) {
  $t = 0;
  $t += $_ foreach @{$scores{$name}};
  $scores{$name} = $t/@{$scores{$name}};
}
foreach my $name ( reverse sort { $scores{$a} <=> $scores{$b} } keys %scores ) {
  printf "%7.3f\t%s\n", $scores{$name}, $name;
}

Globules said

July 15, 2018 at 4:28 AM

A Haskell version.

import Control.Foldl (fold, mean)
import Data.Function (on)
import Data.List (groupBy, sortBy)
import Data.Monoid ((<>))
import Data.Ord (comparing)
import Text.Printf

type Score = (String, Double)

scores :: [Score]
scores = [ ("Bob", 78), ("Sue", 75), ("Bill", 83), ("Jill", 88), ("Joe", 58)
         , ("Bob", 93), ("Sue", 84), ("Bill", 81), ("Jill", 91), ("Joe", 75)
         , ("Bob", 62), ("Sue", 93), ("Bill", 91), ("Jill", 99), ("Joe", 62)
         , ("Bob", 92), ("Sue", 88), ("Bill", 88), ("Jill", 76), ("Joe", 61)
         , ("Bob", 87), ("Sue", 92), ("Bill", 87), ("Jill", 85), ("Joe", 69)
         , ("Bob", 85), ("Sue", 87), ("Bill", 93), ("Jill", 96), ("Joe", 65)
         , ("Bob", 72), ("Sue", 88), ("Bill", 85), ("Jill", 83), ("Joe", 71)]

-- Given a list of scores for students, return the final score for each student,
-- based on their top n scores.  We assume n > 0.
finalScores :: Int -> [Score] -> [Score]
finalScores n = map (finalScore . take n) . groupBy ((==) `on` fst) . sortBy cmp
  where cmp ts1 ts2 = comparing fst ts1 ts2 <> comparing snd ts2 ts1
  
-- Given a non-empty list of scores for one student, return their final score.
finalScore :: [Score] -> Score
finalScore tss@((name, _) : _) = (name, fold mean $ map snd tss)

main :: IO ()
main = mapM_ (uncurry $ printf "%-4s %5.1f\n") $ finalScores 5 scores

$ ./top5scores
Bill  88.8
Bob   87.0
Jill  91.8
Joe   68.4
Sue   89.6

Programming Praxis