Three In A Row
January 9, 2018
In today’s exercise we are given a file and asked to find the userid’s of customers that appeared on three successive days. The input file contains a date in MM/DD/YYYY format, a tab character, and a userid (a four-digit integer):
01/11/2018\t0003 01/12/2018\t0003 01/13/2018\t0004 01/13/2018\t0003
In this case, customer 3 appeared on three successive days, 1/11, 1/12 and 1/13. You may assume the input is properly formed.
Your task is to write a program that finds customers who appeared on three successive days. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.
Here’s a solution in C.
I’ve assumed the input is properly formatted, as that assumption was part of the problem. It wasn’t specified whether there would be duplicate entries (e.g., a customer appears multiple times on the same day), so I’ve handled that scenario in the code.
/* three_in_a_row.c */ #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #define CHARS_PER_LINE 16 struct record { int julian_date; int userid; }; typedef struct record record_t; int calc_julian(int year, int month, int day) { --year; // astronomical year numbering int term1 = (1461*(year+4800)+(month-14)/12)/4; int term2 = (367*(month-2-12*((month-14)/12)))/12; int term3 = (-3 * ((year+4900+(month-14)/12)/100))/4; int term4 = day - 32075; return term1 + term2 + term3 + term4; } void parse_records(FILE* fp, record_t* records, long n) { for (int i = 0; i < n; ++i) { int year, month, day; int userid; fscanf(fp, "%2d/%2d/%4d\t%4d\n", &month, &day, &year, &userid); record_t rec; rec.julian_date = calc_julian(year, month, day); rec.userid = userid; records[i] = rec; } } // Compare by user id first, then by julian date. int compare_records(const void* a, const void* b) { record_t rec1 = *(record_t*)a; record_t rec2 = *(record_t*)b; int output = rec1.userid - rec2.userid; if (output == 0) output = rec1.julian_date - rec2.julian_date; return output; } // Calculates users that appeared on three successive days. // The records array is destructively sorted. // Allocated memory should be free'd by calling free_three_in_a_row. void calc_three_in_a_row(record_t* records, size_t n_records, int** usersp, size_t* n_usersp) { qsort(records, n_records, sizeof(record_t), compare_records); size_t n_users = 0; int* users = NULL; int count = 1; for (size_t i = 1; i < n_records; ++i) { record_t current = records[i]; record_t last = records[i-1]; if (current.userid != last.userid) { count = 1; } else if (current.julian_date == last.julian_date) { continue; } else if (current.julian_date == last.julian_date + 1) { ++count; } else { count = 1; } // Add user if this is the user appeared on three successive days and // hasn't been added yet. bool new_user = n_users == 0 || users[n_users-1] != current.userid; if (count == 3 && new_user) { ++n_users; users = realloc(users, n_users * sizeof(int)); users[n_users-1] = current.userid; } } *n_usersp = n_users; *usersp = users; } void free_three_in_a_row(int** usersp) { free(*usersp); *usersp = NULL; } int main(int argc, char* argv[]) { if (argc != 2) { fprintf(stderr, "Usage: three_in_a_row <FILE>\n"); return EXIT_FAILURE; } FILE* fp = fopen(argv[1], "r"); if (!fp) { fprintf(stderr, "%s: %s: Error reading file\n", argv[0], argv[1]); return EXIT_FAILURE; } fseek(fp, 0, SEEK_END); long n_bytes = ftell(fp); rewind(fp); size_t n_records = n_bytes / CHARS_PER_LINE; record_t* records = (record_t*)malloc(sizeof(record_t)*n_records); parse_records(fp, records, n_records); fclose(fp); int* users; size_t n_users; calc_three_in_a_row(records, n_records, &users, &n_users); for (size_t i = 0; i < n_users; ++i) { printf("%d\n", users[i]); } free_three_in_a_row(&users); free(records); return EXIT_SUCCESS; }Usage:
Output:
There’s a bug in my calc_julian function above. The second sum in term1, along with its operands, should be enclosed in parentheses. Here’s a corrected version of that function.
int calc_julian(int year, int month, int day) { int m1 = (month-14)/12; int y1 = year+4800; int term1 = 1461*(y1+m1)/4; int term2 = (367*(month-2-12*m1))/12; int term3 = (-3*((y1+m1+100)/100))/4; int term4 = day-32075; return term1+term2+term3+term4; }