# Adventures of a Programmer: Parser Writing Peril V

Part five and still no grammar? Oh, my…

Before I blow the dust off of libtomfloat I decided to try it with rationals first and armed myself with my good ol’ math-textbook from primary school and D. Knuth’s second volume of TAoCP.
I also try to get a bit more structure into it this time π

Apropos structure: the main data structure shall hold a fraction and a fraction consists of a numerator and a denominator, as every pupil knows. What not every pupil knows (well, some do!) is that there is a sign, too. So the structure is a C struct with some aptly named inhabitants:

typedef struct  {
int sign;
mp_int numerator;
mp_int denominator;
} mp_rat;


From the data type mp_int it is a reasonable assumption that I still use Libtommath as the big-number library. The name of this structure has been chosen very carefully—the ‘r’ sounds better after a ‘p’ than after a ‘f’.

The mp_int numbers need to get initialized before and freed after use, and so do the mp_rat numbers. The implementation makes use of the respective functions of libtommath to keep it simple.
Initialisation of a single mp_rat number is the simplest, we use libtommath’s mp_init_multi() directly.

int mpq_init(mp_rat *a){
int e;
if( (e = mp_init_multi(&a->numerator,&a->denominator,NULL) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}


initializing several rationals needs a bit more work, and the inclusion of stdarg.h, too. But that was all of the unpaid overtime, we can take the rest more or less verbatim from libtommath. Including the comments.

#include <stdarg.h>
int mpq_init_multi(mp_rat *mp, ...){
mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
int n = 0;                 /* Number of ok inits */
mp_rat* cur_arg = mp;
va_list args;

va_start(args, mp);        /* init args to next argument from caller */
while (cur_arg != NULL) {
if (mpq_init(cur_arg) != MP_OKAY) {
/* Oops - error! Back-track and mp_clear what we already
succeeded in init-ing, then return error.
*/
va_list clean_args;

/* end the current list */
va_end(args);

/* now start cleaning up */
cur_arg = mp;
va_start(clean_args, mp);
while (n--) {
mpq_clear(cur_arg);
cur_arg = va_arg(clean_args, mp_rat*);
}
va_end(clean_args);
res = MP_MEM;
break;
}
n++;
cur_arg = va_arg(args, mp_rat*);
}
va_end(args);
return res;                /* Assumed ok, if error flagged above. */
}


Freeing the numbers works similar. We take the liberty to not only steal code from libtommath but the names, too.

void mpq_clear(mp_rat *a){
mp_clear_multi(&a->numerator,&a->denominator,NULL);
a->sign = MP_ZPOS;
}

void mpq_clear_multi(mp_rat *mp, ...){
mp_rat* next_mp = mp;
va_list args;
va_start(args, mp);
while (next_mp != NULL) {
mpq_clear(next_mp);
next_mp = va_arg(args, mp_rat*);
}
va_end(args);
}


One of the most useful helper-functions is the printing function, of course, so let’s implement one.

void mpq_print(mp_rat * a){
/* If ypu wonder what the zero does, try it without */
printf("%c",(a->sign == MP_ZPOS)?0:'-');
mp_fwrite(&a->numerator,10,stdout);
putchar('/');
mp_fwrite(&a->denominator,10,stdout);
}


A reading function is not implemented, yet, at least not directly but it is easy to build one from the following functions. Let’s go from left to right because that’s how fractions get written in most of the western alphabets: ±x/y
So, at first, there need to be some functions to handle the sign.

/* sets sign of mp_rat a to sign given in "sign", returns old sign */
int mpq_set_sign(mp_rat *a, int sign){
int sgn;
sgn = a->sign;
a->sign = (sign < 0 || sign == MP_NEG)?MP_NEG:MP_ZPOS;
a->numerator.sign = MP_ZPOS;
a->denominator.sign = MP_ZPOS;
return sgn;
}
/* just for symmetry: returns the sign of mp_rat a */
int mpq_get_sign(mp_rat *a){
return a->sign;
}


We keep the numerator and the denominator strictly positive but that can can change in some of the calculations, so we need something to set the sign of mp_ratfrom the signs of the numerator and denominator.

void mpq_normalize_sign(mp_rat *a){
int sign_n, sign_d;
sign_n = a->numerator.sign;
sign_d = a->denominator.sign;
a->sign = (sign_n != sign_d)?MP_NEG:MP_ZPOS;
a->numerator.sign = MP_ZPOS;
a->denominator.sign = MP_ZPOS;
}


And while we are at it, we should implement negation, too:

int mpq_neg(mp_rat *a){
int sgn;
sgn =  mpq_get_sign(a);
sgn = (sgn == MP_NEG)?MP_ZPOS:MP_NEG;
a->numerator.sign = MP_ZPOS;
a->denominator.sign = MP_ZPOS;
return mpq_set_sign(a, sgn);
}


The next one to read is the numerator and after that the denominator. Reading the slash or whatever else denotes a fraction is not part of this library. But might be in the future, who knows?

/* sets given mp_int b as the numerator in mp_rat a */
int mpq_set_num(mp_rat *a, mp_int *b){
int e;
if( (e = mp_copy(b,&a->numerator) ) != MP_OKAY){
return e;
}
if( (e = mpq_reduce(a) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}

/* sets given mp_int b as the denominator in mp_rat a */
int mpq_set_den(mp_rat *a, mp_int *b){
int e;
if( (e = mp_copy(b,&a->denominator) ) != MP_OKAY){
return e;
}
if( (e = mpq_reduce(a) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}


And now all together because everybody just loooooves syntactic sugar!

/* sets given mp_int p,q as the numerator,denominator in mp_rat a */
int mpq_set(mp_rat *a, mp_int *p, mp_int *q){
int e;

if( (e = mp_copy(p,&a->numerator) ) != MP_OKAY){
return e;
}
if( (e = mp_copy(q,&a->denominator) ) != MP_OKAY){
return e;
}
if( (e = mpq_reduce(a) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}


More sweets? Ok, Halloween is near, so here are the methods and functions to handle normal C-99 data types, unsigned long as an example:

/* sets given unsigned long b as the denominator in mp_rat a */
int mpq_set_den_int(mp_rat *a, unsigned long b){
int e;

if( (e = mp_set_int(&a->denominator,b) ) != MP_OKAY){
return e;
}
if( (e = mpq_reduce(a) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
/* sets given unsigned longs p,q as the numerator,denominator in mp_rat a */
int mpq_set_int(mp_rat *a, unsigned long p,unsigned long q){
int e;

if( (e = mp_set_int(&a->numerator,p) ) != MP_OKAY){
return e;
}
if( (e = mp_set_int(&a->denominator,q) ) != MP_OKAY){
return e;
}
if( (e = mpq_reduce(a) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}


You might have found out that one of the functions used mpq_reduce() which has not been handled, yet. That is correct. keeping the fractions in their reduced forms is essential to working with rationals, especially if done numerically. The numbers can get very large, very fast otherwise.

But before we come it we need two more functions to handle copying.
Deep copy:

int mpq_copy(mp_rat *a, mp_rat *b){
int e;
if( (e =    mp_copy(&a->numerator,&b->numerator) ) != MP_OKAY){
return e;
}
if( (e =    mp_copy(&a->denominator,&b->denominator) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}


Pointer exchange, only:

void mpq_exch(mp_rat *a, mp_rat *b ){
mp_exch(&a->numerator,&b->numerator);
mp_exch(&a->denominator,&b->denominator);
}


Reducing a fraction is simple but computationally expensive, so we not only need some checks and balances but some shortcuts, too. With the following equations in mind, we can write some of these short cuts.
$\displaystyle{ \frac{0}{x}= 0 }$

$\displaystyle{ \frac{x}{x}= 1 }$

But $\displaystyle{ \frac{0}{0}= \text{undefined} }$ because of $\displaystyle{ \frac{x}{0}= \text{undefined} }$

$\displaystyle{ \frac{x}{1}= 1 }$

All of the above can be used more often and are worth their own functions, we take three of them.

#define mp_isone(a) (((a)->used == 1 && (a)->dp[0] == 1) ? MP_YES : MP_NO)
int mpq_iszero(mp_rat *<span class="hiddenGrammarError" pre="returns "><span class="hiddenGrammarError" pre="returns ">a){
if</span></span>(     mp_iszero(&a->numerator)
&&  mp_isone(&a->denominator)){
return MP_YES;
}
return MP_NO;
}
int mpq_isone(mp_rat *a){
if(mp_cmp(&a->numerator,&a->denominator) == MP_EQ){
return MP_YES;
}
return MP_NO;
}
int mpq_isinteger(mp_rat *a){
if(mp_isone(&a->denominator)){
return MP_YES;
}
return MP_NO;
}


The actual reducing is simply
$\dfrac{p/\gcd\left(p,q\right)}{q/\gcd\left(p,q\right)}$

#define MPQ_DIVISION_BY_ZERO -997
int mpq_reduce(mp_rat *a){
mp_int gcd;
int e;
/* check if num is zero -> return 0 if it is*/
if(mp_iszero(&a->numerator)){
if( (e = mpq_set_int(a,0,1) ) != MP_OKAY){
return e;
}
if( (e = mpq_set_sign(a,MP_ZPOS) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
/* check if den is 1 -> return if it is*/
if(mp_isone(&a->denominator)){
mpq_normalize_sign(a);
return MP_OKAY;
}
/* check if den is zero -> return undefined if it is*/
if(mp_iszero(&a->denominator)){
if( (e = mpq_set_int(a,0,0) ) != MP_OKAY){
return e;
}
return MPQ_DIVISION_BY_ZERO;
}
/* check if den = num -> return 1 */
if(mpq_isone(a)){
mpq_normalize_sign(a);
if( (e = mpq_set_int(a,1,1) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
mp_init(&gcd);
/* compute gcd(num,den) */
if( (e = mp_gcd(&a->numerator,&a->denominator,&gcd) ) != MP_OKAY){
return e;
}
/* set num to num/gcd */
if( (e = mp_div(&a->numerator,&gcd,&a->numerator,NULL) ) != MP_OKAY){
return e;
}
/* set den to den/gcd */
if( (e = mp_div(&a->denominator,&gcd,&a->denominator,NULL) ) != MP_OKAY){
return e;
}
/* not to forget: */
mpq_normalize_sign(a);
return MP_OKAY;
}


It might not be the wish of the programmers using this library to do a full stop every time they approach a division by zero, it might be more useful sometimes (I have to admit, I couldn’t come up with a single example) to keep it and take it along the calculations so it might be a good idea (I doubt it, but who am I?) to write a test for it.

int mpq_isundefined(mp_rat *a){
if(     mp_iszero(&a->numerator)
&&  mp_iszero(&a->denominator)){
return MP_YES;
}
return MP_NO;
}


Ok, that were the functions for the household chores. Now for the real stuff.
The basic computations one should be able to do are:

• Subtraction
• Multiplication
• Division

Funnily, the simplest thing to do in integer calculation is the most complicated when handling fractions and vice versa.

int mpq_add(mp_rat *a, mp_rat *b, mp_rat *c){
int e;
mp_int gcd,tmp;
if(mp_iszero(&a->numerator)){
if( (e = mpq_copy(b,c) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
if(mp_iszero(&b->numerator)){
if( (e = mpq_copy(a,c) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
/* let libtommath take care of the signs */
(&a->numerator)->sign = a->sign;
(&b->numerator)->sign = b->sign;
if(mpq_isinteger(a) && mpq_isinteger(a) ){
if( (e = mp_add(&a->numerator,&b->numerator,&c->numerator) ) != MP_OKAY){
return e;
}
c->sign = (&c->numerator)->sign;
(&a->numerator)->sign = MP_ZPOS;
(&b->numerator)->sign = MP_ZPOS;
(&c->numerator)->sign = MP_ZPOS;
(&c->denominator)->sign = MP_ZPOS;
if( (e = mp_set_int(&c->denominator,1) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
mp_init_multi(&gcd,&tmp,NULL);
/*gcd = gcd(b,d)*/
if( (e = mp_gcd(&a->denominator,&b->denominator,&gcd) ) != MP_OKAY){
return e;
}
/*c->numerator = ((A*D)/gcd) + ((C*B)/gcd)*/
/*c->numerator = ((A*D)/gcd) */
if( (e = mp_mul(&a->numerator,&b->denominator,&tmp) ) != MP_OKAY){
return e;
}
if( (e = mp_div(&tmp,&gcd,&c->numerator,NULL) ) != MP_OKAY){
return e;
}

/* ((C*B)/gcd)*/
if( (e = mp_mul(&b->numerator,&a->denominator,&tmp) ) != MP_OKAY){
return e;
}
if( (e = mp_div(&tmp,&gcd,&tmp,NULL) ) != MP_OKAY){
return e;
}
if( (e = mp_add(&tmp,&c->numerator,&c->numerator) ) != MP_OKAY){
return e;
}
if(mp_iszero(&c->numerator)){
if( (e = mp_set_int(&c->denominator,1) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
/* c->denominator = (max(B,D)/gcd) * (min(B,D))*/
if( (e = mp_div(mp_max(&a->denominator, &b->denominator),&gcd,&tmp,NULL) )
!= MP_OKAY){
return e;
}
if( (e = mp_mul(&tmp, mp_min(&a->denominator, &b->denominator),
&c->denominator) ) != MP_OKAY){
return e;
}
c->sign = (&c->numerator)->sign;
if( (e = mpq_reduce(c) ) != MP_OKAY){
return e;
}
(&a->numerator)->sign = MP_ZPOS;
(&b->numerator)->sign = MP_ZPOS;
mp_clear_multi(&gcd,&tmp,NULL);
return MP_OKAY;
}


You remember it now? π
The two functions min() and max() respectively are additions of me to libtommath and are merged into my fork but they are short, so here they are.

/* returns a if equal, min value otherwise */
mp_int *mp_min(mp_int *a, mp_int *b){
int r;
r = mp_cmp(a,b);
if(r == MP_EQ ||  r == MP_LT){
return a;
}
return b;
}
/* returns a if equal, max value otherwise */
mp_int *mp_max(mp_int *a, mp_int *b){
int r;
r = mp_cmp(a,b);
if(r == MP_EQ || r == MP_GT ){
return a;
}
return b;
}


Subtraction, on the other side, is much simpler if we use addition instead, relying on $a- b = -a +b = a + (-b)$. We do not use deep copies of the arguments of the functions so we need not to forget to reverse every modifications we do to them as we have done so, although not commented, above.

int mpq_sub(mp_rat *a, mp_rat *b, mp_rat *c){
int e;
if(mp_iszero(&a->numerator)){
if( (e = mpq_neg(b) ) != MP_OKAY){
return e;
}
if( (e = mpq_copy(b,c) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
if(mp_iszero(&b->numerator)){
if( (e = mpq_copy(a,c) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}

if(mpq_isinteger(a) && mpq_isinteger(a) ){
(&a->numerator)->sign = a->sign;
(&b->numerator)->sign = b->sign;
if( (e = mp_sub(&a->numerator,&b->numerator,&c->numerator) ) != MP_OKAY){
return e;
}
c->sign = (&c->numerator)->sign;
(&c->numerator)->sign = MP_ZPOS;
if( (e = mp_set_int(&c->denominator,1) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}

/* negate(b) */
if( (e = mpq_neg(b) ) != MP_OKAY){
return e;
}
/* c = a+ (-b) */
if( (e = mpq_add(a,b,c) ) != MP_OKAY){
return e;
}
/* reverse negation */
if( (e = mpq_neg(b) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}


Multiplication is even simpler

int mpq_mul(mp_rat *a, mp_rat *b, mp_rat *c){
int e;
if(mp_iszero(&a->numerator) || mp_iszero(&b->numerator)){
if( (e = mpq_set_int(c,0,1) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
if( (e = mp_mul(&a->numerator,&b->numerator,&c->numerator) ) != MP_OKAY){
return e;
}
if( (e = mp_mul(&a->denominator,&b->denominator,&c->denominator) ) != MP_OKAY){
return e;
}
if( (e = mpq_reduce(c) ) != MP_OKAY){
return e;
}
c->sign = (a->sign != b->sign)?MP_NEG:MP_ZPOS;
return MP_OKAY;
}


And the simplest is, believe it or not, division.

int mpq_div(mp_rat *a, mp_rat *b, mp_rat *c){
int e;
if(mp_iszero(&a->numerator)){
if( (e = mpq_set_int(c,0,1) ) != MP_OKAY){
return e;
}
return MP_OKAY;
}
if(mp_iszero(&b->numerator)){
return MPQ_DIVISION_BY_ZERO;
}
if( (e = mpq_inverse(b) ) != MP_OKAY){
return e;
}
if( (e = mpq_mul(a,b,c) ) != MP_OKAY){
return e;
}
if( (e = mpq_inverse(b) ) != MP_OKAY){
return e;
}
if( (e = mpq_reduce(c) ) != MP_OKAY){
return e;
}
c->sign = (a->sign != b->sign)?MP_NEG:MP_ZPOS;
return MP_OKAY;
}


What I seem to have forgotten in the Tools section (don’t look for it, there is no explicitly named Tools section) is a method to compare two rationals. If comparing two big numbers is already a computationally expensive method if none of the shortcuts hit, comparing two rationals is even more so. The only way to do it for two fully reduced fractions is by way of the lowest common denominator, although the full algorithm is not needed, only the multiplication part.

Given two rationals $Q_1 = \frac{p_1}{q_1}$ and $Q_2 = \frac{p_2}{q_2}$ with $p_1 \nmid q_1$ and $p_2 \nmid q_2$ with a difference in magnitude $Q_1 - Q_2 = Q_m$ the sign of the difference ${\mathop{\mathrm{sgn}}}\; Q_m$ can be evaluated by multiplying out $P_1 = p_1q_2$ and $P_2 = p_2q_1$ and comparing $P_1$ and $P_2$.

It is should be obvious from the above that the multiplications can get quite costly if the involved parts of the fractions are very large!

Besides of the normal shortcuts by looking at the individual signs we can also use the fact that we are only interested in the magnitude of the results and not the exact values. If the magnitudes of the intermediary results are large enough in difference we don’t need to do the expensive multiplications.
I am using the number of digits here but one can go more fine-grained with the indices of the highest bits set.

int mpq_cmp(mp_rat *a, mp_rat *b){
mp_int p1,p2;
int e,wnuma,wdena,wnumb,wdenb,s1,s2;

/* a == b, actually the very same number */
if(a == b){
return MP_EQ;
}
/* a < b */
if(a->sign == MP_NEG && b->sign == MP_ZPOS){
return MP_LT;
}
/* a > b */
if(a->sign == MP_ZPOS && b->sign == MP_NEG){
return MP_GT;
}
/* some more shortcuts */
if(mpq_iszero(a)){
/* a == b (both zero) */
if(mpq_iszero(b)){
return MP_EQ;
}
/* a < b */
if(b->sign == MP_ZPOS){
return MP_LT;
}
/* a > b */
return MP_GT;
}
if(mpq_iszero(b)){
/* a > b */
if(a->sign == MP_ZPOS){
return MP_GT;
}
/* a < b */
return MP_LT;
}

if(mpq_isinteger(a) && mpq_isinteger(b)){
return mp_cmp(&a->numerator,&b->numerator);
}

/*
We can use a rough approximation of the result because we are only
interested in the magnitudes.
*/
wnuma = (&a->numerator)->used;
wdena = (&a->denominator)->used;
wnumb = (&b->numerator)->used;
wdenb = (&b->denominator)->used;
s1 = wnuma + wdenb;
s2 = wnumb + wdena;
if(s1 < s2 - 1){
return ( (a->sign == MP_NEG)?MP_ZPOS: MP_NEG);
}
if(s2 < s1 - 1){
return ( (a->sign == MP_NEG)?MP_NEG: MP_ZPOS);
}
/* This is gonna be brutal, so don't use cmp() frivolously! */
mp_init_multi(&p1,&p2,NULL);
if( (e = mp_mul(&a->numerator,&b->denominator,&p1)) != MP_OKAY){
return e;
}
if( (e = mp_mul(&b->numerator,&a->denominator,&p2)) != MP_OKAY){
mp_clear(&p2);
return e;
}

e = mp_cmp(&p1,&p2);
mp_clear_multi(&p1,&p2,NULL);
if(a->sign == MP_NEG){
return ( (e == MP_ZPOS)?MP_NEG: MP_ZPOS );
}
return e;
}


To test what we did above and if I didn’t any of my infamous C&P errors:

int main (int argc, char **argv){

mp_int num1,den1,num2,den2;
mp_rat q1,q2,q3,q4;

if (argc < 5) {
fprintf(stderr, "usage: %s integer integer integer integer \n", argv[0]);
exit(EXIT_FAILURE);
}

mp_init_multi(&num1,&den1,&num2,&den2,NULL);

mp_get_str(argv[1], &num1, 10);
mp_get_str(argv[2], &den1, 10);
mp_get_str(argv[3], &num2, 10);
mp_get_str(argv[4], &den2, 10);

printf("Numerator   1: ");mp_print(&num1);puts("");
printf("Denominator 1: ");mp_print(&den1);puts("");
printf("Numerator   2: ");mp_print(&num2);puts("");
printf("Denominator 2: ");mp_print(&den2);puts("");

mpq_init_multi(&q1,&q2,&q3,&q4,NULL);puts("000");

mpq_set(&q1,&num1,&den1);puts("111");
printf("Rational1: ");mpq_print(&q1);puts("");
mpq_set(&q2,&num2,&den2);puts("222");
printf("Rational2: ");mpq_print(&q2);puts("");

printf("R1 + R2 = ");mpq_print(&q3);puts("");
mpq_sub(&q1,&q2,&q3);
printf("R1 - R2 = ");mpq_print(&q3);puts("");
mpq_mul(&q1,&q2,&q3);
printf("R1 * R2 = ");mpq_print(&q3);puts("");
mpq_div(&q1,&q2,&q3);
printf("R1 / R2 = ");mpq_print(&q3);puts("");
printf("cmp(R1, R2) = %d\n",mpq_cmp(&q1,&q2));
printf("cmp(R2, R1) = %d\n",mpq_cmp(&q2,&q1));
printf("cmp(R1, R1) = %d\n",mpq_cmp(&q1,&q1));
exit(EXIT_SUCCESS);
}


The function mp_get_str() is a simple wrapper:

int mp_get_str(const char *str, mp_int * a, int radix)
{