#define x 0/**/
char*v,*y="33Yb59@&iBFApt;[[h3V19\\3<:4cJ!U 2eT18pC,Qqik4J:sh?HUXMrR(-l0R\"!eKZcI!@E(@B,C/*!aogn5LbK/e=2CmReb+6,]kD!iOC9DEOC9Dc1EV6976c?&s)Be;P;E^tl2eUYkg*#Yf:6^d[Mg_P;VGCr823^L_<X+j2,%nD20Ls lmpi&I(*hV=+p aTO`r.b1<i[/R\\t1,KBt)\\%;\\@27H>^#d6B!tb-on27d%i_OS5(W5eV-=M14kYO);Fe7k!N41<iX*T,kHW,&)_l&L)'0:Jj%j7^h+JU /9pZn&Td:&T%'TE<7>LW%m/R\\rON3-=G]^akjT778YCJ7B8e-5E#RX R=Ig8#/pDdAI;=a[ ISbp't+ZLJ;lUO71C)b5[Y)qTWmFJ)G1ehmS<.`n3RnE IG+G_A`CE=?hZU)bScgt7R3GNs+V(HQLL_R)n4;]#cUR.p>5!^4T3pQg^o//WLATCE18mSUme[Q<53e:')Q_%<L$1lKOnFD(R3%*jj85VW+#8Wt*Ud,1D7AKcdh<9r%igC$2</HD7X$K_0Rr/>L.*D2%;[0B+#8UANT1.tSd/^@L$&a6^g@jYNMC7O<rPWO5AfA;C'9WnLn9E:0d:R\\hAZ^m=/09d.R$Jd%:Gp hB-g4&N!VO)$;iED1:F`^`UrnWCSZ!L$Tdma!_hn7H0F$^MT(-Ln9F_Ljfp9*h8Er!<.g.h_7@b0l03?a+]`=dIoY>WCKOk&#[9FCQ#WFoga(tK<[PG6@5k2KY\\ oW':M;e//kd\"[l,_V?UlZ,m(Hh?O81mhM!G18 ]7m?X7e^[7EZYj[;kR,YXD\\X,!k6.HF8Z(V^^nBYea^NIL:]lG0(/\\Ik<m`>jPam;F-A,(tVN+bG@<L'J 1D8dE*hN87:TsccSKVE7KP+k8^5qZIkJG_fH_$MS--5(!*St/1:b+g8(/*",*T,*n=" $6I7H#v Jw\t{\v}\f;\t \v }\f }\f \f \f \v }\f \v\t } }\t ;\f }\f \f\t {\f\v } \f\t }\f ;\t \f\f \t\t {\f {\v\v \t \v\v\v \f }\v\v } }\v\v \f\v\v \f\v\v ;\v\v \v ;\v\v \t\v\v }\v\v \v\t ;\v\v \t\v\v }\v\v }\v\v ;\f }\t }\t }\v\v ;\f }\t \f\f \v\t \f\f \t\v\v }\v\v ;\v\v \t\v\v }\v\v {\t \f\f \f\v\v \t\v\v }\t ;\f }\f }\v\v \t\v\v ; \f\f ;\f }\f \t\f }\f ;\v\v \f\v\v {\t }\f \f\v\v \v {\f\v }\v\v ;\v\v }\f {\f\v ;\t {\f\v \f\v\v ;\t {\f\v \f\t }\f \v ;\v\v ;\v\v ;\t\v {\v\v {\f\v }\f\v }\v\v \t\f\v ;\v\v \t\v\v ;\v\v {\f\v \t\v\v } \f\v\v \f\f\v {\v} \f\v\v }\v\v \v\v} }\f \f\f {\t\v ;\f \f\f \v {\t \f\f \t\f ;\f } {\t \t\f } }\f \v\f} }\t \f \f \f\f ;\v} ;\t \f\t }\f }\t }\t\v \f\v} }\f} }\f \v\f} \f\t }\t {\f} :!#$%&*+./<=>?@\\^|-~\t\t\t \v\v\v \f\t \n\t\v\f} {\v\v {\v\v \f\f }\f --\t\t\t \f\t ;\f} ;\f \v\t ;\f }\f \v\t }\f \f\t \v\f\v ;\v\v {\f\v {\f\v \v\f\v } }\v\v \f\v\v }\f {\f\v \v\f} \t\v\v ;\v\v \t\t }\v\t \t\v\v \v\v\v \\'\"\t\t\f} {\f\v ;\f \t\t }\v{\f \v\v\v }\t} }\v\v \v\v\v \t\f} ;\v\f \v\f} }\f} \f\f\v \f\v\f }\f} }\f :\t\f\v} \v\t \v\v\v }\f }\t }\f \v\v\t \f\v\t }\f\v \v\f\v {\f\t \f\t{\f \v\f\v ;\f\f \v\v\t \f\v\f \t\v\t of\t\v\v\v ;\t\v \f\t\v } \f\f {\t\v }\t\v }\t\v \f\f \f\t }\f\v \f\v} {\f\v \t\v\f \f\f }\f \f\v\v ;\t\v \t\v\v \v\t {\f\t {\f\t }\f }\t ;\v} \t\v} {\v} ;\f }\f \f\f }\t\f \t\v\f }\v} \f\v} \f\v\f \f\f \v\v\v ;\t }\f} \t\v\v \t\f\v {\t ->\t\v\v\v \f\t{\f \t\t{\f \t\v\f \v\f\v \v\t\v ;\t \f\v} ;\t \f\f} }\f} \f\v; \t\f} \v\f\v \t\f\f }\t \f\f} }\f} \v\t \t\v} \t\t {\f} \t\f\v \v\f\f \f\f} \v\v; }\f\f \t\v} }\f} }\f} }\f} {\f\v }\v} \f\v} \f\v} ;\f\v \f\v} \v\f\v {\f\v }\v} ;\t\v ;\t\v {\f} ;\f\v ;\t\v \f\f} ;\f\f \f\f} }\v; {\t} {\f \t ;\f ; \v\t\v \t\v} \f\f\v }\f} }\f} }\f} }\f} ;\f} \f\f} \f\v; }\f\f {\t\f {\v} ;\f let\t;\f in\t\t\f }\f ;\f} \f\f} \v\v; }\f\f } {\v} ;\f case\t;\f }\v\v \t\f \v\f} \v\t ;\f} \f\f} }\t {\f} {\v\v \v\f} {\t ;\f} \f\f} \f\v; {\t\f {\v} {\t\v }\v\t {\t \f\f ;\t }\f} }\f} \f\f} }\f\v \t\t \t\f} ;\f} \f\f} ;\f} \f\f} }\f\f }\f\f \t\t} \f\f\f \t\t \v\t \t\v; \f\v; \t\v; \t \v } {\f; {\f; {\f; } \t } } \f\v; } }\f\f \v \f\v; } \v {\f ;\v\v {\f; } } \v\f; {\f; }\f; } }\f; \v }\f; \v \f\f }\f; \f\f \v\f; }\v; ;\v; \f\f }\f; ;\f \t ;\t }\f; } }\f ;\f} \f\v\v : <= * + - /\t}\f; {\f\v }\f \t\f} {\v} \v\t} \v\f; }\f\f \f\t\v {\t} \t\f} ;\f\v }\t data\t\v\f\f \v\t \v\t} \v\v; }\f; ;\t} {\v\v {\f; \v\t\f {\t\f \v\t\f }\f\f \v\v; ;\v; \f\t\v }\t} \v\t} }\f; \v\v\v \v\t} ;\f } ;\f} {\t} {\t} }\v\v \v\t }\f\v \v\t ;\v\v ;\t{\f \v\t} } {\t \v }\t} \v\t} {\f; \t\f ;\f} }\t} \v\t} {\t\f {\t\v \f\v\v \t\f \t\v\f ;\f; #define x \\";
#include <stdio.h>
typedef unsigned P;
enum{ L=1<<26} ;
P I,C,K=24,M,E
,j[L]={ x},*D=j+L/2,*i=j+L-3;
P w(P _){ return j[_[i]+1]; }
#define X(f,b,y,d) void f(){ D=j+b[i]; *D=y; *++D=d; i+=b; }
X(A,1,w(1),i[1])X(a,2,w(2),w(1))P l(P _,P E){ j[K++]=_; K++[j]=E; return K-2; }
int S;
X(t,1,9,(S=getchar())<0?8:l(l(17,l(1,S)),l(7,0)))P U(P _){ return j[w(_)+1]; }
X(_,2,9,U(1)>U(2)?l(8,9):8)X(b,2,w(2),i[1])P G(P _,P S){ return l(w(_),w(S)); }
#define B(f,o) X(f,2,1,U(1)o U(2))
B(p,*)X(c,3,w(1),G(2,3))X(u,3,G(1,3),w(2))P N(P l,P L,P _){
I=0;
while(*T-I[v])I++;
T++;
return I/6?l:N(l+I/_*L,L*6/_,3-_);
}
X(s,3,G(2,3),w(1))X(m,4,G(4,1),w(2))B(o,-)X(f,3,G(1,3),G(2,3))P Z(){
P _=*T++;
return _-9?l(l(17,l(1,_)),Z()):8;
}
X(d,2,w(1),w(2))X(R,2,l(w(2),printf(E?"%c":"%u,",U(1))),l(23,9))P W(P _){
if(S--); else{ M=0; C=5; while(C--)M=85*M+*y++-32; S=31; }
I=_*2+!!(M&1<<S);
for(_=0; (C=_[n]-9)&&C-I-23; _++);
return C?_?--_<2?C=N(0,1,1),_?l(_,C):C?C<8?C+15:7[D-C]:Z():_:(_=W(0),l(_,W(0))):W(I);
}
B(g,/)B(r,+)X(e,2,9,w(1))X(O,2,w(2),l(w(1),M-8))int main(){
v=12+n;
if(E=*j,E){
D=j+2;
K=E;
} else{
T=v+7;
while((*D=W(0)))D++;
puts(T);
}
*i=l(l(l(*--D,l(7,0)),0),l(23,9));
while(M=*i,M)M<24?(void(*[])()){
O,b,f,u,s,c,a,t,e,d,_,p,r,o,g,R,A,m
} [M*(M<18)](),7:(*--i=M[j]); return E||puts(y); }
An award-winning compiler
I may have failed that stupid Compilers exam but at least I can boast of authoring an award-winning compiler!
(Even if the award is from the 26th International Obfuscated C Code Contest.)
The source looks like the following. The original uses raw tabs, vertical tabs, and form feeds to take advantage of the character counting rules.
Try it out!
$ curl https://www.ioccc.org/2019/2019.tar.bz2 | tar xj $ cd 2019/lynn $ make
Thanks!
Many thanks to the contest organizers for volunteering so much time and energy. I’ve followed this incredible competition for years, and I’m thrilled to finally participate.
Spoilers
I implemented a variant of the ION machine along wth a variant of the "Parity" compiler. Rather than ION assembly, it emits the values that should be loaded directly into memory.
Instead of allowing up to 128 combinators and using ASCII codes to make combinator indices friendlier, the combinators are numbered consecutively starting from 0 so the main loop could use a jump table, and there are exactly 24 of them (so the heap starts at 24), but not all are in the jump table.
An arithmetic operator, say ()`, compiles to the index of the combinator
`()
plus 9, which is past the end of the jump table. The code detects this
and acts as if it encountered Q()`, where the `Q = B(BT)T`, that is, it
reduces the term `Q()xy
to y(x(+))
. This ad hoc scheme beats having an
explicit Q
combinator because it is less clear and it saves room. I chose Q
because Smullyan calls this Q4, the "quacky" combinator (Chapter 11).
Originally, I planned to represent the compiler as ION assembly in a C string. But soon it was obvious I needed more drastic measures. The prevalence of applications and certain combinators suggested entropy encoding. I chickened out of arithmetic coding and took refuge with Huffman.
The rules hint that high bits in the source are risky, so I represented the encoded output in base 85, using 5 bytes to represent a 32-bit word:
base85 :: Int -> String
base85 n = take 5 $ toEnum . (32+) . (`mod` 85) <$> iterate (`div` 85) n
Everything else in the program, that is, anything but an application or a
popular combinator, wound up in another buffer. String constants were stored
verbatim, while other values were encoded as a space-terminated mixed-radix
number, where the digits were carefully chosen to be invisible to iocccsize
and also to be legal in C string literals.
spaceOut :: Int -> [Char]
spaceOut n = spaceOut' n True where
spaceOut' n d = if n == 0 then "" else
((if d then "{\v}\f;\t" else "\v\f\t")!!b) : spaceOut' q (not d)
where (q, b) = divMod n (if d then 6 else 3)
Partitioning the code into two buffers meant the decoder had to frequently switch between them, aiding obfuscation.
Unlike ION assembly, references to previous definitions are relative to the
definition in which they appear: the number n
refers to the term that
appeared n
definitions ago. I hoped judicious ordering of definitions
would result in smaller numbers and hence smaller encodings.
On the Haskell side, I broke functions into small pieces and experimented with inlining and rewrite rules to reduce code size.
The order that a C function’s arguments are evaluated depends on the compiler, and hence so does the behaviour of my program. But on closer inspection, only heap allocation is affected. Whether one application cell gets allocated before another is of no consequence, so the code works either way.
By abuse of C block comments, concatenating the output of the compiler with the source of the compiler initializes the heap with ION machine code of the given program. It also initializes the first memory location to the address of the bottom of the heap, which must be at least 24, and the second to the entry point of the program.
The main()
function checks if the first memory location is zero. If so, it
decodes the compiler into the heap. Otherwise, it sets the heap pointer and
entry point. This check also determines whether to print a certain footer on
program termination.
Instead of T1
, the equivalent of wrapIO
uses what we might
call Q1I
. Since Q1Iht
reduces to I(h1)t
, this achieves the same effect
while being slightly more confusing.
The top memory cells are initially zero, and the stack begins slightly below
the top. Our version of the I combinator takes two arguments, lazily reducing
Ixy
to xy
, which overwrites the application cell at sp[2]
.
Then what if sp
is near the top and we reduce an I combinator? The details
are tricky, which is perfect for the competition, but less so when trying to
remember how it works. I believe in such cases, since the top of memory
contains zeroes, it overwrites the contents of the first two cells, which is
harmless because the heap starts from 24.
The Ideas of March
Shortly after the contest deadline, I found I could easily shrink the code further. Firstly, there were many expressions of the form:
f <$> x <*> y
I should have defined liftA2
to save room.
Perhaps defining (<)
instead of (⇐)
would have shaved off a byte.
A different mixed-radix encoding would still be invisible to
iocccsize
but takes less room. The corresponding decoder needs more C, but
the savings are worth it:
spaceOutDeluxe :: Int -> [Char]
spaceOutDeluxe n = zeros !! (2 * length cs) : cs where
cs = spaceOut' n True
spaceOut' n d = if n == 0 then "" else
(zeros!!(b*(if d then 1 else 2))) : spaceOut' q (not d)
where (q, b) = divMod n (if d then 7 else 4)
zeros = "\t{ }\v;\f"
It makes me wonder if I could have achieved my original aim of squeezing in a compiler with type checking.
Related work
Compilers and interpreters (for C, Basic, Lisp, Forth, APL, 6502(!), 8080(!!), PDP-7/11(!!!), …) are rife among previous IOCCC winners.
One previous winner is built around a recursion using the Y combinator, which is also how our compiler supports recursive definitions.
One previous winner even translates lambda calculus to combinatory logic, but luckily for me, was written before Kiselyov published his algorithm.