#!/bin/env perl

################################################################
#
# Create 1000 random categorical data points that describes individuals' 
#
# - sex (p(male)=p(female)=0.5)
# - hair color (p(blonde)=0.3,p(brown)=0.4,p(black)=0.2,p(red)=0.1)
# - eye color (probability depends on hair color, see table below)
# - height (normally distributed, mean(male)=175, mean(female)=162, stdev=5, in cm)
#
################################################################


use Math::Random qw(random_normal);
use Math::Round qw(round);

$\="\n";
$,="\t";
use strict;

my $eyecolor  = {code=>{brown=>0,
			blue=>1,
			green=>2,
			grey=>3}};
my $haircolor = {blonde=>{p=>0.3,
			  eye=>{brown=>0.05,
				blue=>0.7,
				green=>0.05,
				grey=>0.2},
			  code=>0},
		 brown=>{p=>0.4,
			 eye=>{brown=>0.6,
			       blue=>0.15,
			       green=>0.1,
			       grey=>0.15},
			 code=>1},
		 black=>{p=>0.2,
			 eye=>{brown=>0.8,
			       blue=>0.1,
			       green=>0.02,
			       grey=>0.08},
			 code=>2},
		 red=>{p=>0.1,
		       eye=>{brown=>0.15,
			     blue=>0.2,
			     green=>0.5,
			     grey=>0.15},
		       code=>3}};

my $samples = 1000;

for my $i (0..$samples-1) {
    my $hc = pick_hair_color();
    my $ec = pick_eye_color($hc);
    my $sex = rand()<0.5 ? "male" : "female";
    my $height = $sex eq "male" ? random_normal(1,175,5) : random_normal(1,162,5);
    print $i,$sex,substr($sex,0,1),$hc,$haircolor->{$hc}{code},$ec,$eyecolor->{code}{$ec},round($height);
}

sub pick_eye_color {
    my $hc = shift;
    my $x = rand();
    my $pc;
    for my $ec (keys %{$haircolor->{$hc}{eye}}) {
	my $p = $haircolor->{$hc}{eye}{$ec};
	$pc += $p;
	if($x <= $pc) {
	    return $ec;
	}
    }
}

sub pick_hair_color {
    my $x = rand();
    my $pc;
    for my $hc (keys %$haircolor) {
	my $p = $haircolor->{$hc}{p};
	$pc += $p;
	if($x <= $pc) {
	    return $hc;
	}
    }
}
    
			 
			     
