% Code to reproduce the results and figures from "What gender gap in chess % achievement" by Wei Ji Ma for ChessBase, Oct 7, 2020. % To run this code, first download a rating list, convert to CSV, and make % sure that the columns are: % A: Federation % B: Gender % C: Rating % D: Birth year % E: Name % Column E (Name) is barely used so if you don't include that, just comment % out the few lines in which it is used. Column D is not needed if you % don't want to exclude juniors. clear; close all; set(0,'DefaultLineLineWidth',2); set(0,'DefaultAxesFontSize',16); %% Thought experiment nA = 10; nB = 2; ndraws = 1e6; max_A = max(randi(100,[nA ndraws])); max_B = max(randi(100,[nB ndraws])); mean(max_A) mean(max_B) %% Read data country = readmatrix('standard_rating_list_edited.csv','range','A:A','OutputType','string'); country = string(cell2mat(country)); gender = readmatrix('standard_rating_list_edited.csv','range','B:B','OutputType','string'); gender = string(cell2mat(gender)); rating = readmatrix('standard_rating_list_edited.csv','range','C:C'); birth = readmatrix('standard_rating_list_edited.csv','range','D:D'); name = readmatrix('standard_rating_list_edited.csv','range','E:E','OutputType','string'); % Removing junior players idx = birth<2000 & strcmp(country,"IND"); gender = gender(idx); rating = rating(idx); name = name(idx); % Splitting by gender rating_M = rating(strcmp(gender,"M")); rating_F = rating(strcmp(gender,"F")); %% Basic stats n = length(rating) n_M = sum(strcmp(gender,"M")) n_F = sum(strcmp(gender,"F")) pF = n_F/n * 100 max_M = round(max(rating_M)) max_F = round(max(rating_F)) best_M = name(rating==max_M) best_F = name(rating==max_F) [Y,I] = sort(rating,'descend'); I_top = I(1:20); gender(I_top) name(I_top) mu_M = mean(rating_M) mu_F = mean(rating_F) std_M = round(std(rating_M)) std_F = round(std(rating_F)) rating_edges = 1000:50:2800; rating_centers = rating_edges(1:end-1)+25; h_M = histcounts(rating_M, rating_edges); h_F = histcounts(rating_F, rating_edges); hn_M = h_M/n_M; hn_F = h_F/n_F; %% Permutation test ndraws = 100000; n_large = n_M; n_small = n_F; max_large = NaN(1,ndraws); max_small = NaN(1,ndraws); for i = 1:ndraws rating_perm = rating(randperm(n)); draw_large = rating_perm(1:n_large); draw_small = rating_perm(n_large + 1:end); max_large(i) = max(draw_large); max_small(i) = max(draw_small); end mean(max_large) mean(max_small) delta = max_large - max_small; delta_mean = mean(delta) delta_std = std(delta) %% Plots figure; plot(rating_centers, [h_M; h_F],'o-') ylabel('Number of players') xlabel('Rating (binned)') xlim([1000 2800]) set(gca,'LineWidth',1) grid on; legend('M','F') title('Rating distributions of Indian players by gender') figure; plot(rating_centers, [hn_M; hn_F],'o-') ylabel('Proportion of players') xlabel('Rating (binned)') grid on; set(gca,'ytick',[0:0.02:0.1]) set(gca,'LineWidth',1) legend('M','F') title('Normalized rating distributions of Indian players by gender') xlim([1000 2800]) figure; hist(delta, -100:50:500) set(gca,'xtick',-100:100:500) xlabel('Difference between best M and best F') ylabel('Frequency/1000')