First, read the input tables as pandas data frames, and filter out any unnecessary columns and rows.

# Import required modules.
import pandas as pd
import geopandas as gpd
import random
pd.set_option('display.max_columns', None)

# Set input/output local file paths - update these to match your system.
dissemination_area_lyr = r'/home/jovyan/work/data/census/lda_000b16a_e/lda_000b16a_e.shp'
census_csv = r'/home/jovyan/work/data/census/98-400-X2016055_ENG_CSV/98-400-X2016055_English_CSV_data.csv'
out_csv = r'/home/jovyan/work/blog/language-dot-map/language-dots-100.csv'

# Read dissemination areas and subset columns.
da_df = gpd.read_file(dissemination_area_lyr)
da_df = da_df[['DAUID', 'geometry']]
da_df.head()

# Read language census data in chunks, filtering to totals for disemmination areas (GEO_LEVEL 4). May take a while to complete.
census_cols = ['GEO_CODE (POR)', 'Dim: Knowledge of official languages (5): Member ID: [2]: English only', 'Dim: Knowledge of official languages (5): Member ID: [3]: French only', 'Dim: Knowledge of official languages (5): Member ID: [4]: English and French']
reader = pd.read_csv(census_csv, iterator=True, chunksize=1000)
census_df = pd.concat([
    chunk.loc[
        (chunk['GEO_LEVEL'] == 4) &
        (chunk['DIM: Sex (3)'] == 'Total - Sex') &
        (chunk['DIM: Mother tongue (269)'] == 'Total - Mother tongue'),
        census_cols
    ] for chunk in reader
])

# Rename columns.
census_df.columns = ['DAUID', 'English', 'French', 'English and French']
census_df.head()

Next, join the two data frames by the common key DAUID to assign geometries to the census data and calculate the bounding box (bbox) for each geometry.

# Convert the IDs "object" data type to integer, to allow the merge.
da_df['DAUID'] = da_df['DAUID'].astype('int')

# Merge geometry to census data frame and convert to a geopandas data frame.
merged_df = pd.merge(census_df, da_df, on='DAUID')
merged_df = gpd.GeoDataFrame(merged_df, geometry='geometry')

# Calculate bbox for each geometry with bounds method. 
merged_df = pd.concat([merged_df, merged_df.bounds], axis=1)
merged_df.head()

The basic methodology for a dot map like this is to randomly distribute point coordinates within the administration boundary, one to represent each value to be mapped. In this case, we will allocate a coordinate for every 100 people present in each language category.

To distribute the dots, a random location within the bounding box is calculated and then tested against the actual administation boundary geometry. If the coordinate falls within the geometry, it is kept, and if not, a new random location calculated, the process repeated until a coordinate is found that does intersect.

# Recalculate values as 1 per 100 persons (rounded to nearest 100).
factor = 100
merged_df[['English', 'French', 'English and French']] = merged_df[['English', 'French', 'English and French']].astype('int')
merged_df[['English', 'French', 'English and French']] = round(merged_df[['English', 'French', 'English and French']] / factor).astype('int')

# Define a function to randomly distribute coordinates within a geometry per value for each language.
def random_coordinates(row):
    results = []
    for language in ('English', 'French', 'English and French'):
        count = 0
        val = row[language]
        while count < val:
            x = random.uniform(row['minx'], row['maxx'])
            y = random.uniform(row['miny'], row['maxy'])
            pt = Point(x, y)
            if pt.within(row['geometry']):
                count += 1
                results.append([language, x, y])
    return pd.DataFrame(results, columns=('language', 'x', 'y'))

# Apply the function to every row of the data frame, returning a series with a data frame for each row. This step also takes a while to run.
results = merged_df.apply(random_coordinates, axis=1, raw=True)

# Unpack the series and concatenate the data frames.
results = pd.concat(results.tolist(), ignore_index=True)

# Write the results to file.
results.to_csv(out_csv, index=False)

	DAUID	geometry
0	10010244	POLYGON ((8976851.149 2149576.543, 8976818.149...
1	10010245	POLYGON ((8977202.180 2150836.794, 8977136.277...
2	10010246	POLYGON ((8977549.383 2150892.566, 8977492.269...
3	10010247	POLYGON ((8977682.314 2151083.183, 8977689.440...
4	10010248	POLYGON ((8978152.474 2151142.586, 8978040.654...

	DAUID	English	English and French
3228	10010734	150	0
4842	10010735	350	5
5649	10010736	130	0
7263	10010733	65	0
8877	10010737	360	5

	DAUID	English	English and French	geometry	minx	miny	maxx	maxy
0	10010734	150	0	POLYGON ((9001157.471 2050664.529, 9001538.834...	9.000284e+06	2.049439e+06	9.001539e+06	2.051208e+06
1	10010735	350	5	POLYGON ((8991279.169 2051342.151, 8991366.254...	8.989193e+06	2.041993e+06	8.997097e+06	2.051519e+06
2	10010736	130	0	POLYGON ((8992311.551 2054468.074, 8993996.040...	8.985925e+06	2.042707e+06	8.995211e+06	2.054494e+06
3	10010733	65	0	POLYGON ((8985241.431 2028561.543, 8984900.929...	8.984089e+06	2.028226e+06	8.985462e+06	2.029444e+06
4	10010737	360	5	MULTIPOLYGON (((9014579.246 2070080.857, 90145...	8.995169e+06	2.066037e+06	9.015155e+06	2.107619e+06