Predicting Employee Job Type Using Keras 2.8 on Windows 11

One of my standard examples for neural network classification is to predict employee job type (mgmt, supp, tech) from sex, age, city (anaheim, boulder, concord), and income. The data is artificial. I redo the example every few months to make sure that no breaking changes have been introduced by new versions of code libraries. Here’s an example using Keras 2.8 on Windows 10/11.

Note: I did the same problem using PyTorch 1.10 — see https://jamesmccaffreyblog.com/2022/04/29/predicting-employee-job-type-using-pytorch-1-10-on-windows-11/.

My data has 200 training items and 40 test items. The data looks like:

-1   0.39   0  0  1   0.5120   1
 1   0.27   0  1  0   0.2860   2
-1   0.19   0  0  1   0.3270   0
. . . 

Sex is encoded as M = -1, F = +1. Age is normalized by dividing by 100. City is one-hot encoded as anaheim = (1,0,0), boulder = (0,1,0), concord = (0,0,1). Income is normalized by dividing by $100,000. The variable-to-predict, Job Type, is ordinal encoded as mgmt = 0, supp = 1, tech = 2.

My demo neural network has 6-(10-10)-3 architecture with tanh() hidden activation. There is softmax() activation on the output nodes because the demo uses categorical_crossentropy loss. The network is explicitly inititialized using glorot_uniform_() on the weights and zeros_() on the biases.

The demo trains the network for 1000 epochs using stochastic gradient descent with a fixed learning rate of 0.01 and a batch size of 10. The demo saves checkpoints every 100 epochs.



Left: Job Type = chemist. Center: Job Type = scientist. Right: Job Type = nurse.


Demo code:

# employee_job_tfk.py
# predict job type from sex, age, city, income
# Anaconda3-2020.02  (Python 3.7.6)
# TensorFlow 2.8.0 (includes KerasTF 2.8.0)
# Windows 10/11

import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import numpy as np
import tensorflow as tf
from tensorflow import keras as K

# -----------------------------------------------------------

class MyLogger(K.callbacks.Callback):
  def __init__(self, n):
    self.n = n   # print loss and acc every n epochs

  def on_epoch_end(self, epoch, logs={}):
    if epoch % self.n == 0:
      curr_loss = logs.get('loss')
      curr_acc = logs.get('accuracy') * 100
      print("epoch = %4d  |  loss = %0.6f  |  acc = %0.2f%%" % \
(epoch, curr_loss, curr_acc))

# -----------------------------------------------------------

def main():
  # 0. get started
  print("\nBegin Employee job type using Keras/TensorFlow ")
  np.random.seed(1)
  tf.random.set_seed(1)

  # 1. load data
  print("\nLoading Employee data into memory ")
  # sex (-1=M), age, city (anaheim, boulder, concord)
  # income, job type (mgmt, supp, tech)
  # -1  0.39  0 0 1  0.5120   2
  #  1  0.50  0 1 0  0.5650   0

  train_file = ".\\Data\\employee_train.txt"  # ordinal encoded
  train_xy = np.loadtxt(train_file, usecols=[0,1,2,3,4,5,6],
    delimiter="\t", comments="#", dtype=np.float32)
  train_x = train_xy[:,0:6]
  train_y = train_xy[:,6]
  train_y = K.utils.to_categorical(train_y)  # to one-hot

  test_file = ".\\Data\\employee_test.txt"  # ordinal encoded
  test_xy = np.loadtxt(test_file, usecols=[0,1,2,3,4,5,6],
    delimiter="\t", comments="#", dtype=np.float32)
  test_x = test_xy[:,0:6]
  test_y = test_xy[:,6]
  test_y = K.utils.to_categorical(test_y)  # to one-hot

# -----------------------------------------------------------

  # 1b. prepare checkpoints
  checkpoint = K.callbacks.ModelCheckpoint(
    filepath=".\\Log\\checkpoint",
    save_freq='epoch',
    save_weights_only=True)

# -----------------------------------------------------------

  # 2. create network
  print("\nCreating 6-(10-10)-3 neural network ")
  net = K.models.Sequential()
  net.add(K.layers.Dense(input_dim=6, units=10, 
    activation='tanh', kernel_initializer='glorot_uniform',
    bias_initializer='zeros'))
  net.add(K.layers.Dense(units=10, 
    activation='tanh', kernel_initializer='glorot_uniform',
    bias_initializer='zeros'))
  net.add(K.layers.Dense(units=3, activation='softmax'))
  opt = K.optimizers.SGD(learning_rate=0.01)
  net.compile(loss='categorical_crossentropy',
    optimizer=opt, metrics=['accuracy'])

  my_logger = MyLogger(n=100)  # progress every 100 epochs

# -----------------------------------------------------------

  # 3. train model
  print("\nbat_size = 10 ")
  print("loss = categorical_crossentropy ")
  print("optimizer = SGD")
  print("max_epochs = 1000 ")
  print("lrn_rate = 0.010 ")
  print("\nStarting training ")
  h = net.fit(train_x, train_y, batch_size=10,
    epochs=1000, verbose=0, callbacks=[my_logger, checkpoint])
  print("Done ")

# -----------------------------------------------------------

  # 4. evaluate model accuracy
  print("\nComputing model accuracy")
  eval = net.evaluate(train_x, train_y, verbose=0)
  print("Accuracy on training data: %0.4f " % eval[1] )
  eval = net.evaluate(test_x, test_y, verbose=0)
  print("Accuracy on test data: %0.4f " % eval[1] )

  # 5. make a prediction
  print("\nPredicting job for M  30  concord  $50,000: ")
  X = np.array([[-1, 0.30,  0,0,1,  0.5000]], dtype=np.float32)
  np.set_printoptions(precision=4, suppress=True)
  probs = net.predict(X)
  print(probs)  # pseudo-probs

  # 6. save model (full model approach)
  print("\nSaving trained model weights ")
  # net.save_weights(".\\Models\\employee_model.h5")
  # net.save(".\\Models\\employee_model.h5")  # entire

  print("\nEnd Employee predict job demo ")

if __name__=="__main__":
  main()

Training data:

# employee_train.txt
#
# sex (M = -1, F = 1),
# age / 100,
# city (anaheim = 100, boulder = 010, concord = 001),
# income / 100_000
# job (mgmt = 0, supp = 1, tech = 2)
#
1	0.24	1	0	0	0.2950	2
-1	0.39	0	0	1	0.5120	1
1	0.63	0	1	0	0.7580	0
-1	0.36	1	0	0	0.4450	1
1	0.27	0	1	0	0.2860	2
1	0.50	0	1	0	0.5650	1
1	0.50	0	0	1	0.5500	1
-1	0.19	0	0	1	0.3270	0
1	0.22	0	1	0	0.2770	1
-1	0.39	0	0	1	0.4710	2
1	0.34	1	0	0	0.3940	1
-1	0.22	1	0	0	0.3350	0
1	0.35	0	0	1	0.3520	2
-1	0.33	0	1	0	0.4640	1
1	0.45	0	1	0	0.5410	1
1	0.42	0	1	0	0.5070	1
-1	0.33	0	1	0	0.4680	1
1	0.25	0	0	1	0.3000	1
-1	0.31	0	1	0	0.4640	0
1	0.27	1	0	0	0.3250	2
1	0.48	1	0	0	0.5400	1
-1	0.64	0	1	0	0.7130	2
1	0.61	0	1	0	0.7240	0
1	0.54	0	0	1	0.6100	0
1	0.29	1	0	0	0.3630	0
1	0.50	0	0	1	0.5500	1
1	0.55	0	0	1	0.6250	0
1	0.40	1	0	0	0.5240	0
1	0.22	1	0	0	0.2360	2
1	0.68	0	1	0	0.7840	0
-1	0.60	1	0	0	0.7170	2
-1	0.34	0	0	1	0.4650	1
-1	0.25	0	0	1	0.3710	0
-1	0.31	0	1	0	0.4890	1
1	0.43	0	0	1	0.4800	1
1	0.58	0	1	0	0.6540	2
-1	0.55	0	1	0	0.6070	2
-1	0.43	0	1	0	0.5110	1
-1	0.43	0	0	1	0.5320	1
-1	0.21	1	0	0	0.3720	0
1	0.55	0	0	1	0.6460	0
1	0.64	0	1	0	0.7480	0
-1	0.41	1	0	0	0.5880	1
1	0.64	0	0	1	0.7270	0
-1	0.56	0	0	1	0.6660	2
1	0.31	0	0	1	0.3600	1
-1	0.65	0	0	1	0.7010	2
1	0.55	0	0	1	0.6430	0
-1	0.25	1	0	0	0.4030	0
1	0.46	0	0	1	0.5100	1
-1	0.36	1	0	0	0.5350	0
1	0.52	0	1	0	0.5810	1
1	0.61	0	0	1	0.6790	0
1	0.57	0	0	1	0.6570	0
-1	0.46	0	1	0	0.5260	1
-1	0.62	1	0	0	0.6680	2
1	0.55	0	0	1	0.6270	0
-1	0.22	0	0	1	0.2770	1
-1	0.50	1	0	0	0.6290	0
-1	0.32	0	1	0	0.4180	1
-1	0.21	0	0	1	0.3560	0
1	0.44	0	1	0	0.5200	1
1	0.46	0	1	0	0.5170	1
1	0.62	0	1	0	0.6970	0
1	0.57	0	1	0	0.6640	0
-1	0.67	0	0	1	0.7580	2
1	0.29	1	0	0	0.3430	2
1	0.53	1	0	0	0.6010	0
-1	0.44	1	0	0	0.5480	1
1	0.46	0	1	0	0.5230	1
-1	0.20	0	1	0	0.3010	1
-1	0.38	1	0	0	0.5350	1
1	0.50	0	1	0	0.5860	1
1	0.33	0	1	0	0.4250	1
-1	0.33	0	1	0	0.3930	1
1	0.26	0	1	0	0.4040	0
1	0.58	1	0	0	0.7070	0
1	0.43	0	0	1	0.4800	1
-1	0.46	1	0	0	0.6440	0
1	0.60	1	0	0	0.7170	0
-1	0.42	1	0	0	0.4890	1
-1	0.56	0	0	1	0.5640	2
-1	0.62	0	1	0	0.6630	2
-1	0.50	1	0	0	0.6480	1
1	0.47	0	0	1	0.5200	1
-1	0.67	0	1	0	0.8040	2
-1	0.40	0	0	1	0.5040	1
1	0.42	0	1	0	0.4840	1
1	0.64	1	0	0	0.7200	0
-1	0.47	1	0	0	0.5870	2
1	0.45	0	1	0	0.5280	1
-1	0.25	0	0	1	0.4090	0
1	0.38	1	0	0	0.4840	0
1	0.55	0	0	1	0.6000	1
-1	0.44	1	0	0	0.6060	1
1	0.33	1	0	0	0.4100	1
1	0.34	0	0	1	0.3900	1
1	0.27	0	1	0	0.3370	2
1	0.32	0	1	0	0.4070	1
1	0.42	0	0	1	0.4700	1
-1	0.24	0	0	1	0.4030	0
1	0.42	0	1	0	0.5030	1
1	0.25	0	0	1	0.2800	2
1	0.51	0	1	0	0.5800	1
-1	0.55	0	1	0	0.6350	2
1	0.44	1	0	0	0.4780	2
-1	0.18	1	0	0	0.3980	0
-1	0.67	0	1	0	0.7160	2
1	0.45	0	0	1	0.5000	1
1	0.48	1	0	0	0.5580	1
-1	0.25	0	1	0	0.3900	1
-1	0.67	1	0	0	0.7830	1
1	0.37	0	0	1	0.4200	1
-1	0.32	1	0	0	0.4270	1
1	0.48	1	0	0	0.5700	1
-1	0.66	0	0	1	0.7500	2
1	0.61	1	0	0	0.7000	0
-1	0.58	0	0	1	0.6890	1
1	0.19	1	0	0	0.2400	2
1	0.38	0	0	1	0.4300	1
-1	0.27	1	0	0	0.3640	1
1	0.42	1	0	0	0.4800	1
1	0.60	1	0	0	0.7130	0
-1	0.27	0	0	1	0.3480	0
1	0.29	0	1	0	0.3710	0
-1	0.43	1	0	0	0.5670	1
1	0.48	1	0	0	0.5670	1
1	0.27	0	0	1	0.2940	2
-1	0.44	1	0	0	0.5520	0
1	0.23	0	1	0	0.2630	2
-1	0.36	0	1	0	0.5300	2
1	0.64	0	0	1	0.7250	0
1	0.29	0	0	1	0.3000	2
-1	0.33	1	0	0	0.4930	1
-1	0.66	0	1	0	0.7500	2
-1	0.21	0	0	1	0.3430	0
1	0.27	1	0	0	0.3270	2
1	0.29	1	0	0	0.3180	2
-1	0.31	1	0	0	0.4860	1
1	0.36	0	0	1	0.4100	1
1	0.49	0	1	0	0.5570	1
-1	0.28	1	0	0	0.3840	0
-1	0.43	0	0	1	0.5660	1
-1	0.46	0	1	0	0.5880	1
1	0.57	1	0	0	0.6980	0
-1	0.52	0	0	1	0.5940	1
-1	0.31	0	0	1	0.4350	1
-1	0.55	1	0	0	0.6200	2
1	0.50	1	0	0	0.5640	1
1	0.48	0	1	0	0.5590	1
-1	0.22	0	0	1	0.3450	0
1	0.59	0	0	1	0.6670	0
1	0.34	1	0	0	0.4280	2
-1	0.64	1	0	0	0.7720	2
1	0.29	0	0	1	0.3350	2
-1	0.34	0	1	0	0.4320	1
-1	0.61	1	0	0	0.7500	2
1	0.64	0	0	1	0.7110	0
-1	0.29	1	0	0	0.4130	0
1	0.63	0	1	0	0.7060	0
-1	0.29	0	1	0	0.4000	0
-1	0.51	1	0	0	0.6270	1
-1	0.24	0	0	1	0.3770	0
1	0.48	0	1	0	0.5750	1
1	0.18	1	0	0	0.2740	0
1	0.18	1	0	0	0.2030	2
1	0.33	0	1	0	0.3820	2
-1	0.20	0	0	1	0.3480	0
1	0.29	0	0	1	0.3300	2
-1	0.44	0	0	1	0.6300	0
-1	0.65	0	0	1	0.8180	0
-1	0.56	1	0	0	0.6370	2
-1	0.52	0	0	1	0.5840	1
-1	0.29	0	1	0	0.4860	0
-1	0.47	0	1	0	0.5890	1
1	0.68	1	0	0	0.7260	2
1	0.31	0	0	1	0.3600	1
1	0.61	0	1	0	0.6250	2
1	0.19	0	1	0	0.2150	2
1	0.38	0	0	1	0.4300	1
-1	0.26	1	0	0	0.4230	0
1	0.61	0	1	0	0.6740	0
1	0.40	1	0	0	0.4650	1
-1	0.49	1	0	0	0.6520	1
1	0.56	1	0	0	0.6750	0
-1	0.48	0	1	0	0.6600	1
1	0.52	1	0	0	0.5630	2
-1	0.18	1	0	0	0.2980	0
-1	0.56	0	0	1	0.5930	2
-1	0.52	0	1	0	0.6440	1
-1	0.18	0	1	0	0.2860	1
-1	0.58	1	0	0	0.6620	2
-1	0.39	0	1	0	0.5510	1
-1	0.46	1	0	0	0.6290	1
-1	0.40	0	1	0	0.4620	1
-1	0.60	1	0	0	0.7270	2
1	0.36	0	1	0	0.4070	2
1	0.44	1	0	0	0.5230	1
1	0.28	1	0	0	0.3130	2
1	0.54	0	0	1	0.6260	0

Test data:

# employee_test.txt
#
-1	0.51	1	0	0	0.6120	1
-1	0.32	0	1	0	0.4610	1
1	0.55	1	0	0	0.6270	0
1	0.25	0	0	1	0.2620	2
1	0.33	0	0	1	0.3730	2
-1	0.29	0	1	0	0.4620	0
1	0.65	1	0	0	0.7270	0
-1	0.43	0	1	0	0.5140	1
-1	0.54	0	1	0	0.6480	2
1	0.61	0	1	0	0.7270	0
1	0.52	0	1	0	0.6360	0
1	0.30	0	1	0	0.3350	2
1	0.29	1	0	0	0.3140	2
-1	0.47	0	0	1	0.5940	1
1	0.39	0	1	0	0.4780	1
1	0.47	0	0	1	0.5200	1
-1	0.49	1	0	0	0.5860	1
-1	0.63	0	0	1	0.6740	2
-1	0.30	1	0	0	0.3920	0
-1	0.61	0	0	1	0.6960	2
-1	0.47	0	0	1	0.5870	1
1	0.30	0	0	1	0.3450	2
-1	0.51	0	0	1	0.5800	1
-1	0.24	1	0	0	0.3880	1
-1	0.49	1	0	0	0.6450	1
1	0.66	0	0	1	0.7450	0
-1	0.65	1	0	0	0.7690	0
-1	0.46	0	1	0	0.5800	0
-1	0.45	0	0	1	0.5180	1
-1	0.47	1	0	0	0.6360	0
-1	0.29	1	0	0	0.4480	0
-1	0.57	0	0	1	0.6930	2
-1	0.20	1	0	0	0.2870	2
-1	0.35	1	0	0	0.4340	1
-1	0.61	0	0	1	0.6700	2
-1	0.31	0	0	1	0.3730	1
1	0.18	1	0	0	0.2080	2
1	0.26	0	0	1	0.2920	2
-1	0.28	1	0	0	0.3640	2
-1	0.59	0	0	1	0.6940	2
This entry was posted in Keras, Miscellaneous. Bookmark the permalink.