# Options: # Options # 0 = No modification, input must be distribution (default) # 1 = Greedy: return one-hot of maximum prob, equivalent to softmax with 0 temperature # 2 = Softmax with temperature (temperature of 1.0 is ordinary softmax) # 3 = Top-K: Choose from top K most probable options (no other modification) # 4 = Nucleus (Top-p): p is a cutoff, choose from the top choices whose cumulative prob just # exceeds p (so in general, cumulative prob is slightly more than p) import math import numpy as np from numpy.random import choice from scipy.special import softmax def sample_choice(outcomes,distribution,option=3,temperature=0.3,K=5,p=0.25): if type(outcomes) != np.ndarray: outcomes = np.array(outcomes) if type(distribution) != np.ndarray: distribution = np.array(distribution) if option == 0: return choice(a=outcomes,p=distribution/sum(distribution)) elif option == 1: oneHot = np.array([0.0]*len(distribution)) oneHot[np.argmax(distribution)] = 1.0 return outcomes[np.argmax(distribution)] elif option == 2: sum_exp = sum(math.exp(x/temperature) for x in distribution) return choice(a=outcomes,p=[math.exp(x/temperature)/sum_exp for x in distribution]) elif option == 3: distribution = softmax(distribution) indices = np.argsort(distribution)[-K:] return choice(a=outcomes[indices],p=distribution[indices]/sum(distribution[indices])) elif option == 4: distribution = softmax(distribution) indices = np.argsort(distribution) total = 0 for C in range(len(distribution)-1,-1,-1): total += distribution[indices[C]] if total > p: break indices = indices[C:] return choice(a=outcomes[indices],p=distribution[indices]/sum(distribution[indices]))