# Options:
# Options

#    0 = No modification, input must be distribution  (default)
#    1 = Greedy: return one-hot of maximum prob, equivalent to softmax with 0 temperature
#    2 = Softmax with temperature (temperature of 1.0 is ordinary softmax)
#    3 = Top-K: Choose from top K most probable options (no other modification)
#    4 = Nucleus (Top-p): p is a cutoff, choose from the top choices whose cumulative prob just
#            exceeds p (so in general, cumulative prob is slightly more than p)


import math
import numpy as np
from numpy.random import choice
from scipy.special import softmax

def sample_choice(outcomes,distribution,option=3,temperature=0.3,K=5,p=0.25):
    
    if type(outcomes) != np.ndarray:
        outcomes = np.array(outcomes)
    if type(distribution) != np.ndarray:
        distribution = np.array(distribution)
        
    if option == 0:
        return choice(a=outcomes,p=distribution/sum(distribution))    
    elif option == 1:
        oneHot = np.array([0.0]*len(distribution))
        oneHot[np.argmax(distribution)] = 1.0
        return outcomes[np.argmax(distribution)]    
    elif option == 2: 
        sum_exp = sum(math.exp(x/temperature) for x in distribution)    
        return choice(a=outcomes,p=[math.exp(x/temperature)/sum_exp for x in distribution])  
    elif option == 3:
        distribution = softmax(distribution)
        indices = np.argsort(distribution)[-K:]  
        return choice(a=outcomes[indices],p=distribution[indices]/sum(distribution[indices]))   
    elif option == 4:
        distribution = softmax(distribution)
        indices = np.argsort(distribution)
        total = 0
        for C in range(len(distribution)-1,-1,-1):
            total += distribution[indices[C]]
            if total > p:
                break
        indices = indices[C:]
        return choice(a=outcomes[indices],p=distribution[indices]/sum(distribution[indices]))