6.14 compute a random subset, can't we use reservoir sampling to construct random set?

#1

my implementation looks something like this in C++

 vector<int> generate_random_distinct_indicies(int k, int n){
 vector<int> result(k);
 iota(result.begin(), result.end(), 0);
 default_random_engine gen(random_device{}());
 for (int i = k; i < n; i++){
     int index_to_replace = uniform_int_distribution<int>{0, i}(gen);
     if (index_to_replace < k){
         result[index_to_replace] = i;
     }
 }
 return result;}

would this be a valid solution? if not, why?

here is my other implementation that mimics 6.11 sample offline data solution

 vector<int> generate_random_distinct_indicies(int k, int n){
 vector<int> result(k);
 default_random_engine gen(random_device{}());
 unordered_map<int, int> mymap; // index to val
 
 for (int i = 0; i < k; i++){
     int index_to_replace = uniform_int_distribution<int>{i, n - 1}(gen);
     if (mymap.find(index_to_replace) != mymap.end()){
         // we picked the same index again
         // result[i] is assigned the correct value
         result[i] = mymap[index_to_replace];
         // update the index again to keep mimicking solution 6.11
         mymap[index_to_replace] = i;
     }else{
         //populate result with currently generated index
         result[i] = index_to_replace;
         // populate mymap with the index we just replaced 
         // to the the current i, that way, if we pick that
         // index again correct value gets assigned in our result vector
         mymap[index_to_replace] = i;
     }
 }
 return result;}

would this also be a valid solution?

0 Likes