Two [or more] containers with same underlying data, but different views on the data

Question

MyClass below represents a data structure that I need to be able to search very fast in two ways. So say I store the MyClass in and std::vector so that similar names in it can be quickly deleted and inserted continuously. But, I also need to be able to sort the contents of MyClass based on anInt. That is where an intrusive container (or Multimap) would sort the contents of a [very likely] unsorted std::vector. Two containers performing two very different duties on the same underlying items. Also, if I deleted the items from the std::vector they would also automatically disappear from the intrusive container.

Here is sort of the idea

#include <vector>
#include <iostream>
#include <vector>
#include <functional>

#include <boost/intrusive/unordered_set.hpp>

namespace bic = boost::intrusive;

std::hash<std::string> hash_fn;

struct MyClass : bic::unordered_set_base_hook<bic::link_mode<bic::auto_unlink>>
{
    std::string name;
    int anInt1;
    mutable bool bIsMarkedToDelete;

    MyClass(std::string name, int i) : name(name), anInt1(i), bIsMarkedToDelete(false) {}

    bool operator==(MyClass const& o) const
    {
        return anInt1 == o.anInt1; //  need the items in the intrusive container to sort on number
    }

    struct hasher
    {
        size_t operator()(MyClass const& o) const
        {
            //return hash_fn(o.name);
            return o.anInt1; //  need the items in the intrusive container to sort on number
        }
    };
};

std::ostream& operator << (std::ostream& out, const MyClass& ac)
{
    out << ac.name << " " << ac.anInt1;

    return out;
}

typedef bic::unordered_set<MyClass, bic::hash<MyClass::hasher>, bic::constant_time_size<false> > HashTable;

int main()
{
    std::vector<MyClass> values
    {
        MyClass { "John",     0 },
        MyClass { "Mike",     0 },
        MyClass { "Dagobart", 25 },
        MyClass { "John",     5 },
        MyClass { "Mike",     25 },
        MyClass { "Dagobart", 26 },
        MyClass { "John",     10 },
        MyClass { "Mike",     25 },
        MyClass { "Dagobart", 27 },
        MyClass { "John",     15 },
        MyClass { "Mike",     27 }
    };

    HashTable::bucket_type buckets[100];
    HashTable hashtable(values.begin(), values.end(), HashTable::bucket_traits(buckets, 100));

    std::cout << "\nContents of std::vector<MyClass> values\n";

    for(auto& e: values)
        std::cout << e << " ";

    std::cout << "\nContents of HashTable hashtable\n";

    for(auto& b : hashtable)
        std::cout << b << '\n';

std::cout << "\nContents of HashTable hashtable\n";

    for(auto& b : hashtable)
        std::cout << b << '\n';

#if 0 // This code won't compile since there is no operator [] for hashtable
    for(int bucket = 0; bucket < 27; bucket++)
    {
        auto hit(hashtable[bucket].rbegin());
        auto hite(hashtable[bucket].rend());

        while (hit != hite)
        {
            MyClass mc = *hit;

            std::cout << mc << " ";

            hit++;
        }

        std::cout << '\n';
    }
#endif // 0
    std::cout << '\n';
    std::cout << "values size first " << values.size() << '\n';
    std::cout << "hash size fist " << hashtable.size() << '\n';

    for(auto& e: values)
        e.bIsMarkedToDelete |= ("Mike" == e.name);

    std::cout << "removing all bIsMarkedToDelete";
    for(auto& e: values)
        if(e.bIsMarkedToDelete)
            std::cout << e << " ";

    std::cout << '\n';

    // Note how easy and performance fast it is to delete items from the std::vector view
    // I need the intrsive view to automatically update as well
    values.erase(
        std::remove_if(std::begin(values), std::end(values), std::mem_fn(&MyClass::bIsMarkedToDelete)),
                       std::end(values));

#if 0 // This code won't compile since there is no operator [] for hashtable
      // If I did this now, it should show the "Mike" itens gone and the 
      /// rest of the items hanging off the same bucket still there.
    for(int bucket = 0; bucket < 27; bucket++)
    {
        auto hit(hashtable[bucket].rbegin());
        auto hite(hashtable[bucket].rend());

        while (hit != hite)
        {
            MyClass mc = *hit;

            std::cout << mc << " ";

            hit++;
        }

        std::cout << '\n';
    }
#endif // 0

    std::cout << "values size now " << values.size() << '\n';
    std::cout << "hash size now " << hashtable.size() << '\n';

    std::cout << "Contents of value after removing elements " << '\n';
    for(auto& e: values)
        std::cout << e << " ";


    std::cout << '\n';

    values.clear();

    std::cout << values.size() << '\n';
    std::cout << hashtable.size() << '\n';

    std::cout << "Done\n";

    int j;
    std::cin >> j;
}

The question still contains the XY-problem. E.g. "they would also automatically disappear from the intrusive container" assumes you'd use Boost Intrusive. In fact, I'd always consider Boost Intrusive an optimization (or a hack to work with legacy code). Using BI is kinda... intrusive though. Seriously, it's complicated and ties up your code. So you use it when you know exactly what you need and how. — sehe

sehe sehe · Accepted Answer · 2015-03-23T09:17:32

You need fast lookups using different indices. This makes me think of Boost MultiIndex.

So say I store the MyClass in and std::vector so that similar names in it can be quickly deleted and inserted continuously

combined with

Also, if I deleted the items from the std::vector they would also automatically disappear

makes it clear you cannot escape the cost of keeping all indices up to date anyways. In which case, Boost Multi Index is as good as it gets.

Here's a sample:

Live On Coliru

#include <iostream>
#include <vector>
#include <boost/tuple/tuple_comparison.hpp>
#include <boost/range/iterator_range.hpp>

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/ordered_index.hpp>

namespace bmi = boost::multi_index;

struct MyClass
{
    std::string name;
    int         anInt1;

    MyClass(std::string name, int i) : name(name), anInt1(i) {}

    //bool operator==(MyClass const& o) const { return boost::tie(name, anInt1) == boost::tie(o.name, o.anInt1); }
    //bool operator< (MyClass const& o) const { return boost::tie(name, anInt1) <  boost::tie(o.name, o.anInt1); }

    friend std::ostream& operator << (std::ostream& out, const MyClass& ac) {
        return out << ac.name << " " << ac.anInt1;
    }
};

using Table = bmi::multi_index_container<MyClass,
      bmi::indexed_by<
        bmi::random_access<bmi::tag<struct as_vector> >,
        bmi::ordered_non_unique<bmi::tag<struct by_number>,
            bmi::member<MyClass, int, &MyClass::anInt1>
        >,
        bmi::hashed_non_unique<bmi::tag<struct by_name>,
            bmi::member<MyClass, std::string, &MyClass::name>
        >
      >
    >;

void alternative_remove_mikes(Table& values);

int main()
{
    Table values {
        { "John",     0 },
        { "Mike",     0 },
        { "Dagobart", 25 },
        { "John",     5 },
        { "Mike",     25 },
        { "Dagobart", 26 },
        { "John",     10 },
        { "Mike",     25 },
        { "Dagobart", 27 },
        { "John",     15 },
        { "Mike",     27 },
    };

    auto& name_idx = values.get<by_name>();

    std::cout << "\nBEFORE: Ordered by number:\n";
    for(auto& e: values.get<by_number>()) 
        std::cout << e << "\n";

    std::cout << "\nBEFORE: O(1) lookup by name:\n";
    for(auto& e : boost::make_iterator_range(name_idx.equal_range("Mike")))
        std::cout << e << '\n';

    std::cout << "removing all Mikes\n";
    name_idx.erase("Mike");
    // alternative_remove_mikes(values);

    std::cout << "\nAFTER: Ordered by number:\n";
    for(auto& e: values.get<by_number>()) 
        std::cout << e << "\n";

    std::cout << "\nAFTER: O(1) lookup by name:\n";
    for(auto& e : boost::make_iterator_range(name_idx.equal_range("Mike")))
        std::cout << e << '\n';

    values.clear();

    std::cout << "Done\n----------------------------\n";
}

If you want to keep the code to remove similar to the code you had (which is not optimal, but hey, just in case):

void alternative_remove_mikes(Table& values) {
    // this uses the same `remove_if` approach together with the `rearrange()` feature of `random_access` index
    std::vector<boost::reference_wrapper<MyClass const> > refs(values.begin(), values.end());

    // remove_if is not good enough since it will leave the removed end unspecified
    auto it = std::partition(refs.begin(), refs.end(), [](MyClass const& mc) { return "Mike" != mc.name; });

    std::cout << "Removing " << std::distance(it, refs.end()) << " items:\n";

    values.rearrange(refs.begin());

    auto newend = values.begin() + std::distance(refs.begin(), it);
    for (auto& e : boost::make_iterator_range(newend, values.end()))
        std::cout << " -- removing " << e << "\n";

    values.erase(newend, values.end());
}

Output:

BEFORE: Ordered by number:
John 0
Mike 0
John 5
John 10
John 15
Dagobart 25
Mike 25
Mike 25
Dagobart 26
Dagobart 27
Mike 27

BEFORE: O(1) lookup by name:
Mike 27
Mike 25
Mike 25
Mike 0
removing all Mikes

AFTER: Ordered by number:
John 0
John 5
John 10
John 15
Dagobart 25
Dagobart 26
Dagobart 27

AFTER: O(1) lookup by name:
Done
----------------------------

Two [or more] containers with same underlying data, but different views on the data

1 Answers