Source: workloads/bestbuy_analytics.js

/**
 * @file
 * Measure performance of analytics workloads on the
 * {@link https://bestbuyapis.github.io/api-documentation/#overview|BestBuy Developer API data},
 * The workload size can be scaled with the testScale variable.  When set to testScale=4, we should
 * see CPU bound behavior with our standard-sized instances and see IO-bound behavior when running
 * on small instances with only 4gb of RAM.
 *
 * With enableColumnstore = "True", a columnstore index will be created before running queries.
 *
 * Results are reported as ops / second.
 *
 * ### *Pre-requisite*
 * The dataset must be installed on the target cluster before running the test. The data can be
 *  downloaded  from
 * {@link
 * https://s3-us-west-2.amazonaws.com/dsi-donot-remove/AggPerformance/bestbuyproducts.bson.gz|here}
 *  and installed using mongorestore (mongorestore --gzip --archive=bestbuyproducts.bson.gz)
 *
 * ### *Setup*
 * None
 *
 * ### *Owning-team*
 * mongodb/product-query
 *
 * ### *Keywords*
 * columnstore, analytics
 *
 * @module workloads/bestbuy_analytics
 */

(function() {
"use strict";

load("utils/exec_helpers.js");  // For 'ExecHelpers'.

var testDb = db.getSiblingDB(testDbName);
var testColl = testDb[testCollName];

// The number of seconds to use when measuring throughput with 'ExecHelpers.measurePerformance().
// This test is designed mainly to be run on low-memory instances (4GB RAM).
var TEST_SECONDS = 2 * 60;

function measurePerformanceUsingAgg(testName, pipeline) {
    var perfInfo = ExecHelpers.measurePerformance(function() {
        testColl.aggregate(pipeline).itcount();
    }, TEST_SECONDS);
    reportThroughput(testName, perfInfo.meanThroughput, {nThread: 1});
}

var tests = [
    {name: "1_col", pipeline: [{$group: {_id: '$type', count: {$sum: 1}}}]},
    // The following cannot be satisfied by a traditional index, due to SERVER-12869.
    {
        name: "1_col_exists",
        pipeline: [{$match: {'type': {$exists: true}}}, {$group: {_id: '$type', count: {$sum: 1}}}]
    },
    {
        name: "1_col_nomatch_scalar",
        pipeline: [{$match: {'type': 'asdf'}}, {$group: {_id: '$type', count: {$sum: 1}}}]
    },
    {
        name: "2_col_nomatch_array",
        pipeline:
            [{$match: {'categoryPath.name': 'asdf'}}, {$group: {_id: '$type', count: {$sum: 1}}}]
    },
    {
        name: "2_col_selective",
        pipeline: [
            {$match: {'categoryPath.name': 'Pop', type: 'Movie'}},
            {$group: {_id: '$type', count: {$sum: 1}}}
        ]
    },
    {
        name: "2_col_unselective",
        pipeline: [
            {$match: {'categoryPath.name': 'Pop', type: 'Music'}},
            {$group: {_id: '$type', count: {$sum: 1}}}
        ]
    },
    {
        name: "group_2_col",
        pipeline: [{
            $group: {
                _id: {classId: '$classId', subclassId: '$subclassId'},
                class: {$first: '$class'},
                subclass: {$first: '$subclass'},
                count: {$sum: 1}
            }
        }]
    },
    // Simple group with complex operation on each doc.
    // Counts the products updated in each hour of the day.
    {
        name: "group_hour_in_date",
        pipeline: [{
            $group: {
                _id: {
                    $hour: {
                        date: {
                            $dateFromString: {
                                dateString: "$itemUpdateDate",
                                format: "%Y-%m-%dT%H:%M:%S",
                                timezone: "UTC"
                            }
                        },
                        timezone: "GMT"
                    }
                },
                count: {$sum: 1}
            }
        }]
    },
    // Match on several fields.
    // Traditional indexes in the original Best Buy dump would help with this one.
    {
        name: "match_3_column",
        pipeline: [
            {
                $match: {
                    $and: [
                        {salePrice: {$gt: 10, $lt: 100}},
                        {type: {$eq: "Software"}},
                        {digital: {$eq: false}}
                    ]
                }
            },
            {$group: {_id: null, avgPrice: {$avg: '$salePrice'}}}
        ]
    },
    {
        name: "match_10_column",
        pipeline: [
            {
                $match: {
                    $and: [
                        {sku: {$gt: 10000000}},
                        {productId: {$gt: 1000}},
                        {new: {$eq: false}},
                        {active: {$eq: true}},
                        {advertisedPriceRestriction: {$eq: false}},
                        {freeShipping: {$eq: false}},
                        {onlineAvailability: {$eq: true}},
                        {salePrice: {$gt: 10, $lt: 100}},
                        {type: {$eq: "Software"}},
                        {digital: {$eq: false}}
                    ]
                }
            },
            {$group: {_id: null, avgPrice: {$avg: '$salePrice'}}}
        ]
    },
    {
        name: "sort_limit_project_4",
        pipeline: [
            {$sort: {salePrice: -1}},
            {$limit: 10},
            {$project: {_id: 0, name: 1, type: 1, department: 1, salePrice: 1}}
        ]
    },
    {
        name: "sort_limit_project_10",
        pipeline: [
            {$sort: {salePrice: -1}},
            {$limit: 10},
            {
                $project: {
                    _id: 0,
                    sku: 1,
                    productId: 1,
                    new: 1,
                    active: 1,
                    advertisedPriceRestriction: 1,
                    freeShipping: 1,
                    onlineAvailability: 1,
                    salePrice: 1,
                    type: 1,
                    digital: 1
                }
            }
        ]
    },
    {
        name: "sort_limit_full_doc",
        pipeline: [
            {$sort: {salePrice: -1}},
            {$limit: 10},
        ]
    }
];

// Run tests
for (var i = 0; i < tests.length; i++) {
    measurePerformanceUsingAgg(tests[i].name, tests[i].pipeline);
}
}());