Apply sampling rates to Honeycomb events

Oops, we get a _lot_ of outfit image requests, and it's pushing the limits of our free Honeycomb plan! But I don't really need all that much detail, because there's so many. So, we here apply sampling! `api/outfitImage` is getting a 1/10 rate, and for GraphQL, `ApiOutfitImage` is getting 1/10, and `SearchPanel` is getting 1/5. I had to add a `addTraceContext` call, to give all the child events awareness of what operation they're being called in, too! I haven't actually tested that this is working-working, just that the endpoints still return good data. We'll see how it shakes out in prod! But I did add `console.log(sampleRate, shouldSample, data);` to the `samplerHook` briefly, to see the data flow through, and I reloaded a `SearchPanel` request a few times and observed a plausibly 20% success rate.
2021-05-26 18:50:19 -07:00 · 2021-05-26 18:50:19 -07:00 · 7f0a450480
commit 7f0a450480
parent e829cc5525
3 changed files with 38 additions and 0 deletions
--- a/api/graphql.js
+++ b/api/graphql.js
@ -5,14 +5,48 @@ const beeline = require("honeycomb-beeline")({
      ? "Dress to Impress (2020)"
      : "Dress to Impress (2020, dev)",
  serviceName: "impress-2020-gql-server",
+  samplerHook,
 });

 const { ApolloServer } = require("../src/server/lib/apollo-server-vercel");
 const { config } = require("../src/server");
+const crypto = require("crypto");

 const server = new ApolloServer(config);
 const serverHandler = server.createHandler();

+// We apply different sampling rates for different GraphQL operations
+// (according to the client-defined query name), depending on how much load
+// we're getting on them. For most operations, we just save all the events, but
+// especially heavy-load operations get a lower sampling rate!
+const OPERATION_SAMPLE_RATES = {
+  ApiOutfitImage: 10, // save 1 out of every 10, ignore the others
+  SearchPanel: 5, // save 1 out of every 5, ignore the others
+};
+function samplerHook(data) {
+  // Use the sample rate from the table above.
+  // Defaults to 1 (all) for most operations.
+  let sampleRate = OPERATION_SAMPLE_RATES[data["app.operation_name"]] || 1;
+
+  // Use the `deterministicSampler` to decide whether this event should be
+  // sampled. This might be a child event of a higher-level trace, and we want
+  // to make sure that we always return all child events of traces we've
+  // sampled, and no child events of traces we haven't. Deterministically
+  // sampling by trace ID does this for us!
+  //
+  // This strategy is outlined in: https://docs.honeycomb.io/getting-data-in/javascript/beeline-nodejs/#sampling-events.
+  const shouldSample = deterministicSampler(data["trace.trace_id"], sampleRate);
+
+  return { shouldSample, sampleRate };
+}
+function deterministicSampler(traceId, sampleRate) {
+  // Copied from https://docs.honeycomb.io/getting-data-in/javascript/beeline-nodejs/#sampling-events
+  const MAX_UINT32 = Math.pow(2, 32) - 1;
+  const sum = crypto.createHash("sha1").update(traceId).digest();
+  const upperBound = (MAX_UINT32 / sampleRate) >>> 0;
+  return sum.readUInt32BE(0) <= upperBound;
+}
+
 async function handle(req, res) {
  await serverHandler(req, res);

--- a/api/outfitImage.js
+++ b/api/outfitImage.js
@ -29,6 +29,7 @@ const beeline = require("honeycomb-beeline")({
      ? "Dress to Impress (2020)"
      : "Dress to Impress (2020, dev)",
  serviceName: "impress-2020-gql-server",
+  sampleRate: 10,
 });

 import fetch from "node-fetch";
--- a/src/server/lib/beeline-graphql.js
+++ b/src/server/lib/beeline-graphql.js
@ -9,6 +9,9 @@ const beelinePlugin = {
          name: operationName,
          operation_name: operationName,
        });
+        beeline.addTraceContext({
+          operation_name: operationName,
+        });
      },
      willSendResponse() {
        beeline.finishTrace(trace);