Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/algolia/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import algoliasearch from 'algoliasearch';

function createClient(appId, apiKey, indexName) {
if (!apiKey)
throw new Error(
'npm-search: Please provide the `apiKey` env variable and restart'
);

const client = algoliasearch(appId, apiKey);
return {
index: client.initIndex(indexName),
client,
};
}

/**
* Prepare algolia for indexing
* @param {object} config
*/
async function prepare(config) {
// Get main index and boostrap algolia client
const { index: mainIndex, client } = createClient(
config.appId,
config.apiKey,
config.indexName
);
const { index: bootstrapIndex } = createClient(
config.appId,
config.apiKey,
config.bootstrapIndexName
);

// Ensure indices exists by calling an empty setSettings()
await mainIndex.setSettings({});
await bootstrapIndex.setSettings({});

return {
client,
mainIndex,
bootstrapIndex,
};
}

/**
*
* @param {AlgoliasearchIndex} index
* @param {object} config
*/
async function putDefaultSettings(index, config) {
await index.setSettings(config.indexSettings);

await index.batchSynonyms(config.indexSynonyms, {
replaceExistingSynonyms: true,
});
const { taskID } = await index.batchRules(config.indexRules, {
replaceExistingRules: true,
});

await index.waitTask(taskID);
}

export { prepare, putDefaultSettings };
15 changes: 0 additions & 15 deletions src/createAlgoliaIndex.js

This file was deleted.

59 changes: 29 additions & 30 deletions src/index.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import ms from 'ms';
import cargo from 'async/cargo.js';
import queue from 'async/queue.js';

import createStateManager from './createStateManager.js';
import saveDocs from './saveDocs.js';
import createAlgoliaIndex from './createAlgoliaIndex.js';
import config from './config.js';
import * as algolia from './algolia/index.js';
import * as npm from './npm/index.js';
import log from './log.js';
import datadog from './datadog.js';
Expand All @@ -14,9 +15,7 @@ log.info('🗿 npm ↔️ Algolia replication starts ⛷ 🐌 🛰');

let loopStart = Date.now();

const { index: mainIndex, client } = createAlgoliaIndex(config.indexName);
const { index: bootstrapIndex } = createAlgoliaIndex(config.bootstrapIndexName);
const stateManager = createStateManager(mainIndex);
let stateManager;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this just to have the variable global? I'd rather pass it around than have a mutable (maybe undefined) variable :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just to have the variable global for the moment, I'll change that in an other PR.
I can do it now if you want

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would make sense here to me :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did it, plus added some test 🙆‍♂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


/**
* Main process
Expand All @@ -28,43 +27,43 @@ async function main() {
let start = Date.now();
// first we make sure the bootstrap index has the correct settings
log.info('💪 Setting up Algolia');
await setSettings(bootstrapIndex);
const {
client: algoliaClient,
mainIndex,
bootstrapIndex,
} = await algolia.prepare(config);
datadog.timing('main.init_algolia', Date.now() - start);

// Create State Manager that holds progression of indexing
stateManager = createStateManager(mainIndex);

// then we run the bootstrap
// after a bootstrap is done, it's moved to main (with settings)
// if it was already finished, we will set the settings on the main index
start = Date.now();
log.info('⛷ Bootstraping');
await bootstrap(await stateManager.check());
await bootstrap(
await stateManager.check(),
algoliaClient,
mainIndex,
bootstrapIndex
);
datadog.timing('main.bootsrap', Date.now() - start);

// then we figure out which updates we missed since
// the last time main index was updated
start = Date.now();
log.info('🚀 Launching Replicate');
await replicate(await stateManager.get());
await replicate(await stateManager.get(), mainIndex);
datadog.timing('main.replicate', Date.now() - start);

// then we watch 👀 for all changes happening in the ecosystem
log.info('👀 Watching...');
return watch(await stateManager.get());
return watch(await stateManager.get(), mainIndex);
}

main().catch(error);

async function setSettings(index) {
await index.setSettings(config.indexSettings);
await index.batchSynonyms(config.indexSynonyms, {
replaceExistingSynonyms: true,
});
const { taskID } = await index.batchRules(config.indexRules, {
replaceExistingRules: true,
});

return index.waitTask(taskID);
}

async function logUpdateProgress(seq, nbChanges, emoji) {
const npmInfo = await npm.getInfo();
const ratePerSecond = nbChanges / ((Date.now() - loopStart) / 1000);
Expand Down Expand Up @@ -95,13 +94,13 @@ async function logBootstrapProgress(offset, nbDocs) {
loopStart = Date.now();
}

async function bootstrap(state) {
async function bootstrap(state, algoliaClient, mainIndex, bootstrapIndex) {
await stateManager.save({
stage: 'bootstrap',
});

if (state.seq > 0 && state.bootstrapDone === true) {
await setSettings(mainIndex);
await algolia.putDefaultSettings(mainIndex);
log.info('⛷ Bootstrap: done');
return state;
}
Expand All @@ -114,7 +113,7 @@ async function bootstrap(state) {
log.info('⛷ Bootstrap: starting from the first doc');
// first time this launches, we need to remember the last seq our bootstrap can trust
await stateManager.save({ seq });
await setSettings(bootstrapIndex);
await algolia.putDefaultSettings(bootstrapIndex);
} else {
log.info('⛷ Bootstrap: starting at doc %s', state.bootstrapLastId);
}
Expand All @@ -125,7 +124,7 @@ async function bootstrap(state) {

let lastProcessedId = state.bootstrapLastId;
while (lastProcessedId !== null) {
lastProcessedId = await bootstrapLoop(lastProcessedId);
lastProcessedId = await bootstrapLoop(lastProcessedId, bootstrapIndex);
}

log.info('-----');
Expand All @@ -135,15 +134,15 @@ async function bootstrap(state) {
bootstrapLastDone: Date.now(),
});

return await moveToProduction();
return await moveToProduction(algoliaClient);
}

/**
* Execute one loop for bootstrap,
* Fetch N packages from `lastId`, process and save them to Algolia
* @param {string} lastId
*/
async function bootstrapLoop(lastId) {
async function bootstrapLoop(lastId, bootstrapIndex) {
const start = Date.now();
log.info('loop()', '::', lastId);

Expand Down Expand Up @@ -181,16 +180,16 @@ async function bootstrapLoop(lastId) {
return newLastId;
}

async function moveToProduction() {
async function moveToProduction(algoliaClient) {
log.info('🚚 starting move to production');

const currentState = await stateManager.get();
await client.copyIndex(config.bootstrapIndexName, config.indexName);
await algoliaClient.copyIndex(config.bootstrapIndexName, config.indexName);

await stateManager.save(currentState);
}

async function replicate({ seq }) {
async function replicate({ seq }, mainIndex) {
log.info(
'🐌 Replicate: Asking for %d changes since sequence %d',
config.replicateConcurrency,
Expand Down Expand Up @@ -256,7 +255,7 @@ async function replicate({ seq }) {
});
}

async function watch({ seq }) {
async function watch({ seq }, mainIndex) {
log.info(
`🛰 Watch: 👍 We are in sync (or almost). Will now be 🔭 watching for registry updates, since ${seq}`
);
Expand Down