Skip to content

Commit cbc545d

Browse files
author
Samuel Bodin
authored
fix: improve logging + remove catchup (#647)
1 parent 5a48ad3 commit cbc545d

File tree

7 files changed

+26
-89
lines changed

7 files changed

+26
-89
lines changed

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,7 @@ See [config.js](./config.js):
233233
- `apiKey`: [Algolia](https://www.algolia.com/) apiKey - **required**
234234
- `appId`: [Algolia](https://www.algolia.com/) appId - _default `OFCNCOG2CU`_
235235
- `indexName`: [Algolia](https://www.algolia.com/) indexName - _default `npm-search`_
236-
- `bootstrapConcurrency`: How many docs to grab from npm registry at once in the bootstrap phase - _default `100`_
237-
- `replicateConcurrency`: How many changes to grab from npm registry at once in the replicate phase - _default `10`_
236+
- `bootstrapConcurrency`: How many docs to grab from npm registry at once in the bootstrap phase - _default `25`_
238237
- `seq`: npm registry first [change sequence](http://docs.couchdb.org/en/2.0.0/json-structure.html#changes-information-for-a-database)
239238
to start replication. In normal operations you should never have to use this. - _default `0`_
240239
- `npmRegistryEndpoint`: npm registry endpoint to replicate from - _default `https://replicate.npmjs.com/registry`_

src/bootstrap.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,10 @@ async function loop(
107107

108108
const newLastId = res.rows[res.rows.length - 1].id;
109109

110-
const saved = await saveDocs({ docs: res.rows, index: bootstrapIndex });
110+
await saveDocs({ docs: res.rows, index: bootstrapIndex });
111111
await stateManager.save({
112112
bootstrapLastId: newLastId,
113113
});
114-
log.info(` - saved ${saved} packages`);
115-
116114
await logProgress(res.offset, res.rows.length);
117115

118116
datadog.timing('loop', Date.now() - start);

src/config.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ export const config = {
163163
apiKey: '',
164164
indexName: 'npm-search',
165165
bootstrapIndexName: 'npm-search-bootstrap',
166-
replicateConcurrency: 1,
167166
bootstrapConcurrency: 25,
168167
timeToRedoBootstrap: ms('2 weeks'),
169168
seq: undefined,

src/jsDelivr/__test__/index.test.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ jest.mock('../../utils/log', () => {
66
log: {
77
info: jest.fn(),
88
warn: jest.fn(),
9+
error: jest.fn(),
910
},
1011
};
1112
});
@@ -85,8 +86,8 @@ describe('files', () => {
8586
version: '3.33.0',
8687
});
8788
expect(files).toEqual([]);
88-
expect(log.warn.mock.calls[0][0].message).toMatchInlineSnapshot(
89-
`"Response code 404 (Not Found)"`
89+
expect(log.error.mock.calls[0][0]).toEqual(
90+
'Failed to fetch https://data.jsdelivr.com/v1/package/npm/[email protected]/flat'
9091
);
9192
});
9293
});
@@ -111,8 +112,8 @@ describe('files', () => {
111112
},
112113
]);
113114
expect(files).toMatchSnapshot();
114-
expect(log.warn.mock.calls[0][0].message).toMatchInlineSnapshot(
115-
`"Response code 404 (Not Found)"`
115+
expect(log.error.mock.calls[0][0]).toEqual(
116+
'Failed to fetch https://data.jsdelivr.com/v1/package/npm/[email protected]/flat'
116117
);
117118
});
118119
});

src/jsDelivr/index.ts

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export async function loadHits(): Promise<void> {
2626
hits.set(pkg.name, pkg.hits);
2727
});
2828
} catch (e) {
29-
log.error(e);
29+
log.error('Failed to fetch', e);
3030
}
3131

3232
datadog.timing('jsdelivr.loadHits', Date.now() - start);
@@ -85,16 +85,14 @@ export async function getFilesList(
8585
}
8686

8787
let files: File[] = [];
88+
const url = `${config.jsDelivrPackageEndpoint}/${pkg.name}@${pkg.version}/flat`;
8889
try {
89-
const response = await request<{ default: string; files: File[] }>(
90-
`${config.jsDelivrPackageEndpoint}/${pkg.name}@${pkg.version}/flat`,
91-
{
92-
responseType: 'json',
93-
}
94-
);
90+
const response = await request<{ default: string; files: File[] }>(url, {
91+
responseType: 'json',
92+
});
9593
files = response.body.files;
9694
} catch (e) {
97-
log.warn(e);
95+
log.error(`Failed to fetch ${url}`, e);
9896
}
9997

10098
datadog.timing('jsdelivr.getFilesList', Date.now() - start);

src/saveDocs.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,21 @@ export default async function saveDocs({
4141
log.info('🔍 No pkgs found in response.');
4242
return Promise.resolve(0);
4343
}
44-
45-
log.info(`👔 Saving... ${names.length} packages`, names);
44+
log.info(' => ', names);
45+
log.info(' Adding metadata...');
4646

4747
let start2 = Date.now();
4848
const pkgs = await addMetaData(rawPkgs);
4949
datadog.timing('saveDocs.addMetaData', Date.now() - start2);
5050

51+
log.info(` Saving...`);
52+
5153
start2 = Date.now();
5254
await index.saveObjects(pkgs);
5355
datadog.timing('saveDocs.saveObjects', Date.now() - start2);
5456

57+
log.info(` Saved`);
58+
5559
datadog.timing('saveDocs', Date.now() - start);
5660
return pkgs.length;
5761
}

src/watch.ts

Lines changed: 7 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,20 @@
11
import type { SearchIndex } from 'algoliasearch';
22
import type { QueueObject } from 'async';
33
import { queue } from 'async';
4-
import ms from 'ms';
54
import type { DatabaseChangesResultItem, DocumentLookupFailure } from 'nano';
65

76
import type { StateManager } from './StateManager';
8-
import { config } from './config';
97
import * as npm from './npm';
108
import saveDocs from './saveDocs';
119
import { datadog } from './utils/datadog';
1210
import { log } from './utils/log';
1311
import * as sentry from './utils/sentry';
1412

15-
let loopStart = Date.now();
1613
let totalSequence: number; // Cached npmInfo.seq
1714
let changesConsumer: QueueObject<DatabaseChangesResultItem>;
1815

1916
/**
20-
* Run watch and catchup.
21-
*
22-
* --- Catchup ?
23-
* If the bootstrap is long or the process has been stopped long enough,
24-
* we are lagging behind few changes.
25-
* Catchup() will paginate through changes that we have missed.
17+
* Run watch.
2618
*
2719
* --- Watch ?
2820
* Watch is "Long Polled. This mode is not paginated and the event system in CouchDB send
@@ -43,12 +35,6 @@ let changesConsumer: QueueObject<DatabaseChangesResultItem>;
4335
* until an other package is updated.
4436
* It will never be up to date because he receive event at the same pace
4537
* as they arrive in listener A, even if it's not the same package.
46-
*
47-
*
48-
* --- We could use catchup with a timeout between poll then?
49-
* Yes !
50-
* When we are catched up, we could await between poll and we will receive N changes.
51-
* But long-polling is more efficient in term of bandwidth and more reactive.
5238
*/
5339
async function run(
5440
stateManager: StateManager,
@@ -60,54 +46,11 @@ async function run(
6046

6147
changesConsumer = createChangeConsumer(stateManager, mainIndex);
6248

63-
await catchup(stateManager);
64-
65-
log.info('🚀 Index is up to date, watch mode activated');
66-
6749
await watch(stateManager);
6850

6951
log.info('🚀 watch is done');
7052
}
7153

72-
/**
73-
* Loop through all changes that may have been missed.
74-
*/
75-
async function catchup(stateManager: StateManager): Promise<void> {
76-
let hasCaughtUp: boolean = false;
77-
78-
while (!hasCaughtUp) {
79-
loopStart = Date.now();
80-
81-
try {
82-
const npmInfo = await npm.getInfo();
83-
totalSequence = npmInfo.seq;
84-
85-
const { seq } = await stateManager.get();
86-
87-
log.info('🚀 Catchup: continue since sequence [%d]', seq);
88-
89-
// Get one chunk of changes from registry
90-
const changes = await npm.getChanges({
91-
since: seq,
92-
limit: config.replicateConcurrency,
93-
include_docs: true,
94-
});
95-
96-
for (const change of changes.results) {
97-
changesConsumer.push(change);
98-
}
99-
await changesConsumer.drain();
100-
101-
const newState = await stateManager.get();
102-
if (newState.seq! >= totalSequence) {
103-
hasCaughtUp = true;
104-
}
105-
} catch (err) {
106-
sentry.report(err);
107-
}
108-
}
109-
}
110-
11154
/**
11255
* Active synchronous mode with Registry.
11356
* Changes are polled with a keep-alived connection.
@@ -144,7 +87,7 @@ async function watch(stateManager: StateManager): Promise<true> {
14487
}
14588

14689
/**
147-
* Process changes.
90+
* Process changes in order.
14891
*/
14992
async function loop(
15093
mainIndex: SearchIndex,
@@ -180,20 +123,15 @@ async function loop(
180123
}
181124

182125
/**
183-
* Log our process through catchup/watch.
126+
* Log our process through watch.
184127
*
185128
*/
186-
function logProgress(seq: number, nbChanges: number): void {
187-
const ratePerSecond = nbChanges / ((Date.now() - loopStart) / 1000);
188-
const remaining = ((totalSequence - seq) / ratePerSecond) * 1000 || 0;
189-
129+
function logProgress(seq: number): void {
190130
log.info(
191-
`🚀 Synced %d/%d changes (%d%), current rate: %d changes/s (%s remaining)`,
131+
`🚀 Synced %d/%d changes (%d%)`,
192132
seq,
193133
totalSequence,
194-
Math.floor((Math.max(seq, 1) / totalSequence) * 100),
195-
Math.round(ratePerSecond),
196-
ms(remaining)
134+
Math.floor((Math.max(seq, 1) / totalSequence) * 100)
197135
);
198136
}
199137

@@ -220,7 +158,7 @@ function createChangeConsumer(
220158
await stateManager.save({
221159
seq,
222160
});
223-
logProgress(seq, 1);
161+
logProgress(seq);
224162
} catch (err) {
225163
sentry.report(err);
226164
}

0 commit comments

Comments
 (0)