Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use header-generator to order headers #36

Merged
merged 1 commit into from
Aug 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
],
"dependencies": {
"got-cjs": "12.0.0-beta.3",
"header-generator": "^1.0.0",
"header-generator": "^1.1.0-beta.1",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should be careful with depending on non stable things, we already got burned recently with a beta of our own package (browser-pool), many actors are not using lock files and would use the latest beta automatically on next run

i am quite fine with depending on beta, but i'd suggest to pin it

cc @mnmkng

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we're aware. We did not plan to release it with beta. But it's a good point that pinning it is much safer anyway.

"http-proxy-agent": "^4.0.1",
"http2-wrapper": "^2.1.2",
"https-proxy-agent": "^5.0.0",
Expand Down
57 changes: 12 additions & 45 deletions src/agent/transform-headers-agent.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
/* eslint-disable no-underscore-dangle */
const HeaderGenerator = require('header-generator');
const http = require('http');
const WrappedAgent = require('./wrapped-agent');

const { _storeHeader } = http.OutgoingMessage.prototype;

const generator = new HeaderGenerator();

/**
* @description Transforms the casing of the headers to Pascal-Case.
*/
Expand All @@ -13,9 +16,9 @@ class TransformHeadersAgent extends WrappedAgent {
* @description Transforms the request via header normalization.
* @see {TransformHeadersAgent.toPascalCase}
* @param {http.ClientRequest} request
* @param {string[]} sortedHeaders - headers in order, optional
* @param {boolean} sortHeaders - if the headers should be sorted or not
*/
transformRequest(request, sortedHeaders) {
transformRequest(request, sortHeaders) {
const headers = {};
const hasConnection = request.hasHeader('connection');
const hasContentLength = request.hasHeader('content-length');
Expand All @@ -30,7 +33,7 @@ class TransformHeadersAgent extends WrappedAgent {
headers[this.toPascalCase(key)] = request.getHeader(key);
}

if (sortedHeaders) {
if (sortHeaders) {
// Removal is required in order to change the order of the properties
request.removeHeader(key);
}
Expand Down Expand Up @@ -58,22 +61,22 @@ class TransformHeadersAgent extends WrappedAgent {
}
}

const normalizedKeys = Object.keys(headers);
const sorted = sortedHeaders ? normalizedKeys.sort(this.createSort(sortedHeaders)) : normalizedKeys;
const transformedHeaders = sortHeaders ? generator.orderHeaders(headers) : headers;

for (const key of sorted) {
request.setHeader(key, headers[key]);
// eslint-disable-next-line no-restricted-syntax, guard-for-in
for (const key in transformedHeaders) {
request.setHeader(key, transformedHeaders[key]);
}
}

addRequest(request, options) {
// See https:/nodejs/node/blob/533cafcf7e3ab72e98a2478bc69aedfdf06d3a5e/lib/_http_outgoing.js#L373
// Note: This overrides the private `_storeHeader`.
// This is required, because the function directly copies
// This is required, because the function copies
// the `connection`, `content-length` and `trasfer-encoding` headers
// directly to the underlying buffer.
request._storeHeader = (...args) => {
this.transformRequest(request, options.sortedHeaders);
this.transformRequest(request, true);

return _storeHeader.call(request, ...args);
};
Expand All @@ -93,42 +96,6 @@ class TransformHeadersAgent extends WrappedAgent {
return part[0].toUpperCase() + part.slice(1).toLowerCase();
}).join('-');
}

/**
*
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort
* @param {string} a - header a
* @param {string} b - header b
* @param {string[]} sortedHeaders - array of headers in order
* @returns header a or header b, depending which one is more important
*/
sort(a, b, sortedHeaders) {
const rawA = sortedHeaders.indexOf(a);
const rawB = sortedHeaders.indexOf(b);
const indexA = rawA === -1 ? Number.POSITIVE_INFINITY : rawA;
const indexB = rawB === -1 ? Number.POSITIVE_INFINITY : rawB;

if (indexA < indexB) {
return -1;
}

if (indexA > indexB) {
return 1;
}

return 0;
}

/**
*
* @param {string[]} sortedHeaders - array of headers in order
* @returns {Function} - sort function
*/
createSort(sortedHeaders) {
const sortWithSortedHeaders = (a, b) => this.sort(a, b, sortedHeaders);

return sortWithSortedHeaders;
}
}

module.exports = TransformHeadersAgent;
36 changes: 0 additions & 36 deletions test/agent.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,42 +119,6 @@ describe('TransformHeadersAgent', () => {
request.end();
});

test('sorted headers', (done) => {
const transformAgent = new TransformHeadersAgent(new http.Agent({
keepAlive: true,
}));

const request = http.request(`http://localhost:${port}/headers`, {
agent: transformAgent,
sortedHeaders: ['Host', 'Connection', 'Foo', 'Bar'],
headers: {
bar: 'foo',
foo: 'bar',
},
}, async (response) => {
const body = await getStream(response);
const headers = JSON.parse(body);

const keys = Object.keys(headers);

expect(keys[0]).toBe('Host');
expect(keys[1]).toBe('Connection');
expect(keys[2]).toBe('Foo');
expect(keys[3]).toBe('Bar');

expect(headers.Host).toBe(`localhost:${port}`);
expect(headers.Connection).toBe('keep-alive');
expect(headers.Foo).toBe('bar');
expect(headers.Bar).toBe('foo');

transformAgent.destroy();

done();
});

request.end();
});

describe('respects native behavior', () => {
test('content-length removal', (done) => {
const transformAgent = new TransformHeadersAgent(new http.Agent({
Expand Down
33 changes: 33 additions & 0 deletions test/main.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ describe('GotScraping', () => {
});

describe('Integration', () => {
test('should order headers', async () => {
const { rawHeaders } = await gotScraping({ url: 'https://api.apify.com/v2/browser-info?rawHeaders=1' }).json();
expect(rawHeaders[0].toLowerCase()).toBe('connection');
});

test('should use http2 first', async () => {
const response = await gotScraping({ url: 'https://apify.com/' });
expect(response.statusCode).toBe(200);
Expand Down Expand Up @@ -174,6 +179,16 @@ describe('GotScraping', () => {
expect(responseProxy.httpVersion).toBe('1.1');
});

test('should order headers with proxyUrl and http1', async () => {
const { rawHeaders } = await gotScraping({
url: 'https://api.apify.com/v2/browser-info?rawHeaders=1',
proxyUrl: `http://groups-SHADER:${process.env.APIFY_PROXY_PASSWORD}@proxy.apify.com:8000`,
http2: false,
}).json();

expect(rawHeaders[0].toLowerCase()).toBe('connection');
});

test('should work with proxyUrl and http2', async () => {
const response = await gotScraping({
responseType: 'json',
Expand Down Expand Up @@ -212,6 +227,13 @@ describe('GotScraping', () => {
});

describe('same thing with streams', () => {
test('should order headers', async () => {
const body = await getStream(gotScraping.stream({ url: 'https://api.apify.com/v2/browser-info?rawHeaders=1' }));
const { rawHeaders } = JSON.parse(body);

expect(rawHeaders[0].toLowerCase()).toBe('connection');
});

test('should allow passing custom properties', async () => {
const requestOptions = {
isStream: true,
Expand Down Expand Up @@ -331,6 +353,17 @@ describe('GotScraping', () => {
expect(JSON.parse(responseBody)).toEqual(body);
});

test('should order headers with proxyUrl and http1', async () => {
const body = await getStream(gotScraping.stream({
url: 'https://api.apify.com/v2/browser-info?rawHeaders=1',
proxyUrl: `http://groups-SHADER:${process.env.APIFY_PROXY_PASSWORD}@proxy.apify.com:8000`,
http2: false,
}));
const { rawHeaders } = JSON.parse(body);

expect(rawHeaders[0].toLowerCase()).toBe('connection');
});

describe('Integration', () => {
test('should use http2 first', async () => {
const stream = gotScraping.stream({ url: 'https://apify.com/' });
Expand Down