Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sbt-package): Cache bad URLs during brute-force release fetching #31877

Merged
merged 3 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions lib/modules/datasource/sbt-package/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,13 @@ describe('modules/datasource/sbt-package/index', () => {
200,
codeBlock`
<a href="empty/">empty_2.12/</a>
<a href="empty_but_invalid/">???</a>
`,
)
.get('/maven2/com/example/empty/')
.reply(200, '')
.get('/maven2/com/example/empty_but_invalid/')
.reply(404, '')
.get('/maven2/com/example/empty/maven-metadata.xml')
.reply(404)
.get('/maven2/com/example/empty/index.html')
Expand Down Expand Up @@ -112,12 +115,11 @@ describe('modules/datasource/sbt-package/index', () => {
`,
)
.get('/org/example/example/1.2.3/example-1.2.3.pom')
.twice()
.reply(200, '')
.reply(404)
.get('/org/example/example_2.12/1.2.3/example-1.2.3.pom')
.reply(200, '')
.reply(404)
.get('/org/example/example_2.12/1.2.3/example_2.12-1.2.3.pom')
.reply(200, '');
.reply(404);

const res = await getPkgReleases({
versioning: mavenVersioning.id,
Expand Down
143 changes: 117 additions & 26 deletions lib/modules/datasource/sbt-package/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as upath from 'upath';
import { XmlDocument } from 'xmldoc';
import { logger } from '../../../logger';
import * as packageCache from '../../../util/cache/package';
import { Http } from '../../../util/http';
import { regEx } from '../../../util/regex';
import { ensureTrailingSlash, trimTrailingSlash } from '../../../util/url';
Expand Down Expand Up @@ -58,11 +59,23 @@ export class SbtPackageDatasource extends MavenDatasource {

const groupIdSplit = groupId.split('.');
const repoRootUrl = ensureTrailingSlash(registryUrl);
const packageRootUrlWith = (sep: string): string =>
`${repoRootUrl}${groupIdSplit.join(sep)}`;

const validRootUrlKey = `valid-root-url:${registryUrl}:${packageName}`;
const validRootUrl = await packageCache.get<string>(
'datasource-sbt-package',
validRootUrlKey,
);

const packageRootUrls: string[] = [];
packageRootUrls.push(ensureTrailingSlash(packageRootUrlWith('/')));
packageRootUrls.push(ensureTrailingSlash(packageRootUrlWith('.')));
// istanbul ignore if: not easily testable
if (validRootUrl) {
packageRootUrls.push(validRootUrl);
} else {
const packageRootUrlWith = (sep: string): string =>
`${repoRootUrl}${groupIdSplit.join(sep)}`;
packageRootUrls.push(ensureTrailingSlash(packageRootUrlWith('/')));
packageRootUrls.push(ensureTrailingSlash(packageRootUrlWith('.')));
}

let dependencyUrl: string | undefined;
let packageUrls: string[] | undefined;
Expand All @@ -72,6 +85,13 @@ export class SbtPackageDatasource extends MavenDatasource {
continue;
}

await packageCache.set(
'datasource-sbt-package',
validRootUrlKey,
packageRootUrl,
30 * 24 * 60,
);

dependencyUrl = trimTrailingSlash(packageRootUrl);

const rootPath = new URL(packageRootUrl).pathname;
Expand Down Expand Up @@ -110,15 +130,23 @@ export class SbtPackageDatasource extends MavenDatasource {
return null;
}

const validPackageUrls: string[] = [];
const invalidPackageUrlsKey = `invalid-package-urls:${registryUrl}:${packageName}`;
const invalidPackageUrls = new Set(
await packageCache.get<string[]>(
'datasource-sbt-package',
invalidPackageUrlsKey,
),
);
packageUrls = packageUrls.filter((url) => !invalidPackageUrls.has(url));

const allVersions = new Set<string>();
for (const pkgUrl of packageUrls) {
const res = await downloadHttpProtocol(this.http, pkgUrl);
// istanbul ignore if
if (!res) {
invalidPackageUrls.add(pkgUrl);
continue;
}
validPackageUrls.push(pkgUrl);

const rootPath = new URL(pkgUrl).pathname;
const versions = extractPageLinks(res.body, (href) => {
Expand All @@ -135,13 +163,37 @@ export class SbtPackageDatasource extends MavenDatasource {
}
}

if (invalidPackageUrls.size > 0) {
await packageCache.set(
'datasource-sbt-package',
invalidPackageUrlsKey,
[...invalidPackageUrls],
30 * 24 * 60,
);
}

if (packageUrls.length > 0) {
const packageUrlsKey = `package-urls:${registryUrl}:${packageName}`;
await packageCache.set(
'datasource-sbt-package',
packageUrlsKey,
packageUrls,
30 * 24 * 60,
);
}

const versions = [...allVersions];
if (!versions.length) {
return null;
}

const latestVersion = getLatestVersion(versions);
const pomInfo = await this.getPomInfo(packageUrls, latestVersion);
const pomInfo = await this.getPomInfo(
registryUrl,
packageName,
latestVersion,
packageUrls,
);

const releases: Release[] = [...allVersions]
.sort(compare)
Expand All @@ -150,11 +202,22 @@ export class SbtPackageDatasource extends MavenDatasource {
}

async getPomInfo(
packageUrls: string[],
registryUrl: string,
packageName: string,
version: string | null,
pkgUrls?: string[],
): Promise<Pick<ReleaseResult, 'homepage' | 'sourceUrl'>> {
const result: Pick<ReleaseResult, 'homepage' | 'sourceUrl'> = {};

const packageUrlsKey = `package-urls:${registryUrl}:${packageName}`;
// istanbul ignore next: will be covered later
const packageUrls =
pkgUrls ??
(await packageCache.get<string[]>(
'datasource-sbt-package',
packageUrlsKey,
));

// istanbul ignore if
if (!packageUrls?.length) {
return result;
Expand All @@ -165,37 +228,65 @@ export class SbtPackageDatasource extends MavenDatasource {
return result;
}

const invalidPomFilesKey = `invalid-pom-files:${registryUrl}:${packageName}:${version}`;
const invalidPomFiles = new Set(
await packageCache.get<string[]>(
'datasource-sbt-package',
invalidPomFilesKey,
),
);

const saveCache = async (): Promise<void> => {
if (invalidPomFiles.size > 0) {
await packageCache.set(
'datasource-sbt-package',
invalidPomFilesKey,
[...invalidPomFiles],
30 * 24 * 60,
);
}
};

for (const packageUrl of packageUrls) {
const artifactDir = upath.basename(packageUrl);
const [artifact] = artifactDir.split('_');

for (const pomFilePrefix of [artifactDir, artifact]) {
const pomFileName = `${pomFilePrefix}-${version}.pom`;
const pomUrl = `${packageUrl}${version}/${pomFileName}`;
if (invalidPomFiles.has(pomUrl)) {
continue;
}

const res = await downloadHttpProtocol(this.http, pomUrl);
const content = res?.body;
if (content) {
const pomXml = new XmlDocument(content);

const homepage = pomXml.valueWithPath('url');
if (homepage) {
result.homepage = homepage;
}

const sourceUrl = pomXml.valueWithPath('scm.url');
if (sourceUrl) {
result.sourceUrl = sourceUrl
.replace(regEx(/^scm:/), '')
.replace(regEx(/^git:/), '')
.replace(regEx(/^[email protected]:/), 'https:/')
.replace(regEx(/\.git$/), '');
}

return result;
if (!content) {
invalidPomFiles.add(pomUrl);
continue;
}

const pomXml = new XmlDocument(content);

const homepage = pomXml.valueWithPath('url');
if (homepage) {
result.homepage = homepage;
}

const sourceUrl = pomXml.valueWithPath('scm.url');
if (sourceUrl) {
result.sourceUrl = sourceUrl
.replace(regEx(/^scm:/), '')
.replace(regEx(/^git:/), '')
.replace(regEx(/^[email protected]:/), 'https:/')
.replace(regEx(/\.git$/), '');
}

await saveCache();
return result;
}
}

await saveCache();
return result;
}

Expand Down
1 change: 1 addition & 0 deletions lib/util/cache/package/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ export type PackageCacheNamespace =
| 'datasource-repology'
| 'datasource-ruby-version'
| 'datasource-rubygems'
| 'datasource-sbt-package'
| 'datasource-terraform-module'
| 'datasource-terraform-provider'
| 'datasource-terraform'
Expand Down